fix: non utf-8 text files, refactor db model type

This commit is contained in:
neri 2024-06-09 22:33:13 +02:00
parent 9b397ee26a
commit b56f053e50
6 changed files with 156 additions and 97 deletions

2
Cargo.lock generated
View File

@ -523,7 +523,7 @@ dependencies = [
[[package]] [[package]]
name = "datatrash" name = "datatrash"
version = "2.5.0" version = "2.5.1"
dependencies = [ dependencies = [
"actix-files", "actix-files",
"actix-governor", "actix-governor",

View File

@ -1,6 +1,6 @@
[package] [package]
name = "datatrash" name = "datatrash"
version = "2.5.0" version = "2.5.1"
authors = ["neri"] authors = ["neri"]
edition = "2021" edition = "2021"

View File

@ -1,4 +1,4 @@
use std::{path::PathBuf, str::FromStr, time::SystemTime}; use std::{io::ErrorKind, time::SystemTime};
use actix_files::NamedFile; use actix_files::NamedFile;
use actix_web::{ use actix_web::{
@ -16,7 +16,12 @@ use std::path::Path;
use time::OffsetDateTime; use time::OffsetDateTime;
use tokio::fs; use tokio::fs;
use crate::{config::Config, deleter, mime_relations, template}; use crate::{
config::Config,
deleter,
file_info::{self, FileInfo},
mime_relations, template,
};
const TEXT_VIEW_SIZE_LIMIT: u64 = 512 * 1024; // 512KiB const TEXT_VIEW_SIZE_LIMIT: u64 = 512 * 1024; // 512KiB
@ -32,24 +37,27 @@ pub async fn download(
config: web::Data<Config>, config: web::Data<Config>,
) -> Result<HttpResponse, Error> { ) -> Result<HttpResponse, Error> {
let id = req.match_info().query("id"); let id = req.match_info().query("id");
let (file_id, file_name, valid_till, content_type, delete) = load_file_info(id, &db).await?; let file_info = file_info::find_by_id(id, &db)
let path = config.files_dir.join(&file_id); .await
.map_err(|db_err| {
log::error!("could not run select statement {:?}", db_err);
error::ErrorInternalServerError("could not run select statement")
})?
.ok_or_else(|| error::ErrorNotFound("file does not exist or has expired"))?;
let delete = file_info.delete_on_download;
let valid_till = file_info.valid_till;
let path = config.files_dir.join(&file_info.file_id);
let mime = Mime::from_str(&content_type).unwrap_or(APPLICATION_OCTET_STREAM); let mut response = match get_view_type(&file_info, &path, &req).await {
let computed_file_name = file_name.clone().unwrap_or_else(|| { ViewType::Raw => build_file_response(file_info, &path, false, &req),
let extension = mime_relations::get_extension(&mime).unwrap_or("txt"); ViewType::Download => build_file_response(file_info, &path, true, &req),
format!("{file_id}.{extension}") ViewType::Html => build_html_response(file_info, &path, &config, &req).await,
});
let mut response = match get_view_type(&req, &mime, &path, delete).await {
ViewType::Raw => build_file_response(false, &computed_file_name, path, mime, &req),
ViewType::Download => build_file_response(true, &computed_file_name, path, mime, &req),
ViewType::Html => build_html_response(file_name.as_deref(), &path, &config, &req).await,
}?; }?;
insert_cache_headers(&mut response, valid_till); insert_cache_headers(&mut response, valid_till);
if delete { if delete {
deleter::delete_by_id(&db, &file_id, &config.files_dir) deleter::delete_by_id(&db, id, &config.files_dir)
.await .await
.map_err(|db_err| { .map_err(|db_err| {
log::error!("could not delete file {:?}", db_err); log::error!("could not delete file {:?}", db_err);
@ -60,36 +68,14 @@ pub async fn download(
Ok(response) Ok(response)
} }
async fn load_file_info( async fn get_view_type(file_info: &FileInfo, file_path: &Path, req: &HttpRequest) -> ViewType {
id: &str, if file_info.delete_on_download || req.query_string().contains("dl") {
db: &web::Data<sqlx::Pool<sqlx::Postgres>>,
) -> Result<(String, Option<String>, OffsetDateTime, String, bool), Error> {
sqlx::query_as(
"SELECT file_id, file_name, valid_till, content_type, delete_on_download from files WHERE file_id = $1",
)
.bind(id)
.fetch_optional(db.as_ref())
.await
.map_err(|db_err| {
log::error!("could not run select statement {:?}", db_err);
error::ErrorInternalServerError("could not run select statement")
})?
.ok_or_else(|| error::ErrorNotFound("file does not exist or has expired"))
}
async fn get_view_type(
req: &HttpRequest,
mime: &Mime,
file_path: &Path,
delete_on_download: bool,
) -> ViewType {
if delete_on_download || req.query_string().contains("dl") {
return ViewType::Download; return ViewType::Download;
} }
if req.query_string().contains("raw") { if req.query_string().contains("raw") {
return ViewType::Raw; return ViewType::Raw;
} }
if !mime_relations::matches_text(mime) { if !mime_relations::matches_text(&file_info.content_type) {
return ViewType::Raw; return ViewType::Raw;
} }
if get_file_size(file_path).await >= TEXT_VIEW_SIZE_LIMIT { if get_file_size(file_path).await >= TEXT_VIEW_SIZE_LIMIT {
@ -100,7 +86,7 @@ async fn get_view_type(
if accept_mime == TEXT_HTML { if accept_mime == TEXT_HTML {
return ViewType::Html; return ViewType::Html;
} }
if mime_matches(&accept_mime, mime) { if mime_matches(&accept_mime, &file_info.content_type) {
break; break;
} }
} }
@ -122,28 +108,42 @@ async fn get_file_size(file_path: &Path) -> u64 {
} }
async fn build_html_response( async fn build_html_response(
file_name: Option<&str>, file_info: FileInfo,
path: &Path, path: &Path,
config: &Config, config: &Config,
req: &HttpRequest, req: &HttpRequest,
) -> Result<HttpResponse, Error> { ) -> Result<HttpResponse, Error> {
let content = fs::read_to_string(path).await.map_err(|file_err| { let content = match fs::read_to_string(path).await {
log::error!("file could not be read {:?}", file_err); Ok(content) => content,
error::ErrorInternalServerError("this file should be here but could not be found") Err(file_err) if file_err.kind() == ErrorKind::InvalidData => {
})?; // content may not be valid UTF-8, try to return the raw file instead
let html_view = template::build_html_view_template(&content, file_name, req, config); return build_file_response(file_info, path, false, req);
}
Err(file_err) => {
log::error!("file could not be read: {:?}", file_err);
return Err(error::ErrorInternalServerError(
"this file should be here but could not be found",
));
}
};
let html_view =
template::build_html_view_template(&content, file_info.file_name.as_deref(), req, config);
Ok(HttpResponse::Ok() Ok(HttpResponse::Ok()
.content_type(TEXT_HTML.to_string()) .content_type(TEXT_HTML.to_string())
.body(html_view)) .body(html_view))
} }
fn build_file_response( fn build_file_response(
file_info: FileInfo,
path: &Path,
download: bool, download: bool,
file_name: &str,
path: PathBuf,
mime: Mime,
req: &HttpRequest, req: &HttpRequest,
) -> Result<HttpResponse, Error> { ) -> Result<HttpResponse, Error> {
let file_name: String = file_info.file_name.unwrap_or_else(|| {
let file_id = file_info.file_id;
let extension = mime_relations::get_extension(&file_info.content_type).unwrap_or("txt");
format!("{file_id}.{extension}")
});
let content_disposition = ContentDisposition { let content_disposition = ContentDisposition {
disposition: if download { disposition: if download {
DispositionType::Attachment DispositionType::Attachment
@ -154,10 +154,10 @@ fn build_file_response(
}; };
let file = NamedFile::open(path) let file = NamedFile::open(path)
.map_err(|file_err| { .map_err(|file_err| {
log::error!("file could not be read {:?}", file_err); log::error!("file could not be read: {:?}", file_err);
error::ErrorInternalServerError("this file should be here but could not be found") error::ErrorInternalServerError("this file should be here but could not be found")
})? })?
.set_content_type(mime) .set_content_type(file_info.content_type)
.set_content_disposition(content_disposition); .set_content_disposition(content_disposition);
let mut response = file.into_response(req); let mut response = file.into_response(req);
@ -165,16 +165,19 @@ fn build_file_response(
Ok(response) Ok(response)
} }
fn get_disposition_params(filename: &str) -> Vec<DispositionParam> { fn get_disposition_params(filename: String) -> Vec<DispositionParam> {
let mut parameters = vec![DispositionParam::Filename(filename.to_owned())];
if !filename.is_ascii() { if !filename.is_ascii() {
parameters.push(DispositionParam::FilenameExt(ExtendedValue { vec![
charset: Charset::Ext(String::from("UTF-8")), DispositionParam::Filename(filename.clone()),
language_tag: None, DispositionParam::FilenameExt(ExtendedValue {
value: filename.to_owned().into_bytes(), charset: Charset::Ext(String::from("UTF-8")),
})); language_tag: None,
value: filename.into_bytes(),
}),
]
} else {
vec![DispositionParam::Filename(filename)]
} }
parameters
} }
const ALLOWED_CONTEXTS: [&str; 6] = ["audio", "document", "empty", "font", "image", "video"]; const ALLOWED_CONTEXTS: [&str; 6] = ["audio", "document", "empty", "font", "image", "video"];
@ -209,7 +212,7 @@ fn insert_cache_headers(response: &mut HttpResponse, valid_till: OffsetDateTime)
CacheControl(vec![ CacheControl(vec![
CacheDirective::Public, CacheDirective::Public,
CacheDirective::MustRevalidate, CacheDirective::MustRevalidate,
CacheDirective::MaxAge(valid_cache_seconds), // todo: expiry in seconds CacheDirective::MaxAge(valid_cache_seconds),
CacheDirective::NoTransform, CacheDirective::NoTransform,
CacheDirective::Extension("immutable".to_owned(), None), CacheDirective::Extension("immutable".to_owned(), None),
]) ])

69
src/file_info.rs Normal file
View File

@ -0,0 +1,69 @@
use mime::{Mime, APPLICATION_OCTET_STREAM};
use sqlx::{postgres::PgRow, FromRow, Row};
use time::OffsetDateTime;
use crate::multipart::UploadConfig;
pub struct FileInfo {
pub file_id: String,
pub file_name: Option<String>,
pub valid_till: OffsetDateTime,
pub content_type: Mime,
pub delete_on_download: bool,
}
impl FileInfo {
pub fn new(file_id: String, upload_config: UploadConfig) -> Self {
Self {
file_id,
file_name: upload_config.original_name,
valid_till: upload_config.valid_till,
content_type: upload_config.content_type,
delete_on_download: upload_config.delete_on_download,
}
}
}
pub async fn find_by_id(
id: &str,
db: &sqlx::Pool<sqlx::Postgres>,
) -> Result<Option<FileInfo>, sqlx::Error> {
sqlx::query_as(
"SELECT file_id, file_name, valid_till, content_type, delete_on_download from files WHERE file_id = $1",
)
.bind(id)
.fetch_optional(db)
.await
}
pub async fn create(file_info: &FileInfo, db: &sqlx::Pool<sqlx::Postgres>) -> Result<(), sqlx::Error> {
sqlx::query(
"INSERT INTO Files (file_id, file_name, content_type, valid_till, delete_on_download) \
VALUES ($1, $2, $3, $4, $5)",
)
.bind(&file_info.file_id)
.bind(&file_info.file_name)
.bind(file_info.content_type.to_string())
.bind(file_info.valid_till)
.bind(file_info.delete_on_download)
.execute(db)
.await
.map(|_| ())
}
impl FromRow<'_, PgRow> for FileInfo {
fn from_row(row: &'_ PgRow) -> Result<Self, sqlx::Error> {
Ok(Self {
file_id: row.try_get("file_id")?,
file_name: row.try_get("file_name")?,
valid_till: row.try_get("valid_till")?,
content_type: row
.try_get_raw("content_type")?
.as_str()
.map_err(sqlx::Error::Decode)?
.parse()
.unwrap_or(APPLICATION_OCTET_STREAM),
delete_on_download: row.try_get("delete_on_download")?,
})
}
}

View File

@ -2,6 +2,7 @@ mod config;
mod db; mod db;
mod deleter; mod deleter;
mod download; mod download;
mod file_info;
mod mime_relations; mod mime_relations;
mod multipart; mod multipart;
mod rate_limit; mod rate_limit;
@ -53,8 +54,6 @@ async fn main() -> std::io::Result<()> {
let config = config::from_env().await; let config = config::from_env().await;
let (sender, receiver) = channel(8); let (sender, receiver) = channel(8);
log::info!("omnomnom");
let db = web::Data::new(pool.clone()); let db = web::Data::new(pool.clone());
let expiry_watch_sender = web::Data::new(sender); let expiry_watch_sender = web::Data::new(sender);
let bind_address = env::var("BIND_ADDRESS").unwrap_or_else(|_| "0.0.0.0:8000".to_owned()); let bind_address = env::var("BIND_ADDRESS").unwrap_or_else(|_| "0.0.0.0:8000".to_owned());
@ -78,6 +77,9 @@ async fn main() -> std::io::Result<()> {
.finish() .finish()
.unwrap(); .unwrap();
log::info!("Listening on {bind_address}");
log::info!("omnomnom");
let http_server = HttpServer::new({ let http_server = HttpServer::new({
move || { move || {
App::new() App::new()

View File

@ -1,8 +1,8 @@
use std::io::ErrorKind; use std::io::ErrorKind;
use crate::config::Config; use crate::config::Config;
use crate::multipart::UploadConfig; use crate::file_info::FileInfo;
use crate::{multipart, template}; use crate::{file_info, multipart, template};
use actix_files::NamedFile; use actix_files::NamedFile;
use actix_multipart::Multipart; use actix_multipart::Multipart;
use actix_web::http::header::LOCATION; use actix_web::http::header::LOCATION;
@ -33,52 +33,30 @@ pub async fn upload(
expiry_watch_sender: web::Data<Sender<()>>, expiry_watch_sender: web::Data<Sender<()>>,
config: web::Data<Config>, config: web::Data<Config>,
) -> Result<HttpResponse, Error> { ) -> Result<HttpResponse, Error> {
let (file_id, file_path) = create_unique_file(&config).await.map_err(|file_err| { let (file_id, file_path) = create_unique_file_id(&config).await.map_err(|file_err| {
log::error!("could not create file {:?}", file_err); log::error!("could not create file {:?}", file_err);
error::ErrorInternalServerError("could not create file") error::ErrorInternalServerError("could not create file")
})?; })?;
let upload_config = multipart::parse_multipart(payload, &file_path, &config).await?; let upload_config = multipart::parse_multipart(payload, &file_path, &config).await?;
let file_name = upload_config.original_name.clone(); let file_info = FileInfo::new(file_id, upload_config);
insert_file_metadata(&file_id, file_name, &file_path, &upload_config, db).await?; create_file(&file_info, &file_path, db).await?;
log::info!(
"create new file {} (valid_till: {}, content_type: {}, delete_on_download: {})",
file_id,
upload_config.valid_till,
upload_config.content_type,
upload_config.delete_on_download
);
expiry_watch_sender.send(()).await.unwrap(); expiry_watch_sender.send(()).await.unwrap();
let redirect = get_redirect_url(&file_id, upload_config.original_name.as_deref()); let redirect = get_redirect_url(&file_info.file_id, file_info.file_name.as_deref());
let url = template::get_file_url(&req, &file_id, upload_config.original_name.as_deref()); let url = template::get_file_url(&req, &file_info.file_id, file_info.file_name.as_deref());
Ok(HttpResponse::SeeOther() Ok(HttpResponse::SeeOther()
.insert_header((LOCATION, redirect)) .insert_header((LOCATION, redirect))
.body(format!("{url}\n"))) .body(format!("{url}\n")))
} }
async fn insert_file_metadata( async fn create_file(
file_id: &String, file_info: &FileInfo,
file_name: Option<String>,
file_path: &Path, file_path: &Path,
upload_config: &UploadConfig,
db: web::Data<sqlx::Pool<sqlx::Postgres>>, db: web::Data<sqlx::Pool<sqlx::Postgres>>,
) -> Result<(), Error> { ) -> Result<(), Error> {
let db_insert = sqlx::query( if let Err(db_err) = file_info::create(file_info, &db).await {
"INSERT INTO Files (file_id, file_name, content_type, valid_till, delete_on_download) \
VALUES ($1, $2, $3, $4, $5)",
)
.bind(file_id)
.bind(file_name)
.bind(&upload_config.content_type.to_string())
.bind(upload_config.valid_till)
.bind(upload_config.delete_on_download)
.execute(db.as_ref())
.await;
if let Err(db_err) = db_insert {
log::error!("could not insert into datebase {:?}", db_err); log::error!("could not insert into datebase {:?}", db_err);
if let Err(file_err) = fs::remove_file(file_path).await { if let Err(file_err) = fs::remove_file(file_path).await {
@ -88,10 +66,17 @@ async fn insert_file_metadata(
"could not insert file into database", "could not insert file into database",
)); ));
} }
log::info!(
"create new file {} (valid_till: {}, content_type: {}, delete_on_download: {})",
file_info.file_id,
file_info.valid_till,
file_info.content_type,
file_info.delete_on_download
);
Ok(()) Ok(())
} }
async fn create_unique_file( async fn create_unique_file_id(
config: &web::Data<Config>, config: &web::Data<Config>,
) -> Result<(String, PathBuf), std::io::Error> { ) -> Result<(String, PathBuf), std::io::Error> {
loop { loop {