From 1dc2fff0c1bffdc6dba8f5894f739a271fc35624 Mon Sep 17 00:00:00 2001 From: neri Date: Fri, 7 Oct 2022 12:18:23 +0200 Subject: [PATCH] load mime parents to determine text display --- Cargo.lock | 12 ++++++----- Cargo.toml | 6 ++++-- src/download.rs | 12 +++++------ src/main.rs | 1 + src/mime_relations.rs | 48 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 66 insertions(+), 13 deletions(-) create mode 100644 src/mime_relations.rs diff --git a/Cargo.lock b/Cargo.lock index 833a9c6..0084d37 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -427,7 +427,7 @@ dependencies = [ [[package]] name = "datatrash" -version = "1.1.8" +version = "1.1.9" dependencies = [ "actix-files", "actix-governor", @@ -437,12 +437,14 @@ dependencies = [ "futures-util", "governor", "htmlescape", + "lazy_static", "log", "mime", "rand", "sqlx", "time", "tokio", + "tree_magic_db", "tree_magic_mini", "url", "urlencoding", @@ -870,9 +872,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754" +checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc" [[package]] name = "jobserver" @@ -1607,9 +1609,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c3f9a28b618c3a6b9251b6908e9c99e04b9e5c02e6581ccbb67d59c34ef7f9b" +checksum = "d634a985c4d4238ec39cacaed2e7ae552fbd3c476b552c1deac3021b7d7eaf0c" dependencies = [ "itoa", "libc", diff --git a/Cargo.toml b/Cargo.toml index 68201ff..91eef87 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "datatrash" -version = "1.1.8" +version = "1.1.9" authors = ["neri"] edition = "2021" @@ -24,11 +24,13 @@ tokio = { version = "1.21.2", features = ["rt", "macros", "sync"] } actix-multipart = "0.4.0" futures-util = "0.3.24" rand = "0.8.5" -time = "0.3.14" +time = "0.3.15" htmlescape = "0.3.1" urlencoding = "2.1.2" tree_magic_mini = { version = "3.0.3", features = ["with-gpl-data"] } +tree_magic_db = "*" mime = "0.3.16" url = "2.3.1" actix-governor = "0.3.2" governor = "0.4.2" +lazy_static = "1.4.0" diff --git a/src/download.rs b/src/download.rs index 4265236..4e8c25a 100644 --- a/src/download.rs +++ b/src/download.rs @@ -17,8 +17,7 @@ use time::OffsetDateTime; use tokio::fs; use url::Url; -use crate::config::Config; -use crate::deleter; +use crate::{config::Config, deleter, mime_relations}; const TEXT_VIEW_HTML: &str = include_str!("../template/text-view.html"); const URL_VIEW_HTML: &str = include_str!("../template/url-view.html"); @@ -42,7 +41,8 @@ pub async fn download( path.push(&file_id); let mime = Mime::from_str(&content_type).unwrap_or(APPLICATION_OCTET_STREAM); - let mut response = match get_view_type(&req, &mime, &path, delete).await { + let mime = mime_relations::get_alias(&mime); + let mut response = match get_view_type(&req, mime, &path, delete).await { ViewType::Raw => build_file_response(false, &file_name, path, mime, &req).await, ViewType::Download => build_file_response(true, &file_name, path, mime, &req).await, ViewType::Html => build_text_response(&path).await, @@ -91,7 +91,7 @@ async fn get_view_type( if req.query_string().contains("raw") { return ViewType::Raw; } - if mime.type_() != mime::TEXT { + if !mime_relations::matches_text(mime) { return ViewType::Raw; } if get_file_size(file_path).await >= TEXT_VIEW_SIZE_LIMIT { @@ -146,7 +146,7 @@ async fn build_file_response( download: bool, file_name: &str, path: PathBuf, - mime: Mime, + mime: &Mime, req: &HttpRequest, ) -> Result { let content_disposition = ContentDisposition { @@ -162,7 +162,7 @@ async fn build_file_response( log::error!("file could not be read {:?}", file_err); error::ErrorInternalServerError("this file should be here but could not be found") })? - .set_content_type(mime) + .set_content_type(mime.clone()) .set_content_disposition(content_disposition); let mut response = file.into_response(req); diff --git a/src/main.rs b/src/main.rs index de1b0e6..6f4c45c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,7 @@ mod config; mod db; mod deleter; mod download; +mod mime_relations; mod multipart; mod rate_limit; mod template; diff --git a/src/mime_relations.rs b/src/mime_relations.rs new file mode 100644 index 0000000..72dd856 --- /dev/null +++ b/src/mime_relations.rs @@ -0,0 +1,48 @@ +use std::{collections::HashMap, str::FromStr}; + +use lazy_static::lazy_static; +use mime::Mime; + +lazy_static! { + static ref ALIASES: HashMap = get_mime_aliases(); + static ref PARENTS: Vec<(Mime, Mime)> = get_mime_parent_relations(); +} + +fn get_mime_aliases() -> HashMap { + tree_magic_db::aliases() + .lines() + .flat_map(|line| line.split_once(' ')) + .flat_map(|(a, b)| Some((Mime::from_str(a).ok()?, Mime::from_str(b).ok()?))) + .collect() +} + +pub(crate) fn get_alias(mimetype: &Mime) -> &Mime { + match ALIASES.get(mimetype) { + Some(x) => x, + None => mimetype, + } +} + +fn get_mime_parent_relations() -> Vec<(Mime, Mime)> { + tree_magic_db::subclasses() + .lines() + .flat_map(|line| line.split_once(' ')) + .flat_map(|(child, parent)| { + Some((Mime::from_str(child).ok()?, Mime::from_str(parent).ok()?)) + }) + .collect() +} + +fn get_mime_parents(mimetype: &Mime) -> Vec<&Mime> { + PARENTS + .iter() + .filter_map(|(child, parent)| (child == mimetype).then_some(parent)) + .collect() +} + +pub(crate) fn matches_text(mime: &Mime) -> bool { + if mime.type_() == mime::TEXT { + return true; + } + return get_mime_parents(mime).into_iter().any(matches_text); +}