datatrash/src/download.rs

234 lines
7.8 KiB
Rust
Raw Normal View History

2022-06-29 23:04:03 +00:00
use std::{path::PathBuf, str::FromStr, time::SystemTime};
2021-09-09 20:18:42 +00:00
use actix_files::NamedFile;
use actix_web::{
error,
http::header::{
2022-06-29 23:04:03 +00:00
Accept, CacheControl, CacheDirective, Charset, ContentDisposition, DispositionParam,
DispositionType, Expires, ExtendedValue, Header, HeaderValue, HttpDate, TryIntoHeaderValue,
ACCEPT, CACHE_CONTROL, CONTENT_TYPE, EXPIRES, VARY,
},
web, Error, HttpRequest, HttpResponse,
};
use mime::{Mime, APPLICATION_OCTET_STREAM, TEXT_HTML};
2021-09-10 15:44:37 +00:00
use sqlx::postgres::PgPool;
2022-02-26 23:34:57 +00:00
use std::path::Path;
2022-06-29 23:04:03 +00:00
use time::OffsetDateTime;
2022-02-26 23:34:57 +00:00
use tokio::fs;
2021-09-09 20:18:42 +00:00
use url::Url;
use crate::{config::Config, deleter, mime_relations};
2021-09-09 20:18:42 +00:00
const TEXT_VIEW_HTML: &str = include_str!("../template/text-view.html");
const URL_VIEW_HTML: &str = include_str!("../template/url-view.html");
const TEXT_VIEW_SIZE_LIMIT: u64 = 512 * 1024; // 512KiB
enum ViewType {
Raw,
Download,
Html,
}
pub async fn download(
req: HttpRequest,
db: web::Data<PgPool>,
config: web::Data<Config>,
) -> Result<HttpResponse, Error> {
let id = req.match_info().query("id");
let (file_id, file_name, valid_till, content_type, delete) = load_file_info(id, &db).await?;
let mut path = config.files_dir.clone();
path.push(&file_id);
let mime = Mime::from_str(&content_type).unwrap_or(APPLICATION_OCTET_STREAM);
let mut response = match get_view_type(&req, &mime, &path, delete).await {
2022-11-04 10:37:10 +00:00
ViewType::Raw => build_file_response(false, &file_name, path, mime, &req),
ViewType::Download => build_file_response(true, &file_name, path, mime, &req),
ViewType::Html => build_text_response(&path).await,
2022-06-29 23:04:03 +00:00
}?;
insert_cache_headers(&mut response, valid_till);
if delete {
deleter::delete_by_id(&db, &file_id, &config.files_dir)
.await
.map_err(|db_err| {
log::error!("could not delete file {:?}", db_err);
error::ErrorInternalServerError("could not delete file")
})?;
}
2022-06-29 23:04:03 +00:00
Ok(response)
}
2021-09-09 20:18:42 +00:00
async fn load_file_info(
id: &str,
db: &web::Data<sqlx::Pool<sqlx::Postgres>>,
2022-06-29 23:04:03 +00:00
) -> Result<(String, String, OffsetDateTime, String, bool), Error> {
2021-09-10 15:44:37 +00:00
sqlx::query_as(
"SELECT file_id, file_name, valid_till, content_type, delete_on_download from files WHERE file_id = $1",
2021-09-09 20:18:42 +00:00
)
.bind(id)
2021-09-10 15:44:37 +00:00
.fetch_optional(db.as_ref())
.await
.map_err(|db_err| {
log::error!("could not run select statement {:?}", db_err);
error::ErrorInternalServerError("could not run select statement")
})?
.ok_or_else(|| error::ErrorNotFound("file does not exist or has expired"))
2021-09-09 20:18:42 +00:00
}
async fn get_view_type(
req: &HttpRequest,
mime: &Mime,
file_path: &Path,
delete_on_download: bool,
) -> ViewType {
if delete_on_download || req.query_string().contains("dl") {
return ViewType::Download;
}
if req.query_string().contains("raw") {
return ViewType::Raw;
}
if !mime_relations::matches_text(mime) {
return ViewType::Raw;
}
if get_file_size(file_path).await >= TEXT_VIEW_SIZE_LIMIT {
return ViewType::Raw;
}
if let Ok(accept) = Accept::parse(req) {
2022-02-26 23:34:57 +00:00
for accept_mime in accept.ranked() {
if accept_mime == TEXT_HTML {
return ViewType::Html;
}
if mime_matches(&accept_mime, mime) {
break;
}
}
}
ViewType::Raw
}
fn mime_matches(accept: &Mime, content: &Mime) -> bool {
let type_matches = accept.type_() == content.type_() || accept.type_() == mime::STAR;
let subtype_matches = accept.subtype() == content.subtype() || accept.subtype() == mime::STAR;
type_matches && subtype_matches
}
async fn get_file_size(file_path: &Path) -> u64 {
fs::metadata(file_path)
.await
.map(|metadata| metadata.len())
.unwrap_or(0)
}
2021-09-09 20:18:42 +00:00
async fn build_text_response(path: &Path) -> Result<HttpResponse, Error> {
let content = fs::read_to_string(path).await.map_err(|file_err| {
log::error!("file could not be read {:?}", file_err);
error::ErrorInternalServerError("this file should be here but could not be found")
})?;
let encoded = htmlescape::encode_minimal(&content);
2021-09-09 23:45:12 +00:00
let html = if !content.contains(&['\n', '\r'][..]) && Url::from_str(&content).is_ok() {
2021-09-09 20:18:42 +00:00
let attribute_encoded = htmlescape::encode_attribute(&content);
URL_VIEW_HTML
.replace("{link_content}", &encoded)
.replace("{link_attribute}", &attribute_encoded)
} else {
TEXT_VIEW_HTML.replace("{text}", &encoded)
};
2021-12-20 14:40:49 +00:00
Ok(HttpResponse::Ok()
.content_type(TEXT_HTML.to_string())
.body(html))
2021-09-09 20:18:42 +00:00
}
2022-11-04 10:37:10 +00:00
fn build_file_response(
2021-09-09 20:18:42 +00:00
download: bool,
file_name: &str,
2022-02-26 23:34:57 +00:00
path: PathBuf,
mime: Mime,
req: &HttpRequest,
2021-09-09 20:18:42 +00:00
) -> Result<HttpResponse, Error> {
let content_disposition = ContentDisposition {
disposition: if download {
DispositionType::Attachment
} else {
DispositionType::Inline
},
2021-09-11 00:08:47 +00:00
parameters: get_disposition_params(file_name),
2021-09-09 20:18:42 +00:00
};
let file = NamedFile::open(path)
.map_err(|file_err| {
log::error!("file could not be read {:?}", file_err);
error::ErrorInternalServerError("this file should be here but could not be found")
})?
.set_content_type(mime)
2021-09-09 20:18:42 +00:00
.set_content_disposition(content_disposition);
2022-06-29 23:04:03 +00:00
let mut response = file.into_response(req);
2022-11-22 19:46:37 +00:00
append_security_headers(&mut response, req, download);
Ok(response)
}
2022-11-22 19:46:37 +00:00
fn append_security_headers(response: &mut HttpResponse, req: &HttpRequest, download: bool) {
// if the browser is trying to fetch this resource in a secure context pretend the reponse is
// just binary data so it won't be executed
let sec_fetch_mode = req
.headers()
.get("sec-fetch-mode")
.and_then(|v| v.to_str().ok());
if !download && sec_fetch_mode.is_some() && sec_fetch_mode != Some("navigate") {
response.headers_mut().insert(
CONTENT_TYPE,
HeaderValue::from_str(APPLICATION_OCTET_STREAM.as_ref())
.expect("mime type can be encoded to header value"),
);
}
// the reponse varies based on these request headers
response
.headers_mut()
2022-11-22 19:46:37 +00:00
.append(VARY, HeaderValue::from_static("sec-fetch-mode"));
2021-09-09 20:18:42 +00:00
}
fn get_disposition_params(filename: &str) -> Vec<DispositionParam> {
let mut parameters = vec![DispositionParam::Filename(filename.to_owned())];
if !filename.is_ascii() {
parameters.push(DispositionParam::FilenameExt(ExtendedValue {
charset: Charset::Ext(String::from("UTF-8")),
language_tag: None,
value: filename.to_owned().into_bytes(),
2022-11-04 10:37:10 +00:00
}));
}
parameters
}
2022-06-29 23:04:03 +00:00
fn insert_cache_headers(response: &mut HttpResponse, valid_till: OffsetDateTime) {
if response.status().is_success() {
let valid_duration = valid_till - OffsetDateTime::now_utc();
2022-11-04 10:37:10 +00:00
let valid_cache_seconds =
valid_duration.whole_seconds().clamp(0, i64::from(u32::MAX)) as u32;
2022-06-29 23:04:03 +00:00
response.headers_mut().insert(
CACHE_CONTROL,
CacheControl(vec![
CacheDirective::Public,
CacheDirective::MustRevalidate,
CacheDirective::MaxAge(valid_cache_seconds), // todo: expiry in seconds
CacheDirective::NoTransform,
CacheDirective::Extension("immutable".to_owned(), None),
])
.try_into_value()
.unwrap(),
);
response.headers_mut().insert(
EXPIRES,
Expires(HttpDate::from(
SystemTime::now() + std::time::Duration::from_secs(valid_cache_seconds.into()),
))
.try_into_value()
.unwrap(),
);
}
response
.headers_mut()
2022-11-22 19:46:37 +00:00
.append(VARY, HeaderValue::from_name(ACCEPT));
2022-06-29 23:04:03 +00:00
}