stumpapp · aaronleopold · Mar 16, 2023 · Mar 1, 2023 · Mar 1, 2023 · Mar 11, 2023
diff --git a/apps/server/src/utils/http.rs b/apps/server/src/utils/http.rs
@@ -155,7 +155,7 @@ impl IntoResponse for NamedFile {
 		Response::builder()
 			.header(
 				header::CONTENT_TYPE,
-				ContentType::from_infer(&self.path_buf).to_string(),
+				ContentType::from_path(&self.path_buf).to_string(),
 			)
 			.header(
 				header::CONTENT_DISPOSITION,

diff --git a/core/Cargo.toml b/core/Cargo.toml
@@ -31,7 +31,7 @@ infer = "0.7.0"
 image = "0.24.2"
 webp = "0.2.2"
 zip = "0.5.13"
-epub = "1.2.3"
+epub = "1.2.4"
 unrar = { git = "https://github.com/aaronleopold/unrar.rs", branch = "aleopold--read-bytes" }
 data-encoding = "2.3.2"
 # include_dir = "0.7.2"

diff --git a/core/src/fs/media_file/epub.rs b/core/src/fs/media_file/epub.rs
@@ -10,15 +10,15 @@ use std::os::unix::prelude::MetadataExt;
 #[cfg(target_family = "windows")]
 use std::os::windows::prelude::*;
 
+const ACCEPTED_EPUB_COVER_MIMES: [&str; 2] = ["image/jpeg", "image/png"];
+const DEFAULT_EPUB_COVER_ID: &str = "cover";
+
 use crate::{
-	fs::{
-		checksum,
-		media_file::{get_content_type_from_mime, guess_content_type},
-	},
+	fs::checksum,
 	prelude::{errors::ProcessFileError, fs::ProcessedMediaFile, ContentType},
 };
 use epub::doc::EpubDoc;
-use tracing::{debug, error, warn};
+use tracing::{debug, error, trace, warn};
 
 /*
 epubcfi usually starts with /6, referring to spine element of package file
@@ -76,19 +76,73 @@ pub fn process(path: &Path) -> Result<ProcessedMediaFile, ProcessFileError> {
 }
 
 // TODO: change return type to make more sense
+/// Returns the cover image for the epub file. If a cover image cannot be extracted via the
+/// metadata, it will go through two rounds of fallback methods:
+///
+/// 1. Attempt to find a resource with the default ID of "cover"
+/// 2. Attempt to find a resource with a mime type of "image/jpeg" or "image/png", and weight the
+/// results based on how likely they are to be the cover. For example, if the cover is named
+/// "cover.jpg", it's probably the cover. The entry with the heighest weight, if any, will be
+/// returned.
 pub fn get_cover(file: &str) -> Result<(ContentType, Vec<u8>), ProcessFileError> {
 	let mut epub_file = EpubDoc::new(file).map_err(|e| {
 		error!("Failed to open epub file: {}", e);
 		ProcessFileError::EpubOpenError(e.to_string())
 	})?;
 
-	let cover = epub_file.get_cover().map_err(|e| {
-		error!("Failed to get cover from epub file: {}", e);
-		ProcessFileError::EpubReadError(e.to_string())
-	})?;
+	let cover_id = epub_file.get_cover_id().unwrap_or_else(|_| {
+		debug!("Epub file does not contain cover metadata");
+		DEFAULT_EPUB_COVER_ID.to_string()
+	});
+
+	if let Ok(cover) = epub_file.get_resource(&cover_id) {
+		let mime = epub_file
+			.get_resource_mime(&cover_id)
+			.unwrap_or_else(|_| "image/png".to_string());
+
+		return Ok((ContentType::from(mime.as_str()), cover));
+	}
 
-	// FIXME: mime type
-	Ok((get_content_type_from_mime("image/png"), cover))
+	debug!(
+		"Explicit cover image could not be found, falling back to searching for best match..."
+	);
+	// FIXME: this is hack, i do NOT want to clone this entire hashmap...
+	let cloned_resources = epub_file.resources.clone();
+	let search_result = cloned_resources
+		.iter()
+		.filter(|(_, (_, mime))| {
+			ACCEPTED_EPUB_COVER_MIMES
+				.iter()
+				.any(|accepted_mime| accepted_mime == mime)
+		})
+		.map(|(id, (path, _))| {
+			trace!(name = ?path, "Found possible cover image");
+			// I want to weight the results based on how likely they are to be the cover.
+			// For example, if the cover is named "cover.jpg", it's probably the cover.
+			// TODO: this is SUPER naive, and should be improved at some point...
+			if path.starts_with("cover") {
+				let weight = if path.ends_with("png") { 100 } else { 75 };
+				(weight, id)
+			} else {
+				(0, id)
+			}
+		})
+		.max_by_key(|(weight, _)| *weight);
+
+	if let Some((_, id)) = search_result {
+		if let Ok(c) = epub_file.get_resource(id) {
+			let mime = epub_file
+				.get_resource_mime(id)
+				.unwrap_or_else(|_| "image/png".to_string());
+
+			return Ok((ContentType::from(mime.as_str()), c));
+		}
+	}
+
+	error!("Failed to find cover for epub file");
+	Err(ProcessFileError::EpubReadError(
+		"Failed to find cover for epub file".to_string(),
+	))
 }
 
 pub fn get_epub_chapter(
@@ -108,15 +162,15 @@ pub fn get_epub_chapter(
 	})?;
 
 	let content_type = match epub_file.get_current_mime() {
-		Ok(mime) => get_content_type_from_mime(&mime),
+		Ok(mime) => ContentType::from(mime.as_str()),
 		Err(e) => {
-			warn!(
-				"Failed to get explicit definition of resource mime for {}: {}",
-				path, e
+			error!(
+				error = ?e,
+				chapter_path = ?path,
+				"Failed to get explicit resource mime for chapter. Returning default.",
 			);
 
-			// FIXME: when did I write this? lmao
-			guess_content_type("REMOVEME.xhml")
+			ContentType::XHTML
 		},
 	};
 
@@ -139,7 +193,7 @@ pub fn get_epub_resource(
 		ProcessFileError::EpubReadError(e.to_string())
 	})?;
 
-	Ok((get_content_type_from_mime(&content_type), contents))
+	Ok((ContentType::from(content_type.as_str()), contents))
 }
 
 pub fn normalize_resource_path(path: PathBuf, root: &str) -> PathBuf {
@@ -201,15 +255,15 @@ pub fn get_epub_resource_from_path(
 	// package.opf, etc.).
 	let content_type = match epub_file.get_resource_mime_by_path(adjusted_path.as_path())
 	{
-		Ok(mime) => get_content_type_from_mime(&mime),
+		Ok(mime) => ContentType::from(mime.as_str()),
 		Err(e) => {
 			warn!(
 				"Failed to get explicit definition of resource mime for {}: {}",
 				adjusted_path.as_path().to_str().unwrap(),
 				e
 			);
 
-			guess_content_type(adjusted_path.as_path().to_str().unwrap())
+			ContentType::from_path(adjusted_path.as_path())
 		},
 	};
 

diff --git a/core/src/fs/media_file/mod.rs b/core/src/fs/media_file/mod.rs
@@ -5,7 +5,7 @@ pub mod rar;
 pub mod zip;
 
 use std::path::Path;
-use tracing::{debug, warn};
+use tracing::debug;
 
 use crate::{
 	db::models::LibraryOptions,
@@ -16,13 +16,6 @@ use crate::{
 	},
 };
 
-// FIXME: this module does way too much. It should be cleaned up, way too many vaguely
-// similar things shoved in here with little distinction.
-
-// TODO: replace all these match statements with an custom enum that handles it all.
-// The enum itself will have some repetition, however it'll be cleaner than
-// doing this stuff over and over as this file currently does.
-
 // TODO: move trait, maybe merge with another.
 pub trait IsImage {
 	fn is_image(&self) -> bool;
@@ -39,101 +32,18 @@ pub fn process_comic_info(buffer: String) -> Option<MediaMetadata> {
 	}
 }
 
-fn temporary_content_workarounds(extension: &str) -> ContentType {
-	if extension == "opf" || extension == "ncx" {
-		return ContentType::XML;
-	}
-
-	ContentType::UNKNOWN
-}
-
-pub fn guess_content_type(file: &str) -> ContentType {
-	let file = Path::new(file);
-
-	let extension = file.extension().unwrap_or_default();
-	let extension = extension.to_string_lossy().to_string();
-
-	// TODO: if this fails manually check the extension
-	match ContentType::from_extension(&extension) {
-		Some(content_type) => content_type,
-		// None => ContentType::Any,
-		None => temporary_content_workarounds(&extension),
-	}
-}
-
-pub fn get_content_type_from_mime(mime: &str) -> ContentType {
-	ContentType::from(mime)
-}
-
-/// Guess the MIME type of a file based on its extension.
-pub fn guess_mime(path: &Path) -> Option<String> {
-	let extension = path.extension().and_then(|ext| ext.to_str());
-
-	if extension.is_none() {
-		warn!(
-			"Unable to guess mime for file without extension: {:?}",
-			path
-		);
-		return None;
-	}
-
-	let extension = extension.unwrap();
-
-	let content_type = ContentType::from_extension(extension);
-
-	if let Some(content_type) = content_type {
-		return Some(content_type.to_string());
-	}
-
-	// TODO: add more?
-	match extension.to_lowercase().as_str() {
-		"pdf" => Some("application/pdf".to_string()),
-		"epub" => Some("application/epub+zip".to_string()),
-		"zip" => Some("application/zip".to_string()),
-		"cbz" => Some("application/vnd.comicbook+zip".to_string()),
-		"rar" => Some("application/vnd.rar".to_string()),
-		"cbr" => Some("application/vnd.comicbook-rar".to_string()),
-		"png" => Some("image/png".to_string()),
-		"jpg" => Some("image/jpeg".to_string()),
-		"jpeg" => Some("image/jpeg".to_string()),
-		"webp" => Some("image/webp".to_string()),
-		"gif" => Some("image/gif".to_string()),
-		_ => None,
-	}
-}
-
-/// Infer the MIME type of a file. If the MIME type cannot be inferred via reading
-/// the first few bytes of the file, then the file extension is used via `guess_mime`.
-pub fn infer_mime_from_path(path: &Path) -> Option<String> {
-	match infer::get_from_path(path) {
-		Ok(mime) => {
-			debug!("Inferred mime for file {:?}: {:?}", path, mime);
-			mime.map(|m| m.mime_type().to_string())
-		},
-		Err(e) => {
-			warn!(
-				"Unable to infer mime for file {:?}: {:?}",
-				path,
-				e.to_string()
-			);
-
-			guess_mime(path)
-		},
-	}
-}
-
 pub fn get_page(
 	file: &str,
 	page: i32,
 ) -> Result<(ContentType, Vec<u8>), ProcessFileError> {
-	let mime = guess_mime(Path::new(file));
-
-	match mime.as_deref() {
-		Some("application/zip") => zip::get_image(file, page),
-		Some("application/vnd.comicbook+zip") => zip::get_image(file, page),
-		Some("application/vnd.rar") => rar::get_image(file, page),
-		Some("application/vnd.comicbook-rar") => rar::get_image(file, page),
-		Some("application/epub+zip") => {
+	let mime = ContentType::from_file(file).mime_type();
+
+	match mime.as_str() {
+		"application/zip" => zip::get_image(file, page),
+		"application/vnd.comicbook+zip" => zip::get_image(file, page),
+		"application/vnd.rar" => rar::get_image(file, page),
+		"application/vnd.comicbook-rar" => rar::get_image(file, page),
+		"application/epub+zip" => {
 			if page == 1 {
 				epub::get_cover(file)
 			} else {
@@ -142,7 +52,7 @@ pub fn get_page(
 				))
 			}
 		},
-		None => Err(ProcessFileError::Unknown(format!(
+		"unknown" => Err(ProcessFileError::Unknown(format!(
 			"Unable to determine mime type for file: {:?}",
 			file
 		))),
@@ -166,19 +76,16 @@ pub fn process(
 	path: &Path,
 	options: &LibraryOptions,
 ) -> Result<ProcessedMediaFile, ProcessFileError> {
-	debug!("Processing entry {:?} with options: {:?}", path, options);
-
-	let mime = infer_mime_from_path(path);
-
-	match mime.as_deref() {
-		Some("application/zip") => zip::process(path),
-		Some("application/vnd.comicbook+zip") => zip::process(path),
-		Some("application/vnd.rar") => process_rar(options.convert_rar_to_zip, path),
-		Some("application/vnd.comicbook-rar") => {
-			process_rar(options.convert_rar_to_zip, path)
-		},
-		Some("application/epub+zip") => epub::process(path),
-		None => Err(ProcessFileError::Unknown(format!(
+	debug!(?path, ?options, "Processing entry");
+	let mime = ContentType::from_path(path).mime_type();
+
+	match mime.as_str() {
+		"application/zip" => zip::process(path),
+		"application/vnd.comicbook+zip" => zip::process(path),
+		"application/vnd.rar" => process_rar(options.convert_rar_to_zip, path),
+		"application/vnd.comicbook-rar" => process_rar(options.convert_rar_to_zip, path),
+		"application/epub+zip" => epub::process(path),
+		"unknown" => Err(ProcessFileError::Unknown(format!(
 			"Unable to determine mime type for file: {:?}",
 			path
 		))),

diff --git a/core/src/fs/media_file/zip.rs b/core/src/fs/media_file/zip.rs
@@ -13,16 +13,10 @@ use crate::{
 };
 
 impl<'a> IsImage for ZipFile<'a> {
-	// FIXME: use infer here
 	fn is_image(&self) -> bool {
 		if self.is_file() {
-			let content_type = media_file::guess_content_type(self.name());
-			trace!(
-				"Content type of file {:?} is {:?}",
-				self.name(),
-				content_type
-			);
-
+			let content_type = ContentType::from_file(self.name());
+			trace!(name = self.name(), content_type = ?content_type, "ContentType of file");
 			return content_type.is_image();
 		}
 
@@ -147,7 +141,7 @@ pub fn get_image(
 		let mut contents = Vec::new();
 		// Note: guessing mime here since this file isn't accessible from the filesystem,
 		// it lives inside the zip file.
-		let content_type = media_file::guess_content_type(name);
+		let content_type = ContentType::from_file(name);
 
 		if images_seen + 1 == page && file.is_image() {
 			trace!("Found target image: {}", name);