Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: load epub cover with missing metadata and misc improvements #99

Merged
merged 3 commits into from
Mar 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apps/server/src/utils/http.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ impl IntoResponse for NamedFile {
Response::builder()
.header(
header::CONTENT_TYPE,
ContentType::from_infer(&self.path_buf).to_string(),
ContentType::from_path(&self.path_buf).to_string(),
)
.header(
header::CONTENT_DISPOSITION,
Expand Down
2 changes: 1 addition & 1 deletion core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ infer = "0.7.0"
image = "0.24.2"
webp = "0.2.2"
zip = "0.5.13"
epub = "1.2.3"
epub = "1.2.4"
unrar = { git = "https://github.com/aaronleopold/unrar.rs", branch = "aleopold--read-bytes" }
data-encoding = "2.3.2"
# include_dir = "0.7.2"
Expand Down
94 changes: 74 additions & 20 deletions core/src/fs/media_file/epub.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ use std::os::unix::prelude::MetadataExt;
#[cfg(target_family = "windows")]
use std::os::windows::prelude::*;

const ACCEPTED_EPUB_COVER_MIMES: [&str; 2] = ["image/jpeg", "image/png"];
const DEFAULT_EPUB_COVER_ID: &str = "cover";

use crate::{
fs::{
checksum,
media_file::{get_content_type_from_mime, guess_content_type},
},
fs::checksum,
prelude::{errors::ProcessFileError, fs::ProcessedMediaFile, ContentType},
};
use epub::doc::EpubDoc;
use tracing::{debug, error, warn};
use tracing::{debug, error, trace, warn};

/*
epubcfi usually starts with /6, referring to spine element of package file
Expand Down Expand Up @@ -76,19 +76,73 @@ pub fn process(path: &Path) -> Result<ProcessedMediaFile, ProcessFileError> {
}

// TODO: change return type to make more sense
/// Returns the cover image for the epub file. If a cover image cannot be extracted via the
/// metadata, it will go through two rounds of fallback methods:
///
/// 1. Attempt to find a resource with the default ID of "cover"
/// 2. Attempt to find a resource with a mime type of "image/jpeg" or "image/png", and weight the
/// results based on how likely they are to be the cover. For example, if the cover is named
/// "cover.jpg", it's probably the cover. The entry with the heighest weight, if any, will be
/// returned.
pub fn get_cover(file: &str) -> Result<(ContentType, Vec<u8>), ProcessFileError> {
let mut epub_file = EpubDoc::new(file).map_err(|e| {
error!("Failed to open epub file: {}", e);
ProcessFileError::EpubOpenError(e.to_string())
})?;

let cover = epub_file.get_cover().map_err(|e| {
error!("Failed to get cover from epub file: {}", e);
ProcessFileError::EpubReadError(e.to_string())
})?;
let cover_id = epub_file.get_cover_id().unwrap_or_else(|_| {
debug!("Epub file does not contain cover metadata");
DEFAULT_EPUB_COVER_ID.to_string()
});

if let Ok(cover) = epub_file.get_resource(&cover_id) {
let mime = epub_file
.get_resource_mime(&cover_id)
.unwrap_or_else(|_| "image/png".to_string());

return Ok((ContentType::from(mime.as_str()), cover));
}

// FIXME: mime type
Ok((get_content_type_from_mime("image/png"), cover))
debug!(
"Explicit cover image could not be found, falling back to searching for best match..."
);
// FIXME: this is hack, i do NOT want to clone this entire hashmap...
let cloned_resources = epub_file.resources.clone();
let search_result = cloned_resources
.iter()
.filter(|(_, (_, mime))| {
ACCEPTED_EPUB_COVER_MIMES
.iter()
.any(|accepted_mime| accepted_mime == mime)
})
.map(|(id, (path, _))| {
trace!(name = ?path, "Found possible cover image");
// I want to weight the results based on how likely they are to be the cover.
// For example, if the cover is named "cover.jpg", it's probably the cover.
// TODO: this is SUPER naive, and should be improved at some point...
if path.starts_with("cover") {
let weight = if path.ends_with("png") { 100 } else { 75 };
(weight, id)
} else {
(0, id)
}
})
.max_by_key(|(weight, _)| *weight);

if let Some((_, id)) = search_result {
if let Ok(c) = epub_file.get_resource(id) {
let mime = epub_file
.get_resource_mime(id)
.unwrap_or_else(|_| "image/png".to_string());

return Ok((ContentType::from(mime.as_str()), c));
}
}

error!("Failed to find cover for epub file");
Err(ProcessFileError::EpubReadError(
"Failed to find cover for epub file".to_string(),
))
}

pub fn get_epub_chapter(
Expand All @@ -108,15 +162,15 @@ pub fn get_epub_chapter(
})?;

let content_type = match epub_file.get_current_mime() {
Ok(mime) => get_content_type_from_mime(&mime),
Ok(mime) => ContentType::from(mime.as_str()),
Err(e) => {
warn!(
"Failed to get explicit definition of resource mime for {}: {}",
path, e
error!(
error = ?e,
chapter_path = ?path,
"Failed to get explicit resource mime for chapter. Returning default.",
);

// FIXME: when did I write this? lmao
guess_content_type("REMOVEME.xhml")
ContentType::XHTML
},
};

Expand All @@ -139,7 +193,7 @@ pub fn get_epub_resource(
ProcessFileError::EpubReadError(e.to_string())
})?;

Ok((get_content_type_from_mime(&content_type), contents))
Ok((ContentType::from(content_type.as_str()), contents))
}

pub fn normalize_resource_path(path: PathBuf, root: &str) -> PathBuf {
Expand Down Expand Up @@ -201,15 +255,15 @@ pub fn get_epub_resource_from_path(
// package.opf, etc.).
let content_type = match epub_file.get_resource_mime_by_path(adjusted_path.as_path())
{
Ok(mime) => get_content_type_from_mime(&mime),
Ok(mime) => ContentType::from(mime.as_str()),
Err(e) => {
warn!(
"Failed to get explicit definition of resource mime for {}: {}",
adjusted_path.as_path().to_str().unwrap(),
e
);

guess_content_type(adjusted_path.as_path().to_str().unwrap())
ContentType::from_path(adjusted_path.as_path())
},
};

Expand Down
133 changes: 20 additions & 113 deletions core/src/fs/media_file/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ pub mod rar;
pub mod zip;

use std::path::Path;
use tracing::{debug, warn};
use tracing::debug;

use crate::{
db::models::LibraryOptions,
Expand All @@ -16,13 +16,6 @@ use crate::{
},
};

// FIXME: this module does way too much. It should be cleaned up, way too many vaguely
// similar things shoved in here with little distinction.

// TODO: replace all these match statements with an custom enum that handles it all.
// The enum itself will have some repetition, however it'll be cleaner than
// doing this stuff over and over as this file currently does.

// TODO: move trait, maybe merge with another.
pub trait IsImage {
fn is_image(&self) -> bool;
Expand All @@ -39,101 +32,18 @@ pub fn process_comic_info(buffer: String) -> Option<MediaMetadata> {
}
}

fn temporary_content_workarounds(extension: &str) -> ContentType {
if extension == "opf" || extension == "ncx" {
return ContentType::XML;
}

ContentType::UNKNOWN
}

pub fn guess_content_type(file: &str) -> ContentType {
let file = Path::new(file);

let extension = file.extension().unwrap_or_default();
let extension = extension.to_string_lossy().to_string();

// TODO: if this fails manually check the extension
match ContentType::from_extension(&extension) {
Some(content_type) => content_type,
// None => ContentType::Any,
None => temporary_content_workarounds(&extension),
}
}

pub fn get_content_type_from_mime(mime: &str) -> ContentType {
ContentType::from(mime)
}

/// Guess the MIME type of a file based on its extension.
pub fn guess_mime(path: &Path) -> Option<String> {
let extension = path.extension().and_then(|ext| ext.to_str());

if extension.is_none() {
warn!(
"Unable to guess mime for file without extension: {:?}",
path
);
return None;
}

let extension = extension.unwrap();

let content_type = ContentType::from_extension(extension);

if let Some(content_type) = content_type {
return Some(content_type.to_string());
}

// TODO: add more?
match extension.to_lowercase().as_str() {
"pdf" => Some("application/pdf".to_string()),
"epub" => Some("application/epub+zip".to_string()),
"zip" => Some("application/zip".to_string()),
"cbz" => Some("application/vnd.comicbook+zip".to_string()),
"rar" => Some("application/vnd.rar".to_string()),
"cbr" => Some("application/vnd.comicbook-rar".to_string()),
"png" => Some("image/png".to_string()),
"jpg" => Some("image/jpeg".to_string()),
"jpeg" => Some("image/jpeg".to_string()),
"webp" => Some("image/webp".to_string()),
"gif" => Some("image/gif".to_string()),
_ => None,
}
}

/// Infer the MIME type of a file. If the MIME type cannot be inferred via reading
/// the first few bytes of the file, then the file extension is used via `guess_mime`.
pub fn infer_mime_from_path(path: &Path) -> Option<String> {
match infer::get_from_path(path) {
Ok(mime) => {
debug!("Inferred mime for file {:?}: {:?}", path, mime);
mime.map(|m| m.mime_type().to_string())
},
Err(e) => {
warn!(
"Unable to infer mime for file {:?}: {:?}",
path,
e.to_string()
);

guess_mime(path)
},
}
}

pub fn get_page(
file: &str,
page: i32,
) -> Result<(ContentType, Vec<u8>), ProcessFileError> {
let mime = guess_mime(Path::new(file));

match mime.as_deref() {
Some("application/zip") => zip::get_image(file, page),
Some("application/vnd.comicbook+zip") => zip::get_image(file, page),
Some("application/vnd.rar") => rar::get_image(file, page),
Some("application/vnd.comicbook-rar") => rar::get_image(file, page),
Some("application/epub+zip") => {
let mime = ContentType::from_file(file).mime_type();

match mime.as_str() {
"application/zip" => zip::get_image(file, page),
"application/vnd.comicbook+zip" => zip::get_image(file, page),
"application/vnd.rar" => rar::get_image(file, page),
"application/vnd.comicbook-rar" => rar::get_image(file, page),
"application/epub+zip" => {
if page == 1 {
epub::get_cover(file)
} else {
Expand All @@ -142,7 +52,7 @@ pub fn get_page(
))
}
},
None => Err(ProcessFileError::Unknown(format!(
"unknown" => Err(ProcessFileError::Unknown(format!(
"Unable to determine mime type for file: {:?}",
file
))),
Expand All @@ -166,19 +76,16 @@ pub fn process(
path: &Path,
options: &LibraryOptions,
) -> Result<ProcessedMediaFile, ProcessFileError> {
debug!("Processing entry {:?} with options: {:?}", path, options);

let mime = infer_mime_from_path(path);

match mime.as_deref() {
Some("application/zip") => zip::process(path),
Some("application/vnd.comicbook+zip") => zip::process(path),
Some("application/vnd.rar") => process_rar(options.convert_rar_to_zip, path),
Some("application/vnd.comicbook-rar") => {
process_rar(options.convert_rar_to_zip, path)
},
Some("application/epub+zip") => epub::process(path),
None => Err(ProcessFileError::Unknown(format!(
debug!(?path, ?options, "Processing entry");
let mime = ContentType::from_path(path).mime_type();

match mime.as_str() {
"application/zip" => zip::process(path),
"application/vnd.comicbook+zip" => zip::process(path),
"application/vnd.rar" => process_rar(options.convert_rar_to_zip, path),
"application/vnd.comicbook-rar" => process_rar(options.convert_rar_to_zip, path),
"application/epub+zip" => epub::process(path),
"unknown" => Err(ProcessFileError::Unknown(format!(
"Unable to determine mime type for file: {:?}",
path
))),
Expand Down
12 changes: 3 additions & 9 deletions core/src/fs/media_file/zip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,10 @@ use crate::{
};

impl<'a> IsImage for ZipFile<'a> {
// FIXME: use infer here
fn is_image(&self) -> bool {
if self.is_file() {
let content_type = media_file::guess_content_type(self.name());
trace!(
"Content type of file {:?} is {:?}",
self.name(),
content_type
);

let content_type = ContentType::from_file(self.name());
trace!(name = self.name(), content_type = ?content_type, "ContentType of file");
return content_type.is_image();
}

Expand Down Expand Up @@ -147,7 +141,7 @@ pub fn get_image(
let mut contents = Vec::new();
// Note: guessing mime here since this file isn't accessible from the filesystem,
// it lives inside the zip file.
let content_type = media_file::guess_content_type(name);
let content_type = ContentType::from_file(name);

if images_seen + 1 == page && file.is_image() {
trace!("Found target image: {}", name);
Expand Down
Loading