Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 Fix nested file structure support for ZIP/RAR format #353

Merged
merged 5 commits into from
Jun 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
243 changes: 173 additions & 70 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ uuid = "1.8.0"
walkdir = "2.4.0"
webp = "0.2.6"
xml-rs = "0.8.20" # XML reader/writer
zip = "0.6.6"
zip = "2.1.3"

[dev-dependencies]
temp-env = "0.3.6"
Expand Down
Binary file not shown.
7 changes: 3 additions & 4 deletions core/src/filesystem/archive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::{
};
use tracing::{trace, warn};
use walkdir::WalkDir;
use zip::write::FileOptions;
use zip::{write::FileOptions, CompressionMethod};

/// Creates a new zip file at `destination` from the contents of the folder `unpacked_path`.
pub(crate) fn zip_dir(
Expand All @@ -17,8 +17,8 @@ pub(crate) fn zip_dir(

let mut zip_writer = zip::ZipWriter::new(zip_file);

let options = FileOptions::default()
.compression_method(zip::CompressionMethod::Stored)
let options: FileOptions<'_, ()> = FileOptions::default()
.compression_method(CompressionMethod::Stored)
.unix_permissions(0o755);

trace!("Creating zip file at {:?}", destination);
Expand All @@ -35,7 +35,6 @@ pub(crate) fn zip_dir(
// Some unzip tools unzip files with directory paths correctly, some do not!
if path.is_file() {
trace!("Adding file to zip file: {:?} as {:?}", path, name);
#[allow(deprecated)]
zip_writer.start_file_from_path(name, options)?;
let mut f = File::open(path)?;

Expand Down
66 changes: 43 additions & 23 deletions core/src/filesystem/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,35 @@ pub struct FileParts {
}

pub trait PathUtils {
/// Returns the file name, file stem, and extension of the file.
fn file_parts(&self) -> FileParts;
/// Returns the result of `infer::get_from_path`.
fn infer_kind(&self) -> std::io::Result<Option<infer::Type>>;
/// Returns the content type of the file based on the extension.
fn naive_content_type(&self) -> ContentType;
/// Returns true if the file is hidden (i.e. starts with a dot). Also checks for
/// files within a __MACOSX directory.
fn is_hidden_file(&self) -> bool;
/// Returns true if the file is supported by Stump.
fn should_ignore(&self) -> bool;
/// Returns true if the file is an image.
fn is_supported(&self) -> bool;
/// Returns true if the file is an image.
fn is_img(&self) -> bool;
/// Returns true if the file is a thumbnail image. This calls the `is_img` function
/// from the same trait, and then checks if the file name is one of the following:
/// - cover
/// - thumbnail
/// - folder
///
/// These will *potentially* be reserved filenames in the future... Not sure
/// if this functionality will be kept.
fn is_thumbnail_img(&self) -> bool;
/// Returns true if the directory has any media files in it. This is a shallow
/// check, and will not check subdirectories.
fn dir_has_media(&self) -> bool;
/// Returns true if the directory has any media files in it. This is a deep
/// check, and will check *all* subdirectories.
fn dir_has_media_deep(&self) -> bool;
}

Expand Down Expand Up @@ -120,13 +141,33 @@ impl PathUtils for Path {
infer::get_from_path(self)
}

fn naive_content_type(&self) -> ContentType {
let extension = self
.extension()
.and_then(|e| e.to_str())
.unwrap_or_default();

if extension.is_empty() {
return ContentType::UNKNOWN;
}

ContentType::from_extension(extension)
}

/// Returns true if the file is hidden (i.e. starts with a dot).
fn is_hidden_file(&self) -> bool {
// If the file is contained inside of a __MACOSX directory, assume it is hidden.
// We don't want to deal with these files.
if self.starts_with("__MACOSX") {
return true;
}

let FileParts { file_name, .. } = self.file_parts();

file_name.starts_with('.')
}

// TODO(327): Remove infer usage
/// Returns true if the file is a supported media file. This is a strict check when
/// infer can determine the file type, and a loose extension-based check when infer cannot.
fn is_supported(&self) -> bool {
Expand All @@ -148,28 +189,11 @@ impl PathUtils for Path {
!self.is_supported()
}

/// Returns true if the file is an image. This is a strict check when infer
/// can determine the file type, and a loose extension-based check when infer cannot.
/// Returns true if the file is an image. This is a naive check based on the extension.
fn is_img(&self) -> bool {
if let Ok(Some(file_type)) = infer::get_from_path(self) {
return file_type.mime_type().starts_with("image/");
}

let FileParts { extension, .. } = self.file_parts();

extension.eq_ignore_ascii_case("jpg")
|| extension.eq_ignore_ascii_case("png")
|| extension.eq_ignore_ascii_case("jpeg")
self.naive_content_type().is_image()
}

/// Returns true if the file is a thumbnail image. This calls the `is_img` function
/// from the same trait, and then checks if the file name is one of the following:
/// - cover
/// - thumbnail
/// - folder
///
/// These will *potentially* be reserved filenames in the future... Not sure
/// if this functionality will be kept.
fn is_thumbnail_img(&self) -> bool {
if !self.is_img() {
return false;
Expand All @@ -180,8 +204,6 @@ impl PathUtils for Path {
is_accepted_cover_name(&file_stem)
}

/// Returns true if the directory has any media files in it. This is a shallow
/// check, and will not check subdirectories.
fn dir_has_media(&self) -> bool {
if !self.is_dir() {
return false;
Expand All @@ -205,8 +227,6 @@ impl PathUtils for Path {
}
}

/// Returns true if the directory has any media files in it. This is a deep
/// check, and will check *all* subdirectories.
fn dir_has_media_deep(&self) -> bool {
if !self.is_dir() {
return false;
Expand Down
38 changes: 22 additions & 16 deletions core/src/filesystem/media/epub.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ impl FileProcessor for EpubProcessor {
for chapter in pages {
if chapter == 1 {
// Assume this is the cover page
// FIXME: This is wrong. I just don't want to deal with it right now...
content_types.insert(chapter, ContentType::JPEG);
let (content_type, _) = Self::get_cover_internal(&mut epub_file)?;
content_types.insert(chapter, content_type);
continue;
}

Expand Down Expand Up @@ -153,20 +153,9 @@ impl EpubProcessor {
EpubDoc::new(path).map_err(|e| FileError::EpubOpenError(e.to_string()))
}

/// Returns the cover image for the epub file. If a cover image cannot be extracted via the
/// metadata, it will go through two rounds of fallback methods:
///
/// 1. Attempt to find a resource with the default ID of "cover"
/// 2. Attempt to find a resource with a mime type of "image/jpeg" or "image/png", and weight the
/// results based on how likely they are to be the cover. For example, if the cover is named
/// "cover.jpg", it's probably the cover. The entry with the heighest weight, if any, will be
/// returned.
pub fn get_cover(path: &str) -> Result<(ContentType, Vec<u8>), FileError> {
let mut epub_file = EpubDoc::new(path).map_err(|e| {
tracing::error!("Failed to open epub file: {}", e);
FileError::EpubOpenError(e.to_string())
})?;

fn get_cover_internal(
epub_file: &mut EpubDoc<BufReader<File>>,
) -> Result<(ContentType, Vec<u8>), FileError> {
let cover_id = epub_file.get_cover_id().unwrap_or_else(|| {
tracing::debug!("Epub file does not contain cover metadata");
DEFAULT_EPUB_COVER_ID.to_string()
Expand Down Expand Up @@ -214,6 +203,23 @@ impl EpubProcessor {
))
}

/// Returns the cover image for the epub file. If a cover image cannot be extracted via the
/// metadata, it will go through two rounds of fallback methods:
///
/// 1. Attempt to find a resource with the default ID of "cover"
/// 2. Attempt to find a resource with a mime type of "image/jpeg" or "image/png", and weight the
/// results based on how likely they are to be the cover. For example, if the cover is named
/// "cover.jpg", it's probably the cover. The entry with the heighest weight, if any, will be
/// returned.
pub fn get_cover(path: &str) -> Result<(ContentType, Vec<u8>), FileError> {
let mut epub_file = EpubDoc::new(path).map_err(|e| {
tracing::error!("Failed to open epub file: {}", e);
FileError::EpubOpenError(e.to_string())
})?;

EpubProcessor::get_cover_internal(&mut epub_file)
}

pub fn get_chapter(
path: &str,
chapter: usize,
Expand Down
9 changes: 9 additions & 0 deletions core/src/filesystem/media/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,13 @@ mod tests {
.to_string_lossy()
.to_string()
}

// Note: each page should be 96623 bytes. The macOS metadata files should be 220 bytes, but
// ignored by the processor. Commenting the sizes for posterity.
pub fn get_nested_macos_compressed_cbz_path() -> String {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("integration-tests/data/nested-macos-compressed.cbz")
.to_string_lossy()
.to_string()
}
}
Loading
Loading