From 5f9871b81b8210e1a5432644ff908c739559aa19 Mon Sep 17 00:00:00 2001 From: Aaron Leopold <36278431+aaronleopold@users.noreply.github.com> Date: Sat, 7 Sep 2024 23:32:30 -0700 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Refactor=20thumbnail=20generation?= =?UTF-8?q?=20and=20scanner=20IO=20operations=20(#426)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * :zap: Refactor thumbnail generation * :zap: Refactor scanner IO operations Push up last nights changes * debug: push to new tag for testing * async-ify IO operations outside scan context * debug: add tracing * wip: rename library options and add fields requires migration still * wip: rename library options, fix frontend * handwrite migration 😬 * fix ui * NON FUNCTIONAL: wip migration kill me * debug: stop auto pushing * fix migration? * wip: support processing options and restructure * 🥪 lunch allocator experiment * super wip: distroless image, zip downgrade * cleanup docker stuff * Revert "debug: stop auto pushing" This reverts commit bd6da98e6604cb05b5a93030ba1a2613d3786239. * remove missed feature * fix job upsert after refresh * cleanup comments and wip review * Reapply "debug: stop auto pushing" This reverts commit f43c187f4bfad818dea79eb283bbdd0e49d76348. * cleanup --- apps/server/Cargo.toml | 1 - apps/server/src/errors.rs | 11 +- apps/server/src/http_server.rs | 6 +- apps/server/src/routers/api/v1/emailer.rs | 5 +- apps/server/src/routers/api/v1/library.rs | 177 +++--- apps/server/src/routers/api/v1/media.rs | 84 ++- apps/server/src/routers/api/v1/series.rs | 79 ++- apps/server/src/routers/api/v1/user.rs | 7 +- apps/server/src/routers/opds/v1_2.rs | 4 +- apps/server/src/routers/opds/v2_0.rs | 7 +- apps/server/src/routers/utoipa.rs | 2 +- core/benches/benchmarks/library_scanner.rs | 34 +- core/integration-tests/tests/scanner.rs | 4 +- core/integration-tests/tests/utils.rs | 26 +- .../migration.sql | 87 +++ core/prisma/schema.prisma | 18 +- core/src/config/stump_config.rs | 35 +- .../entity/library/{options.rs => config.rs} | 20 +- core/src/db/entity/library/entity.rs | 73 +-- core/src/db/entity/library/mod.rs | 4 +- core/src/db/entity/library/prisma_macros.rs | 14 +- core/src/db/entity/media/entity.rs | 4 +- core/src/db/entity/media/prisma_macros.rs | 2 +- core/src/db/entity/series/prisma_macros.rs | 2 +- core/src/db/migration.rs | 14 +- core/src/db/query/pagination.rs | 4 +- core/src/filesystem/common.rs | 87 ++- core/src/filesystem/hash.rs | 2 +- core/src/filesystem/image/mod.rs | 18 +- .../filesystem/image/thumbnail/generate.rs | 146 +++++ .../image/thumbnail/generation_job.rs | 209 ++++--- .../src/filesystem/image/thumbnail/manager.rs | 173 ------ core/src/filesystem/image/thumbnail/mod.rs | 172 +----- core/src/filesystem/image/thumbnail/utils.rs | 77 +++ .../analyze_media_job/task_page_count.rs | 4 +- core/src/filesystem/media/builder.rs | 24 +- .../src/filesystem/media/{ => format}/epub.rs | 22 +- core/src/filesystem/media/format/mod.rs | 4 + core/src/filesystem/media/{ => format}/pdf.rs | 23 +- core/src/filesystem/media/{ => format}/rar.rs | 21 +- core/src/filesystem/media/{ => format}/zip.rs | 24 +- core/src/filesystem/media/mod.rs | 15 +- core/src/filesystem/media/process.rs | 191 ++++++- .../filesystem/media/{common.rs => utils.rs} | 0 .../filesystem/scanner/library_scan_job.rs | 117 ++-- .../src/filesystem/scanner/series_scan_job.rs | 34 +- core/src/filesystem/scanner/utils.rs | 539 ++++++++++++++---- core/src/job/progress.rs | 4 +- core/src/job/scheduler.rs | 11 +- core/src/lib.rs | 2 +- core/src/opds/v2_0/link.rs | 4 +- core/src/opds/v2_0/publication.rs | 12 +- docker/Dockerfile | 22 +- docker/collect_stats.sh | 40 ++ docker/entrypoint.sh | 30 +- .../guides/configuration/server-options.md | 20 + .../browser/src/__mocks__/resizeObserver.ts | 2 - .../src/components/jobs/JobOverlay.tsx | 108 ++-- .../library/createOrUpdate/schema.ts | 18 +- .../sections/FileConversionOptions.tsx | 4 +- .../sections/IgnoreRulesConfig.tsx | 2 +- .../sections/ScannerOptInFeatures.tsx | 89 +++ .../library/createOrUpdate/sections/index.ts | 1 + .../createLibrary/CreateLibraryForm.tsx | 4 +- .../createLibrary/CreateLibraryScene.tsx | 8 +- .../scenes/createLibrary/LibraryReview.tsx | 14 + .../tabs/settings/LibrarySettingsRouter.tsx | 4 +- .../FileConversionOptionsPatchForm.tsx | 4 +- .../settings/options/IgnoreRulesPatchForm.tsx | 10 +- .../settings/options/ScannerBehaviorScene.tsx | 4 +- .../options/ScannerFeaturesPatchForm.tsx | 53 ++ .../thumbnails/ThumbnailSettingsScene.tsx | 6 +- .../settings/server/email/emailers/schema.ts | 2 +- packages/client/src/hooks/useCoreEvent.ts | 3 + packages/client/src/stores/job.ts | 7 +- .../components/src/progress/ProgressBar.tsx | 14 +- packages/components/tailwind/tailwind.js | 9 + packages/i18n/src/locales/en.json | 19 +- packages/types/generated.ts | 14 +- 79 files changed, 1936 insertions(+), 1234 deletions(-) create mode 100644 core/prisma/migrations/20240904025817_rename_library_options_and_addtl_configs/migration.sql rename core/src/db/entity/library/{options.rs => config.rs} (75%) create mode 100644 core/src/filesystem/image/thumbnail/generate.rs delete mode 100644 core/src/filesystem/image/thumbnail/manager.rs create mode 100644 core/src/filesystem/image/thumbnail/utils.rs rename core/src/filesystem/media/{ => format}/epub.rs (96%) create mode 100644 core/src/filesystem/media/format/mod.rs rename core/src/filesystem/media/{ => format}/pdf.rs (93%) rename core/src/filesystem/media/{ => format}/rar.rs (96%) rename core/src/filesystem/media/{ => format}/zip.rs (95%) rename core/src/filesystem/media/{common.rs => utils.rs} (100%) create mode 100755 docker/collect_stats.sh create mode 100644 packages/browser/src/components/library/createOrUpdate/sections/ScannerOptInFeatures.tsx create mode 100644 packages/browser/src/scenes/library/tabs/settings/options/ScannerFeaturesPatchForm.tsx diff --git a/apps/server/Cargo.toml b/apps/server/Cargo.toml index af290bb1b..4d6de4705 100644 --- a/apps/server/Cargo.toml +++ b/apps/server/Cargo.toml @@ -66,4 +66,3 @@ openssl = { version = "0.10.61", features = ["vendored"] } [target.x86_64-unknown-linux-musl.dependencies] openssl = { version = "0.10.61", features = ["vendored"] } - diff --git a/apps/server/src/errors.rs b/apps/server/src/errors.rs index d8976266e..074e71c55 100644 --- a/apps/server/src/errors.rs +++ b/apps/server/src/errors.rs @@ -11,7 +11,10 @@ use prisma_client_rust::{ }; use stump_core::{ error::CoreError, - filesystem::{image::ProcessorError, FileError}, + filesystem::{ + image::{ProcessorError, ThumbnailGenerateError}, + FileError, + }, job::error::JobManagerError, opds::v2_0::OPDSV2Error, CoreEvent, @@ -178,6 +181,12 @@ impl From for APIError { } } +impl From for APIError { + fn from(value: ThumbnailGenerateError) -> Self { + APIError::InternalServerError(value.to_string()) + } +} + impl APIError { pub fn forbidden_discreet() -> APIError { APIError::Forbidden(String::from( diff --git a/apps/server/src/http_server.rs b/apps/server/src/http_server.rs index 19d72b07c..9c35e9ec2 100644 --- a/apps/server/src/http_server.rs +++ b/apps/server/src/http_server.rs @@ -20,9 +20,9 @@ use stump_core::config::StumpConfig; pub async fn run_http_server(config: StumpConfig) -> ServerResult<()> { let core = StumpCore::new(config.clone()).await; - if let Err(err) = core.run_migrations().await { - tracing::error!("Failed to run migrations: {:?}", err); - return Err(ServerError::ServerStartError(err.to_string())); + if let Err(error) = core.run_migrations().await { + tracing::error!(?error, "Failed to run migrations"); + return Err(ServerError::ServerStartError(error.to_string())); } core.get_job_controller() diff --git a/apps/server/src/routers/api/v1/emailer.rs b/apps/server/src/routers/api/v1/emailer.rs index 122681527..52b347a9b 100644 --- a/apps/server/src/routers/api/v1/emailer.rs +++ b/apps/server/src/routers/api/v1/emailer.rs @@ -15,10 +15,11 @@ use stump_core::{ AttachmentMeta, EmailerConfig, EmailerConfigInput, EmailerSendRecord, EmailerSendTo, Media, RegisteredEmailDevice, SMTPEmailer, User, UserPermission, }, - filesystem::{read_entire_file, ContentType, FileParts, PathUtils}, + filesystem::{ContentType, FileParts, PathUtils}, prisma::{emailer, emailer_send_record, registered_email_device, user, PrismaClient}, AttachmentPayload, EmailContentType, }; +use tokio::fs; use utoipa::ToSchema; use crate::{ @@ -517,7 +518,7 @@ async fn send_attachment_email( extension, .. } = PathBuf::from(&book.path).file_parts(); - let content = read_entire_file(book.path)?; + let content = fs::read(book.path).await?; // TODO: should error? match (content.len(), max_attachment_size_bytes) { diff --git a/apps/server/src/routers/api/v1/library.rs b/apps/server/src/routers/api/v1/library.rs index d1431eaa5..b82ca4255 100644 --- a/apps/server/src/routers/api/v1/library.rs +++ b/apps/server/src/routers/api/v1/library.rs @@ -10,6 +10,7 @@ use serde::{Deserialize, Serialize}; use serde_qs::axum::QsQuery; use specta::Type; use std::path; +use tokio::fs; use tracing::{debug, error, trace}; use utoipa::ToSchema; @@ -17,9 +18,11 @@ use stump_core::{ config::StumpConfig, db::{ entity::{ - library_series_ids_media_ids_include, library_thumbnails_deletion_include, - macros::{library_tags_select, series_or_library_thumbnail}, - FileStatus, Library, LibraryOptions, LibraryScanMode, LibraryStats, Media, + macros::{ + library_series_ids_media_ids_include, library_tags_select, + library_thumbnails_deletion_include, series_or_library_thumbnail, + }, + FileStatus, Library, LibraryConfig, LibraryScanMode, LibraryStats, Media, Series, TagName, User, UserPermission, }, query::pagination::{Pageable, Pagination, PaginationQuery}, @@ -27,19 +30,19 @@ use stump_core::{ }, filesystem::{ analyze_media_job::AnalyzeMediaJob, - get_unknown_thumnail, + get_thumbnail, image::{ - self, generate_thumbnail, place_thumbnail, remove_thumbnails, ImageFormat, - ImageProcessorOptions, ThumbnailGenerationJob, ThumbnailGenerationJobParams, + self, generate_book_thumbnail, place_thumbnail, remove_thumbnails, + GenerateThumbnailOptions, ImageFormat, ImageProcessorOptions, + ThumbnailGenerationJob, ThumbnailGenerationJobParams, }, - read_entire_file, scanner::LibraryScanJob, - ContentType, FileParts, PathUtils, + ContentType, }, prisma::{ last_library_visit, library::{self, WhereParam}, - library_options, + library_config, media::{self, OrderByParam as MediaOrderByParam}, series::{self, OrderByParam as SeriesOrderByParam}, tag, user, @@ -195,7 +198,7 @@ async fn get_libraries( .library() .find_many(where_conditions.clone()) .with(library::tags::fetch(vec![])) - .with(library::library_options::fetch()) + .with(library::config::fetch()) .order_by(order_by); if !is_unpaged { @@ -388,7 +391,7 @@ async fn get_library_by_id( .chain([library_not_hidden_from_user_filter(user)]) .collect(), ) - .with(library::library_options::fetch()) + .with(library::config::fetch()) .with(library::tags::fetch(vec![])) .exec() .await? @@ -562,36 +565,21 @@ async fn get_library_media( Ok(Json(Pageable::from(media))) } -pub(crate) fn get_library_thumbnail( +pub(crate) async fn get_library_thumbnail( id: &str, first_series: series_or_library_thumbnail::Data, first_book: Option, image_format: Option, config: &StumpConfig, ) -> APIResult<(ContentType, Vec)> { - if let Some(format) = image_format.clone() { - let extension = format.extension(); - - let path = config - .get_thumbnails_dir() - .join(format!("{}.{}", id, extension)); + let generated_thumb = + get_thumbnail(config.get_thumbnails_dir(), id, image_format.clone()).await?; - if path.exists() { - tracing::trace!(?path, id, "Found generated library thumbnail"); - return Ok((ContentType::from(format), read_entire_file(path)?)); - } - } - - if let Some(path) = get_unknown_thumnail(id, config.get_thumbnails_dir()) { - tracing::debug!(path = ?path, id, "Found library thumbnail that does not align with config"); - let FileParts { extension, .. } = path.file_parts(); - return Ok(( - ContentType::from_extension(extension.as_str()), - read_entire_file(path)?, - )); + if let Some((content_type, bytes)) = generated_thumb { + Ok((content_type, bytes)) + } else { + get_series_thumbnail(&first_series.id, first_book, image_format, config).await } - - get_series_thumbnail(&first_series.id, first_book, image_format, config) } // TODO: ImageResponse for utoipa @@ -645,14 +633,15 @@ async fn get_library_thumbnail_handler( .ok_or(APIError::NotFound("Library has no series".to_string()))?; let first_book = first_series.media.first().cloned(); - let library_options = first_series + let library_config = first_series .library .as_ref() - .map(|l| l.library_options.clone()) - .map(LibraryOptions::from); - let image_format = library_options.and_then(|o| o.thumbnail_config.map(|c| c.format)); + .map(|l| l.config.clone()) + .map(LibraryConfig::from); + let image_format = library_config.and_then(|o| o.thumbnail_config.map(|c| c.format)); get_library_thumbnail(&id, first_series, first_book, image_format, &ctx.config) + .await .map(ImageResponse::from) } @@ -714,7 +703,7 @@ async fn patch_library_thumbnail( ]) .with( media::series::fetch() - .with(series::library::fetch().with(library::library_options::fetch())), + .with(series::library::fetch().with(library::config::fetch())), ) .exec() .await? @@ -731,8 +720,8 @@ async fn patch_library_thumbnail( .ok_or(APIError::NotFound(String::from("Series relation missing")))? .library()? .ok_or(APIError::NotFound(String::from("Library relation missing")))?; - let thumbnail_options = library - .library_options()? + let image_options = library + .config()? .thumbnail_config .to_owned() .map(ImageProcessorOptions::try_from) @@ -745,11 +734,20 @@ async fn patch_library_thumbnail( }) .with_page(target_page); - let format = thumbnail_options.format.clone(); - let path_buf = generate_thumbnail(&id, &media.path, thumbnail_options, &ctx.config)?; + let format = image_options.format.clone(); + let (_, path_buf, _) = generate_book_thumbnail( + &media, + GenerateThumbnailOptions { + image_options, + core_config: ctx.config.as_ref().clone(), + force_regen: true, + }, + ) + .await?; + Ok(ImageResponse::from(( ContentType::from(format), - read_entire_file(path_buf)?, + fs::read(path_buf).await?, ))) } @@ -792,11 +790,11 @@ async fn replace_library_thumbnail( ), } - let path_buf = place_thumbnail(&library_id, ext, &bytes, &ctx.config)?; + let path_buf = place_thumbnail(&library_id, ext, &bytes, &ctx.config).await?; Ok(ImageResponse::from(( content_type, - read_entire_file(path_buf)?, + fs::read(path_buf).await?, ))) } @@ -815,7 +813,6 @@ async fn replace_library_thumbnail( (status = 500, description = "Internal server error") ) )] -// TODO: make this a queuable job async fn delete_library_thumbnails( Path(id): Path, State(ctx): State, @@ -884,12 +881,12 @@ async fn generate_library_thumbnails( library::id::equals(id.clone()), library_not_hidden_from_user_filter(&user), ]) - .with(library::library_options::fetch()) + .with(library::config::fetch()) .exec() .await? .ok_or(APIError::NotFound("Library not found".to_string()))?; - let library_options = library.library_options()?.to_owned(); - let existing_options = if let Some(config) = library_options.thumbnail_config { + let library_config = library.config()?.to_owned(); + let existing_options = if let Some(config) = library_config.thumbnail_config { // I hard error here so that we don't accidentally generate thumbnails in an invalid or // otherwise undesired way per the existing (but not properly parsed) config Some(ImageProcessorOptions::try_from(config)?) @@ -1242,7 +1239,7 @@ pub struct CreateLibrary { pub scan_mode: Option, /// Optional options to apply to the library. When not provided, the default options will be used. #[specta(optional)] - pub library_options: Option, + pub config: Option, } #[utoipa::path( @@ -1289,33 +1286,39 @@ async fn create_library( // TODO(prisma-nested-create): Refactor once nested create is supported // https://github.com/Brendonovich/prisma-client-rust/issues/44 - let library_options_arg = input.library_options.unwrap_or_default(); + let library_config = input.config.unwrap_or_default(); let transaction_result: Result = db ._transaction() .with_timeout(Duration::seconds(30).num_milliseconds() as u64) .run(|client| async move { - let ignore_rules = (!library_options_arg.ignore_rules.is_empty()) - .then(|| library_options_arg.ignore_rules.as_bytes()) + let ignore_rules = (!library_config.ignore_rules.is_empty()) + .then(|| library_config.ignore_rules.as_bytes()) .transpose()?; - let thumbnail_config = library_options_arg + let thumbnail_config = library_config .thumbnail_config .map(|options| options.as_bytes()) .transpose()?; - let library_options = client - .library_options() + let library_config = client + .library_config() .create(vec![ - library_options::convert_rar_to_zip::set( - library_options_arg.convert_rar_to_zip, + library_config::convert_rar_to_zip::set( + library_config.convert_rar_to_zip, + ), + library_config::hard_delete_conversions::set( + library_config.hard_delete_conversions, ), - library_options::hard_delete_conversions::set( - library_options_arg.hard_delete_conversions, + library_config::process_metadata::set( + library_config.process_metadata, ), - library_options::library_pattern::set( - library_options_arg.library_pattern.to_string(), + library_config::generate_file_hashes::set( + library_config.generate_file_hashes, ), - library_options::thumbnail_config::set(thumbnail_config), - library_options::ignore_rules::set(ignore_rules), + library_config::library_pattern::set( + library_config.library_pattern.to_string(), + ), + library_config::thumbnail_config::set(thumbnail_config), + library_config::ignore_rules::set(ignore_rules), ]) .exec() .await?; @@ -1368,7 +1371,7 @@ async fn create_library( .create( input.name.to_owned(), input.path.to_owned(), - library_options::id::equals(library_options.id.clone()), + library_config::id::equals(library_config.id.clone()), chain_optional_iter( [library::description::set(input.description.to_owned())], [(!library_tags.is_empty()).then(|| { @@ -1384,21 +1387,21 @@ async fn create_library( .exec() .await?; - let library_options = client - .library_options() + let library_config = client + .library_config() .update( - library_options::id::equals(library_options.id), + library_config::id::equals(library_config.id), vec![ - library_options::library::connect(library::id::equals( + library_config::library::connect(library::id::equals( library.id.clone(), )), - library_options::library_id::set(Some(library.id.clone())), + library_config::library_id::set(Some(library.id.clone())), ], ) .exec() .await?; - Ok(Library::from((library, library_options))) + Ok(Library::from((library, library_config))) }) .await; @@ -1436,7 +1439,7 @@ pub struct UpdateLibrary { #[specta(optional)] pub tags: Option>, /// The updated options of the library. - pub library_options: LibraryOptions, + pub config: LibraryConfig, /// Optional flag to indicate how the library should be automatically scanned after update. Default is `BATCHED`. #[serde(default)] pub scan_mode: Option, @@ -1493,28 +1496,34 @@ async fn update_library( ._transaction() .with_timeout(Duration::seconds(30).num_milliseconds() as u64) .run(|client| async move { - let library_options = input.library_options.to_owned(); - let ignore_rules = (!library_options.ignore_rules.is_empty()) - .then(|| library_options.ignore_rules.as_bytes()) + let library_config = input.config.to_owned(); + let ignore_rules = (!library_config.ignore_rules.is_empty()) + .then(|| library_config.ignore_rules.as_bytes()) .transpose()?; - let thumbnail_config = library_options + let thumbnail_config = library_config .thumbnail_config .map(|options| options.as_bytes()) .transpose()?; client - .library_options() + .library_config() .update( - library_options::id::equals(library_options.id.unwrap_or_default()), + library_config::id::equals(library_config.id.unwrap_or_default()), vec![ - library_options::convert_rar_to_zip::set( - library_options.convert_rar_to_zip, + library_config::convert_rar_to_zip::set( + library_config.convert_rar_to_zip, + ), + library_config::hard_delete_conversions::set( + library_config.hard_delete_conversions, + ), + library_config::process_metadata::set( + library_config.process_metadata, ), - library_options::hard_delete_conversions::set( - library_options.hard_delete_conversions, + library_config::generate_file_hashes::set( + library_config.generate_file_hashes, ), - library_options::ignore_rules::set(ignore_rules), - library_options::thumbnail_config::set(thumbnail_config), + library_config::ignore_rules::set(ignore_rules), + library_config::thumbnail_config::set(thumbnail_config), ], ) .exec() diff --git a/apps/server/src/routers/api/v1/media.rs b/apps/server/src/routers/api/v1/media.rs index c3ac00942..e95cda400 100644 --- a/apps/server/src/routers/api/v1/media.rs +++ b/apps/server/src/routers/api/v1/media.rs @@ -23,7 +23,7 @@ use stump_core::{ finished_reading_session_with_book_pages, media_thumbnail, reading_session_with_book_pages, }, - ActiveReadingSession, FinishedReadingSession, LibraryOptions, Media, + ActiveReadingSession, FinishedReadingSession, LibraryConfig, Media, PageDimension, PageDimensionsEntity, ProgressUpdateReturn, User, UserPermission, }, @@ -32,13 +32,12 @@ use stump_core::{ }, filesystem::{ analyze_media_job::AnalyzeMediaJob, - get_unknown_thumnail, + get_page_async, get_thumbnail, image::{ - generate_thumbnail, place_thumbnail, remove_thumbnails, ImageFormat, - ImageProcessorOptions, + generate_book_thumbnail, place_thumbnail, remove_thumbnails, + GenerateThumbnailOptions, ImageFormat, ImageProcessorOptions, }, - media::get_page, - read_entire_file, ContentType, FileParts, PathUtils, + ContentType, }, prisma::{ active_reading_session, finished_reading_session, library, @@ -47,6 +46,7 @@ use stump_core::{ }, Ctx, }; +use tokio::fs; use tracing::error; use utoipa::ToSchema; @@ -355,7 +355,7 @@ pub fn apply_media_restrictions_for_user(user: &User) -> Vec { )] /// Get all media accessible to the requester. This is a paginated request, and /// has various pagination params available. -#[tracing::instrument(skip(ctx))] +#[tracing::instrument(err, ret, skip(ctx))] async fn get_media( filter_query: QsQuery>, pagination_query: Query, @@ -961,7 +961,7 @@ async fn get_media_page( page, id ))) } else { - Ok(get_page(&media.path, page, &ctx.config)?.into()) + Ok(get_page_async(&media.path, page, &ctx.config).await?.into()) } } @@ -991,46 +991,29 @@ pub(crate) async fn get_media_thumbnail_by_id( .await? .ok_or_else(|| APIError::NotFound("Book not found".to_string()))?; - let library_options = book + let library_config = book .series - .and_then(|s| s.library.map(|l| l.library_options)) - .map(LibraryOptions::from); - let image_format = library_options.and_then(|o| o.thumbnail_config.map(|c| c.format)); + .and_then(|s| s.library.map(|l| l.config)) + .map(LibraryConfig::from); + let image_format = library_config.and_then(|o| o.thumbnail_config.map(|c| c.format)); - get_media_thumbnail(&book.id, &book.path, image_format, config) + get_media_thumbnail(&book.id, &book.path, image_format, config).await } -pub(crate) fn get_media_thumbnail( +pub(crate) async fn get_media_thumbnail( id: &str, path: &str, - target_format: Option, + image_format: Option, config: &StumpConfig, ) -> APIResult<(ContentType, Vec)> { - if let Some(format) = target_format { - let extension = format.extension(); - let thumbnail_path = config - .get_thumbnails_dir() - .join(format!("{}.{}", id, extension)); - - if thumbnail_path.exists() { - tracing::trace!(path = ?thumbnail_path, media_id = id, "Found generated media thumbnail"); - return Ok((ContentType::from(format), read_entire_file(thumbnail_path)?)); - } - } + let generated_thumb = + get_thumbnail(config.get_thumbnails_dir(), id, image_format).await?; - if let Some(path) = get_unknown_thumnail(id, config.get_thumbnails_dir()) { - // If there exists a file that starts with the media id in the thumbnails dir, - // then return it. This might happen if a user manually regenerates thumbnails - // via the API without updating the thumbnail config... - tracing::debug!(path = ?path, media_id = id, "Found media thumbnail that does not align with config"); - let FileParts { extension, .. } = path.file_parts(); - return Ok(( - ContentType::from_extension(extension.as_str()), - read_entire_file(path)?, - )); + if let Some((content_type, bytes)) = generated_thumb { + Ok((content_type, bytes)) + } else { + Ok(get_page_async(path, 1, config).await?) } - - Ok(get_page(path, 1, config)?) } // TODO: ImageResponse as body type @@ -1120,7 +1103,7 @@ async fn patch_media_thumbnail( .find_first(where_params) .with( media::series::fetch() - .with(series::library::fetch().with(library::library_options::fetch())), + .with(series::library::fetch().with(library::config::fetch())), ) .exec() .await? @@ -1135,8 +1118,8 @@ async fn patch_media_thumbnail( .ok_or(APIError::NotFound(String::from("Series relation missing")))? .library()? .ok_or(APIError::NotFound(String::from("Library relation missing")))?; - let thumbnail_options = library - .library_options()? + let image_options = library + .config()? .thumbnail_config .to_owned() .map(ImageProcessorOptions::try_from) @@ -1149,11 +1132,20 @@ async fn patch_media_thumbnail( }) .with_page(target_page); - let format = thumbnail_options.format.clone(); - let path_buf = generate_thumbnail(&id, &media.path, thumbnail_options, &ctx.config)?; + let format = image_options.format.clone(); + let (_, path_buf, _) = generate_book_thumbnail( + &media, + GenerateThumbnailOptions { + image_options, + core_config: ctx.config.as_ref().clone(), + force_regen: true, + }, + ) + .await?; + Ok(ImageResponse::from(( ContentType::from(format), - read_entire_file(path_buf)?, + fs::read(path_buf).await?, ))) } @@ -1216,11 +1208,11 @@ async fn replace_media_thumbnail( ); } - let path_buf = place_thumbnail(&book_id, ext, &bytes, &ctx.config)?; + let path_buf = place_thumbnail(&book_id, ext, &bytes, &ctx.config).await?; Ok(ImageResponse::from(( content_type, - read_entire_file(path_buf)?, + fs::read(path_buf).await?, ))) } diff --git a/apps/server/src/routers/api/v1/series.rs b/apps/server/src/routers/api/v1/series.rs index deb583892..c8a19fb4e 100644 --- a/apps/server/src/routers/api/v1/series.rs +++ b/apps/server/src/routers/api/v1/series.rs @@ -17,7 +17,7 @@ use stump_core::{ macros::{ finished_reading_session_series_complete, series_or_library_thumbnail, }, - LibraryOptions, Media, Series, User, UserPermission, + LibraryConfig, Media, Series, User, UserPermission, }, query::{ ordering::QueryOrder, @@ -27,14 +27,13 @@ use stump_core::{ }, filesystem::{ analyze_media_job::AnalyzeMediaJob, - get_unknown_thumnail, + get_thumbnail, image::{ - generate_thumbnail, place_thumbnail, remove_thumbnails, ImageFormat, - ImageProcessorOptions, + generate_book_thumbnail, place_thumbnail, remove_thumbnails, + GenerateThumbnailOptions, ImageFormat, ImageProcessorOptions, }, - read_entire_file, scanner::SeriesScanJob, - ContentType, FileParts, PathUtils, + ContentType, }, prisma::{ active_reading_session, finished_reading_session, library, @@ -44,6 +43,7 @@ use stump_core::{ series_metadata, }, }; +use tokio::fs; use tracing::{error, trace}; use utoipa::ToSchema; @@ -515,35 +515,19 @@ async fn get_recently_added_series_handler( Ok(Json(recently_added_series)) } -pub(crate) fn get_series_thumbnail( +pub(crate) async fn get_series_thumbnail( id: &str, first_book: Option, image_format: Option, config: &StumpConfig, ) -> APIResult<(ContentType, Vec)> { - let thumbnails_dir = config.get_thumbnails_dir(); + let generated_thumb = + get_thumbnail(config.get_thumbnails_dir(), id, image_format.clone()).await?; - if let Some(format) = image_format.clone() { - let extension = format.extension(); - let path = thumbnails_dir.join(format!("{}.{}", id, extension)); - - if path.exists() { - tracing::trace!(?path, id, "Found generated series thumbnail"); - return Ok((ContentType::from(format), read_entire_file(path)?)); - } - } - - if let Some(path) = get_unknown_thumnail(id, thumbnails_dir) { - tracing::debug!(path = ?path, id, "Found series thumbnail that does not align with config"); - let FileParts { extension, .. } = path.file_parts(); - return Ok(( - ContentType::from_extension(extension.as_str()), - read_entire_file(path)?, - )); - } - - if let Some(first_book) = first_book { - get_media_thumbnail(&first_book.id, &first_book.path, image_format, config) + if let Some((content_type, bytes)) = generated_thumb { + Ok((content_type, bytes)) + } else if let Some(first_book) = first_book { + get_media_thumbnail(&first_book.id, &first_book.path, image_format, config).await } else { Err(APIError::NotFound( "Series does not have a thumbnail".to_string(), @@ -551,7 +535,7 @@ pub(crate) fn get_series_thumbnail( } } -// TODO: ImageResponse type for body +/// Returns the thumbnail image for a series #[utoipa::path( get, path = "/api/v1/series/:id/thumbnail", @@ -566,7 +550,7 @@ pub(crate) fn get_series_thumbnail( (status = 500, description = "Internal server error."), ) )] -/// Returns the thumbnail image for a series +#[tracing::instrument(err, skip(ctx))] async fn get_series_thumbnail_handler( Path(id): Path, State(ctx): State, @@ -601,13 +585,11 @@ async fn get_series_thumbnail_handler( .ok_or(APIError::NotFound("Series not found".to_string()))?; let first_book = series.media.into_iter().next(); - let library_options = series - .library - .map(|l| l.library_options) - .map(LibraryOptions::from); - let image_format = library_options.and_then(|o| o.thumbnail_config.map(|c| c.format)); + let library_config = series.library.map(|l| l.config).map(LibraryConfig::from); + let image_format = library_config.and_then(|o| o.thumbnail_config.map(|c| c.format)); get_series_thumbnail(&id, first_book, image_format, &ctx.config) + .await .map(ImageResponse::from) } @@ -686,7 +668,7 @@ async fn patch_series_thumbnail( .find_first(media_where_params) .with( media::series::fetch() - .with(series::library::fetch().with(library::library_options::fetch())), + .with(series::library::fetch().with(library::config::fetch())), ) .exec() .await? @@ -701,8 +683,8 @@ async fn patch_series_thumbnail( .ok_or(APIError::NotFound(String::from("Series relation missing")))? .library()? .ok_or(APIError::NotFound(String::from("Library relation missing")))?; - let thumbnail_options = library - .library_options()? + let image_options = library + .config()? .thumbnail_config .to_owned() .map(ImageProcessorOptions::try_from) @@ -715,11 +697,20 @@ async fn patch_series_thumbnail( }) .with_page(target_page); - let format = thumbnail_options.format.clone(); - let path_buf = generate_thumbnail(&id, &media.path, thumbnail_options, &ctx.config)?; + let format = image_options.format.clone(); + let (_, path_buf, _) = generate_book_thumbnail( + &media, + GenerateThumbnailOptions { + image_options, + core_config: ctx.config.as_ref().clone(), + force_regen: true, + }, + ) + .await?; + Ok(ImageResponse::from(( ContentType::from(format), - read_entire_file(path_buf)?, + fs::read(path_buf).await?, ))) } @@ -782,11 +773,11 @@ async fn replace_series_thumbnail( ), } - let path_buf = place_thumbnail(&series_id, ext, &bytes, &ctx.config)?; + let path_buf = place_thumbnail(&series_id, ext, &bytes, &ctx.config).await?; Ok(ImageResponse::from(( content_type, - read_entire_file(path_buf)?, + fs::read(path_buf).await?, ))) } diff --git a/apps/server/src/routers/api/v1/user.rs b/apps/server/src/routers/api/v1/user.rs index 76d95043b..787c93eae 100644 --- a/apps/server/src/routers/api/v1/user.rs +++ b/apps/server/src/routers/api/v1/user.rs @@ -19,14 +19,13 @@ use stump_core::{ }, query::pagination::{Pageable, Pagination, PaginationQuery}, }, - filesystem::{ - get_unknown_image, read_entire_file, ContentType, FileParts, PathUtils, - }, + filesystem::{get_unknown_image, ContentType, FileParts, PathUtils}, prisma::{ age_restriction, session, user, user_login_activity, user_preferences, PrismaClient, }, }; +use tokio::fs; use tower_sessions::Session; use tracing::{debug, trace}; use utoipa::ToSchema; @@ -1107,7 +1106,7 @@ async fn get_user_avatar( if let Some(local_file) = get_unknown_image(base_path) { let FileParts { extension, .. } = local_file.file_parts(); let content_type = ContentType::from_extension(extension.as_str()); - let bytes = read_entire_file(local_file)?; + let bytes = fs::read(local_file).await?; Ok(ImageResponse::new(content_type, bytes)) } else { Err(APIError::NotFound("User avatar not found".to_string())) diff --git a/apps/server/src/routers/opds/v1_2.rs b/apps/server/src/routers/opds/v1_2.rs index 7cab843ed..dd5ca16be 100644 --- a/apps/server/src/routers/opds/v1_2.rs +++ b/apps/server/src/routers/opds/v1_2.rs @@ -8,8 +8,8 @@ use prisma_client_rust::{chrono, Direction}; use stump_core::{ db::{entity::UserPermission, query::pagination::PageQuery}, filesystem::{ + get_page_async, image::{GenericImageProcessor, ImageProcessor, ImageProcessorOptions}, - media::get_page, ContentType, }, opds::v1_2::{ @@ -638,7 +638,7 @@ async fn get_book_page( } let (content_type, image_buffer) = - get_page(book.path.as_str(), correct_page, &ctx.config)?; + get_page_async(book.path.as_str(), correct_page, &ctx.config).await?; handle_opds_image_response(content_type, image_buffer) } diff --git a/apps/server/src/routers/opds/v2_0.rs b/apps/server/src/routers/opds/v2_0.rs index e5d128e57..2e43fd7fd 100644 --- a/apps/server/src/routers/opds/v2_0.rs +++ b/apps/server/src/routers/opds/v2_0.rs @@ -1,3 +1,5 @@ +use std::path::PathBuf; + use axum::{ extract::{Path, Query, State}, middleware, @@ -17,7 +19,7 @@ use stump_core::{ }, query::pagination::PageQuery, }, - filesystem::media::get_page, + filesystem::get_page_async, opds::v2_0::{ authentication::{ OPDSAuthenticationDocument, OPDSAuthenticationDocumentBuilder, @@ -842,7 +844,8 @@ async fn fetch_book_page_for_user( .await? .ok_or(APIError::NotFound(String::from("Book not found")))?; - let (content_type, image_buffer) = get_page(book.path.as_str(), page, &ctx.config)?; + let (content_type, image_buffer) = + get_page_async(PathBuf::from(book.path), page, &ctx.config).await?; Ok(ImageResponse::new(content_type, image_buffer)) } diff --git a/apps/server/src/routers/utoipa.rs b/apps/server/src/routers/utoipa.rs index 2e9af8b6f..bd6ab8add 100644 --- a/apps/server/src/routers/utoipa.rs +++ b/apps/server/src/routers/utoipa.rs @@ -135,7 +135,7 @@ use super::api::{ ), components( schemas( - Library, LibraryOptions, Media, ReadingList, ActiveReadingSession, FinishedReadingSession, Series, Tag, User, + Library, LibraryConfig, Media, ReadingList, ActiveReadingSession, FinishedReadingSession, Series, Tag, User, UserPreferences, LibraryPattern, LibraryScanMode, LogLevel, ClaimResponse, StumpVersion, FileStatus, PageableDirectoryListing, DirectoryListing, DirectoryListingFile, CursorInfo, PageInfo, PageableLibraries, diff --git a/core/benches/benchmarks/library_scanner.rs b/core/benches/benchmarks/library_scanner.rs index 952d691c3..10acf5fca 100644 --- a/core/benches/benchmarks/library_scanner.rs +++ b/core/benches/benchmarks/library_scanner.rs @@ -10,11 +10,11 @@ use stump_core::{ config::StumpConfig, db::{ create_client_with_url, - entity::{Library, LibraryOptions}, + entity::{Library, LibraryConfig}, }, filesystem::scanner::LibraryScanJob, job::{Executor, WorkerCtx, WrappedJob}, - prisma::{library, library_options, PrismaClient}, + prisma::{library, library_config, PrismaClient}, }; use tempfile::{Builder as TempDirBuilder, TempDir}; use tokio::{ @@ -40,7 +40,7 @@ impl Display for BenchmarkSize { } fn full_scan(c: &mut Criterion) { - static SIZES: [BenchmarkSize; 3] = [ + static SIZES: [BenchmarkSize; 4] = [ BenchmarkSize { series_count: 10, media_per_series: 10, @@ -54,10 +54,10 @@ fn full_scan(c: &mut Criterion) { media_per_series: 100, }, // Note: This benchmark is a time hog, so I have commented it out for now - // BenchmarkSize { - // series_count: 100, - // media_per_series: 1000, - // }, + BenchmarkSize { + series_count: 100, + media_per_series: 1000, + }, ]; let mut group = c.benchmark_group("full_scan"); @@ -129,7 +129,7 @@ async fn create_test_library( let library_temp_dir = TempDirBuilder::new().prefix("ROOT").tempdir()?; let library_temp_dir_path = library_temp_dir.path().to_str().unwrap().to_string(); - let library_options = client.library_options().create(vec![]).exec().await?; + let library_config = client.library_config().create(vec![]).exec().await?; let id = Uuid::new_v4().to_string(); let library = client @@ -137,21 +137,19 @@ async fn create_test_library( .create( id.clone(), library_temp_dir_path.clone(), - library_options::id::equals(library_options.id.clone()), + library_config::id::equals(library_config.id.clone()), vec![library::id::set(id.clone())], ) .exec() .await?; - let library_options = client - .library_options() + let library_config = client + .library_config() .update( - library_options::id::equals(library_options.id), + library_config::id::equals(library_config.id), vec![ - library_options::library::connect(library::id::equals( - library.id.clone(), - )), - library_options::library_id::set(Some(library.id.clone())), + library_config::library::connect(library::id::equals(library.id.clone())), + library_config::library_id::set(Some(library.id.clone())), ], ) .exec() @@ -192,7 +190,7 @@ async fn create_test_library( tracing::info!("Library created!"); let library = Library { - library_options: LibraryOptions::from(library_options), + config: LibraryConfig::from(library_config), ..Library::from(library) }; @@ -208,7 +206,7 @@ async fn setup_test( let job = WrappedJob::new(LibraryScanJob { id: library.id.clone(), path: library.path.clone(), - options: Some(library.library_options.clone()), + options: Some(library.config.clone()), }); let job_id = Uuid::new_v4().to_string(); diff --git a/core/integration-tests/tests/scanner.rs b/core/integration-tests/tests/scanner.rs index e41675ac8..ebf8cc0c3 100644 --- a/core/integration-tests/tests/scanner.rs +++ b/core/integration-tests/tests/scanner.rs @@ -48,7 +48,7 @@ async fn series_based_library_batch_scan() -> CoreResult<()> { let ctx = Ctx::mock().await; let client = &ctx.db; - let (library, _library_options, _tmp) = + let (library, _library_config, _tmp) = TempLibrary::create(client, LibraryPattern::SeriesBased, LibraryScanMode::None) .await?; @@ -75,7 +75,7 @@ async fn collection_based_library_batch_scan() -> CoreResult<()> { let ctx = Ctx::mock().await; let client = &ctx.db; - let (library, _library_options, _tmp) = TempLibrary::create( + let (library, _library_config, _tmp) = TempLibrary::create( client, LibraryPattern::CollectionBased, LibraryScanMode::None, diff --git a/core/integration-tests/tests/utils.rs b/core/integration-tests/tests/utils.rs index b6f1266ab..428eb566a 100644 --- a/core/integration-tests/tests/utils.rs +++ b/core/integration-tests/tests/utils.rs @@ -11,7 +11,7 @@ use stump_core::{ fs::scanner::scan, job::{persist_new_job, runner::RunnerCtx, LibraryScanJob}, prelude::{CoreResult, Ctx}, - prisma::{library, library_options, PrismaClient}, + prisma::{library, library_config, PrismaClient}, }; // https://web.mit.edu/rust-lang_v1.25/arch/amd64_ubuntu1404/share/doc/rust/html/book/second-edition/ch11-03-test-organization.html @@ -147,7 +147,7 @@ impl TempLibrary { client: &PrismaClient, pattern: LibraryPattern, scan_mode: LibraryScanMode, - ) -> CoreResult<(library::Data, library_options::Data, TempLibrary)> { + ) -> CoreResult<(library::Data, library_config::Data, TempLibrary)> { let temp_library = match pattern { LibraryPattern::CollectionBased => TempLibrary::collection_library()?, LibraryPattern::SeriesBased => TempLibrary::series_library()?, @@ -161,7 +161,7 @@ impl TempLibrary { /// A helper to create a collection based library used in the epub tests. pub async fn epub_library( client: &PrismaClient, - ) -> CoreResult<(library::Data, library_options::Data, TempLibrary)> { + ) -> CoreResult<(library::Data, library_config::Data, TempLibrary)> { let _tmp = TempLibrary::collection_library()?; let (library, options) = _tmp.insert(client, LibraryScanMode::Batched).await?; @@ -183,7 +183,7 @@ impl TempLibrary { &self, client: &PrismaClient, scan_mode: LibraryScanMode, - ) -> CoreResult<(library::Data, library_options::Data)> { + ) -> CoreResult<(library::Data, library_config::Data)> { let (library, options) = create_library( client, self.get_name(), @@ -321,29 +321,29 @@ pub async fn create_library( library_path: &str, pattern: LibraryPattern, scan_mode: LibraryScanMode, -) -> CoreResult<(library::Data, library_options::Data)> { - let library_options_result = client - .library_options() - .create(vec![library_options::library_pattern::set( +) -> CoreResult<(library::Data, library_config::Data)> { + let library_config_result = client + .library_config() + .create(vec![library_config::library_pattern::set( pattern.to_string(), )]) .exec() .await; assert!( - library_options_result.is_ok(), + library_config_result.is_ok(), "Failed to create library options: {:?}", - library_options_result + library_config_result ); - let library_options = library_options_result.unwrap(); + let library_config = library_config_result.unwrap(); let library = client .library() .create( name.into(), library_path.into(), - library_options::id::equals(library_options.id.clone()), + library_config::id::equals(library_config.id.clone()), vec![], ) .exec() @@ -362,5 +362,5 @@ pub async fn create_library( // println!("Created library at {:?}", library_path); - Ok((library, library_options)) + Ok((library, library_config)) } diff --git a/core/prisma/migrations/20240904025817_rename_library_options_and_addtl_configs/migration.sql b/core/prisma/migrations/20240904025817_rename_library_options_and_addtl_configs/migration.sql new file mode 100644 index 000000000..bd26fa980 --- /dev/null +++ b/core/prisma/migrations/20240904025817_rename_library_options_and_addtl_configs/migration.sql @@ -0,0 +1,87 @@ +BEGIN TRANSACTION; +CREATE TABLE "library_configs" ( + "id" TEXT NOT NULL PRIMARY KEY, + "library_pattern" TEXT NOT NULL DEFAULT 'SERIES_BASED', + "convert_rar_to_zip" BOOLEAN NOT NULL DEFAULT FALSE, + "hard_delete_conversions" BOOLEAN NOT NULL DEFAULT FALSE, + "generate_file_hashes" BOOLEAN NOT NULL DEFAULT FALSE, + "process_metadata" BOOLEAN NOT NULL DEFAULT TRUE, + "thumbnail_config" BLOB, + "ignore_rules" BLOB, + "library_id" TEXT +); +INSERT INTO "library_configs" ( + "convert_rar_to_zip", + "hard_delete_conversions", + "id", + "library_id", + "library_pattern", + "thumbnail_config" + ) +SELECT "convert_rar_to_zip", + "hard_delete_conversions", + "id", + "library_id", + "library_pattern", + "thumbnail_config" +FROM "library_options"; +-- RedefineTables +PRAGMA foreign_keys = OFF; +CREATE TABLE "new_libraries" ( + "id" TEXT NOT NULL PRIMARY KEY, + "name" TEXT NOT NULL, + "description" TEXT, + "path" TEXT NOT NULL, + "status" TEXT NOT NULL DEFAULT 'READY', + "updated_at" DATETIME NOT NULL, + "created_at" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + "emoji" TEXT, + "config_id" TEXT NOT NULL, + "job_schedule_config_id" TEXT, + CONSTRAINT "libraries_config_id_fkey" FOREIGN KEY ("config_id") REFERENCES "library_configs" ("id") ON DELETE RESTRICT ON UPDATE CASCADE, + CONSTRAINT "libraries_job_schedule_config_id_fkey" FOREIGN KEY ("job_schedule_config_id") REFERENCES "job_schedule_configs" ("id") ON DELETE + SET NULL ON UPDATE CASCADE +); +INSERT INTO "new_libraries" ( + "created_at", + "description", + "emoji", + "id", + "config_id", + "job_schedule_config_id", + "name", + "path", + "status", + "updated_at" + ) +SELECT "created_at", + "description", + "emoji", + "id", + "library_options_id", + "job_schedule_config_id", + "name", + "path", + "status", + "updated_at" +FROM "libraries"; +DROP TABLE "libraries"; +ALTER TABLE "new_libraries" + RENAME TO "libraries"; +CREATE UNIQUE INDEX "libraries_name_key" ON "libraries" ("name"); +CREATE UNIQUE INDEX "libraries_path_key" ON "libraries" ("path"); +CREATE UNIQUE INDEX "libraries_config_id_key" ON "libraries" ("config_id"); +/* + Warnings: + + - You are about to drop the `library_options` table. If the table is not empty, all the data it contains will be lost. + - You are about to drop the column `library_options_id` on the `libraries` table. All the data in the column will be lost. + - Added the required column `config_id` to the `libraries` table without a default value. This is not possible if the table is not empty. + */ +-- DropTable +PRAGMA foreign_keys = off; +DROP TABLE "library_options"; +PRAGMA foreign_keys = ON; +PRAGMA foreign_key_check; +PRAGMA foreign_keys = ON; +COMMIT; \ No newline at end of file diff --git a/core/prisma/schema.prisma b/core/prisma/schema.prisma index 7ad7740b1..275ec91ec 100644 --- a/core/prisma/schema.prisma +++ b/core/prisma/schema.prisma @@ -96,8 +96,8 @@ model Library { series Series[] - library_options LibraryOptions @relation(fields: [library_options_id], references: [id]) - library_options_id String @unique + config LibraryConfig @relation(fields: [config_id], references: [id]) + config_id String @unique tags Tag[] hidden_from_users User[] @@ -109,18 +109,22 @@ model Library { @@map("libraries") } -model LibraryOptions { +model LibraryConfig { id String @id @default(uuid()) + library_pattern String @default("SERIES_BASED") // SERIES_BASED or COLLECTION_BASED + // Scanning opt-in settings convert_rar_to_zip Boolean @default(false) hard_delete_conversions Boolean @default(false) - library_pattern String @default("SERIES_BASED") // SERIES_BASED or COLLECTION_BASED - thumbnail_config Bytes? // { size_factor: "...", format: "...", quality: ... } - ignore_rules Bytes? // ["glob1", "glob2", ...] + generate_file_hashes Boolean @default(false) + process_metadata Boolean @default(true) + + thumbnail_config Bytes? // { size_factor: "...", format: "...", quality: ... } + ignore_rules Bytes? // ["glob1", "glob2", ...] library_id String? library Library? - @@map("library_options") + @@map("library_configs") } model LastLibraryVisit { diff --git a/core/src/config/stump_config.rs b/core/src/config/stump_config.rs index 454f86983..35e85deb7 100644 --- a/core/src/config/stump_config.rs +++ b/core/src/config/stump_config.rs @@ -29,7 +29,8 @@ pub mod env_keys { pub const HASH_COST_KEY: &str = "HASH_COST"; pub const SESSION_TTL_KEY: &str = "SESSION_TTL"; pub const SESSION_EXPIRY_INTERVAL_KEY: &str = "SESSION_EXPIRY_CLEANUP_INTERVAL"; - pub const SCANNER_CHUNK_SIZE_KEY: &str = "STUMP_SCANNER_CHUNK_SIZE"; + pub const MAX_SCANNER_CONCURRENCY_KEY: &str = "STUMP_MAX_SCANNER_CONCURRENCY"; + pub const MAX_THUMBNAIL_CONCURRENCY_KEY: &str = "STUMP_MAX_THUMBNAIL_CONCURRENCY"; } use env_keys::*; @@ -38,7 +39,8 @@ pub mod defaults { pub const DEFAULT_SESSION_TTL: i64 = 3600 * 24 * 3; // 3 days pub const DEFAULT_ACCESS_TOKEN_TTL: i64 = 3600 * 24; // 1 days pub const DEFAULT_SESSION_EXPIRY_CLEANUP_INTERVAL: u64 = 60 * 60 * 24; // 24 hours - pub const DEFAULT_SCANNER_CHUNK_SIZE: usize = 100; + pub const DEFAULT_MAX_SCANNER_CONCURRENCY: usize = 200; + pub const DEFAULT_MAX_THUMBNAIL_CONCURRENCY: usize = 50; } use defaults::*; @@ -148,12 +150,20 @@ pub struct StumpConfig { #[env_key(SESSION_EXPIRY_INTERVAL_KEY)] pub expired_session_cleanup_interval: u64, - /// The size of chunks to use throughout scanning the filesystem. This is used to - /// limit the number of files that are processed at once. Realistically, you are bound - /// by I/O constraints, but perhaps you can squeeze out some performance by tweaking this. - #[default_value(DEFAULT_SCANNER_CHUNK_SIZE)] - #[env_key(SCANNER_CHUNK_SIZE_KEY)] - pub scanner_chunk_size: usize, + /// The maximum number of concurrent files which may be processed by a scanner. This is used + /// to limit/increase the number of files that are processed at once. This may be useful for those + /// with high or low performance systems to configure to their needs. + #[default_value(DEFAULT_MAX_SCANNER_CONCURRENCY)] + #[env_key(MAX_SCANNER_CONCURRENCY_KEY)] + pub max_scanner_concurrency: usize, + + /// The maximum number of concurrent files which may be processed by a thumbnail generator. This is used + /// to limit/increase the number of images that are processed at once. Image generation can be + /// resource intensive, so this may be useful for those with high or low performance systems to + /// configure to their needs. + #[default_value(DEFAULT_MAX_THUMBNAIL_CONCURRENCY)] + #[env_key(MAX_THUMBNAIL_CONCURRENCY_KEY)] + pub max_thumbnail_concurrency: usize, } impl StumpConfig { @@ -295,7 +305,8 @@ mod tests { session_ttl: None, access_token_ttl: None, expired_session_cleanup_interval: None, - scanner_chunk_size: None, + max_scanner_concurrency: None, + max_thumbnail_concurrency: None, }; partial_config.apply_to_config(&mut config); @@ -329,7 +340,8 @@ mod tests { expired_session_cleanup_interval: Some( DEFAULT_SESSION_EXPIRY_CLEANUP_INTERVAL ), - scanner_chunk_size: Some(DEFAULT_SCANNER_CHUNK_SIZE), + max_scanner_concurrency: Some(DEFAULT_MAX_SCANNER_CONCURRENCY), + max_thumbnail_concurrency: Some(DEFAULT_MAX_THUMBNAIL_CONCURRENCY), } ); @@ -377,8 +389,9 @@ mod tests { access_token_ttl: DEFAULT_ACCESS_TOKEN_TTL, expired_session_cleanup_interval: DEFAULT_SESSION_EXPIRY_CLEANUP_INTERVAL, - scanner_chunk_size: DEFAULT_SCANNER_CHUNK_SIZE, custom_templates_dir: None, + max_scanner_concurrency: DEFAULT_MAX_SCANNER_CONCURRENCY, + max_thumbnail_concurrency: DEFAULT_MAX_THUMBNAIL_CONCURRENCY, } ); }, diff --git a/core/src/db/entity/library/options.rs b/core/src/db/entity/library/config.rs similarity index 75% rename from core/src/db/entity/library/options.rs rename to core/src/db/entity/library/config.rs index 74bc32cf1..c5e854409 100644 --- a/core/src/db/entity/library/options.rs +++ b/core/src/db/entity/library/config.rs @@ -2,16 +2,18 @@ use serde::{Deserialize, Serialize}; use specta::Type; use utoipa::ToSchema; -use crate::{filesystem::image::ImageProcessorOptions, prisma::library_options}; +use crate::{filesystem::image::ImageProcessorOptions, prisma::library_config}; use super::{IgnoreRules, LibraryPattern}; #[derive(Debug, Clone, Deserialize, Serialize, Type, ToSchema, Default)] -pub struct LibraryOptions { +pub struct LibraryConfig { #[specta(optional)] pub id: Option, pub convert_rar_to_zip: bool, pub hard_delete_conversions: bool, + pub generate_file_hashes: bool, + pub process_metadata: bool, pub library_pattern: LibraryPattern, pub thumbnail_config: Option, #[serde(default)] @@ -22,19 +24,21 @@ pub struct LibraryOptions { pub library_id: Option, } -impl LibraryOptions { +impl LibraryConfig { pub fn is_collection_based(&self) -> bool { self.library_pattern == LibraryPattern::CollectionBased } } // TODO: This should probably be a TryFrom, as annoying as that is -impl From for LibraryOptions { - fn from(data: library_options::Data) -> LibraryOptions { - LibraryOptions { +impl From for LibraryConfig { + fn from(data: library_config::Data) -> LibraryConfig { + LibraryConfig { id: Some(data.id), convert_rar_to_zip: data.convert_rar_to_zip, hard_delete_conversions: data.hard_delete_conversions, + generate_file_hashes: data.generate_file_hashes, + process_metadata: data.process_metadata, library_pattern: LibraryPattern::from(data.library_pattern), thumbnail_config: data.thumbnail_config.map(|config| { ImageProcessorOptions::try_from(config).unwrap_or_default() @@ -49,8 +53,8 @@ impl From for LibraryOptions { } } -impl From<&library_options::Data> for LibraryOptions { - fn from(data: &library_options::Data) -> LibraryOptions { +impl From<&library_config::Data> for LibraryConfig { + fn from(data: &library_config::Data) -> LibraryConfig { data.clone().into() } } diff --git a/core/src/db/entity/library/entity.rs b/core/src/db/entity/library/entity.rs index 8d75c699c..8c85e49dd 100644 --- a/core/src/db/entity/library/entity.rs +++ b/core/src/db/entity/library/entity.rs @@ -6,30 +6,10 @@ use utoipa::ToSchema; use crate::{ db::entity::{Cursor, Series, Tag}, - prisma::{self, library}, + prisma::{library, library_config}, }; -use super::LibraryOptions; - -////////////////////////////////////////////// -//////////////// PRISMA MACROS /////////////// -////////////////////////////////////////////// - -library::include!(library_series_ids_media_ids_include { - series: include { - media: select { id } - } -}); - -library::include!(library_thumbnails_deletion_include { - series: include { - media: select { id } - } -}); - -/////////////////////////////////////////////// -//////////////////// MODELS /////////////////// -/////////////////////////////////////////////// +use super::LibraryConfig; #[derive(Debug, Clone, Deserialize, Serialize, Type, ToSchema)] pub struct Library { @@ -51,8 +31,8 @@ pub struct Library { pub series: Option>, /// The tags associated with this library. Will be `None` only if the relation is not loaded. pub tags: Option>, - /// The options of the library. Will be Default only if the relation is not loaded. - pub library_options: LibraryOptions, + /// The configuration for the library. Will be Default only if the relation is not loaded. + pub config: LibraryConfig, } impl Cursor for Library { @@ -149,26 +129,22 @@ pub struct LibraryStats { in_progress_books: u64, } -/////////////////////////////////////////////// -////////////////// CONVERSIONS //////////////// -/////////////////////////////////////////////// - -impl From for Library { - fn from(data: prisma::library::Data) -> Library { - let series = match data.series() { - Ok(series) => Some(series.iter().map(|s| s.to_owned().into()).collect()), - Err(_e) => None, - }; +impl From for Library { + fn from(data: library::Data) -> Library { + let series = data + .series() + .ok() + .map(|series| series.iter().map(|s| s.to_owned().into()).collect()); - let tags = match data.tags() { - Ok(tags) => Some(tags.iter().map(|tag| tag.to_owned().into()).collect()), - Err(_e) => None, - }; + let tags = data + .tags() + .ok() + .map(|tags| tags.iter().map(|tag| tag.to_owned().into()).collect()); - let library_options = match data.library_options() { - Ok(library_options) => library_options.to_owned().into(), - Err(_e) => LibraryOptions::default(), - }; + let config = data.config().map_or_else( + |_| LibraryConfig::default(), + |config| config.to_owned().into(), + ); Library { id: data.id, @@ -180,18 +156,13 @@ impl From for Library { updated_at: data.updated_at.to_rfc3339(), series, tags, - library_options, + config, } } } -impl From<(prisma::library::Data, prisma::library_options::Data)> for Library { - fn from( - (library, library_options): ( - prisma::library::Data, - prisma::library_options::Data, - ), - ) -> Library { +impl From<(library::Data, library_config::Data)> for Library { + fn from((library, library_config): (library::Data, library_config::Data)) -> Library { let series = match library.series() { Ok(series) => Some(series.iter().map(|s| s.to_owned().into()).collect()), Err(_e) => None, @@ -212,7 +183,7 @@ impl From<(prisma::library::Data, prisma::library_options::Data)> for Library { updated_at: library.updated_at.to_rfc3339(), series, tags, - library_options: LibraryOptions::from(library_options), + config: LibraryConfig::from(library_config), } } } diff --git a/core/src/db/entity/library/mod.rs b/core/src/db/entity/library/mod.rs index 6a08b5578..21d5a3f9c 100644 --- a/core/src/db/entity/library/mod.rs +++ b/core/src/db/entity/library/mod.rs @@ -1,9 +1,9 @@ +mod config; mod entity; -mod options; pub(crate) mod prisma_macros; mod rules; pub(crate) mod utils; +pub use config::*; pub use entity::*; -pub use options::*; pub use rules::*; diff --git a/core/src/db/entity/library/prisma_macros.rs b/core/src/db/entity/library/prisma_macros.rs index f794ab0bf..52a02f9d0 100644 --- a/core/src/db/entity/library/prisma_macros.rs +++ b/core/src/db/entity/library/prisma_macros.rs @@ -10,5 +10,17 @@ library::select!(library_tags_select { library::select!(library_path_with_options_select { path - library_options + config +}); + +library::include!(library_series_ids_media_ids_include { + series: include { + media: select { id } + } +}); + +library::include!(library_thumbnails_deletion_include { + series: include { + media: select { id } + } }); diff --git a/core/src/db/entity/media/entity.rs b/core/src/db/entity/media/entity.rs index fbc797753..49cc9b1eb 100644 --- a/core/src/db/entity/media/entity.rs +++ b/core/src/db/entity/media/entity.rs @@ -6,7 +6,7 @@ use utoipa::ToSchema; use crate::{ db::{ - entity::{common::Cursor, LibraryOptions, MediaMetadata, Series, Tag}, + entity::{common::Cursor, LibraryConfig, MediaMetadata, Series, Tag}, FileStatus, }, error::CoreError, @@ -119,7 +119,7 @@ impl TryFrom for Media { #[derive(Default)] pub struct MediaBuilderOptions { pub series_id: String, - pub library_options: LibraryOptions, + pub library_config: LibraryConfig, } impl From for Media { diff --git a/core/src/db/entity/media/prisma_macros.rs b/core/src/db/entity/media/prisma_macros.rs index af99226da..e8d8b4773 100644 --- a/core/src/db/entity/media/prisma_macros.rs +++ b/core/src/db/entity/media/prisma_macros.rs @@ -12,7 +12,7 @@ media::select!(media_thumbnail { path series: select { library: select { - library_options + config } } }); diff --git a/core/src/db/entity/series/prisma_macros.rs b/core/src/db/entity/series/prisma_macros.rs index 044048c32..89fe75f46 100644 --- a/core/src/db/entity/series/prisma_macros.rs +++ b/core/src/db/entity/series/prisma_macros.rs @@ -11,6 +11,6 @@ series::select!((book_filters: Vec) => series_or_library_thumb path } library: select { - library_options + config } }); diff --git a/core/src/db/migration.rs b/core/src/db/migration.rs index fca64d314..ddb864701 100644 --- a/core/src/db/migration.rs +++ b/core/src/db/migration.rs @@ -20,14 +20,18 @@ pub async fn run_migrations(client: &prisma::PrismaClient) -> CoreResult<()> { .await .map_err(|e| CoreError::MigrationError(e.to_string()))?; - tracing::info!("Migrations complete!"); + tracing::debug!("Database push completed!"); } #[cfg(not(debug_assertions))] - client - ._migrate_deploy() - .await - .map_err(|e| CoreError::MigrationError(e.to_string()))?; + { + client + ._migrate_deploy() + .await + .map_err(|e| CoreError::MigrationError(e.to_string()))?; + + tracing::info!("Database migration completed!"); + } Ok(()) } diff --git a/core/src/db/query/pagination.rs b/core/src/db/query/pagination.rs index f3d145c98..7ed883b3c 100644 --- a/core/src/db/query/pagination.rs +++ b/core/src/db/query/pagination.rs @@ -230,7 +230,7 @@ pub struct PageLinks { pub next: Option, } -#[derive(Serialize, Type, ToSchema)] +#[derive(Debug, Serialize, Type, ToSchema)] pub struct PageInfo { /// The number of pages available. pub total_pages: u32, @@ -277,7 +277,7 @@ impl From for CursorInfo { } } -#[derive(Serialize, Type, ToSchema)] +#[derive(Debug, Serialize, Type, ToSchema)] // OK, this is SO annoying... #[aliases(PageableDirectoryListing = Pageable)] pub struct Pageable { diff --git a/core/src/filesystem/common.rs b/core/src/filesystem/common.rs index 5099a9b34..ab80860b6 100644 --- a/core/src/filesystem/common.rs +++ b/core/src/filesystem/common.rs @@ -1,43 +1,66 @@ use globset::GlobSet; use std::{ ffi::OsStr, - fs::File, - io::Read, path::{Path, PathBuf}, }; +use tokio::{fs, io}; use tracing::error; use walkdir::WalkDir; -use super::{media::is_accepted_cover_name, ContentType, FileError}; +use super::{image::ImageFormat, media::is_accepted_cover_name, ContentType}; pub const ACCEPTED_IMAGE_EXTENSIONS: [&str; 8] = ["jpg", "png", "jpeg", "jxl", "webp", "gif", "avif", "heif"]; -pub fn read_entire_file>(path: P) -> Result, FileError> { - let mut file = File::open(path)?; - - let mut buf = Vec::new(); - file.read_to_end(&mut buf)?; - - Ok(buf) +pub async fn get_thumbnail( + parent: impl AsRef, + name: &str, + format: Option, +) -> io::Result)>> { + let thumbnails_dir = parent.as_ref().to_path_buf(); + + let path = match format { + Some(format) => { + let file_path = + thumbnails_dir.join(format!("{}.{}", name, format.extension())); + + if fs::metadata(&file_path).await.is_ok() { + Some(file_path) + } else { + find_thumbnail(&thumbnails_dir, name).await + } + }, + _ => find_thumbnail(&thumbnails_dir, name).await, + }; + + if let Some(path) = path { + let FileParts { extension, .. } = path.file_parts(); + fs::read(path).await.map(|bytes| { + let content_type = ContentType::from_extension(&extension); + Some((content_type, bytes)) + }) + } else { + Ok(None) + } } -/// A function that returns the path of a thumbnail image, if it exists. -/// This should be used when the thumbnail extension is not known. -pub fn get_unknown_thumnail(id: &str, mut thumbnails_dir: PathBuf) -> Option { +pub async fn find_thumbnail(parent: &Path, name: &str) -> Option { + let mut thumbnails_dir = parent.to_path_buf(); + for extension in ACCEPTED_IMAGE_EXTENSIONS.iter() { - thumbnails_dir.push(format!("{}.{}", id, extension)); + let path = parent.join(format!("{}.{}", name, extension)); - if thumbnails_dir.exists() { - return Some(thumbnails_dir); + if fs::metadata(&path).await.is_ok() { + return Some(path); } - thumbnails_dir.pop(); + thumbnails_dir.push(format!("{}.{}", name, extension)); } None } +// TODO(perf): Async-ify pub fn get_unknown_image(mut base_path: PathBuf) -> Option { for extension in ACCEPTED_IMAGE_EXTENSIONS.iter() { base_path.set_extension(extension); @@ -245,33 +268,3 @@ impl PathUtils for Path { }) } } - -#[cfg(test)] -mod tests { - use std::io::Write; - - use tempfile::TempDir; - - use super::*; - - #[test] - fn test_read_entire_file() { - let temp_dir = TempDir::new().unwrap(); - let temp_file = temp_dir.path().join("temp_file.txt"); - - File::create(&temp_file) - .unwrap() - .write_all(b"Test data") - .unwrap(); - - let data = read_entire_file(&temp_file).unwrap(); - assert_eq!(data, b"Test data"); - } - - #[test] - fn test_read_entire_file_non_existent() { - let path = "non_existent_file.txt"; - let result = read_entire_file(path); - assert!(result.is_err()); - } -} diff --git a/core/src/filesystem/hash.rs b/core/src/filesystem/hash.rs index ece3c60b0..647427b6a 100644 --- a/core/src/filesystem/hash.rs +++ b/core/src/filesystem/hash.rs @@ -30,7 +30,7 @@ fn read(file: &std::fs::File, offset: u64, size: u64) -> Result, io::Err } pub fn generate(path: &str, bytes: u64) -> Result { - let file = std::fs::File::open(path).unwrap(); + let file = std::fs::File::open(path)?; let mut ring_context = Context::new(&SHA256); diff --git a/core/src/filesystem/image/mod.rs b/core/src/filesystem/image/mod.rs index 746abedd4..26064b5fd 100644 --- a/core/src/filesystem/image/mod.rs +++ b/core/src/filesystem/image/mod.rs @@ -13,11 +13,7 @@ pub use process::{ ImageFormat, ImageProcessor, ImageProcessorOptions, ImageResizeMode, ImageResizeOptions, }; -pub use thumbnail::{ - generate_thumbnail, generate_thumbnails, place_thumbnail, remove_thumbnails, - ThumbnailGenerationJob, ThumbnailGenerationJobParams, ThumbnailGenerationJobVariant, - ThumbnailGenerationOutput, ThumbnailManager, -}; +pub use thumbnail::*; #[cfg(test)] mod tests { @@ -44,12 +40,12 @@ mod tests { .to_string() } - pub fn get_test_avif_path() -> String { - PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("integration-tests/data/example.avif") - .to_string_lossy() - .to_string() - } + // pub fn get_test_avif_path() -> String { + // PathBuf::from(env!("CARGO_MANIFEST_DIR")) + // .join("integration-tests/data/example.avif") + // .to_string_lossy() + // .to_string() + // } // TODO(339): Avif + Jxl support // pub fn get_test_jxl_path() -> String { diff --git a/core/src/filesystem/image/thumbnail/generate.rs b/core/src/filesystem/image/thumbnail/generate.rs new file mode 100644 index 000000000..a3c9b96bc --- /dev/null +++ b/core/src/filesystem/image/thumbnail/generate.rs @@ -0,0 +1,146 @@ +use std::path::PathBuf; + +use tokio::{fs, sync::oneshot, task::spawn_blocking}; + +use crate::{ + config::StumpConfig, + filesystem::{ + get_page, + image::{ + GenericImageProcessor, ImageFormat, ImageProcessor, ImageProcessorOptions, + WebpProcessor, + }, + FileError, + }, + prisma::media, +}; + +/// An error enum for thumbnail generation errors +#[derive(Debug, thiserror::Error)] +pub enum ThumbnailGenerateError { + #[error("Could not write to disk: {0}")] + WriteFailed(#[from] std::io::Error), + #[error("{0}")] + FileError(#[from] FileError), + #[error("Did not receive thumbnail generation result")] + ResultNeverReceived, + #[error("Something unexpected went wrong: {0}")] + Unknown(String), +} + +/// The options for generating a thumbnail +#[derive(Debug, Clone)] +pub struct GenerateThumbnailOptions { + pub image_options: ImageProcessorOptions, + pub core_config: StumpConfig, + pub force_regen: bool, +} + +/// A type alias for whether a thumbnail was generated or not during the generation process. This is +/// not indicative of success or failure, but rather whether the thumbnail was newly generated or +/// already existed. +pub type DidGenerate = bool; +/// The output of a thumbnail generation operation +pub type GenerateOutput = (Vec, PathBuf, DidGenerate); + +/// The main function for generating a thumbnail for a book. This should be called from within the +/// scope of a blocking task in the [generate_book_thumbnail] function. +fn do_generate_book_thumbnail( + book_path: &str, + file_name: &str, + config: StumpConfig, + options: ImageProcessorOptions, +) -> Result { + let (_, page_data) = get_page(book_path, options.page.unwrap_or(1), &config)?; + let ext = options.format.extension(); + + let thumbnail_path = config + .get_thumbnails_dir() + .join(format!("{}.{}", &file_name, ext)); + + match options.format { + ImageFormat::Webp => WebpProcessor::generate(&page_data, options), + _ => GenericImageProcessor::generate(&page_data, options), + } + .map(|buf| (buf, thumbnail_path, true)) +} + +/// Generate a thumbnail for a book, returning the thumbnail data, the path to the thumbnail file, +/// and a boolean indicating whether the thumbnail was generated or not. If the thumbnail already +/// exists and `force_regen` is false, the function will return the existing thumbnail data. +#[tracing::instrument(skip_all)] +pub async fn generate_book_thumbnail( + book: &media::Data, + GenerateThumbnailOptions { + image_options, + core_config, + force_regen, + }: GenerateThumbnailOptions, +) -> Result { + let book_path = book.path.clone(); + let file_name = book.id.clone(); + + let file_path = core_config.get_thumbnails_dir().join(format!( + "{}.{}", + &file_name, + image_options.format.extension() + )); + + if let Err(e) = fs::metadata(&file_path).await { + // A `NotFound` error is expected here, but anything else is unexpected + if e.kind() != std::io::ErrorKind::NotFound { + tracing::error!(error = ?e, "IO error while checking for file existence?"); + } + } else if !force_regen { + match fs::read(&file_path).await { + Ok(thumbnail) => return Ok((thumbnail, PathBuf::from(&file_path), false)), + Err(e) => { + // Realistically, this shouldn't happen if we can grab the metadata, but it isn't a + // big deal if it does. We can just regenerate the thumbnail in the event something + // is wrong with the file. + tracing::error!(error = ?e, "Failed to read thumbnail file from disk! Regenerating..."); + }, + } + } + + let (tx, rx) = oneshot::channel(); + + // Spawn a blocking task to handle the IO-intensive operations: + // 1. Pulling the page data from the book file + // 2. Generating the thumbnail from said page data + let handle = spawn_blocking({ + let book_path = book_path.clone(); + let file_name = file_name.clone(); + + move || { + let _send_result = tx.send(do_generate_book_thumbnail( + &book_path, + &file_name, + core_config, + image_options, + )); + tracing::trace!( + is_err = _send_result.is_err(), + "Sending generate result to channel" + ); + } + }); + + let generate_result = if let Ok(recv) = rx.await { + recv? + } else { + // Note: `abort` has no affect on blocking threads which have already been spawned, + // so we just have to wait for the thread to finish. + // See: https://docs.rs/tokio/latest/tokio/task/fn.spawn_blocking.html + handle + .await + .map_err(|e| ThumbnailGenerateError::Unknown(e.to_string()))?; + return Err(ThumbnailGenerateError::ResultNeverReceived); + }; + + // Write the thumbnail to the filesystem + let (thumbnail, thumbnail_path, did_generate) = generate_result; + fs::write(&thumbnail_path, &thumbnail).await?; + + Ok((thumbnail, thumbnail_path, did_generate)) +} diff --git a/core/src/filesystem/image/thumbnail/generation_job.rs b/core/src/filesystem/image/thumbnail/generation_job.rs index 0fc6e9a0d..600743cb2 100644 --- a/core/src/filesystem/image/thumbnail/generation_job.rs +++ b/core/src/filesystem/image/thumbnail/generation_job.rs @@ -1,7 +1,18 @@ +use std::{ + pin::pin, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, +}; + +use futures::{stream::FuturesUnordered, StreamExt}; use serde::{Deserialize, Serialize}; use specta::Type; +use tokio::sync::Semaphore; use crate::{ + filesystem::image::ImageProcessorOptions, job::{ error::JobError, JobExecuteLog, JobExt, JobOutputExt, JobProgress, JobTaskOutput, WorkerCtx, WorkingState, WrappedJob, @@ -10,8 +21,8 @@ use crate::{ }; use super::{ - manager::{ParThumbnailGenerationOutput, ThumbnailManager}, - ImageProcessorOptions, + generate::{generate_book_thumbnail, GenerateThumbnailOptions}, + ThumbnailGenerateError, }; // Note: I am type aliasing for the sake of clarity in what the provided Strings represent @@ -61,9 +72,13 @@ pub enum ThumbnailGenerationTask { } #[derive(Clone, Serialize, Deserialize, Default, Debug, Type)] +// Note: This container attribute is used to ensure future additions to the struct do not break deserialization +#[serde(default)] pub struct ThumbnailGenerationOutput { /// The total number of files that were visited during the thumbnail generation visited_files: u64, + /// The number of thumbnails that were skipped (already existed and not force regenerated) + skipped_files: u64, /// The number of thumbnails that were generated generated_thumbnails: u64, /// The number of thumbnails that were removed @@ -73,6 +88,7 @@ pub struct ThumbnailGenerationOutput { impl JobOutputExt for ThumbnailGenerationOutput { fn update(&mut self, updated: Self) { self.visited_files += updated.visited_files; + self.skipped_files += updated.skipped_files; self.generated_thumbnails += updated.generated_thumbnails; self.removed_thumbnails += updated.removed_thumbnails; } @@ -149,28 +165,9 @@ impl JobExt for ThumbnailGenerationJob { ThumbnailGenerationJobVariant::MediaGroup(media_ids) => media_ids.clone(), }; - // TODO Should find a way to keep the same ThumbnailManager around for the whole job execution - let manager = ThumbnailManager::new(ctx.config.clone()) - .map_err(|e| JobError::TaskFailed(e.to_string()))?; - - let media_ids = if !self.params.force_regenerate { - // if we aren't force regenerating, we can skip the init if all media already have thumbnails - media_ids - .into_iter() - .filter(|id| !manager.has_thumbnail(id.as_str())) - .collect::>() - } else { - media_ids - }; - - let tasks = media_ids - .chunks(ctx.config.scanner_chunk_size) - .map(|chunk| ThumbnailGenerationTask::GenerateBatch(chunk.to_vec())) - .collect(); - Ok(WorkingState { output: Some(Self::Output::default()), - tasks, + tasks: vec![ThumbnailGenerationTask::GenerateBatch(media_ids)].into(), completed_tasks: 0, logs: vec![], }) @@ -184,9 +181,6 @@ impl JobExt for ThumbnailGenerationJob { let mut output = Self::Output::default(); let mut logs = vec![]; - let mut manager = ThumbnailManager::new(ctx.config.clone()) - .map_err(|e| JobError::TaskFailed(e.to_string()))?; - match task { ThumbnailGenerationTask::GenerateBatch(media_ids) => { let media = ctx @@ -197,50 +191,31 @@ impl JobExt for ThumbnailGenerationJob { .await .map_err(|e| JobError::TaskFailed(e.to_string()))?; - if self.params.force_regenerate { - let media_ids_to_remove = media - .iter() - .filter(|m| manager.has_thumbnail(m.id.as_str())) - .map(|m| m.id.clone()) - .collect::>(); - ctx.report_progress(JobProgress::msg( - format!("Removing {} thumbnails", media_ids_to_remove.len()) - .as_str(), - )); - let JobTaskOutput { - output: sub_output, - logs: sub_logs, - .. - } = safely_remove_batch(&media_ids_to_remove, &mut manager); - output.update(sub_output); - logs.extend(sub_logs); - } - - let media_to_generate_thumbnails = if self.params.force_regenerate { - media - } else { - media - .into_iter() - .filter(|m| !manager.has_thumbnail(m.id.as_str())) - .collect::>() - }; - - ctx.report_progress(JobProgress::msg( - format!( - "Generating {} thumbnails", - media_to_generate_thumbnails.len() - ) - .as_str(), + let task_count = media.len() as i32; + ctx.report_progress(JobProgress::subtask_position_msg( + "Generating thumbnails", + 1, + task_count, )); let JobTaskOutput { output: sub_output, logs: sub_logs, .. } = safely_generate_batch( - &media_to_generate_thumbnails, - self.options.clone(), - &mut manager, - ); + &media, + GenerateThumbnailOptions { + image_options: self.options.clone(), + core_config: ctx.config.as_ref().clone(), + force_regen: self.params.force_regenerate, + }, + |position| { + ctx.report_progress(JobProgress::subtask_position( + position as i32, + task_count, + )) + }, + ) + .await; output.update(sub_output); logs.extend(sub_logs); }, @@ -254,67 +229,75 @@ impl JobExt for ThumbnailGenerationJob { } } -pub fn safely_remove_batch>( - ids: &[S], - manager: &mut ThumbnailManager, +#[tracing::instrument(skip_all)] +pub async fn safely_generate_batch( + books: &[media::Data], + options: GenerateThumbnailOptions, + reporter: impl Fn(usize), ) -> JobTaskOutput { let mut output = ThumbnailGenerationOutput::default(); let mut logs = vec![]; - for id in ids { - manager.remove_thumbnail(id).map_or_else( - |error| { - tracing::error!(error = ?error, "Failed to remove thumbnail"); + let max_concurrency = options.core_config.max_thumbnail_concurrency; + let semaphore = Arc::new(Semaphore::new(max_concurrency)); + tracing::debug!( + max_concurrency, + "Semaphore created for thumbnail generation" + ); + + let futures = books + .iter() + .map(|book| { + let semaphore = semaphore.clone(); + let options = options.clone(); + let path = book.path.clone(); + + async move { + if semaphore.available_permits() == 0 { + tracing::trace!(?path, "Waiting for permit for thumbnail generation"); + } + let _permit = semaphore.acquire().await.map_err(|e| { + (ThumbnailGenerateError::Unknown(e.to_string()), path.clone()) + })?; + tracing::trace!(?path, "Acquired permit for thumbnail generation"); + generate_book_thumbnail(book, options) + .await + .map_err(|e| (e, path)) + } + }) + .collect::>(); + + // An atomic usize to keep track of the current position in the stream + // to report progress to the UI + let atomic_cursor = Arc::new(AtomicUsize::new(1)); + + let mut futures = pin!(futures); + + while let Some(gen_output) = futures.next().await { + match gen_output { + Ok((_, _, did_generate)) => { + if did_generate { + output.generated_thumbnails += 1; + } else { + // If we didn't generate a thumbnail, and have a success result, + // then we skipped it + output.skipped_files += 1; + } + }, + Err((error, path)) => { logs.push( JobExecuteLog::error(format!( - "Failed to remove thumbnail: {:?}", + "Failed to generate thumbnail: {:?}", error.to_string() )) - .with_ctx(format!("Media ID: {}", id.as_ref())), + .with_ctx(format!("Media path: {}", path)), ); }, - |_| output.removed_thumbnails += 1, - ); - } - output.visited_files = ids.len() as u64; - - JobTaskOutput { - output, - logs, - subtasks: vec![], + } + // We visit every file, regardless of success or failure + output.visited_files += 1; + reporter(atomic_cursor.fetch_add(1, Ordering::SeqCst)); } -} - -pub fn safely_generate_batch( - media: &[media::Data], - options: ImageProcessorOptions, - manager: &mut ThumbnailManager, -) -> JobTaskOutput { - let mut output = ThumbnailGenerationOutput::default(); - - let ParThumbnailGenerationOutput { - created_thumbnails, - errors, - } = manager.generate_thumbnails_par(media, options.clone()); - let created_media_id_thumbnails = created_thumbnails - .into_iter() - .map(|(id, _)| id) - .collect::>(); - manager.track_thumbnails(&created_media_id_thumbnails, options); - - output.visited_files = (created_media_id_thumbnails.len() + errors.len()) as u64; - output.generated_thumbnails = created_media_id_thumbnails.len() as u64; - - let logs = errors - .into_iter() - .map(|(path, error)| { - JobExecuteLog::error(format!( - "Failed to generate thumbnail: {:?}", - error.to_string() - )) - .with_ctx(format!("{:?}", path)) - }) - .collect(); JobTaskOutput { output, diff --git a/core/src/filesystem/image/thumbnail/manager.rs b/core/src/filesystem/image/thumbnail/manager.rs deleted file mode 100644 index 83b15e0b6..000000000 --- a/core/src/filesystem/image/thumbnail/manager.rs +++ /dev/null @@ -1,173 +0,0 @@ -use std::{collections::HashMap, fs::File, io::Write, path::PathBuf, sync::Arc}; - -use rayon::iter::{IntoParallelIterator, ParallelIterator}; - -use crate::{ - config::StumpConfig, - filesystem::{ - image::{ - GenericImageProcessor, ImageFormat, ImageProcessor, ImageProcessorOptions, - WebpProcessor, - }, - media, FileError, - }, - prisma::media as prisma_media, -}; - -#[derive(Default)] -pub struct ParThumbnailGenerationOutput { - pub created_thumbnails: Vec<(String, PathBuf)>, - pub errors: Vec<(PathBuf, FileError)>, -} - -pub struct ThumbnailManager { - config: Arc, - thumbnail_contents: HashMap, -} - -impl ThumbnailManager { - pub fn new(config: Arc) -> Result { - // This hashmap will hold the id : PathBuf for each item in the thumbnail dir. - let mut thumbnail_contents = HashMap::new(); - - // Take inventory of the thumbnail_dir's contents - let read_dir = config.get_thumbnails_dir().read_dir()?; - for item in read_dir.into_iter().filter_map(Result::ok) { - let path = item.path(); - // Test if the path has a filename, if it does, add it to the hashmap - if let Some(file_name) = item.path().file_name() { - let file_name = file_name.to_string_lossy().to_string(); - thumbnail_contents.insert(file_name, path); - } else { - tracing::warn!(?path, "Thumbnail file has no filename?"); - } - } - - Ok(Self { - config, - thumbnail_contents, - }) - } - - pub fn has_thumbnail>(&self, media_id: S) -> bool { - self.thumbnail_contents.contains_key(media_id.as_ref()) - } - - /// Inserts thumbnails into the manager's internal hashmap for future reference, - /// it will assume generation is not necessary. - pub fn track_thumbnails( - &mut self, - media_ids: &[String], - options: ImageProcessorOptions, - ) { - let base_path = self.config.get_thumbnails_dir(); - let ext = options.format.extension(); - - for id in media_ids { - let thumbnail_path = base_path.join(format!("{}.{}", id, ext)); - if thumbnail_path.exists() { - self.thumbnail_contents.insert(id.clone(), thumbnail_path); - } - } - } - - fn do_generate_thumbnail( - &self, - media_item: &prisma_media::Data, - options: ImageProcessorOptions, - ) -> Result { - let media_id = media_item.id.clone(); - let media_path = media_item.path.clone(); - - let (_, page_data) = - media::get_page(&media_path, options.page.unwrap_or(1), &self.config)?; - let ext = options.format.extension(); - - let thumbnail_path = self - .config - .get_thumbnails_dir() - .join(format!("{}.{}", &media_id, ext)); - - if !thumbnail_path.exists() { - let image_buffer = match options.format { - ImageFormat::Webp => WebpProcessor::generate(&page_data, options)?, - _ => GenericImageProcessor::generate(&page_data, options)?, - }; - - let mut image_file = File::create(&thumbnail_path)?; - image_file.write_all(&image_buffer)?; - } else { - tracing::trace!( - ?thumbnail_path, - media_id, - "Thumbnail already exists for media" - ) - } - - Ok(thumbnail_path) - } - - pub fn generate_thumbnail( - &mut self, - media_item: &prisma_media::Data, - options: ImageProcessorOptions, - ) -> Result { - let path = self.do_generate_thumbnail(media_item, options)?; - self.thumbnail_contents - .insert(media_item.id.clone(), path.clone()); - Ok(path) - } - - pub fn generate_thumbnails_par( - &self, - media: &[prisma_media::Data], - options: ImageProcessorOptions, - ) -> ParThumbnailGenerationOutput { - let mut output = ParThumbnailGenerationOutput::default(); - - // TODO: make this chunk size configurable - for chunk in media.chunks(5) { - let results = chunk - .into_par_iter() - .map(|m| { - ( - m.id.clone(), - m.path.clone(), - self.do_generate_thumbnail(m, options.clone()), - ) - }) - .collect::>(); - - let (errors, generated) = results.into_iter().fold( - (vec![], vec![]), - |(mut errors, mut generated), (id, path, res)| { - match res { - Ok(generated_path) => generated.push((id, generated_path)), - Err(err) => errors.push((PathBuf::from(path), err)), - } - (errors, generated) - }, - ); - - output.errors = errors; - output.created_thumbnails = generated; - } - - output - } - - pub fn remove_thumbnail>( - &mut self, - media_id: S, - ) -> Result<(), FileError> { - let media_id = media_id.as_ref(); - if let Some(path) = self.thumbnail_contents.get(media_id) { - std::fs::remove_file(path)?; - self.thumbnail_contents.remove(media_id); - } else { - tracing::warn!(?media_id, "Thumbnail not found in manager"); - } - - Ok(()) - } -} diff --git a/core/src/filesystem/image/thumbnail/mod.rs b/core/src/filesystem/image/thumbnail/mod.rs index 9f79fcff1..9c624c312 100644 --- a/core/src/filesystem/image/thumbnail/mod.rs +++ b/core/src/filesystem/image/thumbnail/mod.rs @@ -1,172 +1,12 @@ -use std::{fs::File, io::Write, path::PathBuf}; - -use rayon::prelude::{IntoParallelIterator, ParallelIterator}; -use tracing::{debug, error, trace}; - -// TODO(perf): This is too slow. A couple of notes: -// - We need to spawn blocking threads for the image processing, currently using rayon which is ideal for CPU-bound tasks -// - Stop chunking. Let the OS thread scheduler handle things for us -// - I think we need to break from this struct and go back to functional, the lifetime constraints dealing with self are a pain when exploring -// threading options -// See https://ryhl.io/blog/async-what-is-blocking/ -> Summary for good table - +mod generate; mod generation_job; -mod manager; +mod utils; +pub use generate::{ + generate_book_thumbnail, GenerateThumbnailOptions, ThumbnailGenerateError, +}; pub use generation_job::{ ThumbnailGenerationJob, ThumbnailGenerationJobParams, ThumbnailGenerationJobVariant, ThumbnailGenerationOutput, }; -pub use manager::ThumbnailManager; - -use crate::{ - config::StumpConfig, - db::entity::Media, - filesystem::{media, FileError}, -}; - -use super::{ - process::ImageProcessor, webp::WebpProcessor, GenericImageProcessor, ImageFormat, - ImageProcessorOptions, -}; - -pub fn place_thumbnail( - id: &str, - ext: &str, - bytes: &[u8], - config: &StumpConfig, -) -> Result { - let thumbnail_path = config.get_thumbnails_dir().join(format!("{}.{}", id, ext)); - - let mut image_file = File::create(&thumbnail_path)?; - image_file.write_all(bytes)?; - - Ok(thumbnail_path) -} - -pub fn generate_thumbnail( - id: &str, - media_path: &str, - options: ImageProcessorOptions, - config: &StumpConfig, -) -> Result { - let (_, buf) = media::get_page(media_path, options.page.unwrap_or(1), config)?; - let ext = options.format.extension(); - - let thumbnail_path = config.get_thumbnails_dir().join(format!("{}.{}", &id, ext)); - if !thumbnail_path.exists() { - // TODO: this will be more complicated once more specialized processors are added... - let image_buffer = if options.format == ImageFormat::Webp { - WebpProcessor::generate(&buf, options)? - } else { - GenericImageProcessor::generate(&buf, options)? - }; - - let mut image_file = File::create(&thumbnail_path)?; - image_file.write_all(&image_buffer)?; - } else { - trace!(?thumbnail_path, id, "Thumbnail already exists for media"); - } - - Ok(thumbnail_path) -} - -pub fn generate_thumbnails( - media: &[Media], - options: ImageProcessorOptions, - config: &StumpConfig, -) -> Result, FileError> { - trace!("Enter generate_thumbnails"); - - let mut generated_paths = Vec::with_capacity(media.len()); - - // TODO: configurable chunk size? - // Split the array into chunks of 5 images - for (idx, chunk) in media.chunks(5).enumerate() { - trace!(chunk = idx + 1, "Processing chunk for thumbnail generation"); - let results = chunk - .into_par_iter() - .map(|m| { - generate_thumbnail( - m.id.as_str(), - m.path.as_str(), - options.clone(), - config, - ) - }) - .filter_map(|res| { - if res.is_err() { - error!(error = ?res.err(), "Error generating thumbnail!"); - None - } else { - res.ok() - } - }) - .collect::>(); - - debug!(num_generated = results.len(), "Generated thumbnail batch"); - - generated_paths.extend(results); - } - - Ok(generated_paths) -} - -pub const THUMBNAIL_CHUNK_SIZE: usize = 5; - -/// Deletes thumbnails and returns the number deleted if successful, returns -/// [FileError] otherwise. -pub fn remove_thumbnails( - id_list: &[String], - thumbnails_dir: PathBuf, -) -> Result { - let found_thumbnails = thumbnails_dir - .read_dir() - .ok() - .map(|dir| dir.into_iter()) - .map(|iter| { - iter.filter_map(|entry| { - entry.ok().and_then(|entry| { - let path = entry.path(); - let file_name = path.file_name()?.to_str()?.to_string(); - - if id_list.iter().any(|id| file_name.starts_with(id)) { - Some(path) - } else { - None - } - }) - }) - }) - .map(|iter| iter.collect::>()) - .unwrap_or_default(); - - let found_thumbnails_count = found_thumbnails.len(); - tracing::debug!(found_thumbnails_count, "Found thumbnails to remove"); - - let mut deleted_thumbnails_count = 0; - - for (idx, chunk) in found_thumbnails.chunks(THUMBNAIL_CHUNK_SIZE).enumerate() { - trace!(chunk = idx + 1, "Processing chunk for thumbnail removal"); - let results = chunk - .into_par_iter() - .map(|path| { - std::fs::remove_file(path)?; - Ok(()) - }) - .filter_map(|res: Result<(), FileError>| { - if res.is_err() { - error!(error = ?res.err(), "Error deleting thumbnail!"); - None - } else { - res.ok() - } - }) - .collect::>(); - - trace!(deleted_count = results.len(), "Deleted thumbnail batch"); - deleted_thumbnails_count += results.len() as u64; - } - - Ok(deleted_thumbnails_count) -} +pub use utils::*; diff --git a/core/src/filesystem/image/thumbnail/utils.rs b/core/src/filesystem/image/thumbnail/utils.rs new file mode 100644 index 000000000..790284733 --- /dev/null +++ b/core/src/filesystem/image/thumbnail/utils.rs @@ -0,0 +1,77 @@ +use std::path::PathBuf; + +use crate::{config::StumpConfig, filesystem::FileError}; +use rayon::prelude::{IntoParallelIterator, ParallelIterator}; +use tokio::fs; +use tracing::{error, trace}; + +pub async fn place_thumbnail( + id: &str, + ext: &str, + bytes: &[u8], + config: &StumpConfig, +) -> Result { + let thumbnail_path = config.get_thumbnails_dir().join(format!("{}.{}", id, ext)); + fs::write(&thumbnail_path, bytes).await?; + Ok(thumbnail_path) +} + +pub const THUMBNAIL_CHUNK_SIZE: usize = 500; + +// TODO(perf): Async-ify +/// Deletes thumbnails and returns the number deleted if successful, returns +/// [FileError] otherwise. +pub fn remove_thumbnails( + id_list: &[String], + thumbnails_dir: PathBuf, +) -> Result { + let found_thumbnails = thumbnails_dir + .read_dir() + .ok() + .map(|dir| dir.into_iter()) + .map(|iter| { + iter.filter_map(|entry| { + entry.ok().and_then(|entry| { + let path = entry.path(); + let file_name = path.file_name()?.to_str()?.to_string(); + + if id_list.iter().any(|id| file_name.starts_with(id)) { + Some(path) + } else { + None + } + }) + }) + }) + .map(|iter| iter.collect::>()) + .unwrap_or_default(); + + let found_thumbnails_count = found_thumbnails.len(); + tracing::debug!(found_thumbnails_count, "Found thumbnails to remove"); + + let mut deleted_thumbnails_count = 0; + + for (idx, chunk) in found_thumbnails.chunks(THUMBNAIL_CHUNK_SIZE).enumerate() { + trace!(chunk = idx + 1, "Processing chunk for thumbnail removal"); + let results = chunk + .into_par_iter() + .map(|path| { + std::fs::remove_file(path)?; + Ok(()) + }) + .filter_map(|res: Result<(), FileError>| { + if res.is_err() { + error!(error = ?res.err(), "Error deleting thumbnail!"); + None + } else { + res.ok() + } + }) + .collect::>(); + + trace!(deleted_count = results.len(), "Deleted thumbnail batch"); + deleted_thumbnails_count += results.len() as u64; + } + + Ok(deleted_thumbnails_count) +} diff --git a/core/src/filesystem/media/analyze_media_job/task_page_count.rs b/core/src/filesystem/media/analyze_media_job/task_page_count.rs index 146e36150..193a220fb 100644 --- a/core/src/filesystem/media/analyze_media_job/task_page_count.rs +++ b/core/src/filesystem/media/analyze_media_job/task_page_count.rs @@ -1,7 +1,7 @@ use crate::{ filesystem::{ analyze_media_job::{utils::fetch_media_with_metadata, AnalyzeMediaOutput}, - media::process::get_page_count, + media::process::get_page_count_async, }, job::{error::JobError, WorkerCtx}, prisma::{media, media_metadata}, @@ -27,7 +27,7 @@ pub(crate) async fn execute( let media_item = fetch_media_with_metadata(&id, ctx).await?; let path = media_item.path; - let page_count = get_page_count(&path, &ctx.config)?; + let page_count = get_page_count_async(&path, &ctx.config).await?; output.page_counts_analyzed += 1; // Check if a metadata update is neded diff --git a/core/src/filesystem/media/builder.rs b/core/src/filesystem/media/builder.rs index 267206701..86cfa6b45 100644 --- a/core/src/filesystem/media/builder.rs +++ b/core/src/filesystem/media/builder.rs @@ -1,13 +1,10 @@ -use std::{ - path::{Path, PathBuf}, - sync::Arc, -}; +use std::path::{Path, PathBuf}; use prisma_client_rust::chrono::{DateTime, FixedOffset, Utc}; use crate::{ config::StumpConfig, - db::entity::{LibraryOptions, Media, Series}, + db::entity::{LibraryConfig, Media, Series}, filesystem::{process, FileParts, PathUtils, SeriesJson}, CoreError, CoreResult, }; @@ -15,21 +12,21 @@ use crate::{ pub struct MediaBuilder { path: PathBuf, series_id: String, - library_options: LibraryOptions, - config: Arc, + library_config: LibraryConfig, + config: StumpConfig, } impl MediaBuilder { pub fn new( path: &Path, series_id: &str, - library_options: LibraryOptions, - config: &Arc, + library_config: LibraryConfig, + config: &StumpConfig, ) -> Self { Self { path: path.to_path_buf(), series_id: series_id.to_string(), - library_options, + library_config, config: config.clone(), } } @@ -44,7 +41,7 @@ impl MediaBuilder { pub fn build(self) -> CoreResult { let mut processed_entry = - process(&self.path, self.library_options.into(), &self.config)?; + process(&self.path, self.library_config.into(), &self.config)?; tracing::trace!(?processed_entry, "Processed entry"); @@ -193,14 +190,13 @@ mod tests { fn build_media_test_helper(path: String) -> Result { let path = Path::new(&path); - let library_options = LibraryOptions { + let library_config = LibraryConfig { convert_rar_to_zip: false, hard_delete_conversions: false, ..Default::default() }; let series_id = "series_id"; - let config = Arc::new(StumpConfig::debug()); - MediaBuilder::new(path, series_id, library_options, &config).build() + MediaBuilder::new(path, series_id, library_config, &StumpConfig::debug()).build() } } diff --git a/core/src/filesystem/media/epub.rs b/core/src/filesystem/media/format/epub.rs similarity index 96% rename from core/src/filesystem/media/epub.rs rename to core/src/filesystem/media/format/epub.rs index a2982e946..53b3cd168 100644 --- a/core/src/filesystem/media/epub.rs +++ b/core/src/filesystem/media/format/epub.rs @@ -6,12 +6,15 @@ const DEFAULT_EPUB_COVER_ID: &str = "cover"; use crate::{ config::StumpConfig, db::entity::MediaMetadata, - filesystem::{content_type::ContentType, error::FileError, hash}, + filesystem::{ + content_type::ContentType, + error::FileError, + hash, + media::process::{FileProcessor, FileProcessorOptions, ProcessedFile}, + }, }; use epub::doc::EpubDoc; -use super::process::{FileProcessor, FileProcessorOptions, ProcessedFile}; - // TODO: lots of smells in this file, needs a touch up :) /// A file processor for EPUB files. @@ -64,7 +67,10 @@ impl FileProcessor for EpubProcessor { fn process( path: &str, - _: FileProcessorOptions, + FileProcessorOptions { + generate_file_hashes, + .. + }: FileProcessorOptions, _: &StumpConfig, ) -> Result { tracing::debug!(?path, "processing epub"); @@ -72,13 +78,16 @@ impl FileProcessor for EpubProcessor { let path_buf = PathBuf::from(path); let epub_file = Self::open(path)?; - tracing::trace!(?epub_file.metadata, "Processing raw EPUB metadata"); let pages = epub_file.get_num_pages() as i32; + // Note: The metadata is already parsed by the EPUB library, so might as well use it let metadata = MediaMetadata::from(epub_file.metadata); + let hash = generate_file_hashes + .then(|| EpubProcessor::hash(path)) + .flatten(); Ok(ProcessedFile { path: path_buf, - hash: EpubProcessor::hash(path), + hash, metadata: Some(metadata), pages, }) @@ -388,6 +397,7 @@ mod tests { FileProcessorOptions { convert_rar_to_zip: false, delete_conversion_source: false, + ..Default::default() }, &config, ); diff --git a/core/src/filesystem/media/format/mod.rs b/core/src/filesystem/media/format/mod.rs new file mode 100644 index 000000000..17afbb278 --- /dev/null +++ b/core/src/filesystem/media/format/mod.rs @@ -0,0 +1,4 @@ +pub mod epub; +pub mod pdf; +pub mod rar; +pub mod zip; diff --git a/core/src/filesystem/media/pdf.rs b/core/src/filesystem/media/format/pdf.rs similarity index 93% rename from core/src/filesystem/media/pdf.rs rename to core/src/filesystem/media/format/pdf.rs index ea96cef06..7e2960d65 100644 --- a/core/src/filesystem/media/pdf.rs +++ b/core/src/filesystem/media/format/pdf.rs @@ -12,13 +12,17 @@ use crate::{ config::StumpConfig, db::entity::MediaMetadata, filesystem::{ - archive::create_zip_archive, error::FileError, hash, image::ImageFormat, + archive::create_zip_archive, + error::FileError, + hash, + image::ImageFormat, + media::process::{ + FileConverter, FileProcessor, FileProcessorOptions, ProcessedFile, + }, ContentType, FileParts, PathUtils, }, }; -use super::{process::FileConverter, FileProcessor, FileProcessorOptions, ProcessedFile}; - /// A file processor for PDF files. pub struct PdfProcessor; @@ -60,17 +64,25 @@ impl FileProcessor for PdfProcessor { fn process( path: &str, - _: FileProcessorOptions, + FileProcessorOptions { + generate_file_hashes, + .. + }: FileProcessorOptions, _: &StumpConfig, ) -> Result { let file = FileOptions::cached().open(path)?; let pages = file.pages().count() as i32; + // Note: The metadata is already parsed by the PDF library, so might as well use it + // PDF metadata is generally poop though let metadata = file.trailer.info_dict.map(MediaMetadata::from); + let hash = generate_file_hashes + .then(|| PdfProcessor::hash(path)) + .flatten(); Ok(ProcessedFile { path: PathBuf::from(path), - hash: PdfProcessor::hash(path), + hash, metadata, pages, }) @@ -272,6 +284,7 @@ mod tests { FileProcessorOptions { convert_rar_to_zip: false, delete_conversion_source: false, + ..Default::default() }, &config, ); diff --git a/core/src/filesystem/media/rar.rs b/core/src/filesystem/media/format/rar.rs similarity index 96% rename from core/src/filesystem/media/rar.rs rename to core/src/filesystem/media/format/rar.rs index 5b9404dab..17eb7d1e6 100644 --- a/core/src/filesystem/media/rar.rs +++ b/core/src/filesystem/media/format/rar.rs @@ -15,14 +15,17 @@ use crate::{ error::FileError, hash::{self, HASH_SAMPLE_COUNT, HASH_SAMPLE_SIZE}, image::ImageFormat, - media::common::metadata_from_buf, - zip::ZipProcessor, + media::{ + process::{ + FileConverter, FileProcessor, FileProcessorOptions, ProcessedFile, + }, + utils::metadata_from_buf, + zip::ZipProcessor, + }, FileParts, PathUtils, }, }; -use super::{process::FileConverter, FileProcessor, FileProcessorOptions, ProcessedFile}; - /// A file processor for RAR files. pub struct RarProcessor; @@ -114,9 +117,10 @@ impl FileProcessor for RarProcessor { return ZipProcessor::process(zip_path, options, config); } - debug!(path, "Processing RAR"); - - let hash: Option = RarProcessor::hash(path); + let hash = options + .generate_file_hashes + .then(|| RarProcessor::hash(path)) + .flatten(); let mut archive = RarProcessor::open_for_processing(path)?; let mut pages = 0; @@ -135,7 +139,7 @@ impl FileProcessor for RarProcessor { continue; } - if entry.filename.as_os_str() == "ComicInfo.xml" { + if entry.filename.as_os_str() == "ComicInfo.xml" && options.process_metadata { let (data, rest) = header.read()?; metadata_buf = Some(data); archive = rest; @@ -346,6 +350,7 @@ mod tests { FileProcessorOptions { convert_rar_to_zip: true, delete_conversion_source: true, + ..Default::default() }, &config, ); diff --git a/core/src/filesystem/media/zip.rs b/core/src/filesystem/media/format/zip.rs similarity index 95% rename from core/src/filesystem/media/zip.rs rename to core/src/filesystem/media/format/zip.rs index d1fc0cd19..fc0b64e16 100644 --- a/core/src/filesystem/media/zip.rs +++ b/core/src/filesystem/media/format/zip.rs @@ -7,13 +7,14 @@ use crate::{ content_type::ContentType, error::FileError, hash, - media::common::{metadata_from_buf, sort_file_names}, + media::{ + process::{FileProcessor, FileProcessorOptions, ProcessedFile}, + utils::{metadata_from_buf, sort_file_names}, + }, FileParts, PathUtils, }, }; -use super::{FileProcessor, FileProcessorOptions, ProcessedFile}; - /// A file processor for ZIP files. pub struct ZipProcessor; @@ -57,18 +58,21 @@ impl FileProcessor for ZipProcessor { fn process( path: &str, - _: FileProcessorOptions, + FileProcessorOptions { + generate_file_hashes, + process_metadata, + .. + }: FileProcessorOptions, _: &StumpConfig, ) -> Result { - debug!(path, "Processing zip"); - - let hash = ZipProcessor::hash(path); let zip_file = File::open(path)?; let mut archive = zip::ZipArchive::new(zip_file)?; let mut metadata = None; let mut pages = 0; + let hash = generate_file_hashes.then(|| Self::hash(path)).flatten(); + for i in 0..archive.len() { let mut file = archive.by_index(i)?; @@ -91,9 +95,8 @@ impl FileProcessor for ZipProcessor { let content_type = path.naive_content_type(); let FileParts { file_name, .. } = path.file_parts(); - if file_name == "ComicInfo.xml" { + if file_name == "ComicInfo.xml" && process_metadata { trace!("Found ComicInfo.xml"); - // we have the first few bytes of the file in buf, so we need to read the rest and make it a string let mut contents = Vec::new(); file.read_to_end(&mut contents)?; let contents = String::from_utf8_lossy(&contents).to_string(); @@ -269,6 +272,7 @@ mod tests { FileProcessorOptions { convert_rar_to_zip: false, delete_conversion_source: false, + ..Default::default() }, &config, ); @@ -285,6 +289,7 @@ mod tests { FileProcessorOptions { convert_rar_to_zip: false, delete_conversion_source: false, + ..Default::default() }, &config, ); @@ -301,6 +306,7 @@ mod tests { FileProcessorOptions { convert_rar_to_zip: false, delete_conversion_source: false, + ..Default::default() }, &config, ); diff --git a/core/src/filesystem/media/mod.rs b/core/src/filesystem/media/mod.rs index 76322ee1f..e10c15849 100644 --- a/core/src/filesystem/media/mod.rs +++ b/core/src/filesystem/media/mod.rs @@ -1,19 +1,14 @@ pub mod analyze_media_job; mod builder; -mod common; -pub(crate) mod epub; -pub mod pdf; +mod format; mod process; -pub mod rar; -pub mod zip; +mod utils; pub use crate::filesystem::media::epub::EpubProcessor; pub(crate) use builder::{MediaBuilder, SeriesBuilder}; -pub use common::is_accepted_cover_name; -pub use process::{ - get_content_type_for_page, get_content_types_for_pages, get_page, process, - FileProcessor, FileProcessorOptions, ProcessedFile, SeriesJson, -}; +pub use format::*; +pub use process::*; +pub use utils::is_accepted_cover_name; #[cfg(test)] pub(crate) mod tests { diff --git a/core/src/filesystem/media/process.rs b/core/src/filesystem/media/process.rs index db2d4a1bd..fe2729b04 100644 --- a/core/src/filesystem/media/process.rs +++ b/core/src/filesystem/media/process.rs @@ -6,11 +6,12 @@ use std::{ }; use serde::{Deserialize, Serialize}; +use tokio::{sync::oneshot, task::spawn_blocking}; use tracing::debug; use crate::{ config::StumpConfig, - db::entity::{LibraryOptions, MediaMetadata, SeriesMetadata}, + db::entity::{LibraryConfig, MediaMetadata, SeriesMetadata}, filesystem::{ content_type::ContentType, epub::EpubProcessor, error::FileError, image::ImageFormat, pdf::PdfProcessor, @@ -19,30 +20,43 @@ use crate::{ use super::{rar::RarProcessor, zip::ZipProcessor}; -#[derive(Debug)] +/// A struct representing the options for processing a file. This is a subset of [LibraryConfig] +/// and is used to pass options to the [FileProcessor] implementations. +#[derive(Debug, Default)] pub struct FileProcessorOptions { + /// Whether to convert RAR files to ZIP files after processing pub convert_rar_to_zip: bool, + /// Whether to delete the source file after converting it, if [FileProcessorOptions::convert_rar_to_zip] is true pub delete_conversion_source: bool, + /// Whether to generate a file hash for the file + pub generate_file_hashes: bool, + /// Whether to process metadata for the file + pub process_metadata: bool, } -impl From for FileProcessorOptions { - fn from(options: LibraryOptions) -> Self { +impl From for FileProcessorOptions { + fn from(options: LibraryConfig) -> Self { Self { convert_rar_to_zip: options.convert_rar_to_zip, delete_conversion_source: options.hard_delete_conversions, + generate_file_hashes: options.generate_file_hashes, + process_metadata: options.process_metadata, } } } -impl From<&LibraryOptions> for FileProcessorOptions { - fn from(options: &LibraryOptions) -> Self { +impl From<&LibraryConfig> for FileProcessorOptions { + fn from(options: &LibraryConfig) -> Self { Self { convert_rar_to_zip: options.convert_rar_to_zip, delete_conversion_source: options.hard_delete_conversions, + generate_file_hashes: options.generate_file_hashes, + process_metadata: options.process_metadata, } } } +// TODO(perf): Implement generic hasher which just takes X bytes from the file (and async version) /// Trait defining a standard API for processing files throughout Stump. Every /// supported file type should implement this trait. pub trait FileProcessor { @@ -120,7 +134,9 @@ pub struct ProcessedFile { pub pages: i32, } -// TODO(perf): Async-ify this and use blocking threads in the processors? +/// A function to process a file in a blocking manner. This will call the appropriate +/// [FileProcessor::process] implementation based on the file's mime type, or return an +/// error if the file type is not supported. pub fn process( path: &Path, options: FileProcessorOptions, @@ -144,6 +160,46 @@ pub fn process( } } +/// A function to process a file in the context of a spawned, blocking task. This will call the +/// [process] function and send the result back out through a oneshot channel. +#[tracing::instrument(err, fields(path = %path.as_ref().display()))] +pub async fn process_async( + path: impl AsRef, + options: FileProcessorOptions, + config: &StumpConfig, +) -> Result { + let (tx, rx) = oneshot::channel(); + + let handle = spawn_blocking({ + let path = path.as_ref().to_path_buf(); + let config = config.clone(); + + move || { + let send_result = tx.send(process(path.as_path(), options, &config)); + tracing::trace!( + is_err = send_result.is_err(), + "Sending result of sync process" + ); + } + }); + + let processed_file = if let Ok(recv) = rx.await { + recv? + } else { + handle + .await + .map_err(|e| FileError::UnknownError(e.to_string()))?; + return Err(FileError::UnknownError( + "Failed to receive processed file".to_string(), + )); + }; + + Ok(processed_file) +} + +/// A function to extract the bytes of a page from a file in a blocking manner. This will call the +/// appropriate [FileProcessor::get_page] implementation based on the file's mime type, or return an +/// error if the file type is not supported. pub fn get_page( path: &str, page: i32, @@ -164,6 +220,46 @@ pub fn get_page( } } +/// A function to extract the bytes of a page from a file in the context of a spawned, blocking task. +/// This will call the [get_page] function and send the result back out through a oneshot channel. +#[tracing::instrument(err, fields(path = %path.as_ref().display()))] +pub async fn get_page_async( + path: impl AsRef, + page: i32, + config: &StumpConfig, +) -> Result<(ContentType, Vec), FileError> { + let (tx, rx) = oneshot::channel(); + + let handle = spawn_blocking({ + let path = path.as_ref().to_path_buf(); + let config = config.clone(); + + move || { + let send_result = + tx.send(get_page(path.to_str().unwrap_or_default(), page, &config)); + tracing::trace!( + is_err = send_result.is_err(), + "Sending result of sync get_page" + ); + } + }); + + let page_result = if let Ok(recv) = rx.await { + recv? + } else { + handle + .await + .map_err(|e| FileError::UnknownError(e.to_string()))?; + return Err(FileError::UnknownError( + "Failed to receive page content".to_string(), + )); + }; + + Ok(page_result) +} + +/// Get the number of pages in a file. This will call the appropriate [FileProcessor::get_page_count] +/// implementation based on the file's mime type, or return an error if the file type is not supported. pub fn get_page_count(path: &str, config: &StumpConfig) -> Result { let mime = ContentType::from_file(path).mime_type(); @@ -180,6 +276,46 @@ pub fn get_page_count(path: &str, config: &StumpConfig) -> Result, + config: &StumpConfig, +) -> Result { + let (tx, rx) = oneshot::channel(); + + let handle = spawn_blocking({ + let path = path.as_ref().to_path_buf(); + let config = config.clone(); + + move || { + let send_result = + tx.send(get_page_count(path.to_str().unwrap_or_default(), &config)); + tracing::trace!( + is_err = send_result.is_err(), + "Sending result of sync get_page_count" + ); + } + }); + + let page_count = if let Ok(recv) = rx.await { + recv? + } else { + handle + .await + .map_err(|e| FileError::UnknownError(e.to_string()))?; + return Err(FileError::UnknownError( + "Failed to receive page count".to_string(), + )); + }; + + Ok(page_count) +} + +/// Get the content types of a list of pages of a file. This will call the appropriate +/// [FileProcessor::get_page_content_types] implementation based on the file's mime type, or return an +/// error if the file type is not supported. pub fn get_content_types_for_pages( path: &str, pages: Vec, @@ -203,7 +339,7 @@ pub fn get_content_types_for_pages( /// # Arguments /// * `path` - The path to the file /// * `page` - The page number to get the content type for, 1-indexed -pub fn get_content_type_for_page( +fn get_content_type_for_page_sync( path: &str, page: i32, ) -> Result { @@ -224,3 +360,42 @@ pub fn get_content_type_for_page( Ok(result.get(&page).cloned().unwrap_or(ContentType::UNKNOWN)) } + +/// Get the content type for a specific page of a file in the context of a spawned, blocking task. +/// This will call the [get_content_type_for_page_sync] function and send the result back out through +/// a oneshot channel. +#[tracing::instrument(err, fields(path = %path.as_ref().display()))] +pub async fn get_content_type_for_page( + path: impl AsRef, + page: i32, +) -> Result { + let (tx, rx) = oneshot::channel(); + + let handle = spawn_blocking({ + let path = path.as_ref().to_path_buf(); + + move || { + let send_result = tx.send(get_content_type_for_page_sync( + path.to_str().unwrap_or_default(), + page, + )); + tracing::trace!( + is_err = send_result.is_err(), + "Sending result of sync get_content_type_for_page" + ); + } + }); + + let content_type = if let Ok(recv) = rx.await { + recv? + } else { + handle + .await + .map_err(|e| FileError::UnknownError(e.to_string()))?; + return Err(FileError::UnknownError( + "Failed to receive content type for page".to_string(), + )); + }; + + Ok(content_type) +} diff --git a/core/src/filesystem/media/common.rs b/core/src/filesystem/media/utils.rs similarity index 100% rename from core/src/filesystem/media/common.rs rename to core/src/filesystem/media/utils.rs diff --git a/core/src/filesystem/scanner/library_scan_job.rs b/core/src/filesystem/scanner/library_scan_job.rs index c4adb0adf..17abd6c59 100644 --- a/core/src/filesystem/scanner/library_scan_job.rs +++ b/core/src/filesystem/scanner/library_scan_job.rs @@ -1,6 +1,3 @@ -use rayon::iter::{ - Either, IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator, -}; use std::{collections::VecDeque, path::PathBuf}; use serde::{Deserialize, Serialize}; @@ -13,18 +10,15 @@ use specta::Type; use crate::{ db::{ - entity::{CoreJobOutput, LibraryOptions, Series}, + entity::{CoreJobOutput, LibraryConfig}, FileStatus, SeriesDAO, DAO, }, - filesystem::{ - image::{ThumbnailGenerationJob, ThumbnailGenerationJobParams}, - SeriesBuilder, - }, + filesystem::image::{ThumbnailGenerationJob, ThumbnailGenerationJobParams}, job::{ error::JobError, Executor, JobExecuteLog, JobExt, JobOutputExt, JobProgress, JobTaskOutput, WorkerCtx, WorkerSendExt, WorkingState, WrappedJob, }, - prisma::{library, library_options, media, series, PrismaClient}, + prisma::{library, library_config, media, series, PrismaClient}, utils::chain_optional_iter, CoreEvent, }; @@ -32,9 +26,9 @@ use crate::{ use super::{ series_scan_job::SeriesScanTask, utils::{ - handle_create_media, handle_missing_media, handle_missing_series, - handle_visit_media, MediaBuildOperationCtx, MediaOperationOutput, - MissingSeriesOutput, + handle_missing_media, handle_missing_series, safely_build_and_insert_media, + safely_build_series, visit_and_update_media, MediaBuildOperation, + MediaOperationOutput, MissingSeriesOutput, }, walk_library, walk_series, WalkedLibrary, WalkedSeries, WalkerCtx, }; @@ -63,7 +57,7 @@ pub struct InitTaskInput { pub struct LibraryScanJob { pub id: String, pub path: String, - pub options: Option, + pub options: Option, } impl LibraryScanJob { @@ -133,21 +127,21 @@ impl JobExt for LibraryScanJob { // Note: We ignore the potential self.options here in the event that it was // updated since being queued. This is perhaps a bit overly cautious, but it's // just one additional query. - let library_options = ctx + let library_config = ctx .db - .library_options() - .find_first(vec![library_options::library::is(vec![ + .library_config() + .find_first(vec![library_config::library::is(vec![ library::id::equals(self.id.clone()), library::path::equals(self.path.clone()), ])]) .exec() .await? - .map(LibraryOptions::from) + .map(LibraryConfig::from) .ok_or(JobError::InitFailed("Library not found".to_string()))?; - let is_collection_based = library_options.is_collection_based(); - let ignore_rules = library_options.ignore_rules.build()?; + let is_collection_based = library_config.is_collection_based(); + let ignore_rules = library_config.ignore_rules.build()?; - self.options = Some(library_options); + self.options = Some(library_config); ctx.report_progress(JobProgress::msg("Performing task discovery")); let WalkedLibrary { @@ -265,7 +259,7 @@ impl JobExt for LibraryScanJob { let mut logs = vec![]; let mut subtasks = vec![]; - let chunk_size = ctx.config.scanner_chunk_size; + let max_concurrency = ctx.config.max_scanner_concurrency; match task { LibraryScanTask::Init(input) => { @@ -327,53 +321,38 @@ impl JobExt for LibraryScanJob { } if !series_to_create.is_empty() { - ctx.report_progress(JobProgress::msg(&format!( - "Building {} series entities", - series_to_create.len() - ))); - // TODO: remove this DAO!! - let series_dao = SeriesDAO::new(ctx.db.clone()); + ctx.report_progress(JobProgress::msg("Building new series")); + + let task_count = series_to_create.len() as i32; + let (built_series, failure_logs) = safely_build_series( + &self.id, + series_to_create, + ctx.config.as_ref(), + |position| { + ctx.report_progress(JobProgress::subtask_position( + position as i32, + task_count, + )) + }, + ) + .await; - let (built_series, failure_logs): (Vec<_>, Vec<_>) = series_to_create - .par_iter() - .enumerate() - .map(|(idx, path_buf)| { - ctx.report_progress(JobProgress::subtask_position_msg( - format!("Building series {}", path_buf.display()) - .as_str(), - current_subtask_index + (idx as i32), - total_subtask_count as i32, - )); - ( - path_buf, - SeriesBuilder::new(path_buf.as_path(), &self.id).build(), - ) - }) - .partition_map::<_, _, _, Series, JobExecuteLog>( - |(path_buf, result)| match result { - Ok(s) => Either::Left(s), - Err(e) => Either::Right( - JobExecuteLog::error(e.to_string()) - .with_ctx(path_buf.to_string_lossy().to_string()), - ), - }, - ); current_subtask_index += (built_series.len() + failure_logs.len()) as i32; logs.extend(failure_logs); - // TODO: make this configurable - let chunks = built_series.chunks(400); + // TODO: remove this DAO!! + let series_dao = SeriesDAO::new(ctx.db.clone()); + + let chunks = built_series.chunks(200); let chunk_count = chunks.len(); - tracing::debug!(chunk_count, "Batch inserting new series"); + tracing::trace!(chunk_count, "Batch inserting new series"); + for (idx, chunk) in chunks.enumerate() { - ctx.report_progress(JobProgress::msg( - format!( - "Processing series insertion chunk {} of {}", - idx + 1, - chunk_count - ) - .as_str(), + ctx.report_progress(JobProgress::subtask_position_msg( + "Inserting built series in batches", + (idx + 1) as i32, + chunk_count as i32, )); let result = series_dao.create_many(chunk.to_vec()).await; match result { @@ -555,11 +534,11 @@ impl JobExt for LibraryScanJob { created_media, logs: new_logs, .. - } = handle_create_media( - MediaBuildOperationCtx { + } = safely_build_and_insert_media( + MediaBuildOperation { series_id: series_id.clone(), - library_options: self.options.clone().unwrap_or_default(), - chunk_size, + library_config: self.options.clone().unwrap_or_default(), + max_concurrency, }, ctx, paths, @@ -585,11 +564,11 @@ impl JobExt for LibraryScanJob { updated_media, logs: new_logs, .. - } = handle_visit_media( - MediaBuildOperationCtx { + } = visit_and_update_media( + MediaBuildOperation { series_id: series_id.clone(), - library_options: self.options.clone().unwrap_or_default(), - chunk_size, + library_config: self.options.clone().unwrap_or_default(), + max_concurrency, }, ctx, paths, diff --git a/core/src/filesystem/scanner/series_scan_job.rs b/core/src/filesystem/scanner/series_scan_job.rs index eeb358d94..cc16b7cac 100644 --- a/core/src/filesystem/scanner/series_scan_job.rs +++ b/core/src/filesystem/scanner/series_scan_job.rs @@ -6,7 +6,7 @@ use specta::Type; use crate::{ db::{ entity::{ - macros::library_path_with_options_select, CoreJobOutput, LibraryOptions, + macros::library_path_with_options_select, CoreJobOutput, LibraryConfig, }, FileStatus, }, @@ -22,8 +22,8 @@ use crate::{ use super::{ utils::{ - handle_create_media, handle_missing_media, handle_visit_media, - MediaBuildOperationCtx, MediaOperationOutput, + handle_missing_media, safely_build_and_insert_media, visit_and_update_media, + MediaBuildOperation, MediaOperationOutput, }, walk_series, WalkedSeries, WalkerCtx, }; @@ -40,7 +40,7 @@ pub enum SeriesScanTask { pub struct SeriesScanJob { pub id: String, pub path: String, - pub options: Option, + pub options: Option, } impl SeriesScanJob { @@ -109,17 +109,17 @@ impl JobExt for SeriesScanJob { .ok_or(JobError::InitFailed( "Associated library not found".to_string(), ))?; - let library_options = LibraryOptions::from(library.library_options); - let ignore_rules = library_options.ignore_rules.build()?; + let library_config = LibraryConfig::from(library.config); + let ignore_rules = library_config.ignore_rules.build()?; // If the library is collection-priority, any child directories are 'ignored' and their // files are part of / folded into the top-most folder (series). // If the library is not collection-priority, each subdirectory is its own series. // Therefore, we only scan one level deep when walking a series whose library is not // collection-priority to avoid scanning duplicates which are part of other series - let max_depth = (!library_options.is_collection_based()).then_some(1); + let max_depth = (!library_config.is_collection_based()).then_some(1); - self.options = Some(library_options); + self.options = Some(library_config); let WalkedSeries { series_is_missing, @@ -220,7 +220,7 @@ impl JobExt for SeriesScanJob { let mut output = Self::Output::default(); let mut logs = vec![]; - let chunk_size = ctx.config.scanner_chunk_size; + let max_concurrency = ctx.config.max_scanner_concurrency; match task { SeriesScanTask::MarkMissingMedia(paths) => { @@ -249,11 +249,11 @@ impl JobExt for SeriesScanJob { created_media, logs: new_logs, .. - } = handle_create_media( - MediaBuildOperationCtx { + } = safely_build_and_insert_media( + MediaBuildOperation { series_id: self.id.clone(), - library_options: self.options.clone().unwrap_or_default(), - chunk_size, + library_config: self.options.clone().unwrap_or_default(), + max_concurrency, }, ctx, paths, @@ -278,11 +278,11 @@ impl JobExt for SeriesScanJob { updated_media, logs: new_logs, .. - } = handle_visit_media( - MediaBuildOperationCtx { + } = visit_and_update_media( + MediaBuildOperation { series_id: self.id.clone(), - library_options: self.options.clone().unwrap_or_default(), - chunk_size, + library_config: self.options.clone().unwrap_or_default(), + max_concurrency, }, ctx, paths, diff --git a/core/src/filesystem/scanner/utils.rs b/core/src/filesystem/scanner/utils.rs index c12c6808d..62b26c937 100644 --- a/core/src/filesystem/scanner/utils.rs +++ b/core/src/filesystem/scanner/utils.rs @@ -1,19 +1,33 @@ -use std::{collections::VecDeque, path::PathBuf}; +use std::{ + collections::VecDeque, + path::{Path, PathBuf}, + pin::pin, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, + time::Instant, +}; +use futures::{stream::FuturesUnordered, StreamExt}; use prisma_client_rust::{ chrono::{DateTime, Utc}, QueryError, }; -use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; +use tokio::{ + sync::{oneshot, Semaphore}, + task::spawn_blocking, +}; use walkdir::DirEntry; use crate::{ + config::StumpConfig, db::{ - entity::{LibraryOptions, Media}, + entity::{LibraryConfig, Media, Series}, FileStatus, }, error::{CoreError, CoreResult}, - filesystem::MediaBuilder, + filesystem::{MediaBuilder, SeriesBuilder}, job::{error::JobError, JobExecuteLog, JobProgress, WorkerCtx, WorkerSendExt}, prisma::{media, media_metadata, series, PrismaClient}, CoreEvent, @@ -277,111 +291,341 @@ pub(crate) async fn handle_missing_media( output } -pub(crate) struct MediaBuildOperationCtx { +/// Builds a series from the given path +/// +/// # Arguments +/// * `for_library` - The library ID to associate the series with +/// * `path` - The path to the series on disk +async fn build_series(for_library: &str, path: &Path) -> CoreResult { + let (tx, rx) = oneshot::channel(); + + // Spawn a blocking task to handle the IO-intensive operations: + let handle = spawn_blocking({ + let path = path.to_path_buf(); + let for_library = for_library.to_string(); + + move || { + let send_result = tx.send(SeriesBuilder::new(&path, &for_library).build()); + tracing::trace!( + is_err = send_result.is_err(), + "Sending build result to channel" + ); + } + }); + + let build_result = if let Ok(recv) = rx.await { + recv? + } else { + handle + .await + .map_err(|e| CoreError::Unknown(e.to_string()))?; + return Err(CoreError::Unknown( + "Failed to receive build result".to_string(), + )); + }; + + Ok(build_result) +} + +/// Safely builds a series from a list of paths concurrently, with a maximum concurrency limit +/// as defined by the core configuration. +/// +/// # Arguments +/// * `for_library` - The library ID to associate the series with +/// * `paths` - A list of paths to build series from +/// * `core_config` - The core configuration +/// * `reporter` - A function to report progress to the UI +pub(crate) async fn safely_build_series( + for_library: &str, + paths: Vec, + core_config: &StumpConfig, + reporter: impl Fn(usize), +) -> (Vec, Vec) { + let mut logs = vec![]; + let mut created_series = Vec::with_capacity(paths.len()); + + let max_concurrency = core_config.max_scanner_concurrency; + let semaphore = Arc::new(Semaphore::new(max_concurrency)); + tracing::debug!(max_concurrency, "Semaphore created for series creation"); + + let start = Instant::now(); + + let futures = paths + .iter() + .map(|path| { + let semaphore = semaphore.clone(); + let path = path.clone(); + let library_id = for_library.to_string(); + + async move { + if semaphore.available_permits() == 0 { + tracing::debug!(?path, "No permits available, waiting for one"); + } + let _permit = semaphore + .acquire() + .await + .map_err(|e| (CoreError::Unknown(e.to_string()), path.clone()))?; + tracing::trace!(?path, "Acquired permit for series creation"); + build_series(&library_id, &path) + .await + .map_err(|e| (e, path.clone())) + } + }) + .collect::>(); + + // An atomic usize to keep track of the current position in the stream + // to report progress to the UI + let atomic_cursor = Arc::new(AtomicUsize::new(1)); + + let mut futures = pin!(futures); + + while let Some(result) = futures.next().await { + match result { + Ok(series) => { + created_series.push(series); + }, + Err((error, path)) => { + logs.push( + JobExecuteLog::error(format!( + "Failed to build series: {:?}", + error.to_string() + )) + .with_ctx(format!("Path: {:?}", path)), + ); + }, + } + // We visit every file, regardless of success or failure + reporter(atomic_cursor.fetch_add(1, Ordering::SeqCst)); + } + + let success_count = created_series.len(); + let error_count = logs.len(); + tracing::debug!(elapsed = ?start.elapsed(), success_count, error_count, "Finished batch of series"); + + (created_series, logs) +} + +pub(crate) struct MediaBuildOperation { pub series_id: String, - pub library_options: LibraryOptions, - pub chunk_size: usize, + pub library_config: LibraryConfig, + pub max_concurrency: usize, +} + +/// Builds a media from the given path +/// +/// # Arguments +/// * `path` - The path to the media on disk +/// * `series_id` - The series ID to associate the media with +/// * `existing_book` - An optional existing media to rebuild +/// * `library_config` - The library configuration +/// * `config` - The core configuration +async fn build_book( + path: &Path, + series_id: &str, + existing_book: Option, + library_config: LibraryConfig, + config: &StumpConfig, +) -> CoreResult { + let (tx, rx) = oneshot::channel(); + + // Spawn a blocking task to handle the IO-intensive operations: + let handle = spawn_blocking({ + let path = path.to_path_buf(); + let series_id = series_id.to_string(); + let library_config = library_config.clone(); + let config = config.clone(); + + move || { + let builder = MediaBuilder::new(&path, &series_id, library_config, &config); + let send_result = tx.send(if let Some(existing_book) = existing_book { + builder.rebuild(&existing_book) + } else { + builder.build() + }); + tracing::trace!( + is_err = send_result.is_err(), + "Sending build result to channel" + ); + } + }); + + let build_result = if let Ok(recv) = rx.await { + recv? + } else { + handle + .await + .map_err(|e| CoreError::Unknown(e.to_string()))?; + return Err(CoreError::Unknown( + "Failed to receive build result".to_string(), + )); + }; + + Ok(build_result) } -// TODO(perf): don't use rayon for this IO-bound task -pub(crate) async fn handle_create_media( - build_ctx: MediaBuildOperationCtx, +/// Safely builds media from a list of paths concurrently, with a maximum concurrency limit +/// as defined by the core configuration. The media is then inserted into the database. +/// +/// # Arguments +/// * `MediaBuildOperation` - The operation configuration for building media +/// * `worker_ctx` - The worker context +/// * `paths` - A list of paths to build media from +pub(crate) async fn safely_build_and_insert_media( + MediaBuildOperation { + series_id, + library_config, + max_concurrency, + }: MediaBuildOperation, worker_ctx: &WorkerCtx, paths: Vec, ) -> Result { if paths.is_empty() { - tracing::debug!("No media to create"); + tracing::trace!("No media to create?"); return Ok(MediaOperationOutput::default()); } let mut output = MediaOperationOutput::default(); - let MediaBuildOperationCtx { - series_id, - library_options, - chunk_size, - } = build_ctx; - - let path_chunks = paths.chunks(chunk_size); - for (idx, chunk) in path_chunks.enumerate() { - tracing::trace!(chunk_idx = idx, chunk_len = chunk.len(), "Processing chunk"); - let mut built_media = chunk - .par_iter() - .map(|path_buf| { - ( - path_buf.to_owned(), - MediaBuilder::new( - path_buf, - &series_id, - library_options.clone(), - &worker_ctx.config, - ) - .build(), - ) - }) - .collect::)>>(); + let mut logs = vec![]; + + let semaphore = Arc::new(Semaphore::new(max_concurrency)); + tracing::debug!(max_concurrency, "Semaphore created for media creation"); + + worker_ctx.report_progress(JobProgress::msg("Building media from disk")); + let task_count = paths.len() as i32; + let start = Instant::now(); + + let futures = paths + .iter() + .map(|path| { + let semaphore = semaphore.clone(); + let series_id = series_id.clone(); + let library_config = library_config.clone(); + let path = path.clone(); + + async move { + if semaphore.available_permits() == 0 { + tracing::debug!(?path, "No permits available, waiting for one"); + } + let _permit = semaphore + .acquire() + .await + .map_err(|e| (CoreError::Unknown(e.to_string()), path.clone()))?; + tracing::trace!(?path, "Acquired permit for media creation"); + build_book(&path, &series_id, None, library_config, &worker_ctx.config) + .await + .map_err(|e| (e, path.clone())) + } + }) + .collect::>(); - while let Some((media_path, build_result)) = built_media.pop_front() { - let Ok(generated) = build_result else { - tracing::error!(?media_path, "Failed to build media"); - output.logs.push( + // An atomic usize to keep track of the current position in the stream + // to report progress to the UI + let atomic_cursor = Arc::new(AtomicUsize::new(1)); + + let mut futures = pin!(futures); + let mut books = VecDeque::with_capacity(paths.len()); + + while let Some(result) = futures.next().await { + match result { + Ok(book) => { + books.push_back(book); + }, + Err((error, path)) => { + logs.push( JobExecuteLog::error(format!( - "Failed to build media: {:?}", - build_result.unwrap_err().to_string() + "Failed to build book: {:?}", + error.to_string() )) - .with_ctx(media_path.to_string_lossy().to_string()), + .with_ctx(format!("Path: {:?}", path)), ); - continue; - }; + }, + } + worker_ctx.report_progress(JobProgress::subtask_position( + atomic_cursor.fetch_add(1, Ordering::SeqCst) as i32, + task_count, + )); + } - match create_media(&worker_ctx.db, generated).await { - Ok(created_media) => { - output.created_media += 1; - worker_ctx.send_batch(vec![ - JobProgress::msg( - format!("Inserted {}", media_path.display()).as_str(), - ) - .into_worker_send(), - CoreEvent::CreatedMedia { - id: created_media.id, - series_id: series_id.clone(), - } - .into_worker_send(), - ]); - }, - Err(e) => { - tracing::error!(error = ?e, ?media_path, "Failed to create media"); - output.logs.push( - JobExecuteLog::error(format!( - "Failed to create media: {:?}", - e.to_string() - )) - .with_ctx(media_path.to_string_lossy().to_string()), - ); - }, - } + let success_count = books.len(); + let error_count = logs.len(); + tracing::debug!( + elapsed = ?start.elapsed(), + success_count, error_count, + "Built books from disk" + ); + + worker_ctx.report_progress(JobProgress::msg("Inserting books into database")); + let task_count = books.len() as i32; + let start = Instant::now(); + + let atomic_cursor = Arc::new(AtomicUsize::new(1)); + + // TODO: consider small batches of _batch instead? + while let Some(book) = books.pop_front() { + let path = book.path.clone(); + match create_media(&worker_ctx.db, book).await { + Ok(created_media) => { + output.created_media += 1; + worker_ctx.send_batch(vec![ + JobProgress::subtask_position( + atomic_cursor.fetch_add(1, Ordering::SeqCst) as i32, + task_count, + ) + .into_worker_send(), + CoreEvent::CreatedMedia { + id: created_media.id, + series_id: series_id.clone(), + } + .into_worker_send(), + ]); + }, + Err(e) => { + worker_ctx.report_progress(JobProgress::subtask_position( + atomic_cursor.fetch_add(1, Ordering::SeqCst) as i32, + task_count, + )); + tracing::error!(error = ?e, ?path, "Failed to create media"); + logs.push( + JobExecuteLog::error(format!( + "Failed to create media: {:?}", + e.to_string() + )) + .with_ctx(path), + ); + }, } } + let success_count = output.created_media; + let error_count = logs.len() - error_count; // Subtract the errors from the previous step + tracing::debug!(success_count, error_count, elapsed = ?start.elapsed(), "Inserted books into database"); + Ok(output) } -pub(crate) async fn handle_visit_media( - build_ctx: MediaBuildOperationCtx, +/// Visits the media on disk and updates the database with the latest information. This is done +/// concurrently with a maximum concurrency limit as defined by the core configuration. +/// +/// # Arguments +/// * `MediaBuildOperation` - The operation configuration for visiting media +/// * `worker_ctx` - The worker context +/// * `paths` - A list of paths to visit media from +pub(crate) async fn visit_and_update_media( + MediaBuildOperation { + series_id, + library_config, + max_concurrency, + }: MediaBuildOperation, worker_ctx: &WorkerCtx, paths: Vec, ) -> Result { if paths.is_empty() { - tracing::debug!("No media to visit"); + tracing::trace!("No media to visit?"); return Ok(MediaOperationOutput::default()); } - let mut output = MediaOperationOutput::default(); - let MediaBuildOperationCtx { - series_id, - library_options, - chunk_size, - } = build_ctx; let client = &worker_ctx.db; + let mut output = MediaOperationOutput::default(); let media = client .media() @@ -406,54 +650,109 @@ pub(crate) async fn handle_visit_media( )); } - let chunks = media.chunks(chunk_size); + let semaphore = Arc::new(Semaphore::new(max_concurrency)); + tracing::debug!(max_concurrency, "Semaphore created for media visit"); + + worker_ctx.report_progress(JobProgress::msg("Visiting media on disk")); + let task_count = media.len() as i32; + let start = Instant::now(); - for (idx, chunk) in chunks.enumerate() { - tracing::trace!(chunk_idx = idx, chunk_len = chunk.len(), "Processing chunk"); - let mut built_media = chunk - .par_iter() - .map(|m| { - MediaBuilder::new( - PathBuf::from(m.path.as_str()).as_path(), + let futures = media + .into_iter() + .map(|existing_book| { + let semaphore = semaphore.clone(); + let series_id = series_id.clone(); + let library_config = library_config.clone(); + let path = PathBuf::from(existing_book.path.as_str()); + + async move { + if semaphore.available_permits() == 0 { + tracing::debug!(?path, "No permits available, waiting for one"); + } + let _permit = semaphore + .acquire() + .await + .map_err(|e| (CoreError::Unknown(e.to_string()), path.clone()))?; + tracing::trace!(?path, "Acquired permit for media visit"); + build_book( + path.as_path(), &series_id, - library_options.clone(), + Some(existing_book), + library_config, &worker_ctx.config, ) - .rebuild(m) - }) - .collect::>>(); - - while let Some(build_result) = built_media.pop_front() { - match build_result { - Ok(generated) => { - tracing::warn!( - "Stump currently has minimal support for updating media", - ); - match update_media(client, generated).await { - Ok(updated_media) => { - tracing::trace!(?updated_media, "Updated media"); - // TODO: emit event - output.updated_media += 1; - }, - Err(e) => { - tracing::error!(error = ?e, "Failed to update media"); - output.logs.push(JobExecuteLog::error(format!( - "Failed to update media: {:?}", - e.to_string() - ))); - }, - } - }, - Err(e) => { - tracing::error!(error = ?e, "Failed to build media"); - output.logs.push(JobExecuteLog::error(format!( - "Failed to build media: {:?}", - e.to_string() - ))); - }, + .await + .map_err(|e| (e, path)) } + }) + .collect::>(); + + // An atomic usize to keep track of the current position in the stream + // to report progress to the UI + let atomic_cursor = Arc::new(AtomicUsize::new(1)); + + let mut futures = pin!(futures); + let mut books = VecDeque::with_capacity(paths.len()); + + while let Some(result) = futures.next().await { + match result { + Ok(book) => { + books.push_back(book); + }, + Err((error, path)) => { + output.logs.push( + JobExecuteLog::error(format!( + "Failed to build book: {:?}", + error.to_string() + )) + .with_ctx(format!("Path: {:?}", path)), + ); + }, + } + worker_ctx.report_progress(JobProgress::subtask_position( + atomic_cursor.fetch_add(1, Ordering::SeqCst) as i32, + task_count, + )); + } + + let success_count = books.len(); + let error_count = output.logs.len(); + tracing::debug!(elapsed = ?start.elapsed(), success_count, error_count, "Rebuilt books from disk"); + + worker_ctx.report_progress(JobProgress::msg("Updating media in database")); + let task_count = books.len() as i32; + let start = Instant::now(); + + let atomic_cursor = Arc::new(AtomicUsize::new(1)); + + // TODO: We don't use the updated book, so chunk these and update_many? + while let Some(book) = books.pop_front() { + let path = book.path.clone(); + match update_media(&worker_ctx.db, book).await { + Ok(_) => { + output.updated_media += 1; + }, + Err(e) => { + tracing::error!(error = ?e, ?path, "Failed to update media"); + output.logs.push( + JobExecuteLog::error(format!( + "Failed to update media: {:?}", + e.to_string() + )) + .with_ctx(path), + ); + }, } + + worker_ctx.report_progress(JobProgress::subtask_position( + atomic_cursor.fetch_add(1, Ordering::SeqCst) as i32, + task_count, + )); } + let success_count = output.updated_media; + let error_count = output.logs.len() - error_count; // Subtract the errors from the previous step + tracing::debug!(elapsed = ?start.elapsed(), success_count, error_count, "Updated books in database"); + Ok(output) } diff --git a/core/src/job/progress.rs b/core/src/job/progress.rs index 8887ba242..4c11ebc59 100644 --- a/core/src/job/progress.rs +++ b/core/src/job/progress.rs @@ -37,7 +37,7 @@ pub struct JobProgress { pub completed_subtasks: Option, /// The number of subtasks that exist in the current task #[specta(optional)] - pub remaining_subtasks: Option, + pub total_subtasks: Option, } impl JobProgress { @@ -98,7 +98,7 @@ impl JobProgress { pub fn subtask_position(index: i32, size: i32) -> Self { Self { completed_subtasks: Some(index), - remaining_subtasks: Some(size), + total_subtasks: Some(size), ..Default::default() } } diff --git a/core/src/job/scheduler.rs b/core/src/job/scheduler.rs index d11f2a533..2eca5355d 100644 --- a/core/src/job/scheduler.rs +++ b/core/src/job/scheduler.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use crate::{ - db::entity::LibraryOptions, + db::entity::LibraryConfig, filesystem::scanner::LibraryScanJob, job::WrappedJob, prisma::{job_schedule_config, library}, @@ -71,7 +71,7 @@ impl JobScheduler { .find_many(vec![library::id::not_in_vec( excluded_library_ids.clone(), )]) - .with(library::library_options::fetch()) + .with(library::config::fetch()) .exec() .await .unwrap_or_else(|e| { @@ -80,15 +80,14 @@ impl JobScheduler { }); for library in libraries_to_scan.iter() { - // TODO: support default scan mode on libraries - // let scan_mode = library.default_scan_mode.clone(); let library_path = library.path.clone(); - let options = library.library_options().ok().take(); + // TODO: optimize query with select!/include! + let options = library.config().ok().take(); let result = scheduler_ctx.enqueue_job(WrappedJob::new(LibraryScanJob { id: library.id.clone(), path: library_path, - options: options.map(LibraryOptions::from), + options: options.map(LibraryConfig::from), })); if result.is_err() { tracing::error!( diff --git a/core/src/lib.rs b/core/src/lib.rs index ed6b4b680..095ebb658 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -345,7 +345,7 @@ mod tests { file.write_all(format!("{}\n\n", ts_export::()?).as_bytes())?; file.write_all(format!("{}\n\n", ts_export::()?).as_bytes())?; file.write_all(format!("{}\n\n", ts_export::()?).as_bytes())?; - file.write_all(format!("{}\n\n", ts_export::()?).as_bytes())?; + file.write_all(format!("{}\n\n", ts_export::()?).as_bytes())?; file.write_all(format!("{}\n\n", ts_export::()?).as_bytes())?; file.write_all(format!("{}\n\n", ts_export::()?).as_bytes())?; diff --git a/core/src/opds/v2_0/link.rs b/core/src/opds/v2_0/link.rs index f71d56019..eb593bbcd 100644 --- a/core/src/opds/v2_0/link.rs +++ b/core/src/opds/v2_0/link.rs @@ -450,8 +450,8 @@ mod tests { hidden_from_users: None, job_schedule_config: None, job_schedule_config_id: None, - library_options: None, - library_options_id: String::default(), + config: None, + config_id: String::default(), path: String::default(), series: None, status: String::from("READY"), diff --git a/core/src/opds/v2_0/publication.rs b/core/src/opds/v2_0/publication.rs index 0e21f7de6..ca0b2af6c 100644 --- a/core/src/opds/v2_0/publication.rs +++ b/core/src/opds/v2_0/publication.rs @@ -103,7 +103,7 @@ impl OPDSPublication { for book in books { let links = OPDSPublication::links_for_book(&book, &finalizer)?; - let images = OPDSPublication::images_for_book(&book, &finalizer)?; + let images = OPDSPublication::images_for_book(&book, &finalizer).await?; let position = positions.get(&book.id).cloned(); @@ -151,7 +151,7 @@ impl OPDSPublication { book: books_as_publications::Data, ) -> CoreResult { let links = OPDSPublication::links_for_book(&book, &finalizer)?; - let images = OPDSPublication::images_for_book(&book, &finalizer)?; + let images = OPDSPublication::images_for_book(&book, &finalizer).await?; let series = book .series @@ -230,7 +230,7 @@ impl OPDSPublication { Ok(publication) } - fn images_for_book( + async fn images_for_book( book: &books_as_publications::Data, finalizer: &OPDSLinkFinalizer, ) -> CoreResult> { @@ -243,9 +243,9 @@ impl OPDSPublication { book.id )), ) - ._type(OPDSLinkType::from(get_content_type_for_page( - &book.path, 1, - )?)) + ._type(OPDSLinkType::from( + get_content_type_for_page(&book.path, 1).await?, + )) .build()? .with_auth(finalizer.format_link(AUTH_ROUTE)), ) diff --git a/docker/Dockerfile b/docker/Dockerfile index 68b6fd60d..1143c2abb 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -78,22 +78,20 @@ RUN --mount=type=cache,target=/var/cache/apt --mount=type=cache,target=/var/lib/ # Final Stage # ------------------------------------------------------------------------------ -FROM debian:buster-slim +# FROM debian:buster-slim +FROM gcr.io/distroless/cc-debian12:debug -RUN apt-get update && apt-get install -y locales-all && rm -rf /var/lib/apt/lists/*; \ - mkdir -p config && mkdir -p data && mkdir -p app +RUN [ "/busybox/ln", "-s", "/busybox/sh", "/bin/sh" ] +RUN ln -s /busybox/env /usr/bin/env + +# TODO(distroless): Ensure locales aren't messed up +# RUN apt-get update && apt-get install -y locales-all && rm -rf /var/lib/apt/lists/*; \ +# mkdir -p config && mkdir -p data && mkdir -p app COPY --from=builder /app/stump_server /app/stump COPY --from=pdfium /pdfium /opt/pdfium COPY --from=frontend /app/build /app/client -COPY docker/entrypoint.sh /entrypoint.sh - - -RUN chmod +x /entrypoint.sh; \ - ln -s /opt/pdfium/lib/libpdfium.so /lib/libpdfium.so; \ - echo "/usr/local/lib" >> /etc/ld.so.conf.d/mylibs.conf \ - && ldconfig; \ - if [ ! -d "/app/client" ] || [ ! "$(ls -A /app/client)" ]; then exit 1; fi +COPY --chmod=755 docker/entrypoint.sh /entrypoint.sh # Default Stump environment variables ENV STUMP_CONFIG_DIR=/config \ @@ -101,7 +99,7 @@ ENV STUMP_CONFIG_DIR=/config \ STUMP_PROFILE=release \ STUMP_PORT=10801 \ STUMP_IN_DOCKER=true \ - PDFIUM_PATH=/lib/libpdfium.so \ + PDFIUM_PATH=/opt/pdfium/lib/libpdfium.so \ API_VERSION=v1 WORKDIR /app diff --git a/docker/collect_stats.sh b/docker/collect_stats.sh new file mode 100755 index 000000000..4b733597e --- /dev/null +++ b/docker/collect_stats.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +_STATS_FILE=${STATS_FILE:-"docker_stats.log"} +_CONTAINER_NAME=${CONTAINER_NAME:-"stump"} +_INTERVAL_SECS=${INTERVAL_SECS:-5} +_DO_PLOT=${DO_PLOT:-false} + +if [ "$_DO_PLOT" = false ]; then + # Clear the stats file if it exists + > $_STATS_FILE + + capture_stats() { + docker stats --no-stream --format "{{.CPUPerc}},{{.MemUsage}},{{.MemPerc}},{{.NetIO}}" $_CONTAINER_NAME | \ + awk -F '[ /]' '{print $1 "," $2 "," $4 "," $6 $7}' >> $_STATS_FILE + } + + while true; do + capture_stats + sleep $_INTERVAL_SECS + done +else + # FIXME: doesn't work + set terminal png size 800,600 + set output 'docker_stats.png' + set title "Docker Stats for $_CONTAINER_NAME" + set xlabel "Time (seconds)" + set ylabel "CPU Usage (%)" + set y2label "Memory Usage (MiB)" + set grid + set key outside + set ytics nomirror + set y2tics + + # Plot CPU, Memory usage, Memory percentage, and Network I/O + gnuplot "docker_stats.log" using 0:1 with lines title "CPU (%)" axes x1y1, \ + "docker_stats.log" using 0:2 with lines title "Memory (MiB)" axes x1y2, \ + "docker_stats.log" using 0:3 with lines title "Memory (%)" axes x1y1, \ + "docker_stats.log" using 0:4 with lines title "Net I/O" axes x1y2 +fi + diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 52a28e323..329dcdecf 100644 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env sh # Depending on the values passed for PUID/PGID via environment variables, # either starts the stump server daemon as root or as a regular user @@ -16,35 +16,21 @@ if [[ "$PUID" -lt 100 && "$PUID" -ne 0 ]]; then fi +# TODO(distroless) ensure that the following checks don't cause issues after moving to distroless ## Add stump group if it doesn't already exist -if [[ -z "$(getent group "$PGID" | cut -d':' -f1)" ]]; then +if ! grep -q "^${GROUP}:" /etc/group; then echo "Adding group $GROUP with gid $PGID" - addgroup --gid "$PGID" "$GROUP" -else - echo "Group gid $PGID already exists" - # If the group name is not stump, we need to update GROUP as to avoid errors later - if [[ "$(getent group "$PGID" | cut -d':' -f1)" != "$GROUP" ]]; then - GROUP="$(getent group "$PGID" | cut -d':' -f1)" - echo "Group name '$GROUP' does not match expected name 'stump'. Using '$GROUP' instead." - fi + addgroup -g $PGID $GROUP fi ## Add stump user if it doesn't already exist -if [[ -z "$(getent passwd "$PUID" | cut -d':' -f1)" ]]; then +if ! grep -q "^${USER}:" /etc/passwd; then echo "Adding user $USER with uid $PUID" - adduser --system --shell /bin/bash --no-create-home --uid "$PUID" --gid "$PGID" "$USER" -else - echo "User $USER with uid $PUID already exists" - # If the user name is not stump, we need to update USER as to avoid errors later - if [[ "$(getent passwd "$PUID" | cut -d':' -f1)" != "$USER" ]]; then - USER="$(getent passwd "$PUID" | cut -d':' -f1)" - echo "User name '$USER' does not match expected name 'stump'. Using '$USER' instead." - fi - + adduser -u $PUID -G $GROUP -D -H $USER fi # If a TZ is set, symlink /etc/localtime to it -if [[ -n "$TZ" ]]; then +if [ -n "${TZ:-}" ]; then echo "Setting timezone to $TZ" rm -f /etc/localtime # Remove existing symlink if present (shouldn't be) ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime @@ -68,5 +54,5 @@ else # Run as non-root user # NOTE: Omit "-l" switch to keep env vars - su "$USER" -c /app/stump + exec su $USER -s /app/stump -- "$@" fi diff --git a/docs/pages/guides/configuration/server-options.md b/docs/pages/guides/configuration/server-options.md index 427927f3e..8bccdb889 100644 --- a/docs/pages/guides/configuration/server-options.md +++ b/docs/pages/guides/configuration/server-options.md @@ -109,6 +109,26 @@ The available verbosity levels are: | `2` | `DEBUG`, `INFO`, `WARN`, `ERROR` | | `3` | `TRACE`, `DEBUG`, `INFO`, `WARN`, `ERROR` | +### STUMP_MAX_SCANNER_CONCURRENCY + +The maximum number of files which may be processed concurrently by the scanner. This is useful for limiting the number of files that are processed at once, which can help prevent the server from becoming overwhelmed on systems with limited resources. + +**Note:** The OS thread scheduler should prevent overload, however, you may want to set this value lower if you're running Stump on a system with limited resources. + +| Type | Default Value | +| ------- | ------------- | +| Integer | `200` | + +### STUMP_MAX_THUMBNAIL_CONCURRENCY + +The maximum number of images which may be generated concurrently by the thumbnailer. This is useful for limiting the number of thumbnails that are generated at once, which can help prevent the server from becoming overwhelmed on systems with limited resources. + +**Note:** Thumbnail generation is a CPU-intensive process, so you may want to set this value lower if you're running Stump on a system with limited resources. + +| Type | Default Value | +| ------- | ------------- | +| Integer | `50` | + #### ENABLE_SWAGGER_UI Whether or not to enable Swagger UI. To learn more about what Swagger UI is, visit [swagger.io](https://swagger.io/). diff --git a/packages/browser/src/__mocks__/resizeObserver.ts b/packages/browser/src/__mocks__/resizeObserver.ts index 0af16862a..7e2e84ef5 100644 --- a/packages/browser/src/__mocks__/resizeObserver.ts +++ b/packages/browser/src/__mocks__/resizeObserver.ts @@ -1,5 +1,3 @@ -/* eslint-disable @typescript-eslint/no-empty-function */ -// eslint-disable-next-line @typescript-eslint/ban-ts-comment // components will require this mock. global.ResizeObserver = class FakeResizeObserver { observe() {} diff --git a/packages/browser/src/components/jobs/JobOverlay.tsx b/packages/browser/src/components/jobs/JobOverlay.tsx index 899c76549..417bc37b5 100644 --- a/packages/browser/src/components/jobs/JobOverlay.tsx +++ b/packages/browser/src/components/jobs/JobOverlay.tsx @@ -1,55 +1,65 @@ import { useJobStore } from '@stump/client' import { ProgressBar, Text } from '@stump/components' +import { JobUpdate } from '@stump/types' import { AnimatePresence, motion } from 'framer-motion' import { useMemo } from 'react' export default function JobOverlay() { const storeJobs = useJobStore((state) => state.jobs) - // get the first job that is running from the activeJobs object + /** + * The first running job in the store, which is used to determine the progress of the job. + */ const firstRunningJob = useMemo( () => Object.values(storeJobs).find((job) => job.status === 'RUNNING'), [storeJobs], ) + /** + * The subtask counts for the job, which describe the smaller units of work that are + * being done within the job. This is more indicative of the actual work being done + */ + const subTaskCounts = useMemo( + () => (firstRunningJob ? calcSubTaskCounts(firstRunningJob) : null), + [firstRunningJob], + ) + /** + * The task counts for the job, which describe the overarching tasks for the main + * job. This doesn't relate to smaller units of work, but rather the larger tasks + * which encompass multiple subtasks. + */ + const taskCounts = useMemo( + () => (firstRunningJob ? calcTaskCounts(firstRunningJob) : null), + [firstRunningJob], + ) - const getSubTaskCounts = () => { - if (!firstRunningJob) return null - - const { completed_subtasks, remaining_subtasks } = firstRunningJob - if (remaining_subtasks != null && remaining_subtasks > 0) { - return getCounts(completed_subtasks, remaining_subtasks) - } else { - return null - } - } - const subTaskCounts = useMemo(getSubTaskCounts, [firstRunningJob]) - - const getTaskCounts = () => { - if (!firstRunningJob) return null - - const { completed_tasks, remaining_tasks } = firstRunningJob - if (remaining_tasks != null && remaining_tasks >= 0) { - return getCounts(completed_tasks, remaining_tasks) - } else { - return null - } - } - const taskCounts = useMemo(getTaskCounts, [firstRunningJob]) - + /** + * The percentage value for the progress bar, calculated from the subtask counts. + * Note that we don't care about the task counts here, as the subtask counts are more + * indicative of actual work being done. + */ const progressValue = useMemo(() => { if (subTaskCounts != null) { const { completed, total } = subTaskCounts return (completed / total) * 100 - } else if (taskCounts != null) { - const { completed, total } = taskCounts - return (completed / total) * 100 } - return null - }, [subTaskCounts, taskCounts]) - - const renderTaskCounts = () => `${taskCounts?.completed ?? 0}/${taskCounts?.total ?? 0}` - const renderSubTaskCounts = () => `${subTaskCounts?.completed ?? 0}/${subTaskCounts?.total ?? 0}` + }, [subTaskCounts]) + /** + * The string representation of the task counts, which is used to display the total, overarching + * tasks that are being done in the job. + */ + const taskCountString = useMemo( + () => (taskCounts?.total ? `Tasks (${taskCounts?.completed ?? 0}/${taskCounts.total})` : null), + [taskCounts], + ) + /** + * The string representation of the subtask counts, which is used to display the total, smaller + * units of work that are being done in the job. + */ + const subTaskCountString = useMemo( + () => (subTaskCounts?.total ? `${subTaskCounts?.completed ?? 0}/${subTaskCounts.total}` : null), + [subTaskCounts], + ) return ( @@ -66,10 +76,16 @@ export default function JobOverlay() {
- Tasks ({renderTaskCounts()}) - {subTaskCounts && {renderSubTaskCounts()}} + {taskCountString && {taskCountString}} + {subTaskCounts && {subTaskCountString}}
- + +
)} @@ -77,14 +93,20 @@ export default function JobOverlay() { ) } -type TaskCount = { - completed: number - total: number -} -const getCounts = (completed: number | null, remaining: number | null): TaskCount => { - const total = (completed ?? 0) + (remaining ?? 0) +const calcTaskCounts = ({ completed_tasks, remaining_tasks }: JobUpdate) => { + if (remaining_tasks == null || !completed_tasks) return null + + const total = (completed_tasks ?? 0) + (remaining_tasks ?? 0) return { - completed: completed ?? 0, + completed: completed_tasks ?? 0, total, } } + +const calcSubTaskCounts = ({ completed_subtasks, total_subtasks }: JobUpdate) => { + if (total_subtasks == null) return null + return { + completed: completed_subtasks ?? 0, + total: total_subtasks, + } +} diff --git a/packages/browser/src/components/library/createOrUpdate/schema.ts b/packages/browser/src/components/library/createOrUpdate/schema.ts index 4936d8609..2c6cb275c 100644 --- a/packages/browser/src/components/library/createOrUpdate/schema.ts +++ b/packages/browser/src/components/library/createOrUpdate/schema.ts @@ -62,6 +62,7 @@ export const buildSchema = (existingLibraries: Library[], library?: Library) => z.object({ convert_rar_to_zip: z.boolean().default(false), description: z.string().nullable().optional(), + generate_file_hashes: z.boolean().default(false), hard_delete_conversions: z.boolean().default(false), ignore_rules: z .array( @@ -95,6 +96,7 @@ export const buildSchema = (existingLibraries: Library[], library?: Library) => message: 'Invalid library, parent directory already exists as library.', }), ), + process_metadata: z.boolean().default(true), scan_mode: z.string().refine(isLibraryScanMode).default('DEFAULT'), tags: z .array( @@ -127,23 +129,25 @@ export type CreateOrUpdateLibrarySchema = z.infer * provided an existing library (if editing) */ export const formDefaults = (library?: Library): CreateOrUpdateLibrarySchema => ({ - convert_rar_to_zip: library?.library_options.convert_rar_to_zip ?? false, + convert_rar_to_zip: library?.config.convert_rar_to_zip ?? false, description: library?.description, - hard_delete_conversions: library?.library_options.hard_delete_conversions ?? false, - ignore_rules: toFormIgnoreRules(library?.library_options.ignore_rules), - library_pattern: library?.library_options.library_pattern || 'SERIES_BASED', + generate_file_hashes: library?.config.generate_file_hashes ?? false, + hard_delete_conversions: library?.config.hard_delete_conversions ?? false, + ignore_rules: toFormIgnoreRules(library?.config.ignore_rules), + library_pattern: library?.config.library_pattern || 'SERIES_BASED', name: library?.name || '', path: library?.path || '', + process_metadata: library?.config.process_metadata ?? true, scan_mode: 'DEFAULT', tags: library?.tags?.map((t) => ({ label: t.name, value: t.name.toLowerCase() })), - thumbnail_config: library?.library_options.thumbnail_config + thumbnail_config: library?.config.thumbnail_config ? { enabled: true, - ...library?.library_options.thumbnail_config, + ...library?.config.thumbnail_config, } : { enabled: false, - format: 'Png', + format: 'Webp', quality: undefined, resize_options: undefined, }, diff --git a/packages/browser/src/components/library/createOrUpdate/sections/FileConversionOptions.tsx b/packages/browser/src/components/library/createOrUpdate/sections/FileConversionOptions.tsx index ec1d5372e..85f004ec4 100644 --- a/packages/browser/src/components/library/createOrUpdate/sections/FileConversionOptions.tsx +++ b/packages/browser/src/components/library/createOrUpdate/sections/FileConversionOptions.tsx @@ -46,8 +46,8 @@ export default function FileConversionOptions({ onDidChange }: Props) { useEffect(() => { if (!ctx?.library || !onDidChange) return - const existingConvertToZip = ctx.library.library_options.convert_rar_to_zip - const existingHardDelete = ctx.library.library_options.hard_delete_conversions + const existingConvertToZip = ctx.library.config.convert_rar_to_zip + const existingHardDelete = ctx.library.config.hard_delete_conversions const { convertRarToZip, hardDeleteConversions } = debouncedOptions if (convertRarToZip !== existingConvertToZip || hardDeleteConversions !== existingHardDelete) { diff --git a/packages/browser/src/components/library/createOrUpdate/sections/IgnoreRulesConfig.tsx b/packages/browser/src/components/library/createOrUpdate/sections/IgnoreRulesConfig.tsx index 4d7e95288..afa195494 100644 --- a/packages/browser/src/components/library/createOrUpdate/sections/IgnoreRulesConfig.tsx +++ b/packages/browser/src/components/library/createOrUpdate/sections/IgnoreRulesConfig.tsx @@ -93,7 +93,7 @@ export default function IgnoreRulesConfig() { return null } - const existingRules = ctx.library.library_options.ignore_rules + const existingRules = ctx.library.config.ignore_rules const hasChanges = ignoreRules.some((rule) => existingRules?.every((glob) => glob !== rule.glob), ) diff --git a/packages/browser/src/components/library/createOrUpdate/sections/ScannerOptInFeatures.tsx b/packages/browser/src/components/library/createOrUpdate/sections/ScannerOptInFeatures.tsx new file mode 100644 index 000000000..81dbdd758 --- /dev/null +++ b/packages/browser/src/components/library/createOrUpdate/sections/ScannerOptInFeatures.tsx @@ -0,0 +1,89 @@ +import { Alert, CheckBox, Heading, Text } from '@stump/components' +import { useLocaleContext } from '@stump/i18n' +import React, { useEffect } from 'react' +import { useFormContext } from 'react-hook-form' +import { useDebouncedValue } from 'rooks' + +import { CreateOrUpdateLibrarySchema } from '@/components/library/createOrUpdate' +import { useLibraryContextSafe } from '@/scenes/library/context' + +type Props = { + /** + * A callback that is triggered when the form values change, debounced by 1 second. + */ + onDidChange?: ( + values: Pick, + ) => void +} + +export default function ScannerOptInFeatures({ onDidChange }: Props) { + const form = useFormContext() + const ctx = useLibraryContextSafe() + const isCreating = !ctx?.library + + const [processMetadata, generateFileHashes] = form.watch([ + 'process_metadata', + 'generate_file_hashes', + ]) + const [debouncedOptions] = useDebouncedValue({ generateFileHashes, processMetadata }, 1000) + + const { t } = useLocaleContext() + + /*** + * An effect that triggers the `onDidChange` callback when the form values change. + */ + useEffect(() => { + if (!ctx?.library || !onDidChange) return + + const existingProcessMetadata = ctx.library.config.process_metadata + const existingHashFiles = ctx.library.config.generate_file_hashes + const { processMetadata, generateFileHashes } = debouncedOptions + + if (processMetadata !== existingProcessMetadata || generateFileHashes !== existingHashFiles) { + onDidChange({ + generate_file_hashes: generateFileHashes, + process_metadata: processMetadata, + }) + } + }, [ctx?.library, debouncedOptions, onDidChange]) + + return ( +
+
+ {t(getKey('section.heading'))} + + {t(getKey('section.description'))} + +
+ + {isCreating && ( + + {t(getKey('section.disclaimer'))} + + )} + + form.setValue('process_metadata', !processMetadata)} + {...form.register('process_metadata')} + /> + + form.setValue('generate_file_hashes', !generateFileHashes)} + {...form.register('generate_file_hashes')} + /> +
+ ) +} + +const LOCALE_KEY = 'createOrUpdateLibraryForm.fields.scannerFeatures' +const getKey = (key: string) => `${LOCALE_KEY}.${key}` diff --git a/packages/browser/src/components/library/createOrUpdate/sections/index.ts b/packages/browser/src/components/library/createOrUpdate/sections/index.ts index 7c66f6533..7a38af022 100644 --- a/packages/browser/src/components/library/createOrUpdate/sections/index.ts +++ b/packages/browser/src/components/library/createOrUpdate/sections/index.ts @@ -3,4 +3,5 @@ export { default as FileConversionOptions } from './FileConversionOptions' export { default as IgnoreRulesConfig } from './IgnoreRulesConfig' export { default as LibraryPattern } from './LibraryPattern' export { default as ScanMode } from './ScanMode' +export { default as ScannerOptInFeatures } from './ScannerOptInFeatures' export { default as ThumbnailConfig } from './ThumbnailConfig' diff --git a/packages/browser/src/scenes/createLibrary/CreateLibraryForm.tsx b/packages/browser/src/scenes/createLibrary/CreateLibraryForm.tsx index ea0b44f5b..6811ed55d 100644 --- a/packages/browser/src/scenes/createLibrary/CreateLibraryForm.tsx +++ b/packages/browser/src/scenes/createLibrary/CreateLibraryForm.tsx @@ -17,6 +17,7 @@ import { FileConversionOptions, LibraryPattern as LibraryPatternSection, ScanMode, + ScannerOptInFeatures, ThumbnailConfig, } from '../../components/library/createOrUpdate/sections' import IgnoreRulesConfig from '../../components/library/createOrUpdate/sections/IgnoreRulesConfig' @@ -112,8 +113,9 @@ export default function CreateLibraryForm({ existingLibraries, onSubmit, isLoadi return ( <> - + +
+
+ + + {state.process_metadata ? 'Yes' : 'No'} + +
+ +
+ + + {state.generate_file_hashes ? 'Yes' : 'No'} + +
+
diff --git a/packages/browser/src/scenes/library/tabs/settings/LibrarySettingsRouter.tsx b/packages/browser/src/scenes/library/tabs/settings/LibrarySettingsRouter.tsx index a02f227bd..9ae6ff1fb 100644 --- a/packages/browser/src/scenes/library/tabs/settings/LibrarySettingsRouter.tsx +++ b/packages/browser/src/scenes/library/tabs/settings/LibrarySettingsRouter.tsx @@ -39,9 +39,7 @@ export default function LibrarySettingsRouter() { const payload: UpdateLibrary = { ...library, ...updates, - library_options: updates.library_options - ? { ...library.library_options, ...updates.library_options } - : library.library_options, + config: updates.config ? { ...library.config, ...updates.config } : library.config, tags: updates.tags ? updates.tags : library?.tags?.map(({ name }) => name), } editLibrary(payload) diff --git a/packages/browser/src/scenes/library/tabs/settings/options/FileConversionOptionsPatchForm.tsx b/packages/browser/src/scenes/library/tabs/settings/options/FileConversionOptionsPatchForm.tsx index bad159d5c..1136d87fc 100644 --- a/packages/browser/src/scenes/library/tabs/settings/options/FileConversionOptionsPatchForm.tsx +++ b/packages/browser/src/scenes/library/tabs/settings/options/FileConversionOptionsPatchForm.tsx @@ -28,8 +28,8 @@ export default function FileConversionOptionsPatchForm() { hard_delete_conversions, }: Pick) => { patch({ - library_options: { - ...library.library_options, + config: { + ...library.config, convert_rar_to_zip, hard_delete_conversions, }, diff --git a/packages/browser/src/scenes/library/tabs/settings/options/IgnoreRulesPatchForm.tsx b/packages/browser/src/scenes/library/tabs/settings/options/IgnoreRulesPatchForm.tsx index 792534f35..cff720f88 100644 --- a/packages/browser/src/scenes/library/tabs/settings/options/IgnoreRulesPatchForm.tsx +++ b/packages/browser/src/scenes/library/tabs/settings/options/IgnoreRulesPatchForm.tsx @@ -8,7 +8,6 @@ import { CreateOrUpdateLibrarySchema, formDefaults, IgnoreRulesConfig, - LibraryPattern, } from '@/components/library/createOrUpdate' import { useLibraryManagement } from '../context' @@ -26,8 +25,8 @@ export default function IgnoreRulesPatchForm() { const handleSubmit = useCallback( ({ ignore_rules }: CreateOrUpdateLibrarySchema) => { patch({ - library_options: { - ...library.library_options, + config: { + ...library.config, ignore_rules: ignore_rules?.map(({ glob }) => glob), }, scan_mode: 'NONE', @@ -37,10 +36,7 @@ export default function IgnoreRulesPatchForm() { ) return ( -
- {/* Note: This component doesn't really belong here, but I didn't want to wrap it in its own form when it is just for display */} - {/* Should probably create a separate, non-formy variant */} - + ) diff --git a/packages/browser/src/scenes/library/tabs/settings/options/ScannerBehaviorScene.tsx b/packages/browser/src/scenes/library/tabs/settings/options/ScannerBehaviorScene.tsx index 0537cd344..97f541d79 100644 --- a/packages/browser/src/scenes/library/tabs/settings/options/ScannerBehaviorScene.tsx +++ b/packages/browser/src/scenes/library/tabs/settings/options/ScannerBehaviorScene.tsx @@ -1,11 +1,13 @@ import FileConversionOptionsPatchForm from './FileConversionOptionsPatchForm' import IgnoreRulesPatchForm from './IgnoreRulesPatchForm' +import ScannerFeaturesPatchForm from './ScannerFeaturesPatchForm' export default function GeneralFileOptionsScene() { return (
- + +
) } diff --git a/packages/browser/src/scenes/library/tabs/settings/options/ScannerFeaturesPatchForm.tsx b/packages/browser/src/scenes/library/tabs/settings/options/ScannerFeaturesPatchForm.tsx new file mode 100644 index 000000000..cbd5a11aa --- /dev/null +++ b/packages/browser/src/scenes/library/tabs/settings/options/ScannerFeaturesPatchForm.tsx @@ -0,0 +1,53 @@ +import { zodResolver } from '@hookform/resolvers/zod' +import { Form } from '@stump/components' +import React, { useCallback, useMemo } from 'react' +import { useForm } from 'react-hook-form' + +import { + buildSchema, + CreateOrUpdateLibrarySchema, + formDefaults, + LibraryPattern, + ScannerOptInFeatures, +} from '@/components/library/createOrUpdate' + +import { useLibraryManagement } from '../context' + +export default function ScannerFeaturesPatchForm() { + const { library, patch } = useLibraryManagement() + + const schema = useMemo(() => buildSchema([], library), [library]) + const form = useForm({ + defaultValues: formDefaults(library), + reValidateMode: 'onChange', + resolver: zodResolver(schema), + }) + + const handleSubmit = useCallback( + ({ + process_metadata, + generate_file_hashes, + }: Pick) => { + patch({ + config: { + ...library.config, + generate_file_hashes, + process_metadata, + }, + scan_mode: 'NONE', + }) + }, + [patch, library], + ) + + // Note: The underlying sub-form requires a form in the context, so I am wrapping it in one. However, the submit + // won't ever trigger, which is why there is the `onDidChange` callback. + return ( +
+ {/* Note: This component doesn't really belong here, but I didn't want to wrap it in its own form when it is just for display */} + {/* Should probably create a separate, non-formy variant */} + + + + ) +} diff --git a/packages/browser/src/scenes/library/tabs/settings/options/thumbnails/ThumbnailSettingsScene.tsx b/packages/browser/src/scenes/library/tabs/settings/options/thumbnails/ThumbnailSettingsScene.tsx index 77badea27..1d46e71d5 100644 --- a/packages/browser/src/scenes/library/tabs/settings/options/thumbnails/ThumbnailSettingsScene.tsx +++ b/packages/browser/src/scenes/library/tabs/settings/options/thumbnails/ThumbnailSettingsScene.tsx @@ -27,14 +27,14 @@ export default function ThumbnailSettingsScene() { const handleSubmit = useCallback( ({ thumbnail_config }: Pick) => { patch({ - library_options: { - ...library.library_options, + config: { + ...library.config, thumbnail_config: ensureValidThumbnailConfig(thumbnail_config), }, scan_mode: 'NONE', }) }, - [patch, library.library_options], + [patch, library.config], ) return ( diff --git a/packages/browser/src/scenes/settings/server/email/emailers/schema.ts b/packages/browser/src/scenes/settings/server/email/emailers/schema.ts index fe0635c55..ca7f0f9d5 100644 --- a/packages/browser/src/scenes/settings/server/email/emailers/schema.ts +++ b/packages/browser/src/scenes/settings/server/email/emailers/schema.ts @@ -36,11 +36,11 @@ export const formDefaults = (emailer?: SMTPEmailer) => ({ is_primary: emailer?.is_primary || true, max_attachment_size_bytes: emailer?.config.max_attachment_size_bytes ?? null, name: emailer?.name, + password: undefined, sender_display_name: emailer?.config.sender_display_name, sender_email: emailer?.config.sender_email, smtp_host: emailer?.config.smtp_host, smtp_port: emailer?.config.smtp_port, tls_enabled: emailer?.config.tls_enabled || false, username: emailer?.config.username, - password: undefined, }) diff --git a/packages/client/src/hooks/useCoreEvent.ts b/packages/client/src/hooks/useCoreEvent.ts index c3ea7f9f3..f6d5b6b0a 100644 --- a/packages/client/src/hooks/useCoreEvent.ts +++ b/packages/client/src/hooks/useCoreEvent.ts @@ -66,6 +66,9 @@ export function useCoreEventHandler({ liveRefetch }: Params = {}) { ]) } break + case 'CreatedMedia': + // We don't really care, should honestly remove this... + break default: console.warn('Unhandled core event', event) } diff --git a/packages/client/src/stores/job.ts b/packages/client/src/stores/job.ts index 1baf6b3e4..9242c5488 100644 --- a/packages/client/src/stores/job.ts +++ b/packages/client/src/stores/job.ts @@ -50,7 +50,12 @@ export const useJobStore = createWithEqualityFn( status: status || existingJob.status, } } else { - draft.jobs[job.id] = job + draft.jobs[job.id] = { + ...job, + // This should be a safe assumption, as status events will always have a set status and + // the only time other events are sent would be updates to the job itself + status: job.status || 'RUNNING', + } } }), ) diff --git a/packages/components/src/progress/ProgressBar.tsx b/packages/components/src/progress/ProgressBar.tsx index da6ddb940..fe91317ae 100644 --- a/packages/components/src/progress/ProgressBar.tsx +++ b/packages/components/src/progress/ProgressBar.tsx @@ -9,12 +9,12 @@ type ColorVariant = Record export const PROGRESS_BAR_COLOR_VARIANTS: ColorVariant = { default: 'bg-gray-200 dark:bg-gray-800', - primary: 'bg-brand-100 dark:bg-brand-300/80', + primary: 'bg-fill-brand-secondary', 'primary-dark': 'bg-brand-200 dark:bg-brand-250', } export const PROGRESS_BAR_INDICATOR_COLOR_VARIANTS: ColorVariant = { default: 'bg-gray-800 dark:bg-gray-400', - primary: 'bg-brand-500 dark:bg-brand-400', + primary: 'bg-fill-brand/70', 'primary-dark': 'bg-brand-600 dark:bg-brand-500', } @@ -46,6 +46,7 @@ type BaseProps = React.ComponentPropsWithoutRef & export type ProgressBarProps = { className?: string value?: number | null + isIndeterminate?: boolean } & BaseProps const safeValue = (value: number | null) => { @@ -57,7 +58,7 @@ const safeValue = (value: number | null) => { export const ProgressBar = React.forwardRef< React.ElementRef, ProgressBarProps ->(({ className, value, variant, size, rounded, ...props }, ref) => { +>(({ className, value, variant, size, rounded, isIndeterminate, ...props }, ref) => { const adjustedValue = useMemo(() => safeValue(value ?? null), [value]) return ( @@ -78,8 +79,13 @@ export const ProgressBar = React.forwardRef< className={cx( 'h-full w-full flex-1 transition-all', PROGRESS_BAR_INDICATOR_COLOR_VARIANTS[variant || 'default'], + { + 'origin-left-to-right-indeterminate animate-indeterminate-progress': isIndeterminate, + }, )} - style={{ transform: `translateX(-${100 - (adjustedValue || 0)}%)` }} + style={ + isIndeterminate ? undefined : { transform: `translateX(-${100 - (adjustedValue || 0)}%)` } + } /> ) diff --git a/packages/components/tailwind/tailwind.js b/packages/components/tailwind/tailwind.js index a0ce3c3cb..ece54533a 100644 --- a/packages/components/tailwind/tailwind.js +++ b/packages/components/tailwind/tailwind.js @@ -53,6 +53,7 @@ module.exports = function (relativeFromRoot) { animation: { 'accordion-down': 'accordion-down 0.2s ease-out', 'accordion-up': 'accordion-up 0.2s ease-out', + 'indeterminate-progress': 'indeterminate-progress 1s infinite linear', }, fontFamily: { inter: ['Inter var', ...defaultTheme.fontFamily.sans], @@ -71,6 +72,14 @@ module.exports = function (relativeFromRoot) { from: { height: 'var(--radix-accordion-content-height)' }, to: { height: 0 }, }, + 'indeterminate-progress': { + '0%': { transform: ' translateX(0) scaleX(0)' }, + '40%': { transform: 'translateX(0) scaleX(0.4)' }, + '100%': { transform: 'translateX(100%) scaleX(0.5)' }, + }, + }, + transformOrigin: { + 'left-to-right-indeterminate': '0% 50%', }, ringColor: { DEFAULT: sharedColors.brand['500'], diff --git a/packages/i18n/src/locales/en.json b/packages/i18n/src/locales/en.json index ff087793d..95ef51c7e 100644 --- a/packages/i18n/src/locales/en.json +++ b/packages/i18n/src/locales/en.json @@ -234,6 +234,21 @@ "description": "Delete the original RAR/CBR file after conversion. This is irreversible" } }, + "scannerFeatures": { + "section": { + "heading": "Optional processing", + "description": "Opt-in processing features for the scanner", + "disclaimer": "These features involve additional file reads which may slow down the scan process" + }, + "processMetadata": { + "label": "Process metadata", + "description": "Extract any metadata from library files and store the results in the database" + }, + "generateFileHashes": { + "label": "Generate file hashes", + "description": "Generate a hash for each file in the library to detect duplicates" + } + }, "libraryPattern": { "section": { "heading": "Organization", @@ -312,7 +327,9 @@ "pattern": "Pattern", "ignoreRules": "Ignore rules", "convertRar": "Convert RAR/CBR", - "deleteConversions": "Delete converted files" + "deleteConversions": "Delete converted files", + "processMetadata": "Process metadata", + "generateFileHashes": "Generate file hashes" } }, "buttons": { diff --git a/packages/types/generated.ts b/packages/types/generated.ts index 91e506774..adb7a211c 100644 --- a/packages/types/generated.ts +++ b/packages/types/generated.ts @@ -32,14 +32,14 @@ export type CoreJobOutput = LibraryScanOutput | SeriesScanOutput | ThumbnailGene /** * An update event that is emitted by a job */ -export type JobUpdate = ({ status?: JobStatus | null; message?: string | null; completed_tasks?: number | null; remaining_tasks?: number | null; completed_subtasks?: number | null; remaining_subtasks?: number | null }) & { id: string } +export type JobUpdate = ({ status?: JobStatus | null; message?: string | null; completed_tasks?: number | null; remaining_tasks?: number | null; completed_subtasks?: number | null; total_subtasks?: number | null }) & { id: string } /** * A struct that represents a progress event that is emitted by a job. This behaves like a patch, * where the client will ignore any fields that are not present. This is done so all internal ops * can be done without needing to know the full state of the job. */ -export type JobProgress = { status?: JobStatus | null; message?: string | null; completed_tasks?: number | null; remaining_tasks?: number | null; completed_subtasks?: number | null; remaining_subtasks?: number | null } +export type JobProgress = { status?: JobStatus | null; message?: string | null; completed_tasks?: number | null; remaining_tasks?: number | null; completed_subtasks?: number | null; total_subtasks?: number | null } /** * The data that is collected and updated during the execution of a library scan job @@ -52,7 +52,7 @@ export type ThumbnailGenerationJobVariant = ({ type: "SingleLibrary" } & string) export type ThumbnailGenerationJobParams = { variant: ThumbnailGenerationJobVariant; force_regenerate: boolean } -export type ThumbnailGenerationOutput = { visited_files: BigInt; generated_thumbnails: BigInt; removed_thumbnails: BigInt } +export type ThumbnailGenerationOutput = { visited_files: BigInt; skipped_files: BigInt; generated_thumbnails: BigInt; removed_thumbnails: BigInt } export type User = { id: string; username: string; is_server_owner: boolean; avatar_url: string | null; created_at: string; last_login: string | null; is_locked: boolean; permissions: UserPermission[]; max_sessions_allowed?: number | null; login_sessions_count?: number | null; user_preferences?: UserPreferences | null; login_activity?: LoginActivity[] | null; age_restriction?: AgeRestriction | null; active_reading_sessions?: ActiveReadingSession[] | null; finished_reading_sessions?: FinishedReadingSession[] | null } @@ -116,7 +116,7 @@ export type AttachmentMeta = { filename: string; media_id: string | null; size: export type FileStatus = "UNKNOWN" | "READY" | "UNSUPPORTED" | "ERROR" | "MISSING" -export type Library = { id: string; name: string; description: string | null; emoji: string | null; path: string; status: string; updated_at: string; series: Series[] | null; tags: Tag[] | null; library_options: LibraryOptions } +export type Library = { id: string; name: string; description: string | null; emoji: string | null; path: string; status: string; updated_at: string; series: Series[] | null; tags: Tag[] | null; config: LibraryConfig } export type LibraryPattern = "SERIES_BASED" | "COLLECTION_BASED" @@ -124,7 +124,7 @@ export type LibraryScanMode = "DEFAULT" | "NONE" export type IgnoreRules = string[] -export type LibraryOptions = { id?: string | null; convert_rar_to_zip: boolean; hard_delete_conversions: boolean; library_pattern: LibraryPattern; thumbnail_config: ImageProcessorOptions | null; ignore_rules?: IgnoreRules; library_id?: string | null } +export type LibraryConfig = { id?: string | null; convert_rar_to_zip: boolean; hard_delete_conversions: boolean; generate_file_hashes: boolean; process_metadata: boolean; library_pattern: LibraryPattern; thumbnail_config: ImageProcessorOptions | null; ignore_rules?: IgnoreRules; library_id?: string | null } export type LibraryStats = { series_count: BigInt; book_count: BigInt; total_bytes: BigInt; completed_books: BigInt; in_progress_books: BigInt } @@ -350,9 +350,9 @@ export type CreateOrUpdateEmailDevice = { name: string; email: string; forbidden */ export type PatchEmailDevice = { name: string | null; email: string | null; forbidden: boolean | null } -export type CreateLibrary = { name: string; path: string; description?: string | null; tags?: string[] | null; scan_mode?: LibraryScanMode | null; library_options?: LibraryOptions | null } +export type CreateLibrary = { name: string; path: string; description?: string | null; tags?: string[] | null; scan_mode?: LibraryScanMode | null; config?: LibraryConfig | null } -export type UpdateLibrary = { name: string; path: string; description?: string | null; emoji?: string | null; tags?: string[] | null; library_options: LibraryOptions; scan_mode?: LibraryScanMode | null } +export type UpdateLibrary = { name: string; path: string; description?: string | null; emoji?: string | null; tags?: string[] | null; config: LibraryConfig; scan_mode?: LibraryScanMode | null } export type UpdateLibraryExcludedUsers = { user_ids: string[] }