From 21cced8d64be95560ca82654aaff1fd1bf1f49d7 Mon Sep 17 00:00:00 2001 From: Aaron Leopold <36278431+aaronleopold@users.noreply.github.com> Date: Sun, 22 Dec 2024 10:09:05 -0700 Subject: [PATCH] :bug: Restore media/series when previously missing (#529) * :bug: Correctly restore media/series when previously missing * fix panic from overflow calc --- Cargo.lock | 13 ++-- core/src/db/entity/media/prisma_macros.rs | 2 + .../filesystem/scanner/library_scan_job.rs | 76 ++++++++++++++++++- .../src/filesystem/scanner/series_scan_job.rs | 35 +++++++-- core/src/filesystem/scanner/utils.rs | 47 ++++++++++++ core/src/filesystem/scanner/walk.rs | 58 +++++++++++--- 6 files changed, 200 insertions(+), 31 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f77d4657b..46ce094b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1466,11 +1466,10 @@ dependencies = [ [[package]] name = "crossbeam-queue" -version = "0.3.8" +version = "0.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" dependencies = [ - "cfg-if", "crossbeam-utils", ] @@ -5172,9 +5171,9 @@ dependencies = [ [[package]] name = "paste" -version = "1.0.11" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d01a5bd0424d00070b0098dd17ebca6f961a959dead1dbcbbbc1d1cd8d3deeba" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "pathdiff" @@ -5836,9 +5835,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.87" +version = "1.0.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3e4daa0dcf6feba26f985457cdf104d4b4256fc5a09547140f3631bb076b19a" +checksum = "7c3a7fc5db1e57d5a779a352c8cdb57b29aa4c40cc69c3a68a7fedc815fbf2f9" dependencies = [ "unicode-ident", ] diff --git a/core/src/db/entity/media/prisma_macros.rs b/core/src/db/entity/media/prisma_macros.rs index 49bf26620..6f60927d9 100644 --- a/core/src/db/entity/media/prisma_macros.rs +++ b/core/src/db/entity/media/prisma_macros.rs @@ -5,8 +5,10 @@ media::select!(media_id_select { id }); media::select!(media_path_select { path }); media::select!(media_path_modified_at_select { + id path modified_at + status }); media::select!(media_thumbnail { diff --git a/core/src/filesystem/scanner/library_scan_job.rs b/core/src/filesystem/scanner/library_scan_job.rs index a2783000c..66049dc62 100644 --- a/core/src/filesystem/scanner/library_scan_job.rs +++ b/core/src/filesystem/scanner/library_scan_job.rs @@ -26,9 +26,9 @@ use crate::{ use super::{ series_scan_job::SeriesScanTask, utils::{ - handle_missing_media, handle_missing_series, safely_build_and_insert_media, - safely_build_series, visit_and_update_media, MediaBuildOperation, - MediaOperationOutput, MissingSeriesOutput, + handle_missing_media, handle_missing_series, handle_restored_media, + safely_build_and_insert_media, safely_build_series, visit_and_update_media, + MediaBuildOperation, MediaOperationOutput, MissingSeriesOutput, }, walk_library, walk_series, ScanOptions, WalkedLibrary, WalkedSeries, WalkerCtx, }; @@ -50,6 +50,7 @@ pub enum LibraryScanTask { pub struct InitTaskInput { series_to_create: Vec, missing_series: Vec, + recovered_series: Vec, } /// A job that scans a library and updates the database with the results @@ -152,6 +153,7 @@ impl JobExt for LibraryScanJob { ctx.report_progress(JobProgress::msg("Performing task discovery")); let WalkedLibrary { series_to_create, + recovered_series, series_to_visit, missing_series, library_is_missing, @@ -171,6 +173,7 @@ impl JobExt for LibraryScanJob { series_to_create = series_to_create.len(), series_to_visit = series_to_visit.len(), missing_series = missing_series.len(), + recovered_series = recovered_series.len(), library_is_missing, "Walked library" ); @@ -193,6 +196,7 @@ impl JobExt for LibraryScanJob { let init_task_input = InitTaskInput { series_to_create: series_to_create.clone(), missing_series, + recovered_series, }; let series_to_visit = series_to_visit @@ -275,17 +279,60 @@ impl JobExt for LibraryScanJob { let InitTaskInput { series_to_create, missing_series, + recovered_series, } = input; + let recovered_series_step_count = + if !recovered_series.is_empty() { 1 } else { 0 }; + // Task count: build each series + 1 for insertion step, +1 for update tx on missing series let total_subtask_count = (series_to_create.len() + 1) - + usize::from(!missing_series.is_empty()); + + usize::from(!missing_series.is_empty()) + + recovered_series_step_count; let mut current_subtask_index = 0; ctx.report_progress(JobProgress::subtask_position( current_subtask_index, total_subtask_count as i32, )); + if !recovered_series.is_empty() { + ctx.report_progress(JobProgress::msg("Recovering series")); + + let affected_rows = ctx + .db + .series() + .update_many( + vec![series::id::in_vec(recovered_series)], + vec![series::status::set(FileStatus::Ready.to_string())], + ) + .exec() + .await + .map_or_else( + |error| { + tracing::error!(error = ?error, "Failed to recover series"); + logs.push(JobExecuteLog::error(format!( + "Failed to recover series: {:?}", + error.to_string() + ))); + 0 + }, + |count| { + output.updated_series = count as u64; + count + }, + ); + + ctx.report_progress(JobProgress::subtask_position( + (affected_rows > 0) + .then(|| { + current_subtask_index += 1; + current_subtask_index + }) + .unwrap_or(0), + total_subtask_count as i32, + )); + } + if !missing_series.is_empty() { ctx.report_progress(JobProgress::msg("Handling missing series")); let missing_series_str = missing_series @@ -440,6 +487,7 @@ impl JobExt for LibraryScanJob { series_is_missing, media_to_create, media_to_visit, + recovered_media, missing_media, seen_files, ignored_files, @@ -504,6 +552,8 @@ impl JobExt for LibraryScanJob { [ (!missing_media.is_empty()) .then_some(SeriesScanTask::MarkMissingMedia(missing_media)), + (!recovered_media.is_empty()) + .then_some(SeriesScanTask::RestoreMedia(recovered_media)), (!media_to_create.is_empty()) .then_some(SeriesScanTask::CreateMedia(media_to_create)), (!media_to_visit.is_empty()) @@ -523,6 +573,24 @@ impl JobExt for LibraryScanJob { path: _series_path, task: series_task, } => match series_task { + SeriesScanTask::RestoreMedia(ids) => { + ctx.report_progress(JobProgress::msg("Restoring media entities")); + let MediaOperationOutput { + updated_media, + logs: new_logs, + .. + } = handle_restored_media(ctx, &series_id, ids).await; + ctx.send_batch(vec![ + JobProgress::msg("Restored media entities").into_worker_send(), + CoreEvent::CreatedOrUpdatedManyMedia { + count: updated_media, + series_id, + } + .into_worker_send(), + ]); + output.updated_media += updated_media; + logs.extend(new_logs); + }, SeriesScanTask::MarkMissingMedia(paths) => { ctx.report_progress(JobProgress::msg("Handling missing media")); let MediaOperationOutput { diff --git a/core/src/filesystem/scanner/series_scan_job.rs b/core/src/filesystem/scanner/series_scan_job.rs index 5e4f2a876..43946afda 100644 --- a/core/src/filesystem/scanner/series_scan_job.rs +++ b/core/src/filesystem/scanner/series_scan_job.rs @@ -22,8 +22,8 @@ use crate::{ use super::{ utils::{ - handle_missing_media, safely_build_and_insert_media, visit_and_update_media, - MediaBuildOperation, MediaOperationOutput, + handle_missing_media, handle_restored_media, safely_build_and_insert_media, + visit_and_update_media, MediaBuildOperation, MediaOperationOutput, }, walk_series, ScanOptions, WalkedSeries, WalkerCtx, }; @@ -32,6 +32,7 @@ use super::{ #[derive(Serialize, Deserialize)] pub enum SeriesScanTask { MarkMissingMedia(Vec), + RestoreMedia(Vec), CreateMedia(Vec), VisitMedia(Vec), } @@ -138,6 +139,7 @@ impl JobExt for SeriesScanJob { series_is_missing, media_to_create, media_to_visit, + recovered_media, missing_media, seen_files, ignored_files, @@ -172,14 +174,13 @@ impl JobExt for SeriesScanJob { let tasks = VecDeque::from(chain_optional_iter( [], [ - missing_media - .is_empty() + (!missing_media.is_empty()) .then_some(SeriesScanTask::MarkMissingMedia(missing_media)), - media_to_create - .is_empty() + (!recovered_media.is_empty()) + .then_some(SeriesScanTask::RestoreMedia(recovered_media)), + (!media_to_create.is_empty()) .then_some(SeriesScanTask::CreateMedia(media_to_create)), - media_to_visit - .is_empty() + (!media_to_visit.is_empty()) .then_some(SeriesScanTask::VisitMedia(media_to_visit)), ], )); @@ -237,6 +238,24 @@ impl JobExt for SeriesScanJob { let max_concurrency = ctx.config.max_scanner_concurrency; match task { + SeriesScanTask::RestoreMedia(ids) => { + ctx.report_progress(JobProgress::msg("Restoring media entities")); + let MediaOperationOutput { + updated_media, + logs: new_logs, + .. + } = handle_restored_media(ctx, &self.id, ids).await; + ctx.send_batch(vec![ + JobProgress::msg("Restored media entities").into_worker_send(), + CoreEvent::CreatedOrUpdatedManyMedia { + count: updated_media, + series_id: self.id.clone(), + } + .into_worker_send(), + ]); + output.updated_media += updated_media; + logs.extend(new_logs); + }, SeriesScanTask::MarkMissingMedia(paths) => { ctx.report_progress(JobProgress::msg("Handling missing media")); let MediaOperationOutput { diff --git a/core/src/filesystem/scanner/utils.rs b/core/src/filesystem/scanner/utils.rs index 57a602ea7..035f14bd7 100644 --- a/core/src/filesystem/scanner/utils.rs +++ b/core/src/filesystem/scanner/utils.rs @@ -279,6 +279,8 @@ pub(crate) struct MediaOperationOutput { pub logs: Vec, } +/// Handles missing media by updating the database with the latest information. A media is +/// considered missing if it was previously marked as ready and is no longer found on disk. pub(crate) async fn handle_missing_media( ctx: &WorkerCtx, series_id: &str, @@ -326,6 +328,51 @@ pub(crate) async fn handle_missing_media( output } +/// Handles restored media by updating the database with the latest information. A +/// media is considered restored if it was previously marked as missing and has been +/// found on disk. +pub(crate) async fn handle_restored_media( + ctx: &WorkerCtx, + series_id: &str, + ids: Vec, +) -> MediaOperationOutput { + let mut output = MediaOperationOutput::default(); + + if ids.is_empty() { + tracing::debug!("No restored media to handle"); + return output; + } + + let _affected_rows = ctx + .db + .media() + .update_many( + vec![ + media::series::is(vec![series::id::equals(series_id.to_string())]), + media::id::in_vec(ids), + ], + vec![media::status::set(FileStatus::Ready.to_string())], + ) + .exec() + .await + .map_or_else( + |error| { + tracing::error!(error = ?error, "Failed to restore recovered media"); + output.logs.push(JobExecuteLog::error(format!( + "Failed to update recovered media: {:?}", + error.to_string() + ))); + 0 + }, + |count| { + output.updated_media += count as u64; + count + }, + ); + + output +} + /// Builds a series from the given path /// /// # Arguments diff --git a/core/src/filesystem/scanner/walk.rs b/core/src/filesystem/scanner/walk.rs index 41816440a..f0539127f 100644 --- a/core/src/filesystem/scanner/walk.rs +++ b/core/src/filesystem/scanner/walk.rs @@ -12,7 +12,7 @@ use rayon::iter::{ use walkdir::{DirEntry, WalkDir}; use crate::{ - db::entity::macros::media_path_modified_at_select, + db::{entity::macros::media_path_modified_at_select, FileStatus}, filesystem::{scanner::utils::file_updated_since_scan, PathUtils}, prisma::{media, series, PrismaClient}, CoreResult, @@ -40,6 +40,8 @@ pub struct WalkedLibrary { pub ignored_directories: u64, /// The paths for series that need to be created pub series_to_create: Vec, + /// A list of series IDs that were previously marked as missing but have been found on disk + pub recovered_series: Vec, /// The paths for series that need to be visited. This differs from [`WalkedSeries::media_to_visit`] because /// All series will always be visited in order to determine what media need to be reconciled in the series walk pub series_to_visit: Vec, @@ -132,11 +134,11 @@ pub async fn walk_library( ); let computation_start = std::time::Instant::now(); - let (series_to_create, missing_series, series_to_visit) = { + let (series_to_create, missing_series, recovered_series, series_to_visit) = { let existing_records = db .series() .find_many(vec![series::path::starts_with(path.to_string())]) - .select(series::select!({ path })) + .select(series::select!({ id path status })) .exec() .await?; if existing_records.is_empty() { @@ -147,10 +149,10 @@ pub async fn walk_library( .into_iter() .map(|e| e.path().to_owned()) .collect::>(); - (series_to_create, vec![], vec![]) + (series_to_create, vec![], vec![], vec![]) } else { let existing_series_map = existing_records - .into_iter() + .iter() .map(|s| (s.path.clone(), s.clone())) .collect::>(); @@ -160,6 +162,15 @@ pub async fn walk_library( .map(|(path, _)| PathBuf::from(path)) .collect::>(); + let recovered_series = existing_records + .into_iter() + .filter(|s| { + s.status == FileStatus::Missing.to_string() + && PathBuf::from(path).exists() + }) + .map(|s| s.id) + .collect::>(); + let (series_to_create, series_to_visit) = valid_entries .par_iter() .filter(|e| !missing_series.contains(&e.path().to_path_buf())) @@ -175,17 +186,22 @@ pub async fn walk_library( } }); - (series_to_create, missing_series, series_to_visit) + ( + series_to_create, + missing_series, + recovered_series, + series_to_visit, + ) } }; let to_create = series_to_create.len(); tracing::trace!(?series_to_create, "Found {to_create} series to create"); - let is_missing = missing_series.len(); + let missing_series_len = missing_series.len(); tracing::trace!( ?missing_series, - "Found {is_missing} series to mark as missing" + "Found {missing_series_len} series to mark as missing" ); tracing::debug!( @@ -197,6 +213,7 @@ pub async fn walk_library( seen_directories, ignored_directories, series_to_create, + recovered_series, series_to_visit, missing_series, library_is_missing, @@ -215,6 +232,8 @@ pub struct WalkedSeries { pub skipped_files: u64, /// The paths for media that need to be created pub media_to_create: Vec, + /// A list of media IDs that were previously marked as missing but have been found on disk + pub recovered_media: Vec, /// The paths for media that need to be visited, i.e. the timestamp on disk has changed and /// Stump will reconcile the media with the database pub media_to_visit: Vec, @@ -319,6 +338,7 @@ pub async fn walk_series( // modified, there is no point in visiting it if let Some(media) = existing_media_map.get(entry_path_str.as_str()) { let modified_at = media.modified_at.map(|dt| dt.to_rfc3339()); + let is_missing = media.status == FileStatus::Missing.to_string(); if let Some(dt) = modified_at { file_updated_since_scan(entry, dt) @@ -329,9 +349,9 @@ pub async fn walk_series( "Failed to determine if entry has been modified since last scan" ); }) - .unwrap_or_else(|_| options.should_visit_books()) + .unwrap_or_else(|_| is_missing || options.should_visit_books()) } else { - options.should_visit_books() + is_missing || options.should_visit_books() } } else { // If the media doesn't exist, we need to create it @@ -353,18 +373,31 @@ pub async fn walk_series( }); let missing_media = existing_media_map - .into_par_iter() + .par_iter() .filter(|(path, _)| !PathBuf::from(path).exists()) .map(|(path, _)| PathBuf::from(path)) .collect::>(); + let recovered_media = existing_media_map + .into_par_iter() + .filter(|(path, media)| { + media.status == FileStatus::Missing.to_string() + && PathBuf::from(path).exists() + }) + .map(|(_, media)| media.id) + .collect::>(); + let to_create = media_to_create.len(); tracing::trace!(?media_to_create, "Found {to_create} media to create"); let to_visit = media_to_visit.len(); tracing::trace!(?media_to_visit, "Found {to_visit} media to visit"); - let skipped_files = valid_entries_len - (to_create - to_visit) as u64; + let skipped_files = seen_files - (to_create + to_visit) as u64; + tracing::trace!( + skipped_files, + "Skipped files: {seen_files} - ({to_create} + {to_visit})" + ); let is_missing = missing_media.len(); tracing::trace!( @@ -382,6 +415,7 @@ pub async fn walk_series( ignored_files, skipped_files, media_to_create, + recovered_media, media_to_visit, missing_media, series_is_missing: false,