-
Notifications
You must be signed in to change notification settings - Fork 88
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement Preprocessing of Pdfs Into Images + Setup Pdfium
- Loading branch information
1 parent
1b6cf0b
commit edde66a
Showing
5 changed files
with
114 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,26 +1,90 @@ | ||
#![doc = include_str!("../README.md")] | ||
|
||
/// Adds two numbers together | ||
/// | ||
/// # Example | ||
/// | ||
/// ```rust | ||
/// use chonky::add; | ||
use image::DynamicImage; | ||
use pdfium_render::prelude::*; | ||
|
||
use error_stack::Report; | ||
|
||
|
||
pub fn load_pdf<'a>(pdfium: &'a Pdfium, file_path: &str)-> Result<PdfDocument<'a>, Report<PdfiumError>>{ | ||
let pdf = pdfium | ||
.load_pdf_from_file(file_path, None)?; | ||
Ok(pdf) | ||
} | ||
|
||
/// Takes in a pdf document and returns a vector list where each page | ||
/// is processed into a raw image that can be later converted to any image format | ||
/// | ||
/// assert_eq!(add(1, 3), 4); | ||
/// ``` | ||
#[must_use] | ||
pub const fn add(left: u64, right: u64) -> u64 { | ||
left + right | ||
pub fn pdf_to_images(pdf: &PdfDocument) -> Result<Vec<DynamicImage>,Report<PdfiumError>>{ | ||
let mut images: Vec<DynamicImage> = Vec::new(); | ||
|
||
for page in pdf.pages().iter() { | ||
|
||
let resolution_width = 1000; //may adjust resolution depending on need | ||
|
||
// Render the entire page to an image | ||
let rendered_page = page | ||
.render_with_config( | ||
&PdfRenderConfig::new() | ||
.set_target_width(resolution_width), | ||
)?; // Renders the page to a PdfBitmap | ||
|
||
// Convert PdfBitmap to DynamicImage | ||
let dynamic_image = rendered_page.as_image(); | ||
images.push(dynamic_image); | ||
Check failure Code scanning / clippy mismatched types Error
mismatched types
Check failure Code scanning / clippy mismatched types Error
mismatched types
|
||
} | ||
|
||
Ok(images) | ||
} | ||
|
||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
||
#[test] | ||
fn it_works() { | ||
let result = add(2, 2); | ||
assert_eq!(result, 4); | ||
fn pdf_load_success() { | ||
let pdfium = Pdfium::new( | ||
Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path("./libs/")).unwrap() | ||
); // creates instance so must be global | ||
|
||
let test_pdf_string = "tests/docs/test-doc.pdf"; | ||
|
||
let pdf = load_pdf(&pdfium, test_pdf_string); | ||
|
||
assert!(pdf.is_ok()); | ||
} | ||
|
||
#[test] | ||
fn pdf_load_failure() { | ||
let pdfium = Pdfium::new( | ||
Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path("./libs/")).unwrap() | ||
); // creates instance so must be global | ||
|
||
let test_pdf_string = "tests/docs/invalid.pdf"; | ||
|
||
let pdf = load_pdf(&pdfium, test_pdf_string); | ||
|
||
assert!(pdf.is_err()); | ||
} | ||
|
||
#[test] | ||
fn pdf_image_conversion() { | ||
let pdfium = Pdfium::new( | ||
Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path("./libs/")).unwrap() | ||
); // creates instance so must be global | ||
|
||
let test_pdf_string = "tests/docs/test-doc.pdf"; | ||
|
||
let pdf = load_pdf(&pdfium, test_pdf_string).unwrap(); | ||
|
||
let preprocessed_pdf = pdf_to_images(&pdf).unwrap(); | ||
|
||
let num_pages = 38; //number of pages of pdf | ||
|
||
assert_eq!(preprocessed_pdf.len(), num_pages) //length of vector should be number of pages | ||
|
||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
use std::env; | ||
use chonky::*; | ||
use pdfium_render::prelude::*; | ||
use error_stack::Report; | ||
|
||
fn main() -> Result<(), Report<PdfiumError>>{ | ||
let args: Vec<String> = env::args().collect();// read file path arguments | ||
|
||
if args.len() < 2 { | ||
panic!("Path to PDF not inputted") | ||
} | ||
|
||
let pdfium = Pdfium::new( | ||
Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path("./libs/")).unwrap() | ||
); // creates instance so must be global | ||
|
||
let pdf = load_pdf(&pdfium, &args[1])?; | ||
|
||
let preprocessed_pdf = pdf_to_images(&pdf)?; | ||
|
||
//for now we will print all these images to a folder | ||
// this will be a seperate function in the future once knowledge about error-stack increases | ||
|
||
let output_folder = "./out"; | ||
|
||
for (index, image) in preprocessed_pdf.iter().enumerate() { | ||
// Generate a unique filename for each page image | ||
let file_path = format!("{}/page_{}.png", output_folder, index + 1); | ||
|
||
// Save the image as a PNG file | ||
image.save(&file_path).unwrap(); | ||
} | ||
|
||
Ok(()) | ||
} |
Binary file not shown.