diff --git a/Cargo.lock b/Cargo.lock index 1fb9fb8..aeb8070 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ [root] name = "stringsext" -version = "1.4.0" +version = "1.4.1" dependencies = [ "docopt 0.6.86 (registry+https://github.com/rust-lang/crates.io-index)", "encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index 3077a8c..43983d4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "stringsext" -version = "1.4.0" +version = "1.4.1" authors = ["Jens Getreu "] [dependencies] diff --git a/doc/src/stringsext--man.rst b/doc/src/stringsext--man.rst index 3f6577f..71d7712 100644 --- a/doc/src/stringsext--man.rst +++ b/doc/src/stringsext--man.rst @@ -27,9 +27,12 @@ search for multi-byte encoded strings in binary data. :Date: 2017-01-08 :Version: 1.3.1 + :Date: 2017-01-10 + :Version: 1.4.0 + :Author: Jens Getreu -:Date: 2017-01-10 -:Version: 1.4.0 +:Date: 2017-01-13 +:Version: 1.4.1 :Copyright: Apache License, Version 2.0 (for details see COPYING section) :Manual section: 1 :Manual group: Forensic Tools @@ -82,6 +85,10 @@ when **FILE** is ``-``, it reads standard input *stdin*. When invoked with ``stringsext -e ascii -c i`` **stringsext** can be used as *GNU strings* replacement. +Under Windows a Unicode editor is required. For first tests ``wordpad`` +should do. Choose the ``Courir new`` font or ``Segoe UI symbol`` font to +see the flag symbols ⚑ (U+2691). + OPTIONS ======= diff --git a/src/finding.rs b/src/finding.rs index 0d2b733..24b8014 100644 --- a/src/finding.rs +++ b/src/finding.rs @@ -1,6 +1,7 @@ //! This module defines data structures to store and process found strings (findings) in memory. use std::io::prelude::*; use std::str; +use std; extern crate memmap; extern crate itertools; @@ -88,7 +89,7 @@ macro_rules! enc_str { impl Finding { /// Format and dump a Finding to the output channel, /// usually stdout. - pub fn print(&self, out: &mut Box) { + pub fn print(&self, out: &mut Box) -> Result<(), Box> { if ARGS.flag_control_chars == ControlChars::R { let ptr_str = match ARGS.flag_radix { @@ -98,15 +99,15 @@ impl Finding { None => "".to_string(), }; - let enc_str = if ARGS.flag_encoding.len() > 1 { - enc_str!(self)+"\t" - } else { - "".to_string() - }; + let enc_str = if ARGS.flag_encoding.len() > 1 { + enc_str!(self)+"\t" + } else { + "".to_string() + }; for l in self.s.lines() { - &out.write_all(format!("{}{}{}\n",ptr_str, enc_str, l).as_bytes() ); - } + try!(out.write_all(format!("{}{}{}\n",ptr_str, enc_str, l).as_bytes() )); + }; } else { let mut ptr_str = match ARGS.flag_radix { Some(Radix::X) => format!("{:7x} ", self.ptr), @@ -119,17 +120,18 @@ impl Finding { None => "", }; - let enc_str = if ARGS.flag_encoding.len() > 1 { + let enc_str = if ARGS.flag_encoding.len() > 1 { format!("{:14}\t",enc_str!(self)) - } else { + } else { "".to_string() - }; + }; for l in self.s.lines() { - &out.write_all(format!("{}{}{}\n",ptr_str, enc_str, l).as_bytes() ); + try!(out.write_all(format!("{}{}{}\n",ptr_str, enc_str, l).as_bytes() )); ptr_str = ptr_str_ff.to_string(); - } - } + }; + }; + Ok(()) } } @@ -269,7 +271,7 @@ macro_rules! filter { } else if s.len() >= minsize { out.push_str(&CONTROL_REPLACEMENT_STR); } - if is_last && $fc.last_str_is_incomplete + if is_last && $fc.last_str_is_incomplete && s.len() >= ARGS.flag_split_bytes.unwrap() as usize && inp.ends_with(&s) { if s.len() < minsize { // avoid printing twice @@ -334,11 +336,11 @@ impl FindingCollection { /// This method formats and dumps a `FindingCollection` to the output channel, /// usually `stdout`. #[allow(dead_code)] - pub fn print(&self, out: &mut Box) { - if (&self).v.len() == 0 { return }; + pub fn print(&self, out: &mut Box) -> Result<(), Box> { for finding in &self.v { - finding.print(out); - } + try!(finding.print(out)); + }; + Ok(()) } @@ -351,8 +353,9 @@ impl FindingCollection { /// in the new `Finding`. The actual content will be added with the next call of a /// `StringWriter` function (see below). - pub fn close_old_init_new_finding(&mut self, text_ptr: usize, mission: &'static Mission) { + pub fn close_old_init_new_finding(&mut self, text_ptr: usize) { + let mission = self.v.last().unwrap().mission; if self.v.last().unwrap().s.len() != 0 { // last is not empty filter!(self, mission); @@ -364,13 +367,13 @@ impl FindingCollection { mission: mission, s: String::with_capacity(FINDING_STR_CAPACITY) }); } else { - // The current finding is empty, we do not - // push a new finding, instead we - // only update the pointer of the current - // one. Content will come later anyway. - let last_finding = self.v.last_mut().unwrap(); - last_finding.ptr = text_ptr; - last_finding.mission = mission; + // The current finding is empty, we do not + // push a new finding, instead we + // only update the pointer of the current + // one. Content will come later anyway. + self.v.last_mut().unwrap().ptr= text_ptr; + // we don't need `self.v.last_mut().unwrap().mission = mission` + // because it is always the same mission }; } diff --git a/src/input.rs b/src/input.rs index 5506e22..150ed37 100644 --- a/src/input.rs +++ b/src/input.rs @@ -4,7 +4,7 @@ use scanner::ScannerPool; use std::path::Path; use std::io::prelude::*; use std::io::stdin; -use std::error::Error; +use std; extern crate memmap; use self::memmap::{Mmap, Protection}; extern crate itertools; @@ -177,7 +177,7 @@ pub const UTF8_LEN_MAX: u8 = 6; /// If `file_path_str` == None read from `stdin`, otherwise /// read from file. pub fn process_input(file_path_str: Option<&str>, mut sc: &mut ScannerPool) - -> Result<(), Box> { + -> Result<(), Box> { match file_path_str { Some(p) => { let f = try!(File::open(&Path::new(p))); @@ -193,7 +193,7 @@ pub fn process_input(file_path_str: Option<&str>, mut sc: &mut ScannerPool) /// In order to avoid additional copying the trait `memmap` is used to access /// the file contents. See: /// https://en.wikipedia.org/wiki/Memory-mapped_file -pub fn from_file(sc: &mut ScannerPool, file: &File) -> Result<(), Box> { +pub fn from_file(sc: &mut ScannerPool, file: &File) -> Result<(), Box> { let len = try!(file.metadata()).len() as usize; let mut byte_counter: usize = 0; while byte_counter + WIN_LEN <= len { @@ -217,7 +217,7 @@ pub fn from_file(sc: &mut ScannerPool, file: &File) -> Result<(), Box> { /// Streams the input pipe by cutting it into overlapping chunks and feeds the `ScannerPool`. /// This functions implements is own rotating input buffer. /// After each iteration the `byte_counter` is updated. -fn from_stdin(sc: &mut ScannerPool) -> Result<(), Box> { +fn from_stdin(sc: &mut ScannerPool) -> Result<(), Box> { let mut byte_counter: usize = 0; let stdin = stdin(); let mut stdin = stdin.lock(); diff --git a/src/main.rs b/src/main.rs index 1356e3e..9a225c8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -36,6 +36,10 @@ use std::fmt; use std::process; use std::cmp::{Ord,Eq}; use std::cmp; +use std::thread::JoinHandle; +use std::io; +use std::num::ParseIntError; +use std::str::FromStr; extern crate memmap; extern crate itertools; @@ -45,10 +49,6 @@ extern crate scoped_threadpool; use std::thread; extern crate encoding; -use std::thread::JoinHandle; -use std::io; -use std::num::ParseIntError; -use std::str::FromStr; use encoding::EncodingRef; use encoding::label::encoding_from_whatwg_label; use encoding::all; @@ -332,14 +332,13 @@ impl Missions { } Ok( (enc_name, nbytes_min, filter1, filter2) ) - } } /// This function spawns and defines the behaviour of the _merger-thread_ who /// collects and prints the results produced by the worker threads. -fn main() { +fn main2() -> Result<(), Box> { if ARGS.flag_list_encodings { let list = all::encodings().iter().filter_map(|&e|e.whatwg_name()).sorted(); @@ -347,16 +346,16 @@ fn main() { for e in list { println!("{}",e); } - return; + return Ok(()); } if ARGS.flag_version { println!("Version {}, {}", VERSION.unwrap_or("unknown"), AUTHOR ); - return; + return Ok(()); } - let merger: JoinHandle<()>; + let merger: JoinHandle>>; // Scope for threads { let n_threads = MISSIONS.len(); @@ -367,13 +366,15 @@ fn main() { merger = thread::spawn(move || { let mut output = match ARGS.flag_output { Some(ref fname) => { - let f = File::create(&Path::new(fname.as_str())).unwrap(); + let f = try!(File::create(&Path::new(fname.as_str()))); Box::new(f) as Box }, None => Box::new(io::stdout()) as Box, }; + try!(output.write_all("\u{feff}".as_bytes())); 'outer: loop { + // collect let mut results = Vec::with_capacity(n_threads); for _ in 0..n_threads { results.push(match rx.recv() { @@ -384,47 +385,36 @@ fn main() { Err(_) => {break 'outer}, }); }; - // merge + // merge for finding in kmerge(&results) { - finding.print(&mut output); + try!(finding.print(&mut output)); }; - } + }; //println!("Merger terminated."); + Ok(()) }); // Default for is stdin. if (ARGS.arg_FILE.len() == 0) || ( (ARGS.arg_FILE.len() == 1) && ARGS.arg_FILE[0] == "-") { - match process_input(None, &mut sc) { - Err(e)=> { - writeln!(&mut std::io::stderr(), - "Error while reading from stdin: {}.", - e.to_string()).unwrap(); - process::exit(2); - }, - _ => {}, - } + try!(process_input(None, &mut sc)); } else { for ref file_path_str in ARGS.arg_FILE.iter() { - match process_input(Some(&file_path_str), &mut sc) { - Err(e)=> { - writeln!(&mut std::io::stderr(), - "Error: `{}` while processing file: `{}`.", - e.to_string(), file_path_str).unwrap(); - process::exit(2); - }, - _ => {}, - } - - } - } - + try!(process_input(Some(&file_path_str), &mut sc)); + }; + }; } // `tx` drops here, which "break"s the merger-loop. - merger.join().unwrap(); + merger.join().unwrap() //println!("All threads terminated."); } +fn main() { + if let Err(e) = main2() { + writeln!(&mut std::io::stderr(), "Error: `{}`.",e).unwrap(); + process::exit(1); + } +} #[cfg(test)] diff --git a/src/scanner.rs b/src/scanner.rs index 53ce2af..18427bf 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -54,7 +54,10 @@ //! 15. Repeat until the last chunk is reached. +use std; use std::str; +use std::io::Write; +use std::process; extern crate memmap; extern crate itertools; @@ -197,8 +200,13 @@ impl <'a> ScannerPool <'a> { }; scanner_state.completes_last_str = m.last_str_is_incomplete; match tx.send(m) { - Ok(_) => {}, - Err(_) => { panic!("Can not send FindingCollection:"); }, + Ok(_) => {}, + Err(e) => { + writeln!(&mut std::io::stderr(), + "Error: `{}`. Is the output stream writeable? Is there \ + enough space? ",e).unwrap(); + process::exit(1); + }, }; }); } @@ -221,9 +229,7 @@ impl <'a> ScannerPool <'a> { /// The `Finding`s are returned as a `FindingCollection` vector. /// After execution the `start` variable points to the first unprocessed Byte /// in `input`, usually in WIN_OVERLAP. - /// - /// Please note that this function is stateless (static)! /// fn scan_window <'b> (scanner_state:&ScannerState, @@ -244,7 +250,7 @@ impl <'a> ScannerPool <'a> { ret.completes_last_str = scanner_state.completes_last_str; - while remaining < WIN_STEP { // Never do mission.offset new search in overlapping space + loop { let (offset, err) = decoder.raw_feed(&input[remaining..], &mut *ret); //unprocessed = remaining + offset; @@ -259,12 +265,14 @@ impl <'a> ScannerPool <'a> { let _ = decoder.raw_finish(&mut *ret); // Is this really necessary? Why? break; } - - ret.close_old_init_new_finding(byte_counter+remaining, - scanner_state.mission); + // Never start new search in overlapping space + if remaining >= WIN_STEP { + let _ = decoder.raw_finish(&mut *ret); // Is this really necessary? Why? + break; + } + ret.close_old_init_new_finding(byte_counter+remaining); // only the first finding should have this true ret.completes_last_str = false; - }; // unprocessed points to the first erroneous byte, remaining 1 byte beyond: @@ -282,7 +290,7 @@ impl <'a> ScannerPool <'a> { // This closes the current finding strings and adds an // empty one we have to remove with `close_finding_collection()` later. // Before processing the last finding, - ret.close_old_init_new_finding(byte_counter+remaining, scanner_state.mission); + ret.close_old_init_new_finding(byte_counter+remaining); // Remove empty surplus ret.close_finding_collection();