Skip to content

Commit

Permalink
prepend Unicode BOM, improve error handling
Browse files Browse the repository at this point in the history
- Windows editors do not assume UTF-8 unless they see a Unicode BOM. This
  is prepended now.

- The error handling at various places is improved and simplified.
  • Loading branch information
getreu committed Jan 13, 2017
1 parent 2237db1 commit 39e1ae7
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 81 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "stringsext"
version = "1.4.0"
version = "1.4.1"
authors = ["Jens Getreu <getreu@web.de>"]

[dependencies]
Expand Down
11 changes: 9 additions & 2 deletions doc/src/stringsext--man.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,12 @@ search for multi-byte encoded strings in binary data.
:Date: 2017-01-08
:Version: 1.3.1
:Date: 2017-01-10
:Version: 1.4.0
:Author: Jens Getreu
:Date: 2017-01-10
:Version: 1.4.0
:Date: 2017-01-13
:Version: 1.4.1
:Copyright: Apache License, Version 2.0 (for details see COPYING section)
:Manual section: 1
:Manual group: Forensic Tools
Expand Down Expand Up @@ -82,6 +85,10 @@ when **FILE** is ``-``, it reads standard input *stdin*.
When invoked with ``stringsext -e ascii -c i`` **stringsext** can be
used as *GNU strings* replacement.

Under Windows a Unicode editor is required. For first tests ``wordpad``
should do. Choose the ``Courir new`` font or ``Segoe UI symbol`` font to
see the flag symbols ⚑ (U+2691).

OPTIONS
=======

Expand Down
57 changes: 30 additions & 27 deletions src/finding.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! This module defines data structures to store and process found strings (findings) in memory.
use std::io::prelude::*;
use std::str;
use std;
extern crate memmap;
extern crate itertools;

Expand Down Expand Up @@ -88,7 +89,7 @@ macro_rules! enc_str {
impl Finding {
/// Format and dump a Finding to the output channel,
/// usually stdout.
pub fn print(&self, out: &mut Box<Write>) {
pub fn print(&self, out: &mut Box<Write>) -> Result<(), Box<std::io::Error>> {

if ARGS.flag_control_chars == ControlChars::R {
let ptr_str = match ARGS.flag_radix {
Expand All @@ -98,15 +99,15 @@ impl Finding {
None => "".to_string(),
};

let enc_str = if ARGS.flag_encoding.len() > 1 {
enc_str!(self)+"\t"
} else {
"".to_string()
};
let enc_str = if ARGS.flag_encoding.len() > 1 {
enc_str!(self)+"\t"
} else {
"".to_string()
};

for l in self.s.lines() {
&out.write_all(format!("{}{}{}\n",ptr_str, enc_str, l).as_bytes() );
}
try!(out.write_all(format!("{}{}{}\n",ptr_str, enc_str, l).as_bytes() ));
};
} else {
let mut ptr_str = match ARGS.flag_radix {
Some(Radix::X) => format!("{:7x} ", self.ptr),
Expand All @@ -119,17 +120,18 @@ impl Finding {
None => "",
};

let enc_str = if ARGS.flag_encoding.len() > 1 {
let enc_str = if ARGS.flag_encoding.len() > 1 {
format!("{:14}\t",enc_str!(self))
} else {
} else {
"".to_string()
};
};

for l in self.s.lines() {
&out.write_all(format!("{}{}{}\n",ptr_str, enc_str, l).as_bytes() );
try!(out.write_all(format!("{}{}{}\n",ptr_str, enc_str, l).as_bytes() ));
ptr_str = ptr_str_ff.to_string();
}
}
};
};
Ok(())
}
}

Expand Down Expand Up @@ -269,7 +271,7 @@ macro_rules! filter {
} else if s.len() >= minsize {
out.push_str(&CONTROL_REPLACEMENT_STR);
}
if is_last && $fc.last_str_is_incomplete
if is_last && $fc.last_str_is_incomplete
&& s.len() >= ARGS.flag_split_bytes.unwrap() as usize
&& inp.ends_with(&s) {
if s.len() < minsize { // avoid printing twice
Expand Down Expand Up @@ -334,11 +336,11 @@ impl FindingCollection {
/// This method formats and dumps a `FindingCollection` to the output channel,
/// usually `stdout`.
#[allow(dead_code)]
pub fn print(&self, out: &mut Box<Write>) {
if (&self).v.len() == 0 { return };
pub fn print(&self, out: &mut Box<Write>) -> Result<(), Box<std::io::Error>> {
for finding in &self.v {
finding.print(out);
}
try!(finding.print(out));
};
Ok(())
}


Expand All @@ -351,8 +353,9 @@ impl FindingCollection {
/// in the new `Finding`. The actual content will be added with the next call of a
/// `StringWriter` function (see below).
pub fn close_old_init_new_finding(&mut self, text_ptr: usize, mission: &'static Mission) {
pub fn close_old_init_new_finding(&mut self, text_ptr: usize) {

let mission = self.v.last().unwrap().mission;
if self.v.last().unwrap().s.len() != 0 { // last is not empty

filter!(self, mission);
Expand All @@ -364,13 +367,13 @@ impl FindingCollection {
mission: mission,
s: String::with_capacity(FINDING_STR_CAPACITY) });
} else {
// The current finding is empty, we do not
// push a new finding, instead we
// only update the pointer of the current
// one. Content will come later anyway.
let last_finding = self.v.last_mut().unwrap();
last_finding.ptr = text_ptr;
last_finding.mission = mission;
// The current finding is empty, we do not
// push a new finding, instead we
// only update the pointer of the current
// one. Content will come later anyway.
self.v.last_mut().unwrap().ptr= text_ptr;
// we don't need `self.v.last_mut().unwrap().mission = mission`
// because it is always the same mission
};
}

Expand Down
8 changes: 4 additions & 4 deletions src/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use scanner::ScannerPool;
use std::path::Path;
use std::io::prelude::*;
use std::io::stdin;
use std::error::Error;
use std;
extern crate memmap;
use self::memmap::{Mmap, Protection};
extern crate itertools;
Expand Down Expand Up @@ -177,7 +177,7 @@ pub const UTF8_LEN_MAX: u8 = 6;
/// If `file_path_str` == None read from `stdin`, otherwise
/// read from file.
pub fn process_input(file_path_str: Option<&str>, mut sc: &mut ScannerPool)
-> Result<(), Box<Error>> {
-> Result<(), Box<std::io::Error>> {
match file_path_str {
Some(p) => {
let f = try!(File::open(&Path::new(p)));
Expand All @@ -193,7 +193,7 @@ pub fn process_input(file_path_str: Option<&str>, mut sc: &mut ScannerPool)
/// In order to avoid additional copying the trait `memmap` is used to access
/// the file contents. See:
/// https://en.wikipedia.org/wiki/Memory-mapped_file
pub fn from_file(sc: &mut ScannerPool, file: &File) -> Result<(), Box<Error>> {
pub fn from_file(sc: &mut ScannerPool, file: &File) -> Result<(), Box<std::io::Error>> {
let len = try!(file.metadata()).len() as usize;
let mut byte_counter: usize = 0;
while byte_counter + WIN_LEN <= len {
Expand All @@ -217,7 +217,7 @@ pub fn from_file(sc: &mut ScannerPool, file: &File) -> Result<(), Box<Error>> {
/// Streams the input pipe by cutting it into overlapping chunks and feeds the `ScannerPool`.
/// This functions implements is own rotating input buffer.
/// After each iteration the `byte_counter` is updated.
fn from_stdin(sc: &mut ScannerPool) -> Result<(), Box<Error>> {
fn from_stdin(sc: &mut ScannerPool) -> Result<(), Box<std::io::Error>> {
let mut byte_counter: usize = 0;
let stdin = stdin();
let mut stdin = stdin.lock();
Expand Down
62 changes: 26 additions & 36 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ use std::fmt;
use std::process;
use std::cmp::{Ord,Eq};
use std::cmp;
use std::thread::JoinHandle;
use std::io;
use std::num::ParseIntError;
use std::str::FromStr;

extern crate memmap;
extern crate itertools;
Expand All @@ -45,10 +49,6 @@ extern crate scoped_threadpool;
use std::thread;

extern crate encoding;
use std::thread::JoinHandle;
use std::io;
use std::num::ParseIntError;
use std::str::FromStr;
use encoding::EncodingRef;
use encoding::label::encoding_from_whatwg_label;
use encoding::all;
Expand Down Expand Up @@ -332,31 +332,30 @@ impl Missions {
}

Ok( (enc_name, nbytes_min, filter1, filter2) )

}
}


/// This function spawns and defines the behaviour of the _merger-thread_ who
/// collects and prints the results produced by the worker threads.
fn main() {
fn main2() -> Result<(), Box<std::io::Error>> {

if ARGS.flag_list_encodings {
let list = all::encodings().iter().filter_map(|&e|e.whatwg_name()).sorted();
// Available encodings
for e in list {
println!("{}",e);
}
return;
return Ok(());
}

if ARGS.flag_version {
println!("Version {}, {}", VERSION.unwrap_or("unknown"), AUTHOR );
return;
return Ok(());
}


let merger: JoinHandle<()>;
let merger: JoinHandle<Result<(), Box<std::io::Error>>>;
// Scope for threads
{
let n_threads = MISSIONS.len();
Expand All @@ -367,13 +366,15 @@ fn main() {
merger = thread::spawn(move || {
let mut output = match ARGS.flag_output {
Some(ref fname) => {
let f = File::create(&Path::new(fname.as_str())).unwrap();
let f = try!(File::create(&Path::new(fname.as_str())));
Box::new(f) as Box<Write>
},
None => Box::new(io::stdout()) as Box<Write>,
};
try!(output.write_all("\u{feff}".as_bytes()));

'outer: loop {
// collect
let mut results = Vec::with_capacity(n_threads);
for _ in 0..n_threads {
results.push(match rx.recv() {
Expand All @@ -384,47 +385,36 @@ fn main() {
Err(_) => {break 'outer},
});
};
// merge
// merge
for finding in kmerge(&results) {
finding.print(&mut output);
try!(finding.print(&mut output));
};
}
};
//println!("Merger terminated.");
Ok(())
});

// Default for <file> is stdin.
if (ARGS.arg_FILE.len() == 0) ||
( (ARGS.arg_FILE.len() == 1) && ARGS.arg_FILE[0] == "-") {
match process_input(None, &mut sc) {
Err(e)=> {
writeln!(&mut std::io::stderr(),
"Error while reading from stdin: {}.",
e.to_string()).unwrap();
process::exit(2);
},
_ => {},
}
try!(process_input(None, &mut sc));
} else {
for ref file_path_str in ARGS.arg_FILE.iter() {
match process_input(Some(&file_path_str), &mut sc) {
Err(e)=> {
writeln!(&mut std::io::stderr(),
"Error: `{}` while processing file: `{}`.",
e.to_string(), file_path_str).unwrap();
process::exit(2);
},
_ => {},
}

}
}

try!(process_input(Some(&file_path_str), &mut sc));
};
};
} // `tx` drops here, which "break"s the merger-loop.
merger.join().unwrap();
merger.join().unwrap()

//println!("All threads terminated.");
}

fn main() {
if let Err(e) = main2() {
writeln!(&mut std::io::stderr(), "Error: `{}`.",e).unwrap();
process::exit(1);
}
}


#[cfg(test)]
Expand Down
Loading

0 comments on commit 39e1ae7

Please sign in to comment.