Skip to content

Commit

Permalink
add --workdir option to generate-names.pl, fixes #273
Browse files Browse the repository at this point in the history
  • Loading branch information
rbuels committed Jun 17, 2013
1 parent 8ac9ccd commit 206e0f4
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 10 deletions.
1 change: 1 addition & 0 deletions Makefile.PL
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ my %WriteMakefileArgs = (
"Exporter" => 0,
"Fcntl" => 0,
"File::Basename" => 0,
"File::Copy::Recursive" => 0,
"File::Path" => 2,
"File::Spec" => 0,
"File::Spec::Functions" => 0,
Expand Down
15 changes: 15 additions & 0 deletions bin/generate-names.pl
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,18 @@ =head1 OPTIONS
increasing this amount can speed up this script's running time
significantly.
=item --workdir <dir>
Use the given location for building the names index, copying the index
over to the destination location when fully built. By default, builds
the index in the output location.
Name indexing is a very I/O intensive operation, because the
filesystem is used to store intermediate results in order to keep the
RAM usage reasonable. If a fast filesystem (e.g. tmpfs) is available
and large enough, indexing can be speeded up by using it to store the
intermediate results of name indexing.
=item --completionLimit <number>
Maximum number of completions to store for a given prefix. Default 20.
Expand Down Expand Up @@ -81,6 +93,7 @@ =head1 OPTIONS
my @includedTrackNames;

my $outDir = "data";
my $workDir;
my $verbose = 0;
my $help;
my $max_completions = 20;
Expand All @@ -95,6 +108,7 @@ =head1 OPTIONS
"verbose+" => \$verbose,
"thresh=i" => \$thresh,
"sortMem=i" => \$sort_mem,
"workdir=s" => \$workDir,
"totalNames=i" => \$est_total_name_records,
'tracks=s' => \@includedTrackNames,
'hashBits=i' => \$hash_bits,
Expand Down Expand Up @@ -150,6 +164,7 @@ =head1 OPTIONS

my $nameStore = Bio::JBrowse::HashStore->open(
dir => catdir( $outDir, "names" ),
work_dir => $workDir,
empty => 1,
sort_mem => $sort_mem,

Expand Down
5 changes: 5 additions & 0 deletions release-notes.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
{{$NEXT}}

* Added a `--workdir` option to `generate-names.pl` to allow name
index building on a faster filesystem than the one that will
ultimately store the name index. Thanks to Alexie Papanicolaou for
suggesting this. (issue #273).

* Fixed a bug with some types of BAM files in which not all BAM
features would be displayed. Thanks to Ignazio Carbone for
pointing this out. (issue #276).
Expand Down
35 changes: 26 additions & 9 deletions src/perl5/Bio/JBrowse/HashStore.pm
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ package Bio::JBrowse::HashStore;
use strict;
use warnings;

use Carp;

use JSON 2;

use File::Spec ();
Expand All @@ -49,12 +51,12 @@ sub open {
# source of data: defaults, overridden by open args, overridden by meta.json contents
my $self = bless { @_ }, $class;

$self->{final_dir} = $self->{dir} or croak "dir option required";
$self->{dir} = $self->{work_dir} || $self->{final_dir};

$self->empty if $self->{empty};

%$self = (
%$self,
meta => $self->_read_meta
);
$self->{meta} = $self->_read_meta;

$self->{hash_bits} ||= $self->{meta}{hash_bits} || 16;
$self->{meta}{hash_bits} = $self->{hash_bits};
Expand All @@ -64,7 +66,7 @@ sub open {
$self->{bucket_cache} = $self->_make_cache( size => 30 );
$self->{bucket_path_cache_by_key} = $self->_make_cache( size => 30 );

return bless $self, $class;
return $self;
}

sub _make_cache {
Expand All @@ -76,11 +78,26 @@ sub _make_cache {
# write out meta.json file when the store itself is destroyed
sub DESTROY {
my ( $self ) = @_;

File::Path::mkpath( $self->{dir} );
my $meta_path = $self->_meta_path;
CORE::open my $out, '>', $meta_path or die "$! writing $meta_path";
$out->print( JSON::to_json( $self->{meta} ) )
or die "$! writing $meta_path";
{
my $meta_path = $self->_meta_path;
CORE::open my $out, '>', $meta_path or die "$! writing $meta_path";
$out->print( JSON::to_json( $self->{meta} ) )
or die "$! writing $meta_path";
}

my $final_dir = $self->{final_dir};
my $work_dir = $self->{dir};

# free everything to flush buckets
%$self = ();

unless( $final_dir eq $work_dir ) {
require File::Copy::Recursive;
File::Copy::Recursive::dircopy( $work_dir, $final_dir );
}

}
sub _meta_path {
File::Spec->catfile( shift->{dir}, 'meta.json' );
Expand Down
4 changes: 3 additions & 1 deletion tests/perl_tests/generate-names.pl.t
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@ use File::Temp;
use FileSlurping qw( slurp slurp_tree );

my $tempdir = new_volvox_sandbox();
my $temp2 = File::Temp->newdir( CLEANUP => $ENV{KEEP_ALL} ? 0 : 1 );
system $^X, 'bin/generate-names.pl', (
'--out' => "$tempdir",
'--workdir' => $temp2,
'--completionLimit' => 15
);
ok( ! $?, 'generate-names.pl also ran ok on volvox test data' );
is_deeply( read_names($tempdir), read_names('tests/data/volvox_formatted_names') );
is_deeply( read_names($tempdir), read_names('tests/data/volvox_formatted_names') ) or diag explain read_names($tempdir);

$tempdir = new_volvox_sandbox();
system $^X, 'bin/generate-names.pl', (
Expand Down

0 comments on commit 206e0f4

Please sign in to comment.