Skip to content

Commit

Permalink
upgrade to Bio::GFF3::LowLevel::Parser 1.7, and use its new max_lookb…
Browse files Browse the repository at this point in the history
…ack method
  • Loading branch information
rbuels committed Oct 2, 2013
1 parent 6ef024b commit 5fb859a
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 9 deletions.
2 changes: 1 addition & 1 deletion Makefile.PL
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ my %WriteMakefileArgs = (
"PREREQ_PM" => {
"Bio::Annotation::SimpleValue" => 0,
"Bio::FeatureIO" => 0,
"Bio::GFF3::LowLevel::Parser" => "1.4",
"Bio::GFF3::LowLevel::Parser" => "1.7",
"Bio::Index::Fasta" => 0,
"Bio::OntologyIO" => 0,
"Bio::Root::Version" => "1.006000",
Expand Down
14 changes: 14 additions & 0 deletions bin/flatfile-to-json.pl
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ =head1 USAGE
[ --nclChunk <chunk size for generated NCLs> ] \
[ --compress ] \
[ --sortMem <memory in bytes to use for sorting> ] \
[ --maxLookback <maximum number of features to buffer in gff3 files> ] \
=head1 ARGUMENTS
Expand Down Expand Up @@ -137,6 +138,19 @@ =head2 Optional
=back
=head2 GFF3-specific
=over 4
=item --maxLookback <integer>
Maximum number of features to keep in memory when parsing GFF3 files.
If you are having trouble parsing a GFF3 file that contains few '###'
directives (which are important for parsing), you can try setting this
higher if your machine has enough memory.
=back
=head2 BED-specific
=over 4
Expand Down
5 changes: 5 additions & 0 deletions release-notes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

## Minor improvements

* Upgraded flatfile-to-json.pl to use a new version of
Bio::GFF3::LowLevel::Parser for GFF3 parser, which has a lookback
buffer limit that makes it easier to parse large GFF3 files that do
not contain enough '###' directives.

* Further improved the memory footprint and speed of
generate-names.pl. Thanks to Richard Hayes for his continued help
with testing improvements and reporting problems.
Expand Down
5 changes: 4 additions & 1 deletion src/perl5/Bio/JBrowse/Cmd/FlatFileToJson.pm
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ sub option_defaults {
out => 'data',
cssClass => 'feature',
sortMem => 1024 * 1024 * 512,
maxLookback => 10000
)
}

Expand All @@ -48,6 +49,7 @@ sub option_definitions {
"menuTemplate=s",
"arrowheadClass=s",
"subfeatureClasses=s",
"maxLookback=i",
"clientConfig=s",
"thinType=s",
"thickType=s",
Expand Down Expand Up @@ -129,7 +131,8 @@ sub make_gff_stream {
require Bio::GFF3::LowLevel::Parser;
require Bio::JBrowse::FeatureStream::GFF3_LowLevel;

my $p = Bio::GFF3::LowLevel::Parser->new( $self->opt('gff') );
my $p = Bio::GFF3::LowLevel::Parser->open( $self->opt('gff') );
$p->max_lookback( $self->opt('maxLookback') );

return Bio::JBrowse::FeatureStream::GFF3_LowLevel->new(
parser => $p,
Expand Down
13 changes: 8 additions & 5 deletions src/perl5/Bio/JBrowse/FeatureStream/GFF3_LowLevel.pm
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,22 @@ use base 'Bio::JBrowse::FeatureStream';

sub next_items {
my ( $self ) = @_;
while ( my $i = $self->{parser}->next_item ) {
return $self->_to_hashref( $i ) if $i->{child_features};
while ( my $items = $self->{parser}->next_item ) {
if( ref $items eq 'ARRAY' ) {
return map $self->_to_hashref( $_ ), @$items;
}
}
return;
}

use Carp::Always;

sub _to_hashref {
my ( $self, $f ) = @_;
# use Data::Dump 'dump';
# if( ref $f ne 'HASH' ) {
# Carp::confess( dump $f );
# }
$f = { %$f };
$f->{score} += 0 if defined $f->{score};
$f->{phase} += 0 if defined $f->{phase};

Expand All @@ -42,14 +45,14 @@ sub _to_hashref {
if( $h{child_features} ) {
$h{subfeatures} = [
map {
[ map $self->_to_hashref( $_ ), @$_ ]
[ map $self->_to_hashref( $_ ), map @$_, @$_ ]
} @{delete $h{child_features}}
];
}
if( $h{derived_features} ) {
$h{derived_features} = [
map {
[ map $self->_to_hashref( $_ ), @$_ ]
[ map $self->_to_hashref( $_ ), map @$_, @$_ ]
} @{$h{derived_features}}
];
}
Expand Down
4 changes: 2 additions & 2 deletions tests/perl_tests/featurestream.t
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ sub tempdir {

{
require Bio::GFF3::LowLevel::Parser;
my $p = Bio::GFF3::LowLevel::Parser->new( 'tests/data/redundant.gff3' );
my $p = Bio::GFF3::LowLevel::Parser->open( 'tests/data/redundant.gff3' );

my $s = Bio::JBrowse::FeatureStream::GFF3_LowLevel->new(
parser => $p,
Expand All @@ -43,7 +43,7 @@ sub tempdir {
'type' => 'gene'
}
]
) or diag explain \@i;
) or diag explain \@i, $p;
}

done_testing;

0 comments on commit 5fb859a

Please sign in to comment.