-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathAnalysis.pm
81 lines (63 loc) · 1.45 KB
/
Analysis.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
package Sequences;
#################################
#
#module of methods for dealing with raw solexa reads ..
#################################
use strict;
use Exporter;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
$VERSION = 0.1;
@ISA = qw(Exporter);
@EXPORT = ();
@EXPORT_OK = qw();
%EXPORT_TAGS = (DEFAULT => [qw()],
ALL =>[qw()]);
sub is_solexa_fastq{
## takes a quality score line and scans it to see if the line contains chars with ascii values of greater than 73.. which indicates solexa
my @quals = split(/|/,$_[0]);
foreach my $q (@quals){
if (ord($q) > 73){
return 1;
}
}
return 0;
}
sub sol2sanger{
die "error in sol2sanger " unless $_[1];
my $vers = shift; ## vers is the version of the solexa pipeline
if ($vers =~ m/1.4/){
my $quals = shift;
# Added to eliminate carriage return conversion
chomp $quals;
my @quals = split( '', $quals );
my $qual = '';
foreach my $q (@quals){
my $s = chr(ord($q) - 31);
$qual = $qual . $s;
}
return $qual;
}
else {
die "No known version\n\n";
}
}
sub get_read_lengths{
my %hash;
warn "@{$_[0]}";
foreach my $file (@{$_[0]}){
## returns reference to hash of lengths for every read
open FILE, "<$file" || die "can't open $file\n\n";
while (my $line = <FILE>){
chomp $line;
if ($line =~ m/^@/){
my $seq = <FILE>;
chomp $seq;
my $l = length($seq);
$hash{$l}{$file}{$line} =1;
}
}
close FILE;
}
return \%hash;
}
1;