-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrossReferenceClinVar.pl
133 lines (122 loc) · 3.24 KB
/
crossReferenceClinVar.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/perl
#04 February 2017 - Adam D Scott -
use strict;
use warnings;
use IO::File;
use FileHandle;
my $usage = 'perl crossReferenceClinVar.pl <genome> <clinvar> <gender> <output>
';
die $usage , unless @ARGV == 4;
my ( $genome , $clinvar , $gender , $output ) = @ARGV;
my $IN1 = FileHandle->new( "$genome" , "r" );
if ( not defined $IN1 ) { die "ADSERROR: Could not open/read $genome\n"; }
my $IN2 = FileHandle->new( "$clinvar" , "r" );
if ( not defined $IN2 ) { die "ADSERROR: Could not open/read $clinvar\n"; }
my $OUT = FileHandle->new( "$output" , "w" );
if ( not defined $OUT ) { die "ADSERROR: Could not open/write $output\n"; }
my %snps;
my %rsIDs;
while ( my $line = $IN1->getline ) {
next if ( $line =~ m/^#/ );
chomp( $line );
my ( $rsID , $chr , $pos , $genotype ) = split( "\t" , $line );
my ( $allele1 , $allele2 ) = split // , $genotype;
if ( $chr eq "MT" ) { $allele2 = "."; }
if ( $gender eq "m" ) {
if ( $chr eq "X" or $chr eq "Y" ) {
$allele2 = ".";
}
}
my $gen = join( ":" , ( $chr , $pos ) );
$snps{$gen} = $allele1.":".$allele2;
$rsIDs{$gen} = $rsID;
}
$IN1->close();
sub removeLead {
my $str = shift;
my @str = split // , $str;
shift @str;
return join( "" , @str );
}
my $header = "RefStatus\tAltStatus\trsID\tChromosome\tPosition\tAllele1\tAllele2\t".$IN2->getline;
$OUT->print( $header );
print STDOUT $header;
print STDERR $header;
while ( my $line = $IN2->getline ) {
chomp( $line );
my ( $chr , $pos , $ref , $alt , $sig ) = ( split( "\t" , $line ) )[0,1,2,3,12];
if ( length $ref > 1 and length $alt == 1 ) {
$ref = &removeLead( $ref );
$alt = &removeLead( $alt );
}
my $gen = join( ":" , ( $chr , $pos ) );
if ( exists $snps{$gen} ) {
my ( $refCount , $altCount , $rstatus , $astatus ) = (0)x4;
my ( $allele1 , $allele2 ) = split( ":" , $snps{$gen} );
$rstatus = "absent";
if ( length $ref == 1 ) {
if ( $allele1 eq $ref ) {
$refCount += 1;
}
if ( $allele2 eq $ref ) {
$refCount += 1;
}
if ( $refCount == 2 ) {
$rstatus = "homozygous";
} elsif ( $refCount == 1 ) {
$rstatus = "heterozygous";
}
}
$astatus = "absent";
if ( length $alt == 1 ) {
if ( $allele1 eq $alt ) {
$altCount += 1;
}
if ( $allele2 eq $alt ) {
$altCount += 1;
}
if ( $altCount == 2 ) {
$astatus = "homozygous";
if ( $sig =~ /pathogenic/ig and $sig !~ /benign/ig ) {
print STDERR join( "\t" , ( $rstatus , $astatus , $rsIDs{$gen} , $chr , $pos , $allele1 , $allele2 , $line ) )."\n";
}
} elsif ( $altCount == 1 ) {
$astatus = "heterozygous";
if ( $sig =~ /pathogenic/ig and $sig !~ /benign/ig ) {
print STDOUT join( "\t" , ( $rstatus , $astatus , $rsIDs{$gen} , $chr , $pos , $allele1 , $allele2 , $line ) )."\n";
}
}
}
$OUT->print( join( "\t" , ( $rstatus , $astatus , $rsIDs{$gen} , $chr , $pos , $allele1 , $allele2 , $line ) )."\n" );
}
}
$IN2->close();
$OUT->close();
__DATA__
0 chrom
1 pos
2 ref
3 alt
4 measureset_type
5 measureset_id
6 rcv
7 allele_id
8 symbol
9 hgvs_c
10 hgvs_p
11 molecular_consequence
12 clinical_significance
13 pathogenic
14 benign
15 conflicted
16 review_status
17 gold_stars
18 all_submitters
19 all_traits
20 all_pmids
21 inheritance_modes
22 age_of_onset
23 prevalence
24 disease_mechanism
25 origin
26 xrefs