#!/usr/bin/env perl # the order of clusters need to be identical my ($master_clstr, @clstr) = @ARGV; my $clstr_file_no = $#clstr+1; my @fhs = (); my @div_reps = (); my @div_seqs = (); my @div_rep_no = (); for ($i=0; $i<$clstr_file_no; $i++) { $fh = "FH" . $i; open($fh, $clstr[$i]) || die "can not open $clstr[$i]"; $div_reps[$i] = ""; $div_seqs[$i] = ""; $div_rep_no[$i] = 0; } my $master_rep = ""; my $master_seq = ""; my $rep_no = 0; open(TMP, $master_clstr) || die "can not open $master_clstr"; while($ll = <TMP>) { if ($ll =~ /^>/) { if ($master_rep) { print $master_seq; foreach ($i=0; $i<$clstr_file_no; $i++) { $this_no = process_this($i, $master_rep, $rep_no); $rep_no += $this_no; } } $master_rep = ""; $master_seq = $ll; $rep_no = 0; } else { $master_seq .= $ll; $rep_no++; chop($ll); if ($ll =~ /\*$/) { $rep = ""; if ($ll =~ /(aa|nt), >(.+)\.\.\./) { $rep = $2; $master_rep = $rep; } else { die "format error $ll"; } } } } if ($master_rep) { print $master_seq; foreach ($i=0; $i<$clstr_file_no; $i++) { $this_no = process_this($i, $master_rep, $rep_no); $rep_no += $this_no; } } close(TMP); for ($i=0; $i<$clstr_file_no; $i++) { $fh = "FH" . $i; close($fh); } sub process_this { my ($i, $master_rep, $rep_no) = @_; my $ll; my ($j, $k); $fh = "FH" . $i; while($ll = <$fh>) { if ($ll =~ /^>/) { if ($div_reps[$i] eq $master_rep) { if ($div_rep_no[$i] > 1) { $j = $rep_no; my @lls = split(/\n/,$div_seqs[$i]); foreach $k (@lls) { next if ($k =~ /\*$/); $k =~ s/^\d+/$j/; print $k, "\n"; $j++; } } $div_reps[$i] = ""; $div_seqs[$i] = ""; my $t1 = $div_rep_no[$i]; $div_rep_no[$i] = 0; return ($t1-1); #return ($div_rep_no[$i]-1); } else { $div_reps[$i] = ""; $div_seqs[$i] = ""; $div_rep_no[$i] = 0; } } else { $div_seqs[$i] .= $ll; $div_rep_no[$i]++; chop($ll); if ($ll =~ /\*$/) { my $rep = ""; if ($ll =~ /(aa|nt), >(.+)\.\.\./) { $rep = $2; $div_reps[$i] = $rep; } else { die "format error $ll"; } } } } if ($div_reps[$i] eq $master_rep) { if ($div_rep_no[$i] > 1) { $j = $rep_no; my @lls = split(/\n/,$div_seqs[$i]); foreach $k (@lls) { next if ($k =~ /\*$/); $k =~ s/^\d+/$j/; print $k, "\n"; $j++; } } $div_reps[$i] = ""; $div_seqs[$i] = ""; my $t1 = $div_rep_no[$i]; $div_rep_no[$i] = 0; return ($t1-1); #return ($div_rep_no[$i]-1); } }