-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsplit_PDB.pl
executable file
·115 lines (96 loc) · 2.29 KB
/
split_PDB.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/perl
## Pombert Lab 2020
my $version = '0.4a';
my $name = 'split_PDB.pl';
my $updated = '2022-03-01';
use strict;
use warnings;
use PerlIO::gzip;
use File::Basename;
use File::Path qw(make_path);
use Getopt::Long qw(GetOptions);
## Usage definition
my $USAGE = <<"OPTIONS";
NAME ${name}
VERSION ${version}
UPDATED ${updated}
SYNOPSIS Splits a PDB file into separate files, one per chain
USAGE EXAMPLE ${name} \\
-p files.pdb \\
-o output_folder \\
-e pdb
OPTIONS:
-p (--pdb) PDB input file (supports gzipped files)
-o (--outdir) Output directory [Default: ./]
-e (--ext) Desired file extension [Default: pdb]
OPTIONS
die "\n$USAGE\n" unless @ARGV;
## Defining options
my @pdb;
my $outdir = './';
my $ext = 'pdb';
GetOptions(
'p|pdb=s@{1,}' => \@pdb,
'o|outdir=s' => \$outdir,
'e|ext=s' => \$ext
);
## Output dir
unless (-d $outdir) {
make_path( $outdir, { mode => 0755 } ) or die "Can't create $outdir: $!\n";
}
## Working on PDB files
while (my $pdb = shift@pdb){
my ($filename, $dir) = fileparse($pdb);
my ($prefix) = $filename =~ /^(\w+)/;
## Working on PDB file
my $gzip = '';
if ($pdb =~ /.gz$/){ $gzip = ':gzip'; }
open PDB, "<$gzip", "$pdb" or die "Can't open PDB file $pdb: $!\n";
my @header;
my $chain;
my %chains;
my $type = '';
while (my $line = <PDB>){
chomp $line;
if ($line =~ /^HEADER|TITLE|SOURCE|KEYWDS|EXPDTA|REVDAT|JRNL/){
push (@header, $line);
}
## Check if chain is amino acids
elsif ($line =~ /^ATOM.{13}\w{3}\s(\w)/){
$type = 'aa';
$chain = $1;
push (@{$chains{$chain}}, $line);
}
## Check if chain is nucleotide or something else
elsif ($line =~ /^ATOM/){
$type = 'unknown';
$chain = undef;
}
## Only push if chain is amino acids
elsif ($line =~ /^TER/){
if ($type eq 'aa'){
if (defined $chain){
push (@{$chains{$chain}}, $line);
}
else {
print STDERR "File $pdb: $line\n";
}
}
}
}
if ($gzip eq ':gzip'){ binmode PDB, ":gzip(none)"; }
## Working on chains
for my $ch (keys %chains){
open OUT, ">", "$outdir/${prefix}_$ch.$ext" or die "Can't create PDB file $pdb in folder $outdir: $!\n";
## write header
foreach (@header){
print OUT "$_\n";
}
my @core = @{$chains{$ch}};
for (0..$#core){
print OUT "$core[$_]\n";
}
close OUT;
}
system("rm $pdb")
}