-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathoddt_ifp.py
executable file
·127 lines (100 loc) · 4.65 KB
/
oddt_ifp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python
# Copyright 2022 Informatics Matters Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys, argparse
import utils
from dm_job_utilities.dm_log import DmLog
from oddt import toolkit, fingerprints
# This uses ODDT. See https://oddt.readthedocs.io/en/latest/index.html
# For a description of the ODDT fingerprints see:
# https://oddt.readthedocs.io/en/latest/rst/oddt.html?highlight=dice#module-oddt.fingerprints
fingerprint_types = {
'if': lambda m, p: fingerprints.InteractionFingerprint(m, p),
'sif': lambda m, p: fingerprints.SimpleInteractionFingerprint(m, p),
'splif': lambda m, p: fingerprints.SPLIF(m, p),
'plec': lambda m, p: fingerprints.PLEC(m, p, sparse=False)
}
metric_types = {
'tanimoto': fingerprints.tanimoto,
'dice': fingerprints.dice,
'splif': fingerprints.similarity_SPLIF
}
def execute(inputs, protein, ligands, output, fingerprint='if', metric='dice', field_name='ODDT_IFP', interval=0):
count = 0
success = 0
errors = 0
utils.expand_path(output)
if fingerprint == 'splif':
metric = 'splif'
protein_mol = next(toolkit.readfile('pdb', protein))
protein_mol.protein = True
ligand_mols = toolkit.readfile('sdf', ligands)
input_mols = toolkit.readfile('sdf', inputs)
fps = [fingerprint_types[fingerprint](m, protein_mol) for m in ligand_mols]
utils.log('Read {} ligands'.format(len(fps)))
writer = toolkit.Outputfile('sdf', output, overwrite=True)
try:
for mol in input_mols:
count += 1
if not mol:
DmLog.emit_event("Failed to read molecule", count)
errors += 1
continue
try:
success += 1
fp2 = fingerprint_types[fingerprint](mol, protein_mol)
best = 0
sum = 0
for fp in fps:
score = metric_types[metric](fp, fp2)
sum += score
if score > best:
best = score
mol.data[field_name + '_MAX'] = best
mol.data[field_name + '_AVG'] = sum / len(fps)
writer.write(mol)
except RuntimeError as e:
utils.log("Failed", count, e)
errors += 1
if interval and count % interval == 0:
DmLog.emit_event("Processed {} records, {} errors".format(count, errors))
if success % 10000 == 0:
DmLog.emit_cost(success)
finally:
writer.close()
return count, errors
def main():
# Example:
# ./oddt_ifp.py -i data/dhfr_candidates.sdf -p data/dhfr-receptor.pdb -l data/dhfr-ligand.mol -o foo.sdf
parser = argparse.ArgumentParser(description='Minimize structures')
parser.add_argument('-i', '--input', required=True, help="File with ligands to score (.sdf)")
parser.add_argument('-p', '--protein', required=True, help="Protein in PDB format")
parser.add_argument('-l', '--ligands', required=True, help="Reference ligands to score against (.sdf)")
parser.add_argument('-o', '--output', required=True, help="Output file (.sdf)")
parser.add_argument('-f', '--fingerprint', default='if', choices=['if', 'sif', 'splif', 'plec'],
help="Which fingerprint (if=InteractionFingerprint, sif=SimpleInteractionFingerprint" +
" splif=SPLIF, plec=PLEC")
parser.add_argument('-m', '--metric', default='dice', choices=['dice', 'tanimoto'],
help="Which metric (ignored if using splif fingerprint")
parser.add_argument('--field-name', default='ODDT_IFP', help="Base name for the output fields")
parser.add_argument("--interval", type=int, help="Reporting interval")
args = parser.parse_args()
utils.log("oddt_ifp.py: ", args)
count, errors = execute(args.input, args.protein, args.ligands, args.output,
fingerprint=args.fingerprint, metric=args.metric,
field_name=args.field_name, interval=args.interval)
DmLog.emit_event('Processed {} molecules. {} errors'.format(count, errors))
DmLog.emit_cost(count)
if __name__ == "__main__":
main()