Skip to content

Commit

Permalink
Revised amino acid similarity function. (#267)
Browse files Browse the repository at this point in the history
* Histidine hydrophilicity.

* Constant pH, for now. #266

* Using a lookup table for performance reasons.

* Effect of pKa.

* Amino float similarities.

* Amino test now has a grid view.

* New AA similarity in amino groups.

* Grid based amino report.

* make code now includes amino grid report.

* Remove defunct amino tests.
  • Loading branch information
electronicsbyjulie authored Jun 3, 2023
1 parent 7e42690 commit 8c15113
Show file tree
Hide file tree
Showing 35 changed files with 187 additions and 372 deletions.
5 changes: 3 additions & 2 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ all: $(DIRS) \
$(TESTS) \
$(APPS) \
$(REPORTS)
code: $(OBJS) $(TESTS) molecule_report $(APPS)
code: $(OBJS) $(TESTS) amino_report molecule_report $(APPS)
primarydock: $(DIRS) $(OBJS) $(BINDIR)/primarydock
pepteditor: $(DIRS) $(OBJS) $(BINDIR)/pepteditor

Expand Down Expand Up @@ -107,7 +107,8 @@ performance_test: $(BINDIR)/primarydock testdata/test_TAAR8.config testdata/TAAR
# low-tooling regression tests below
amino_report: REPORT="test/amino_test.approved.txt"
amino_report: test/amino_test
bash src/amino_tests.bash ARNDCEQGHILKMFPUSTWYV
./test/amino_test >test/amino_test.received.txt
diff --color --unified $(REPORT) test/amino_test.received.txt

atom_report: REPORT="test/atom_test.approved.txt"
atom_report: test/atom_test
Expand Down
106 changes: 69 additions & 37 deletions src/amino_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,55 +68,87 @@ int main(int argc, char** argv)
}
else
{
if (argc>1) letter = argv[1][0];
AminoAcid aa(letter);
if (argc>1)
{
letter = argv[1][0];
AminoAcid aa(letter);

cout << "Is tyrosine-like (i.e. has an aromatic ring and a separate H-bond acceptor)? "
<< (aa.is_tyrosine_like() ? "Y" : "N") << endl;
cout << "Charge: " << aa.get_charge() << endl;

Bond** bb = aa.get_rotatable_bonds();
if (bb && bb[0])
{
for (i=0; bb[i]; i++)
{
Atom** baa = bb[i]->get_moves_with_btom();
cout << bb[i]->atom->name << "-" << bb[i]->btom->name << " can rotate, bringing ";
cout << "Is tyrosine-like (i.e. has an aromatic ring and a separate H-bond acceptor)? "
<< (aa.is_tyrosine_like() ? "Y" : "N") << endl;

if (baa)
Bond** bb = aa.get_rotatable_bonds();
if (bb && bb[0])
{
for (i=0; bb[i]; i++)
{
for (j=0; baa[j]; j++)
Atom** baa = bb[i]->get_moves_with_btom();
cout << bb[i]->atom->name << "-" << bb[i]->btom->name << " can rotate, bringing ";

if (baa)
{
cout << baa[j]->name << " ";
for (j=0; baa[j]; j++)
{
cout << baa[j]->name << " ";
}
if (!j) cout << "zero atoms.";
}
if (!j) cout << "zero atoms.";
}
else cout << "no atoms.";
else cout << "no atoms.";

cout << endl;
delete[] baa;
cout << endl;
delete[] baa;
}
}
else cout << "No rotatable bonds." << endl;

cout << aa.get_name() << " hydrophilicity = " << aa.hydrophilicity() << endl;
cout << "Similarity to A " << aa.similarity_to('A') << endl;
cout << "Similarity to D " << aa.similarity_to('D') << endl;
cout << "Similarity to R " << aa.similarity_to('R') << endl;
cout << "Similarity to F " << aa.similarity_to('F') << endl;
cout << "Similarity to C " << aa.similarity_to('C') << endl;
cout << "Similarity to S " << aa.similarity_to('S') << endl;

const char* outfn = "test.pdb";
FILE* pf = fopen(outfn, "wb");
aa.save_pdb(pf);
fclose(pf);
cout << "Wrote " << outfn << endl;

const char* outfn2 = "test.sdf";
pf = fopen(outfn2, "wb");
aa.save_sdf(pf);
fclose(pf);
cout << "Wrote " << outfn2 << endl;
}
else cout << "No rotatable bonds." << endl;
else
{
const char* all_letters = "ARNDCUEQGHIKLMFPSTWYV";

cout << aa.get_name() << " hydrophilicity = " << aa.hydrophilicity() << endl;
cout << "Similarity to A " << aa.similarity_to('A') << endl;
cout << "Similarity to D " << aa.similarity_to('D') << endl;
cout << "Similarity to R " << aa.similarity_to('R') << endl;
cout << "Similarity to F " << aa.similarity_to('F') << endl;
cout << "Similarity to C " << aa.similarity_to('C') << endl;
cout << "Similarity to S " << aa.similarity_to('S') << endl;
int i, j, n = strlen(all_letters);
AminoAcid* all_aa[n+4];

const char* outfn = "test.pdb";
FILE* pf = fopen(outfn, "wb");
aa.save_pdb(pf);
fclose(pf);
cout << "Wrote " << outfn << endl;
for (i=0; i<n; i++)
{
all_aa[i] = new AminoAcid(all_letters[i]);
cout << " " << all_letters[i];
}
cout << endl;

const char* outfn2 = "test.sdf";
pf = fopen(outfn2, "wb");
aa.save_sdf(pf);
fclose(pf);
cout << "Wrote " << outfn2 << endl;
for (i=0; i<n; i++)
{
cout << all_letters[i] << " ";
for (j=0; j<n; j++)
{
float s = all_aa[i]->similarity_to(all_aa[j]);
char buffer[16];
sprintf(buffer, "%0.2f", s);
cout << " " << buffer;
}
cout << endl;
}
}
}

return 0;
Expand Down
12 changes: 0 additions & 12 deletions src/amino_tests.bash

This file was deleted.

77 changes: 50 additions & 27 deletions src/classes/aminoacid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1524,36 +1524,50 @@ bool AminoAcid::can_reach(AminoAcid* other) const
else return false;
}

int AminoAcid::similarity_to(const char letter)
float AminoAcid::similarity_to(const char letter)
{
AminoAcid* a = nullptr;
if (!aa_defs[letter].SMILES.length()) return 0;
if (!aa_defs[letter].loaded)
{
a = new AminoAcid(letter);
delete a;
}
if (!aadef) return 0;

a = new AminoAcid(letter);
float s = similarity_to(a);
delete a;

return s;
}

float AminoAcid::similarity_to(const AminoAcid* aa)
{
if (!atoms || !aa->atoms) return 0;

int retval = 0;
if (aadef->hydrophilicity >= AMINOACID_HYDROPHILICITY_THRESHOLD
&&
aa_defs[letter].hydrophilicity >= AMINOACID_HYDROPHILICITY_THRESHOLD)
bool polar1 = (hydrophilicity() > 0.2);
bool polar2 = (aa->hydrophilicity() > 0.2);

int i, j;
float simil=0, divis=0;
for (i=0; atoms[i]; i++)
{
retval += fmin(aadef->hydrophilicity, aa_defs[letter].hydrophilicity)*2;
if (aadef->charged == aa_defs[letter].charged) retval += 1;
bool apol = fabs(atoms[i]->is_polar()) > 0.333;
if (apol != polar1) continue;

for (j=0; aa->atoms[j]; j++)
{
bool bpol = fabs(aa->atoms[j]->is_polar()) > 0.333;
if (bpol != polar2) continue;

float f = (apol && bpol) ? 1 : 0.333;
simil += f*atoms[i]->similarity_to(aa->atoms[j]);
divis += f;
}
}
if (aadef->hydrophilicity < AMINOACID_HYDROPHILICITY_THRESHOLD
&&
aa_defs[letter].hydrophilicity < AMINOACID_HYDROPHILICITY_THRESHOLD)
retval += 2;
// cout << aadef->hydrophilicity << "|" << aa_defs[letter].hydrophilicity << endl;
// return retval;

if (aadef->aromatic == aa_defs[letter].aromatic) retval += 2;
if (aadef->can_coord_metal == aa_defs[letter].can_coord_metal) retval += 1;
if (divis) simil /= divis;

return retval;
simil -= 0.5*fabs( fmin(1, fabs(hydrophilicity())) - fmin(1, fabs(aa->hydrophilicity())) );
simil += 0.5*(sgn(get_charge() * aa->get_charge()));
simil = fmax(0, fmin(1, simil));

return simil;
}

Ring* AminoAcid::get_most_distal_arom_ring()
Expand Down Expand Up @@ -1629,7 +1643,8 @@ bool AminoAcid::is_tyrosine_like()
for (i=0; atoms[i]; i++)
{
if (atoms[i]->is_backbone) continue;
if (atoms[i]->get_Z() == 7 || atoms[i]->get_Z() == 8 || atoms[i]->is_polar() < 0)
if (atoms[i]->get_Z() == 1) continue;
if (atoms[i]->is_polar() < -0.333)
{
bool part_of_arom_ring = false;

Expand All @@ -1651,7 +1666,8 @@ bool AminoAcid::is_tyrosine_like()
if (!part_of_arom_ring)
{
#if DBG_TYRLIKE
cout << atoms[i]->name << " seems to be ringless, yup not missing any important info, derp." << endl;
cout << atoms[i]->name << " seems to be ringless, polarity " << atoms[i]->is_polar()
<< " yup not missing any important info, derp." << endl;
#endif

return true;
Expand All @@ -1674,7 +1690,12 @@ bool AminoAcid::is_glycine()
return true;
}

bool AminoAcid::conditionally_basic()
float AminoAcid::sc_pKa() const
{
return aadef->sidechain_pKa;
}

bool AminoAcid::conditionally_basic() const
{
#if _allow_conditional_basicity
if (!atoms) return false;
Expand All @@ -1686,7 +1707,7 @@ bool AminoAcid::conditionally_basic()
if (atoms[i]->get_Z() == 1) continue;
if (atoms[i]->get_family() == PNICTOGEN)
{
if (aadef->sidechain_pKa >= 4 && aadef->sidechain_pKa < 7) return true;
if (aadef->sidechain_pKa >= 5 && aadef->sidechain_pKa < 7) return true;
}
}
#endif
Expand Down Expand Up @@ -1749,7 +1770,7 @@ Atom* AminoAcid::capable_of_inter(intera_type inter)
return retval;
}

float AminoAcid::hydrophilicity()
float AminoAcid::hydrophilicity() const
{
int i, count=0;
float total=0;
Expand All @@ -1761,6 +1782,8 @@ float AminoAcid::hydrophilicity()
int fam = atoms[i]->get_family();
if (Z==1) continue;

if (fam == PNICTOGEN && conditionally_basic()) total += protonation(sc_pKa())*2;

total += atoms[i]->hydrophilicity_rule();
count++;
}
Expand Down
9 changes: 5 additions & 4 deletions src/classes/aminoacid.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ class AminoAcid : public Molecule
}
bool is_tyrosine_like(); // An amino acid is tyrosine-like if it has an aromatic ring and a non-backbone H-bond acceptor not part of the ring.
bool is_glycine(); // Glycine is a special case where there are no non-backbone heavy atoms.
bool conditionally_basic();
bool conditionally_basic() const;
float sc_pKa() const;
float get_reach() const
{
return aadef ? aadef->reach : 2.5;
Expand Down Expand Up @@ -144,13 +145,13 @@ class AminoAcid : public Molecule
// Intermol functions.
float get_intermol_binding(AminoAcid* neighbor, bool backbone_atoms_only = false);
float get_intermol_binding(AminoAcid** neighbors, bool backbone_atoms_only = false);
float hydrophilicity();
float hydrophilicity() const;

// Misc.
void delete_sidechain();
static Molecule** aas_to_mols(AminoAcid** aas);
int similarity_to(const char letter);
int similarity_to(const AminoAcid* aa) { return similarity_to(aa->get_letter()); }
float similarity_to(const char letter);
float similarity_to(const AminoAcid* aa);
Ring* get_most_distal_arom_ring();
std::string printable();

Expand Down
2 changes: 1 addition & 1 deletion src/classes/atom.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,7 @@ float Atom::hydrophilicity_rule()
if (is_bonded_to("H") || is_bonded_to(CHALCOGEN)) total += 1;
}

total += fabs(get_charge());
total += 1.5*fabs(get_charge());

return total;
}
Expand Down
6 changes: 3 additions & 3 deletions src/classes/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
#define any_element -5141
#define Avogadro 6.02214076e+23

#define pH 6.0

// Give the atoms a sort of lookahead to know what kind of potential binding they could have if only they would rotate properly.
#define intermol_ESP 0.05

Expand Down Expand Up @@ -265,9 +267,7 @@
// Whether to add the indicated partial charge to a neutral pnictogen, within pKa limits,
// if a negatively charged atom is nearby.
#define _ALLOW_PROTONATE_PNICTOGENS 1
#define pnictogen_partial_protonation 0.2
#define pn_protonation_pKa_min 5
#define _allow_conditional_basicity 0
#define _allow_conditional_basicity 1

#define prealign_iters 50
#define prealign_momenta_mult 0
Expand Down
12 changes: 3 additions & 9 deletions src/classes/group.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ float AtomGroup::compatibility(AminoAcid* aa)
float lgi = get_ionic(), lgh = get_polarity(), lgp = get_pi();

float aachg = aa->get_charge();
if (aa->conditionally_basic()) aachg += 0.5;
if (aa->conditionally_basic()) aachg += protonation(aa->sc_pKa());
if (lgi && aachg && sgn(lgi) != -sgn(aachg)) return 0;

if (aa->hydrophilicity() > 0.25)
Expand Down Expand Up @@ -622,12 +622,6 @@ std::vector<std::shared_ptr<ResidueGroup>> ResidueGroup::get_potential_side_chai
if (dirty[j]) continue;
AminoAcid* bb = aalist[j];

// Debug trap.
if (bb->get_residue_no() == 106)
{
dirty[j] = false;
}

CB = bb->get_atom("CB");
if (CB)
{
Expand Down Expand Up @@ -673,7 +667,7 @@ std::vector<std::shared_ptr<ResidueGroup>> ResidueGroup::get_potential_side_chai
}

float simil = aa->similarity_to(bb);
if (simil >= 4)
if (simil >= 0.333)
{
g->aminos.push_back(bb);
dirty[j] = true;
Expand Down Expand Up @@ -762,7 +756,7 @@ float GroupPair::get_potential()

if ((aa->get_charge() > 1 || aa->conditionally_basic()) && a->is_aldehyde())
{
partial += 60;
partial += protonation(aa->sc_pKa())*60;

#if _dbg_groupsel
cout << "Aldehyde-base potential for " << *a << "..." << *aa << " = " << partial << endl;
Expand Down
Loading

0 comments on commit 8c15113

Please sign in to comment.