Skip to content

Commit

Permalink
Add function to allow CIF parser to store string-valued auth_asym_id/…
Browse files Browse the repository at this point in the history
…chain-labels.

Related to #96.
  • Loading branch information
mittinatten committed Dec 10, 2023
1 parent 5481450 commit ceed1b0
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 16 deletions.
10 changes: 5 additions & 5 deletions src/cif.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ static const auto atom_site_columns = std::vector<std::string>({
* TODO: Better solution needed. A previous version with std::move didn't work as expected,
* and sometimes caused seg-faults.
*/
static freesasa_cif_atom
static freesasa_cif_atom_lcl
freesasa_atom_from_site(const gemmi::cif::Table::Row &site)
{
const char *auth_atom_id;
Expand All @@ -142,9 +142,9 @@ freesasa_atom_from_site(const gemmi::cif::Table::Row &site)
auth_atom_id = site[5].c_str();
}

return {
return freesasa_cif_atom_lcl {
.group_PDB = site[0].c_str(),
.auth_asym_id = site[1][0],
.auth_asym_id = site[1].c_str(),
.auth_seq_id = site[2].c_str(),
.pdbx_PDB_ins_code = site[3].c_str(),
.auth_comp_id = site[4].c_str(),
Expand Down Expand Up @@ -175,7 +175,7 @@ structure_from_pred(const gemmi::cif::Document &doc,

if (discriminator(site)) continue;

freesasa_cif_atom atom = freesasa_atom_from_site(site);
auto atom = freesasa_atom_from_site(site);

if (!(structure_options & FREESASA_INCLUDE_HYDROGEN) && std::string(atom.type_symbol) == "H") {
continue;
Expand All @@ -189,7 +189,7 @@ structure_from_pred(const gemmi::cif::Document &doc,
continue;
}

freesasa_structure_add_cif_atom(structure, &atom, classifier, structure_options);
freesasa_structure_add_cif_atom_lcl(structure, &atom, classifier, structure_options);

// since this is in the interface between C and C++ code, some hackery is needed
free((void *)atom.auth_atom_id);
Expand Down
47 changes: 47 additions & 0 deletions src/freesasa.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,34 @@ struct freesasa_cif_atom {
const double Cartn_z;
};

/**
\private
Struct to store data about a mmCIF atom site.
With `auth_sym_id` as string (long chain label/LCL)
This is an intermediate type added in the process of migrating to long chain labels.
In the next major release `freesasa_cif_atom` will be changed to this form.
@ingroup structure
*/
struct freesasa_cif_atom_lcl {
const char *group_PDB;
const char *auth_asym_id;
const char *auth_seq_id;
const char *pdbx_PDB_ins_code;
const char *auth_comp_id;
const char *auth_atom_id;
const char *label_alt_id;
const char *type_symbol;
const double Cartn_x;
const double Cartn_y;
const double Cartn_z;
};

#ifndef __cplusplus
typedef struct freesasa_cif_atom freesasa_cif_atom;
typedef struct freesasa_cif_atom_lcl freesasa_cif_atom_lcl;
#endif

/**
Expand Down Expand Up @@ -914,6 +940,27 @@ int freesasa_structure_add_cif_atom(freesasa_structure *structure,
freesasa_cif_atom *atom,
const freesasa_classifier *classifier,
int options);

/**
\private
Add atoms from a mmCIF file to a structure using strings for chain labels (LCL)
@param structure The structure to add to.
@param atom An atom site from a mmCIF file with long chain labels
@param classifier A ::freesasa_classifier to determine radius of atom and to
decide if to keep atom or not (see options).
@param options Structure options as in freesasa_structure_add_atom_wopt()
@return ::FREESASA_SUCCESS on normal execution. ::FREESASA_FAIL if
if memory allocation fails or if halting at unknown
atom. ::FREESASA_WARN if skipping atom.
@ingroup structure
*/
int freesasa_structure_add_cif_atom_lcl(freesasa_structure *structure,
freesasa_cif_atom_lcl *atom,
const freesasa_classifier *classifier,
int options);
/**
Create new structure consisting of a selection chains from the
provided structure.
Expand Down
41 changes: 31 additions & 10 deletions src/structure.c
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ atom_new(const char *residue_name,
const char *residue_number,
const char *atom_name,
const char *symbol,
chain_label_t chain_label)
const chain_label_t chain_label)
{
struct atom *a = malloc(sizeof(struct atom));

Expand All @@ -183,11 +183,11 @@ atom_new(const char *residue_name,
a->line = NULL;
a->res_index = -1;

strncpy(a->atom_name, atom_name, sizeof(a->atom_name));
strncpy(a->res_name, residue_name, sizeof(a->res_name));
strncpy(a->res_number, residue_number, sizeof(a->res_number));
strncpy(a->symbol, symbol, sizeof(a->symbol));
strncpy(a->chain_label, chain_label, sizeof(chain_label_t));
snprintf(a->atom_name, sizeof(a->atom_name), "%s", atom_name);
snprintf(a->res_name, sizeof(a->res_name), "%s", residue_name);
snprintf(a->res_number, sizeof(a->res_number), "%s", residue_number);
snprintf(a->symbol, sizeof(a->symbol), "%s", symbol);
snprintf(a->chain_label, sizeof(chain_label_t), "%s", chain_label);

a->the_class = FREESASA_ATOM_UNKNOWN;
}
Expand Down Expand Up @@ -445,7 +445,7 @@ guess_symbol(char *symbol,
return FREESASA_SUCCESS;
}

int structure_has_chain(freesasa_structure *s, chain_label_t chain_label)
int structure_has_chain(freesasa_structure *s, const chain_label_t chain_label)
{
for (int i = 0; i < s->chains.n; ++i) {
if (strncmp(s->chains.labels[i], chain_label, sizeof(chain_label_t)) == 0) {
Expand All @@ -457,7 +457,7 @@ int structure_has_chain(freesasa_structure *s, chain_label_t chain_label)

static int
structure_add_chain(freesasa_structure *s,
chain_label_t chain_label,
const chain_label_t chain_label,
int i_latest_atom)
{
int n;
Expand All @@ -467,7 +467,7 @@ structure_add_chain(freesasa_structure *s,
return fail_msg("");

n = s->chains.n;
strncpy(s->chains.labels[n - 1], chain_label, sizeof(chain_label_t));
snprintf(s->chains.labels[n - 1], sizeof(chain_label_t), "%s", chain_label);
s->chains.short_labels[n - 1] = chain_label[0];
s->chains.short_labels[n] = '\0';

Expand Down Expand Up @@ -726,7 +726,7 @@ structure_add_atom_wopt_impl(freesasa_structure *structure,
const char *residue_name,
const char *residue_number,
const char *symbol,
chain_label_t chain_label,
const char *chain_label,
double x, double y, double z,
const freesasa_classifier *classifier,
int options)
Expand All @@ -740,6 +740,7 @@ structure_add_atom_wopt_impl(freesasa_structure *structure,
assert(atom_name);
assert(residue_name);
assert(residue_number);
assert(chain_label);

/* this option can not be used here, and needs to be unset */
options &= ~FREESASA_RADIUS_FROM_OCCUPANCY;
Expand Down Expand Up @@ -779,6 +780,7 @@ int freesasa_structure_add_atom_wopt(freesasa_structure *structure,
return structure_add_atom_wopt_impl(structure, atom_name, residue_name, residue_number, NULL,
my_chain_label, x, y, z, classifier, options);
}

int freesasa_structure_add_atom(freesasa_structure *structure,
const char *atom_name,
const char *residue_name,
Expand Down Expand Up @@ -813,6 +815,25 @@ int freesasa_structure_add_cif_atom(freesasa_structure *structure,
classifier, options);
}

int freesasa_structure_add_cif_atom_lcl(freesasa_structure *structure,
freesasa_cif_atom_lcl *atom,
const freesasa_classifier *classifier,
int options)
{
char res_number[PDB_ATOM_RES_NUMBER_STRL + 1];

if (atom->pdbx_PDB_ins_code[0] != '?') {
snprintf(res_number, sizeof res_number, "%s%c", atom->auth_seq_id, atom->pdbx_PDB_ins_code[0]);
} else {
snprintf(res_number, sizeof res_number, "%s", atom->auth_seq_id);
}

return structure_add_atom_wopt_impl(structure, atom->auth_atom_id, atom->auth_comp_id,
res_number, atom->type_symbol, atom->auth_asym_id,
atom->Cartn_x, atom->Cartn_y, atom->Cartn_z,
classifier, options);
}

freesasa_structure *
freesasa_structure_from_pdb(FILE *pdb_file,
const freesasa_classifier *classifier,
Expand Down
51 changes: 50 additions & 1 deletion tests/test_structure.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ const char rna[N][PDB_ATOM_RES_NAME_STRL + 1] = {
const char rnu[N][PDB_ATOM_RES_NUMBER_STRL + 1] = {" 1", " 1", " 1", " 1", " 1", " 2"};
const char symbol[N][PDB_ATOM_SYMBOL_STRL + 1] = {" C", " C", " O", " C", " S", "SE"};
const char cl[N] = {'A', 'A', 'A', 'A', 'A', 'A'};
const char *lcl[N] = {"AAA", "AAA", "AAA", "AAA", "AAA", "AAA"};
const double bfactors[N] = {1., 1., 1., 1., 1., 1.};

freesasa_structure *s;
Expand Down Expand Up @@ -119,6 +120,53 @@ START_TEST(test_add_atom)
}
END_TEST

START_TEST(test_cif)
{
s = freesasa_structure_new();
for (int i = 0; i < N; ++i) {
struct freesasa_cif_atom_lcl atom = {
.group_PDB = "",
.auth_asym_id = lcl[i],
.auth_seq_id = rnu[i],
.pdbx_PDB_ins_code = "?",
.auth_comp_id = rna[i],
.auth_atom_id = an[i],
.label_alt_id = "",
.type_symbol = symbol[i],
.Cartn_x = i,
.Cartn_y = i,
.Cartn_z = i,
};
ck_assert_int_eq(freesasa_structure_add_cif_atom_lcl(s, &atom, NULL, 0),
FREESASA_SUCCESS);
}
for (int i = 0; i < N; ++i) {
ck_assert_str_eq(freesasa_structure_atom_name(s, i), an[i]);
ck_assert_str_eq(freesasa_structure_atom_res_name(s, i), rna[i]);
ck_assert_str_eq(freesasa_structure_atom_res_number(s, i), rnu[i]);
ck_assert_str_eq(freesasa_structure_atom_symbol(s, i), symbol[i]);
ck_assert_int_eq(freesasa_structure_atom_chain(s, i), lcl[i][0]);
}

struct freesasa_cif_atom_lcl atom = {
.group_PDB = "",
.auth_asym_id = lcl[0],
.auth_seq_id = rnu[0],
.pdbx_PDB_ins_code = "A",
.auth_comp_id = rna[0],
.auth_atom_id = an[0],
.label_alt_id = "",
.type_symbol = symbol[0],
.Cartn_x = 1,
.Cartn_y = 1,
.Cartn_z = 1,
};

ck_assert_int_eq(freesasa_structure_add_cif_atom_lcl(s, &atom, NULL, 0),
FREESASA_SUCCESS);
ck_assert_str_eq(freesasa_structure_atom_res_number(s, N), " 1A");
}

double a2r(const char *rn, const char *am)
{
return 1.0;
Expand Down Expand Up @@ -472,8 +520,9 @@ Suite *structure_suite(void)
TCase *tc_core = tcase_create("Core");
tcase_add_test(tc_core, test_structure_api);
tcase_add_test(tc_core, test_add_atom);
tcase_add_test(tc_core, test_cif);
if (INCLUDE_MEMERR_TESTS) {
tcase_add_test(tc_core, test_memerr);
tcase_add_test(tc_core, test_memerr);
}

TCase *tc_pdb = tcase_create("PDB");
Expand Down

0 comments on commit ceed1b0

Please sign in to comment.