Skip to content

Commit

Permalink
updated collapse feature
Browse files Browse the repository at this point in the history
  • Loading branch information
qiyunzhu committed Feb 5, 2021
1 parent 8df9540 commit 4dfb38a
Show file tree
Hide file tree
Showing 14 changed files with 5,367 additions and 18 deletions.
23 changes: 23 additions & 0 deletions woltka/biom.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,25 @@ def round_biom(table: biom.Table):
table.remove_empty(axis='observation')


def biom_add_metacol(table: biom.Table, dic, name, missing=''):
"""Add a metadata column to a table in place based on a dictionary.
Parameters
----------
table : biom.Table
Table to add metadata column.
dict : dict
Metadata column (feature-to-value mapping).
name : str
Metadata column name.
missing : any type, optional
Default value if not found in dictionary.
"""
metadata = {x: {name: dic.get(x, missing)} for x in table.ids(
'observation')}
table.add_metadata(metadata, axis='observation')


def collapse_biom(table: biom.Table, mapping: dict, normalize=False):
"""Collapse a BIOM table in many-to-many mode.
Expand Down Expand Up @@ -158,6 +177,10 @@ def collapse_biom(table: biom.Table, mapping: dict, normalize=False):
table = table.filter(lambda data, id_, md: id_ in mapping,
axis='observation', inplace=False)

# stop if no feature left
if table.is_empty():
return table

# add mapping to table metadata
table.add_metadata({k: dict(part=v) for k, v in mapping.items()},
axis='observation')
Expand Down
8 changes: 6 additions & 2 deletions woltka/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,18 +241,22 @@ def merge_cmd(ctx, **kwargs):
type=click.Path(exists=True, dir_okay=False),
help='Path to input profile.')
@click.option(
'--map', '-m', 'map_fp', type=click.Path(exists=True),
'--map', '-m', 'map_fp', required=True,
type=click.Path(exists=True, dir_okay=False),
help=('Mapping of lower classification units to higher ones (supports '
'many-to-many relationships).'))
@click.option(
'--output', '-o', 'output_fp', required=True,
type=click.Path(writable=True, dir_okay=False),
help='Path to output profile.')
@click.option(
'--normalize', '-n', is_flag=True,
'--normalize', '-z', is_flag=True,
help=('Count each higher classification unit as 1/k (k is the number of '
'higher classification units mapped to a lower one). Otherwise, '
'count as one.'))
@click.option(
'--names', '-n', 'names_fp', type=click.Path(exists=True),
help='Names of higher classification units to append to output profile.')
@click.pass_context
def collapse_cmd(ctx, **kwargs):
"""Collapse a profile based on feature mapping.
Expand Down
33 changes: 28 additions & 5 deletions woltka/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from .file import openzip
from .biom import (
table_to_biom, biom_to_table, write_biom, filter_biom, round_biom,
collapse_biom)
biom_add_metacol, collapse_biom)


def prep_table(profile, samples=None, tree=None, rankdic=None, namedic=None,
Expand Down Expand Up @@ -440,6 +440,30 @@ def round_table(table):
del(table[3][i])


def table_add_metacol(table, dic, name, missing=''):
"""Add a metadata column to a table in place based on a dictionary.
Parameters
----------
table : biom.Table, or tuple of (list, list, list, list)
Table to add metadata column (data, features, samples, metadata).
dict : dict
Metadata column (feature-to-value mapping).
name : str
Metadata column name.
missing : any type, optional
Default value if not found in dictionary.
"""
# redirect to BIOM module
if isinstance(table, Table):
biom_add_metacol(table, dic, name, missing='')
return

# add metadata column
for feature, metadatum in zip(*(table[1], table[3])):
metadatum[name] = dic.get(feature, missing)


def collapse_table(table, mapping, normalize=False):
"""Collapse a table by many-to-many mapping.
Expand Down Expand Up @@ -470,18 +494,17 @@ def collapse_table(table, mapping, normalize=False):
width = len(samples)
res = defaultdict(lambda: [0] * width)
for datum, feature in zip(*table[:2]):
try:
targets = mapping[feature]
except KeyError:
if feature not in mapping:
continue
targets = mapping[feature]
if normalize:
k = 1 / len(targets)
datum = [x * k for x in datum]
for target in targets:
res[target] = list(map(add, res[target], datum))

# reformat table
res = list(res.values()), list(res.keys()), samples, [{}] * len(res)
res = list(res.values()), list(res.keys()), samples, [dict() for _ in res]

# round table
if normalize:
Expand Down
2 changes: 1 addition & 1 deletion woltka/tests/data/function/go/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# UniRef-toGO maps
# UniRef-to-GO maps

[Gene Ontology](http://geneontology.org/) (**GO**) is a classification system for gene functions. All GO terms can be traced to three top categories: **biological process**, **cellular component**, and **molecular function**. **GO slim** is a set of higher-level categories of GO terms.
Binary file added woltka/tests/data/function/go/name.txt.xz
Binary file not shown.
Binary file added woltka/tests/data/function/uniref.names.xz
Binary file not shown.
Loading

0 comments on commit 4dfb38a

Please sign in to comment.