From ad5856d16d089a7cbd5b9fd7e68f26da56c973e8 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Wed, 29 Mar 2017 09:38:05 -0700 Subject: [PATCH] BUG: fix #530 --- biom/table.py | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/biom/table.py b/biom/table.py index c6b39874..a3d8182e 100644 --- a/biom/table.py +++ b/biom/table.py @@ -299,11 +299,37 @@ def vlen_list_of_str_formatter(grp, header, md, compression): lengths.append(len(m[header])) if not np.all(iterable_checks): - raise TypeError( - "Category %s not formatted correctly. Did you pass" - " --process-obs-metadata taxonomy when converting " - " from tsv? Please see Table.to_hdf5 docstring for" - " more information") + if header == 'taxonomy': + # attempt to handle the general case issue where the taxonomy + # was not split on semicolons and represented as a flat string + # instead of a list + def split_and_strip(i): + parts = i.split(';') + return [p.strip() for p in parts] + try: + new_md = [] + lengths = [] + for m in md: + parts = split_and_strip(m[header]) + new_md.append({header: parts}) + lengths.append(len(parts)) + old = deepcopy(md) # attempt to preserve the original metadata + md = new_md + except: + raise TypeError("Category '%s' is not formatted properly. The " + "most common issue is when 'taxonomy' is " + "represented as a flat string instead of a " + "list. An attempt was made to split this " + "field on a ';' to coerce it into a list but " + "it failed. An example entry (which is not " + "assured to be the problematic entry) is " + "below:\n%s" % (header, md[0][header])) + else: + raise TypeError( + "Category %s not formatted correctly. Did you pass" + " --process-obs-metadata taxonomy when converting " + " from tsv? Please see Table.to_hdf5 docstring for" + " more information" % header) max_list_len = max(lengths) shape = (len(md), max_list_len)