Skip to content

Commit

Permalink
Merge pull request #26 from ccb-hms/development
Browse files Browse the repository at this point in the history
Source Term ID Bug Fixes
  • Loading branch information
paynejason authored Feb 24, 2023
2 parents 4f11944 + 947e72a commit a0d301c
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 5 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ All other arguments are the same, and have the same functionality:

`source_terms_ids` : tuple
Collection of identifiers for the given source terms
WARNING: While this is still available for the tagged term function, it is worth noting that dictionaries do not necessarily preserve order, so it is not recommended. If using the TaggedTerm object, the source terms can be attached there to guarantee order.

`excl_deprecated` : bool
Exclude ontology terms stated as deprecated via `owl:deprecated true`
Expand Down Expand Up @@ -145,12 +146,14 @@ WARNING: Removing duplicates at any point does not guarantee which original term
The non-tagged functions both return a dictionary where the keys are the original terms and the values are the preprocessed terms.
The tagged function returns a list of TaggedTerm items with the following function contracts:
```python
def __init__(self, term=None, tags=[], original_term=None)
def __init__(self, term=None, tags=[], original_term=None, source_term_id=None)
def add_tags(self, new_tags)
def update_term(self, term)
def update_source_term_id(self, source_term_id)
def get_original_term(self)
def get_term(self)
def get_tags(self)
def get_source_term_id(self)
```
As mentioned in the mapping section above, this can then be passed directly to map_tagged_terms(), allowing for easy prgorammatic usage. Note that this allows multiple of the same preprocessed term with different tags.

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from setuptools import setup, find_packages

version = '2.0.1'
version = '2.0.2'
description = 'A tool for mapping free-text descriptions of (biomedical) entities to controlled terms in an ontology'
long_description = open('README.md').read()

Expand Down
3 changes: 2 additions & 1 deletion text2term/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@
from .mapper import Mapper
from .preprocess import preprocess_file
from .preprocess import preprocess_terms
from .preprocess import preprocess_tagged_terms
from .preprocess import preprocess_tagged_terms
from .tagged_terms import TaggedTerm
13 changes: 12 additions & 1 deletion text2term/t2t.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,16 @@ def map_tagged_terms(tagged_terms_dict, target_ontology, base_iris=(), excl_depr
if isinstance(tagged_terms_dict, dict):
terms = list(tagged_terms_dict.keys())
else:
terms = [tagged_term.get_term() for tagged_term in tagged_terms_dict]
terms = []
source_terms_id_list = []
for tagged_term in tagged_terms_dict:
terms.append(tagged_term.get_term())
if tagged_term.get_source_term_id() != None:
source_terms_id_list.append(tagged_term.get_source_term_id())
if len(source_terms_id_list) > 0:
source_terms_ids = tuple(source_terms_id_list)

# Run the mapper
df = map_terms(terms, target_ontology, base_iris=base_iris, excl_deprecated=excl_deprecated, \
max_mappings=max_mappings, min_score=min_score, mapper=mapper, output_file=output_file, \
save_graphs=save_graphs, source_terms_ids=source_terms_ids, use_cache=use_cache)
Expand Down Expand Up @@ -138,6 +147,8 @@ def map_terms(source_terms, target_ontology, base_iris=(), excl_deprecated=False
Data frame containing the generated ontology mappings
"""
if len(source_terms_ids) != len(source_terms):
if len(source_terms_ids) > 0:
sys.stderr.write("Warning: Source Term Ids are non-zero, but will not be used.")
source_terms_ids = onto_utils.generate_iris(len(source_terms))
if output_file == '':
timestamp = datetime.datetime.now().strftime("%d-%m-%YT%H-%M-%S")
Expand Down
9 changes: 8 additions & 1 deletion text2term/tagged_terms.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@

class TaggedTerm:
def __init__(self, term=None, tags=[], original_term=None):
def __init__(self, term=None, tags=[], original_term=None, source_term_id=None):
self.term = term
self.tags = tags
self.original_term = original_term
self.source_term_id = source_term_id

def __repr__(self):
return f"<TaggedTerm term:{self.term} tags:{self.tags} original_term:{self.original_term}"
Expand All @@ -14,6 +15,9 @@ def add_tags(self, new_tags):
def update_term(self, term):
self.term = term

def update_source_term_id(self, source_term_id):
self.source_term_id = source_term_id

def get_original_term(self):
return self.original_term

Expand All @@ -22,4 +26,7 @@ def get_term(self):

def get_tags(self):
return self.tags

def get_source_term_id(self):
return self.source_term_id

0 comments on commit a0d301c

Please sign in to comment.