Skip to content

Commit

Permalink
Add spacy NER tagger
Browse files Browse the repository at this point in the history
  • Loading branch information
andychisholm committed Mar 19, 2017
1 parent 18db2af commit e9477ec
Showing 1 changed file with 17 additions and 0 deletions.
17 changes: 17 additions & 0 deletions nel/process/tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from subprocess import Popen, PIPE
from cStringIO import StringIO

import spacy

from .process import Process
from ..model import recognition
from ..doc import Mention, Chain, Candidate
Expand Down Expand Up @@ -40,6 +42,21 @@ def iter_options(cls):
if c != cls and isinstance(c, type) and issubclass(c, cls):
yield c

class SpacyTagger(Tagger):
def __init__(self, spacy_model = None):
self.spacy_model = spacy_model or 'en_default'
log.debug('Using spacy entity tagger (%s)...', spacy_model)
self.nlp = spacy.load(self.spacy_model)

def tag(self, doc):
spacy_doc = self.nlp(doc.text)
doc.tokens = [Mention(t.idx, t.text) for t in spacy_doc]

for ent in spacy_doc.ents:
tok_idxs = [i for i in xrange(len(ent)) if not ent[i].is_space]
if tok_idxs:
yield self.mention_over_tokens(doc, ent.start + min(tok_idxs), ent.start + max(tok_idxs) + 1, ent.label_)

class CRFTagger(Tagger):
""" Conditional random field sequence tagger """
@classmethod
Expand Down

0 comments on commit e9477ec

Please sign in to comment.