Skip to content
This repository has been archived by the owner on Jan 10, 2023. It is now read-only.

Commit

Permalink
Write parser model after training (#425)
Browse files Browse the repository at this point in the history
  • Loading branch information
ringgaard authored Nov 9, 2019
1 parent 66d005d commit f2f9910
Show file tree
Hide file tree
Showing 16 changed files with 216 additions and 170 deletions.
2 changes: 1 addition & 1 deletion python/task/wiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def wikidata_import(self, input, name=None):

def wikidata(self, dump=None):
"""Import Wikidata dump to frame format. It takes a Wikidata dump in JSON
format as inpput and converts each item and property to a SLING frame.
format as input and converts each item and property to a SLING frame.
Returns the item and property output files."""
if dump == None: dump = self.wikidata_dump()
with self.wf.namespace("wikidata"):
Expand Down
2 changes: 1 addition & 1 deletion sling/nlp/document/affix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ void AffixTable::Write(OutputStream *stream) const {
output.WriteVarint32(affix->form().size());
output.Write(affix->form());
output.WriteVarint32(affix->length());
if (affix->length() > 1) {
if (affix->length() > 0) {
CHECK(affix->shorter() != nullptr);
output.WriteVarint32(affix->shorter()->id());
}
Expand Down
35 changes: 0 additions & 35 deletions sling/nlp/document/document.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,22 +46,18 @@ CaseForm Token::Form() const {

void Span::Evoke(const Frame &frame) {
mention_.Add(document_->names_->n_evokes, frame);
document_->AddMention(frame.handle(), this);
}

void Span::Evoke(Handle frame) {
mention_.Add(document_->names_->n_evokes, frame);
document_->AddMention(frame, this);
}

void Span::Replace(Handle existing, Handle replacement) {
Handle n_evokes = document_->names_->n_evokes.handle();
FrameDatum *mention = mention_.store()->GetFrame(mention_.handle());
for (Slot *slot = mention->begin(); slot < mention->end(); ++slot) {
if (slot->name == n_evokes && slot->value == existing) {
document_->RemoveMention(existing, this);
slot->value = replacement;
document_->AddMention(replacement, this);
return;
}
}
Expand Down Expand Up @@ -268,9 +264,6 @@ Document::Document(const Frame &top, const DocumentNames *names)
Span *span = Insert(begin, end);
CHECK(span != nullptr) << "Crossing span: " << begin << "," << end;
span->mention_ = Frame(store(), mention->self);
for (const Slot &s : span->mention_) {
if (s.name == names_->n_evokes) AddMention(s.value, span);
}
} else if (slot->name == names_->n_theme.handle()) {
// Add thematic frame.
themes_.push_back(slot->value);
Expand Down Expand Up @@ -305,9 +298,6 @@ Document::Document(const Document &other, bool annotations)
for (const Span *s : other.spans_) {
Span *span = Insert(s->begin_, s->end_);
span->mention_ = Frame(store, store->Clone(s->mention_.handle()));
for (const Slot &s : span->mention_) {
if (s.name == names_->n_evokes) AddMention(s.value, span);
}
}

// Copy themes.
Expand Down Expand Up @@ -366,9 +356,6 @@ Document::Document(const Document &other,
if (b < 0 || e > length) continue;
Span *span = Insert(b, e);
span->mention_ = Frame(store, store->Clone(s->mention_.handle()));
for (const Slot &s : span->mention_) {
if (s.name == names_->n_evokes) AddMention(s.value, span);
}
}
}

Expand Down Expand Up @@ -511,13 +498,6 @@ void Document::DeleteSpan(Span *span) {
// Remove span from span index.
Remove(span);

// Remove all evoked frames from mention table.
for (const Slot &slot : span->mention_) {
if (slot.name == names_->n_evokes) {
RemoveMention(slot.value, span);
}
}

// Clear the reference to the mention frame. This will mark the span as
// deleted.
span->mention_ = Frame::nil();
Expand All @@ -537,20 +517,6 @@ void Document::AddExtra(Handle name, Handle value) {
extras_->emplace_back(name, value);
}

void Document::AddMention(Handle handle, Span *span) {
mentions_.emplace(handle, span);
}

void Document::RemoveMention(Handle handle, Span *span) {
auto interval = mentions_.equal_range(handle);
for (auto it = interval.first; it != interval.second; ++it) {
if (it->second == span) {
mentions_.erase(it);
break;
}
}
}

int Document::Locate(int position) const {
int index = 0;
int len = tokens_.size();
Expand Down Expand Up @@ -781,7 +747,6 @@ void Document::ClearAnnotations() {
for (Token &t : tokens_) t.span_ = nullptr;
for (Span *s : spans_) delete s;
spans_.clear();
mentions_.clear();
themes_.clear();
}

Expand Down
67 changes: 0 additions & 67 deletions sling/nlp/document/document.h
Original file line number Diff line number Diff line change
Expand Up @@ -374,62 +374,6 @@ class Document {
AddExtra(name.handle(), store()->AllocateString(value));
}

// Types for mapping from frame to spans that evoke it.
typedef std::unordered_multimap<Handle, Span *, HandleHash> MentionMap;
typedef std::pair<MentionMap::const_iterator, MentionMap::const_iterator>
ConstMentionIteratorPair;
typedef std::pair<MentionMap::iterator, MentionMap::iterator>
MentionIteratorPair;

// Iterator adapters for mention ranges.
class ConstMentionRange {
public:
explicit ConstMentionRange(const ConstMentionIteratorPair &interval)
: interval_(interval) {}
MentionMap::const_iterator begin() const { return interval_.first; }
MentionMap::const_iterator end() const { return interval_.second; }

private:
ConstMentionIteratorPair interval_;
};

class MentionRange {
public:
explicit MentionRange(const MentionIteratorPair &interval)
: interval_(interval) {}
MentionMap::iterator begin() { return interval_.first; }
MentionMap::iterator end() { return interval_.second; }

private:
MentionIteratorPair interval_;
};

// Iterates over all spans that evoke a frame, e.g.:
// for (const auto &it : document.EvokingSpans(h)) {
// Span *s = it.second;
// }
ConstMentionRange EvokingSpans(Handle handle) const {
return ConstMentionRange(mentions_.equal_range(handle));
}
ConstMentionRange EvokingSpans(const Frame &frame) const {
return ConstMentionRange(mentions_.equal_range(frame.handle()));
}

MentionRange EvokingSpans(Handle handle) {
return MentionRange(mentions_.equal_range(handle));
}
MentionRange EvokingSpans(const Frame &frame) {
return MentionRange(mentions_.equal_range(frame.handle()));
}

// Returns the number of spans evoking a frame.
int EvokingSpanCount(Handle handle) {
return mentions_.count(handle);
}
int EvokingSpanCount(const Frame &frame) {
return mentions_.count(frame.handle());
}

// Clears annotations (mentions and themes) from document.
void ClearAnnotations();

Expand All @@ -445,12 +389,6 @@ class Document {
// Removes the span from the span index.
void Remove(Span *span);

// Adds frame to mention mapping.
void AddMention(Handle handle, Span *span);

// Removes frame from mention mapping.
void RemoveMention(Handle handle, Span *span);

// Document frame.
Frame top_;

Expand All @@ -474,11 +412,6 @@ class Document {
// Additional slots that should be added to document.
Slots *extras_ = nullptr;

// Inverse mapping from frames to spans that can be used for looking up all
// mentions of a frame. The handles are tracked by the mention frame in the
// span.
MentionMap mentions_;

// Document symbol names.
const DocumentNames *names_;

Expand Down
7 changes: 5 additions & 2 deletions sling/nlp/document/lex.cc
Original file line number Diff line number Diff line change
Expand Up @@ -133,22 +133,25 @@ bool DocumentLexer::Lex(Document *document, Text lex) const {
if (objects.size() != current_object + 1) return false;

// Add mentions to document.
HandleSet added;
for (auto &m : markables) {
int begin = document->Locate(m.begin);
int end = document->Locate(m.end);
Span *span = document->AddSpan(begin, end);
if (m.object != -1) {
Array evoked(store, objects[m.object]);
for (int i = 0; i < evoked.length(); ++i) {
span->Evoke(evoked.get(i));
Handle frame = evoked.get(i);
span->Evoke(frame);
added.insert(frame);
}
}
}

// Add thematic frames. Do not add frames that are evoked by spans.
for (int theme : themes) {
Handle frame = objects[theme];
if (document->EvokingSpanCount(frame) == 0) {
if (added.count(frame) == 0) {
document->AddTheme(frame);
}
}
Expand Down
2 changes: 1 addition & 1 deletion sling/nlp/document/lexical-encoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ void LexicalFeatures::LoadLexicon(Flow *flow) {
void LexicalFeatures::SaveLexicon(myelin::Flow *flow) const {
// Save word vocabulary.
Flow::Blob *vocabulary = flow->AddBlob("lexicon", "dict");
vocabulary->SetAttr("delimiter", 10);
vocabulary->SetAttr("delimiter", 0);
vocabulary->SetAttr("oov", lexicon_.oov());
auto normalization = lexicon_.normalization();
vocabulary->SetAttr("normalization", NormalizationString(normalization));
Expand Down
1 change: 1 addition & 0 deletions sling/nlp/parser/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ cc_library(
":roles",
"//sling/base",
"//sling/frame:store",
"//sling/frame:serialization",
"//sling/myelin:builder",
"//sling/myelin:compiler",
"//sling/myelin:gradient",
Expand Down
97 changes: 48 additions & 49 deletions sling/nlp/parser/action-table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,37 +38,40 @@ void ActionTable::Init(Store *store) {
CHECK(top.valid());

// Get all the integer fields.
max_actions_per_token_ = top.GetInt("/table/max_actions_per_token");
frame_limit_ = top.GetInt("/table/frame_limit");
max_actions_per_token_ = top.GetInt("/table/max_actions_per_token", 5);
frame_limit_ = top.GetInt("/table/frame_limit", 5);

// Read the action index.
Array actions = top.Get("/table/actions").AsArray();
CHECK(actions.valid());

Handle action_type = store->LookupExisting("/table/action/type");
Handle action_length = store->LookupExisting("/table/action/length");
Handle action_source = store->LookupExisting("/table/action/source");
Handle action_target = store->LookupExisting("/table/action/target");
Handle action_role = store->LookupExisting("/table/action/role");
Handle action_label = store->LookupExisting("/table/action/label");
Handle n_type = store->Lookup("/table/action/type");
Handle n_length = store->Lookup("/table/action/length");
Handle n_source = store->Lookup("/table/action/source");
Handle n_target = store->Lookup("/table/action/target");
Handle n_role = store->Lookup("/table/action/role");
Handle n_label = store->Lookup("/table/action/label");
Handle n_delegate = store->Lookup("/table/action/delegate");
for (int i = 0; i < actions.length(); ++i) {
ParserAction action;
Frame item(store, actions.get(i));
CHECK(item.valid());

for (const Slot &slot : item) {
if (slot.name == action_type) {
if (slot.name == n_type) {
action.type = static_cast<ParserAction::Type>(slot.value.AsInt());
} else if (slot.name == action_length) {
} else if (slot.name == n_length) {
action.length = slot.value.AsInt();
} else if (slot.name == action_source) {
} else if (slot.name == n_source) {
action.source = slot.value.AsInt();
} else if (slot.name == action_target) {
} else if (slot.name == n_target) {
action.target = slot.value.AsInt();
} else if (slot.name == action_role) {
} else if (slot.name == n_role) {
action.role = slot.value;
} else if (slot.name == action_label) {
} else if (slot.name == n_label) {
action.label = slot.value;
} else if (slot.name == n_delegate) {
action.delegate = slot.value.AsInt();
}
}

Expand All @@ -82,66 +85,62 @@ void ActionTable::Save(const Store *global, const string &file) const {
}

string ActionTable::Serialize(const Store *global) const {
// Build frame with action table.
Store store(global);
Builder top(&store);
top.AddId("/table");
Builder table(&store);
table.AddId("/table");
Write(&table);

StringEncoder encoder(&store);
encoder.Encode(table.Create());
return encoder.buffer();
}

void ActionTable::Write(Builder *frame) const {
// Save the action table.
Handle action_type = store.Lookup("/table/action/type");
Handle action_length = store.Lookup("/table/action/length");
Handle action_source = store.Lookup("/table/action/source");
Handle action_target = store.Lookup("/table/action/target");
Handle action_role = store.Lookup("/table/action/role");
Handle action_label = store.Lookup("/table/action/label");

Array actions(&store, actions_.size());
Store *store = frame->store();
Handle n_type = store->Lookup("/table/action/type");
Handle n_length = store->Lookup("/table/action/length");
Handle n_source = store->Lookup("/table/action/source");
Handle n_target = store->Lookup("/table/action/target");
Handle n_role = store->Lookup("/table/action/role");
Handle n_label = store->Lookup("/table/action/label");
Handle n_delegate = store->Lookup("/table/action/delegate");

Array actions(store, actions_.size());
int index = 0;
for (const ParserAction &action : actions_) {
auto type = action.type;
Builder b(&store);
b.Add(action_type, static_cast<int>(type));
Builder b(store);
b.Add(n_type, static_cast<int>(type));

if (type == ParserAction::REFER || type == ParserAction::EVOKE) {
if (action.length > 0) {
b.Add(action_length, static_cast<int>(action.length));
b.Add(n_length, static_cast<int>(action.length));
}
}
if (type == ParserAction::ASSIGN ||
type == ParserAction::ELABORATE ||
type == ParserAction::CONNECT) {
if (action.source != 0) {
b.Add(action_source, static_cast<int>(action.source));
b.Add(n_source, static_cast<int>(action.source));
}
}
if (type == ParserAction::EMBED ||
type == ParserAction::REFER ||
type == ParserAction::CONNECT) {
if (action.target != 0) {
b.Add(action_target, static_cast<int>(action.target));
b.Add(n_target, static_cast<int>(action.target));
}
}
if (!action.role.IsNil()) b.Add(action_role, action.role);
if (!action.label.IsNil()) b.Add(action_label, action.label);
if (type == ParserAction::CASCADE) {
b.Add(n_delegate, static_cast<int>(action.delegate));
}
if (!action.role.IsNil()) b.Add(n_role, action.role);
if (!action.label.IsNil()) b.Add(n_label, action.label);
actions.set(index++, b.Create().handle());
}
top.Add("/table/actions", actions);

// Add artificial links to symbols used in serialization. This is needed as
// some action types might be unseen, so their corresponding symbols won't be
// serialized. However we still want handles to them during Load().
// For example, if we have only seen EVOKE, SHIFT, and STOP actions, then
// the symbol /table/fp/refer for REFER won't be serialized unless the table
// links to it.
std::vector<Handle> symbols = {
action_type, action_length, action_source, action_target,
action_role, action_label
};
Array symbols_array(&store, symbols);
top.Add("/table/symbols", symbols_array);

StringEncoder encoder(&store);
encoder.Encode(top.Create());
return encoder.buffer();
frame->Add("/table/actions", actions);
}

} // namespace nlp
Expand Down
Loading

0 comments on commit f2f9910

Please sign in to comment.