Skip to content
This repository has been archived by the owner on Jan 10, 2023. It is now read-only.

Commit

Permalink
Alias transfer (#326)
Browse files Browse the repository at this point in the history
  • Loading branch information
ringgaard authored Jan 22, 2019
1 parent e037430 commit 93c2ab1
Show file tree
Hide file tree
Showing 47 changed files with 1,013 additions and 539 deletions.
45 changes: 28 additions & 17 deletions app/lib/docview.js
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ export class DocumentViewer extends Component {
}

BuildPanel(phrase, fidx) {
let mention = this.document.frames[fidx];
let frame = this.document.frames[fidx];
let panel = document.createElement("div");
panel.className = "panel";
panel.id = "p" + next_panel++;
Expand All @@ -462,9 +462,11 @@ export class DocumentViewer extends Component {

let title = document.createElement("span");
title.className = "panel-title";
title.appendChild(document.createTextNode(phrase));
titlebar.appendChild(title);
this.AddTypes(titlebar, mention.types);
if (phrase) {
title.appendChild(document.createTextNode(phrase));
titlebar.appendChild(title);
this.AddTypes(titlebar, frame.types);
}

let icon = document.createElement("span");
icon.className = "panel-icon";
Expand All @@ -476,19 +478,24 @@ export class DocumentViewer extends Component {
let contents = document.createElement("div");
contents.className = "panel-content"

let slots = mention.slots;
if (slots) {
for (let i = 0; i < slots.length; i += 2) {
let n = slots[i];
let v = slots[i + 1];
if (this.document.frames[n].id == "evokes" ||
this.document.frames[n].id == "is") {
let avm = this.BuildAVM(v, {});
contents.appendChild(avm);
if (phrase) {
let rendered = {};
let slots = frame.slots;
if (slots) {
for (let i = 0; i < slots.length; i += 2) {
let n = slots[i];
let v = slots[i + 1];
if (this.document.frames[n].id == "evokes" ||
this.document.frames[n].id == "is") {
let avm = this.BuildAVM(v, rendered);
contents.appendChild(avm);
}
}
}
} else {
let avm = this.BuildAVM(fidx, {});
contents.appendChild(avm);
}

panel.appendChild(contents);
return panel;
}
Expand All @@ -504,7 +511,11 @@ export class DocumentViewer extends Component {
let span = e.currentTarget;
let phrase = span.getAttribute("phrase");
let fidx = parseInt(span.getAttribute("frame"));
this.AddPanel('"' + phrase + '"', fidx);
if (phrase) {
this.AddPanel('"' + phrase + '"', fidx);
} else {
this.AddPanel(null, fidx);
}
}

ClosePanel(e) {
Expand All @@ -519,7 +530,6 @@ export class DocumentViewer extends Component {
chip.className = "chip";
chip.id = "t" + fidx;
chip.setAttribute("frame", fidx);
chip.setAttribute("phrase", name);
chip.appendChild(document.createTextNode(name));

return chip;
Expand Down Expand Up @@ -548,13 +558,14 @@ export class DocumentViewer extends Component {

let fidx = parseInt(span.getAttribute("frame"))
let mention = this.document.frames[fidx];
let rendered = {};
let slots = mention.slots;
if (slots) {
for (let i = 0; i < slots.length; i += 2) {
let n = slots[i];
let v = slots[i + 1];
if (this.document.frames[n].id == "evokes") {
let avm = this.BuildAVM(v, {});
let avm = this.BuildAVM(v, rendered);
callout.appendChild(avm);
}
}
Expand Down
2 changes: 1 addition & 1 deletion data/wiki/calendar.sling
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@
625: Q2662
626: Q2661
627: Q2664
628: Q2664
628: Q2660
629: Q2659
630: Q2657

Expand Down
2 changes: 1 addition & 1 deletion doc/guide/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ SLING uses [Bazel](https://bazel.build/) as the build system, so you need to
to build the SLING parser.

```shell
sudo apt-get install pkg-config zip g++ zlib1g-dev unzip python
sudo apt-get install pkg-config zip g++ zlib1g-dev unzip python2.7 python2.7-dev
wget -P /tmp https://github.com/bazelbuild/bazel/releases/download/0.13.0/bazel-0.13.0-installer-linux-x86_64.sh
chmod +x /tmp/bazel-0.13.0-installer-linux-x86_64.sh
sudo /tmp/bazel-0.13.0-installer-linux-x86_64.sh
Expand Down
6 changes: 3 additions & 3 deletions doc/guide/myelin.md
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ flow = "flow" <version>

var = <#flags> (IN=1, OUT=2, REF=4, LEARNABLE=8 UNIQUE=16, from version 5)
<name$>
<#aliases> <alias$>
<#aliases> <alias$>*
<dtype$>
<shape>
<#bytes> value
Expand All @@ -397,11 +397,11 @@ blob = <#flags> (unused, from version 5)

func = <#flags> (TRAINING=1, from version 5)
<name$>
<#ops> <op$>
<#ops> <op$>*

cnx = <#flags> (unused, from version 5)
<name$>
<#vars> <var$>
<#vars> <var$>*

shape = <#dims> <size>*

Expand Down
73 changes: 19 additions & 54 deletions python/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,37 +120,6 @@
default=False,
action='store_true')

flags.define("--dryrun",
help="build worflows but do not run them",
default=False,
action='store_true')

flags.define("--monitor",
help="port number for task monitor (0 means no monitor)",
default=6767,
type=int,
metavar="PORT")

flags.define("--logdir",
help="directory where workflow logs are stored",
default="local/logs",
metavar="DIR")

def run_workflow(wf):
# In dryrun mode the workflow is just dumped without running it.
if flags.arg.dryrun:
print wf.wf.dump()
return

# Start workflow.
log.info("start workflow")
wf.wf.start()

# Wait until workflow completes. Poll every second to make the workflow
# interruptible.
done = False
while not done: done = wf.wf.wait(1000)

def download_corpora():
if flags.arg.download_wikidata or flags.arg.download_wikipedia:
wf = download.DownloadWorkflow("wiki-download")
Expand All @@ -164,7 +133,7 @@ def download_corpora():
for language in flags.arg.languages:
wf.download_wikipedia(language=language)

run_workflow(wf)
workflow.run(wf.wf)

def import_wiki():
if flags.arg.import_wikidata or flags.arg.import_wikipedia:
Expand All @@ -180,7 +149,7 @@ def import_wiki():
log.info("Import " + language + " wikipedia")
wf.wikipedia(language=language)

run_workflow(wf)
workflow.run(wf.wf)

def parse_wikipedia():
# Convert wikipedia pages to SLING documents.
Expand All @@ -189,36 +158,36 @@ def parse_wikipedia():
log.info("Parse " + language + " wikipedia")
wf = wiki.WikiWorkflow(language + "-wikipedia-parsing")
wf.parse_wikipedia(language=language)
run_workflow(wf)
workflow.run(wf.wf)

def fuse_items():
# Merge categories from wikipedias.
if flags.arg.merge_categories:
log.info("Merge wikipedia categories")
wf = wiki.WikiWorkflow("category-merging")
wf.merge_wikipedia_categories()
run_workflow(wf)
workflow.run(wf.wf)

# Invert categories.
if flags.arg.invert_categories:
log.info("Invert categories")
wf = wiki.WikiWorkflow("category-inversion")
wf.invert_wikipedia_categories()
run_workflow(wf)
workflow.run(wf.wf)

# Compute item popularity.
if flags.arg.compute_item_popularity:
log.info("Compute item popularity")
wf = wiki.WikiWorkflow("item-popularity")
wf.compute_item_popularity()
run_workflow(wf)
workflow.run(wf.wf)

# Fuse items.
if flags.arg.fuse_items:
log.info("Fuse items")
wf = wiki.WikiWorkflow("fuse-items")
wf.fuse_items()
run_workflow(wf)
workflow.run(wf.wf)


def build_knowledge_base():
Expand All @@ -227,31 +196,31 @@ def build_knowledge_base():
log.info("Build knowledge base repository")
wf = wiki.WikiWorkflow("knowledge-base")
wf.build_knowledge_base()
run_workflow(wf)
workflow.run(wf.wf)

# Extract item names from wikidata and wikipedia.
if flags.arg.extract_names:
for language in flags.arg.languages:
log.info("Extract " + language + " names")
wf = wiki.WikiWorkflow(language + "-name-extraction")
wf.extract_names(language=language)
run_workflow(wf)
workflow.run(wf.wf)

# Build name table.
if flags.arg.build_nametab:
for language in flags.arg.languages:
log.info("Build " + language + " name table")
wf = wiki.WikiWorkflow(language + "-name-table")
wf.build_name_table(language=language)
run_workflow(wf)
workflow.run(wf.wf)

# Build phrase table.
if flags.arg.build_phrasetab:
for language in flags.arg.languages:
log.info("Build " + language + " phrase table")
wf = wiki.WikiWorkflow(language + "-phrase-table")
wf.build_phrase_table(language=language)
run_workflow(wf)
workflow.run(wf.wf)

def train_embeddings():
# Extract vocabulary for word embeddings.
Expand All @@ -260,36 +229,36 @@ def train_embeddings():
log.info("Extract " + language + " vocabulary")
wf = embedding.EmbeddingWorkflow(language + "-vocabulary")
wf.extract_vocabulary(language=language)
run_workflow(wf)
workflow.run(wf.wf)

# Train word embeddings.
if flags.arg.train_word_embeddings:
for language in flags.arg.languages:
log.info("Train " + language + " word embeddings")
wf = embedding.EmbeddingWorkflow(language + "-word-embeddings")
wf.train_word_embeddings(language=language)
run_workflow(wf)
workflow.run(wf.wf)

# Extract vocabulary for fact and category embeddings.
if flags.arg.extract_fact_lexicon:
log.info("Extract fact and category lexicons")
wf = embedding.EmbeddingWorkflow("fact-lexicon")
wf.extract_fact_lexicon()
run_workflow(wf)
workflow.run(wf.wf)

# Extract facts from knowledge base.
if flags.arg.extract_facts:
log.info("Extract facts from knowledge base")
wf = embedding.EmbeddingWorkflow("fact-extraction")
wf.extract_facts()
run_workflow(wf)
workflow.run(wf.wf)

# Train fact and category embeddings.
if flags.arg.train_fact_embeddings:
log.info("Train fact and category embeddings")
wf = embedding.EmbeddingWorkflow("fact-embeddings")
wf.train_fact_embeddings()
run_workflow(wf)
workflow.run(wf.wf)


if __name__ == '__main__':
Expand All @@ -309,20 +278,16 @@ def train_embeddings():
flags.arg.build_nametab = True
flags.arg.build_phrasetab = True

# Start task monitor.
if flags.arg.monitor > 0: workflow.start_monitor(flags.arg.monitor)

# Run workflows.
workflow.startup()
download_corpora()
import_wiki()
parse_wikipedia()
fuse_items()
build_knowledge_base()
train_embeddings()

# Stop task monitor.
if flags.arg.monitor > 0: workflow.stop_monitor()
workflow.save_workflow_log(flags.arg.logdir)
workflow.shutdown()

# Done.
log.info("Done")

1 change: 1 addition & 0 deletions python/task/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import os
import urllib2
import _strptime
import time

from workflow import *
Expand Down
7 changes: 5 additions & 2 deletions python/task/wiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,7 @@ def extract_names(self, aliases=None, language=None):
if aliases == None:
# Get language-dependent aliases from Wikidata and Wikpedia.
wikidata_aliases = self.wf.map(self.fused_items(),
"profile-alias-extractor",
"alias-extractor",
params={
"language": language,
"skip_aux": True,
Expand All @@ -599,7 +599,7 @@ def extract_names(self, aliases=None, language=None):
merged_aliases = self.wf.shuffle(aliases, len(names))

# Filter and select aliases.
self.wf.reduce(merged_aliases, names, "profile-alias-reducer",
self.wf.reduce(merged_aliases, names, "alias-reducer",
params={"language": language})
return names

Expand Down Expand Up @@ -648,8 +648,11 @@ def build_phrase_table(self, names=None, language=None):
with self.wf.namespace("phrase-table"):
builder = self.wf.task("phrase-table-builder")
builder.add_param("language", language)
builder.add_param("transfer_aliases", True)
self.wf.connect(self.wf.read(names, name="name-reader"), builder)
kb = self.knowledge_base()
repo = self.phrase_table(language)
builder.attach_input("commons", kb)
builder.attach_output("repository", repo)
return repo

Loading

0 comments on commit 93c2ab1

Please sign in to comment.