Skip to content
This repository has been archived by the owner on Jan 10, 2023. It is now read-only.

Alias transfer #326

Merged
merged 2 commits into from
Jan 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 28 additions & 17 deletions app/lib/docview.js
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ export class DocumentViewer extends Component {
}

BuildPanel(phrase, fidx) {
let mention = this.document.frames[fidx];
let frame = this.document.frames[fidx];
let panel = document.createElement("div");
panel.className = "panel";
panel.id = "p" + next_panel++;
Expand All @@ -462,9 +462,11 @@ export class DocumentViewer extends Component {

let title = document.createElement("span");
title.className = "panel-title";
title.appendChild(document.createTextNode(phrase));
titlebar.appendChild(title);
this.AddTypes(titlebar, mention.types);
if (phrase) {
title.appendChild(document.createTextNode(phrase));
titlebar.appendChild(title);
this.AddTypes(titlebar, frame.types);
}

let icon = document.createElement("span");
icon.className = "panel-icon";
Expand All @@ -476,19 +478,24 @@ export class DocumentViewer extends Component {
let contents = document.createElement("div");
contents.className = "panel-content"

let slots = mention.slots;
if (slots) {
for (let i = 0; i < slots.length; i += 2) {
let n = slots[i];
let v = slots[i + 1];
if (this.document.frames[n].id == "evokes" ||
this.document.frames[n].id == "is") {
let avm = this.BuildAVM(v, {});
contents.appendChild(avm);
if (phrase) {
let rendered = {};
let slots = frame.slots;
if (slots) {
for (let i = 0; i < slots.length; i += 2) {
let n = slots[i];
let v = slots[i + 1];
if (this.document.frames[n].id == "evokes" ||
this.document.frames[n].id == "is") {
let avm = this.BuildAVM(v, rendered);
contents.appendChild(avm);
}
}
}
} else {
let avm = this.BuildAVM(fidx, {});
contents.appendChild(avm);
}

panel.appendChild(contents);
return panel;
}
Expand All @@ -504,7 +511,11 @@ export class DocumentViewer extends Component {
let span = e.currentTarget;
let phrase = span.getAttribute("phrase");
let fidx = parseInt(span.getAttribute("frame"));
this.AddPanel('"' + phrase + '"', fidx);
if (phrase) {
this.AddPanel('"' + phrase + '"', fidx);
} else {
this.AddPanel(null, fidx);
}
}

ClosePanel(e) {
Expand All @@ -519,7 +530,6 @@ export class DocumentViewer extends Component {
chip.className = "chip";
chip.id = "t" + fidx;
chip.setAttribute("frame", fidx);
chip.setAttribute("phrase", name);
chip.appendChild(document.createTextNode(name));

return chip;
Expand Down Expand Up @@ -548,13 +558,14 @@ export class DocumentViewer extends Component {

let fidx = parseInt(span.getAttribute("frame"))
let mention = this.document.frames[fidx];
let rendered = {};
let slots = mention.slots;
if (slots) {
for (let i = 0; i < slots.length; i += 2) {
let n = slots[i];
let v = slots[i + 1];
if (this.document.frames[n].id == "evokes") {
let avm = this.BuildAVM(v, {});
let avm = this.BuildAVM(v, rendered);
callout.appendChild(avm);
}
}
Expand Down
2 changes: 1 addition & 1 deletion data/wiki/calendar.sling
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@
625: Q2662
626: Q2661
627: Q2664
628: Q2664
628: Q2660
629: Q2659
630: Q2657

Expand Down
2 changes: 1 addition & 1 deletion doc/guide/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ SLING uses [Bazel](https://bazel.build/) as the build system, so you need to
to build the SLING parser.

```shell
sudo apt-get install pkg-config zip g++ zlib1g-dev unzip python
sudo apt-get install pkg-config zip g++ zlib1g-dev unzip python2.7 python2.7-dev
wget -P /tmp https://github.com/bazelbuild/bazel/releases/download/0.13.0/bazel-0.13.0-installer-linux-x86_64.sh
chmod +x /tmp/bazel-0.13.0-installer-linux-x86_64.sh
sudo /tmp/bazel-0.13.0-installer-linux-x86_64.sh
Expand Down
6 changes: 3 additions & 3 deletions doc/guide/myelin.md
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ flow = "flow" <version>

var = <#flags> (IN=1, OUT=2, REF=4, LEARNABLE=8 UNIQUE=16, from version 5)
<name$>
<#aliases> <alias$>
<#aliases> <alias$>*
<dtype$>
<shape>
<#bytes> value
Expand All @@ -397,11 +397,11 @@ blob = <#flags> (unused, from version 5)

func = <#flags> (TRAINING=1, from version 5)
<name$>
<#ops> <op$>
<#ops> <op$>*

cnx = <#flags> (unused, from version 5)
<name$>
<#vars> <var$>
<#vars> <var$>*

shape = <#dims> <size>*

Expand Down
73 changes: 19 additions & 54 deletions python/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,37 +120,6 @@
default=False,
action='store_true')

flags.define("--dryrun",
help="build worflows but do not run them",
default=False,
action='store_true')

flags.define("--monitor",
help="port number for task monitor (0 means no monitor)",
default=6767,
type=int,
metavar="PORT")

flags.define("--logdir",
help="directory where workflow logs are stored",
default="local/logs",
metavar="DIR")

def run_workflow(wf):
# In dryrun mode the workflow is just dumped without running it.
if flags.arg.dryrun:
print wf.wf.dump()
return

# Start workflow.
log.info("start workflow")
wf.wf.start()

# Wait until workflow completes. Poll every second to make the workflow
# interruptible.
done = False
while not done: done = wf.wf.wait(1000)

def download_corpora():
if flags.arg.download_wikidata or flags.arg.download_wikipedia:
wf = download.DownloadWorkflow("wiki-download")
Expand All @@ -164,7 +133,7 @@ def download_corpora():
for language in flags.arg.languages:
wf.download_wikipedia(language=language)

run_workflow(wf)
workflow.run(wf.wf)

def import_wiki():
if flags.arg.import_wikidata or flags.arg.import_wikipedia:
Expand All @@ -180,7 +149,7 @@ def import_wiki():
log.info("Import " + language + " wikipedia")
wf.wikipedia(language=language)

run_workflow(wf)
workflow.run(wf.wf)

def parse_wikipedia():
# Convert wikipedia pages to SLING documents.
Expand All @@ -189,36 +158,36 @@ def parse_wikipedia():
log.info("Parse " + language + " wikipedia")
wf = wiki.WikiWorkflow(language + "-wikipedia-parsing")
wf.parse_wikipedia(language=language)
run_workflow(wf)
workflow.run(wf.wf)

def fuse_items():
# Merge categories from wikipedias.
if flags.arg.merge_categories:
log.info("Merge wikipedia categories")
wf = wiki.WikiWorkflow("category-merging")
wf.merge_wikipedia_categories()
run_workflow(wf)
workflow.run(wf.wf)

# Invert categories.
if flags.arg.invert_categories:
log.info("Invert categories")
wf = wiki.WikiWorkflow("category-inversion")
wf.invert_wikipedia_categories()
run_workflow(wf)
workflow.run(wf.wf)

# Compute item popularity.
if flags.arg.compute_item_popularity:
log.info("Compute item popularity")
wf = wiki.WikiWorkflow("item-popularity")
wf.compute_item_popularity()
run_workflow(wf)
workflow.run(wf.wf)

# Fuse items.
if flags.arg.fuse_items:
log.info("Fuse items")
wf = wiki.WikiWorkflow("fuse-items")
wf.fuse_items()
run_workflow(wf)
workflow.run(wf.wf)


def build_knowledge_base():
Expand All @@ -227,31 +196,31 @@ def build_knowledge_base():
log.info("Build knowledge base repository")
wf = wiki.WikiWorkflow("knowledge-base")
wf.build_knowledge_base()
run_workflow(wf)
workflow.run(wf.wf)

# Extract item names from wikidata and wikipedia.
if flags.arg.extract_names:
for language in flags.arg.languages:
log.info("Extract " + language + " names")
wf = wiki.WikiWorkflow(language + "-name-extraction")
wf.extract_names(language=language)
run_workflow(wf)
workflow.run(wf.wf)

# Build name table.
if flags.arg.build_nametab:
for language in flags.arg.languages:
log.info("Build " + language + " name table")
wf = wiki.WikiWorkflow(language + "-name-table")
wf.build_name_table(language=language)
run_workflow(wf)
workflow.run(wf.wf)

# Build phrase table.
if flags.arg.build_phrasetab:
for language in flags.arg.languages:
log.info("Build " + language + " phrase table")
wf = wiki.WikiWorkflow(language + "-phrase-table")
wf.build_phrase_table(language=language)
run_workflow(wf)
workflow.run(wf.wf)

def train_embeddings():
# Extract vocabulary for word embeddings.
Expand All @@ -260,36 +229,36 @@ def train_embeddings():
log.info("Extract " + language + " vocabulary")
wf = embedding.EmbeddingWorkflow(language + "-vocabulary")
wf.extract_vocabulary(language=language)
run_workflow(wf)
workflow.run(wf.wf)

# Train word embeddings.
if flags.arg.train_word_embeddings:
for language in flags.arg.languages:
log.info("Train " + language + " word embeddings")
wf = embedding.EmbeddingWorkflow(language + "-word-embeddings")
wf.train_word_embeddings(language=language)
run_workflow(wf)
workflow.run(wf.wf)

# Extract vocabulary for fact and category embeddings.
if flags.arg.extract_fact_lexicon:
log.info("Extract fact and category lexicons")
wf = embedding.EmbeddingWorkflow("fact-lexicon")
wf.extract_fact_lexicon()
run_workflow(wf)
workflow.run(wf.wf)

# Extract facts from knowledge base.
if flags.arg.extract_facts:
log.info("Extract facts from knowledge base")
wf = embedding.EmbeddingWorkflow("fact-extraction")
wf.extract_facts()
run_workflow(wf)
workflow.run(wf.wf)

# Train fact and category embeddings.
if flags.arg.train_fact_embeddings:
log.info("Train fact and category embeddings")
wf = embedding.EmbeddingWorkflow("fact-embeddings")
wf.train_fact_embeddings()
run_workflow(wf)
workflow.run(wf.wf)


if __name__ == '__main__':
Expand All @@ -309,20 +278,16 @@ def train_embeddings():
flags.arg.build_nametab = True
flags.arg.build_phrasetab = True

# Start task monitor.
if flags.arg.monitor > 0: workflow.start_monitor(flags.arg.monitor)

# Run workflows.
workflow.startup()
download_corpora()
import_wiki()
parse_wikipedia()
fuse_items()
build_knowledge_base()
train_embeddings()

# Stop task monitor.
if flags.arg.monitor > 0: workflow.stop_monitor()
workflow.save_workflow_log(flags.arg.logdir)
workflow.shutdown()

# Done.
log.info("Done")

1 change: 1 addition & 0 deletions python/task/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import os
import urllib2
import _strptime
import time

from workflow import *
Expand Down
7 changes: 5 additions & 2 deletions python/task/wiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,7 @@ def extract_names(self, aliases=None, language=None):
if aliases == None:
# Get language-dependent aliases from Wikidata and Wikpedia.
wikidata_aliases = self.wf.map(self.fused_items(),
"profile-alias-extractor",
"alias-extractor",
params={
"language": language,
"skip_aux": True,
Expand All @@ -599,7 +599,7 @@ def extract_names(self, aliases=None, language=None):
merged_aliases = self.wf.shuffle(aliases, len(names))

# Filter and select aliases.
self.wf.reduce(merged_aliases, names, "profile-alias-reducer",
self.wf.reduce(merged_aliases, names, "alias-reducer",
params={"language": language})
return names

Expand Down Expand Up @@ -648,8 +648,11 @@ def build_phrase_table(self, names=None, language=None):
with self.wf.namespace("phrase-table"):
builder = self.wf.task("phrase-table-builder")
builder.add_param("language", language)
builder.add_param("transfer_aliases", True)
self.wf.connect(self.wf.read(names, name="name-reader"), builder)
kb = self.knowledge_base()
repo = self.phrase_table(language)
builder.attach_input("commons", kb)
builder.attach_output("repository", repo)
return repo

Loading