From 75452a3386839d112dffaa8deca4aa98bcd3aad9 Mon Sep 17 00:00:00 2001 From: Michael Ringgaard Date: Mon, 18 Nov 2019 10:41:14 +0100 Subject: [PATCH] Rearrange parser training modules (#430) --- sling/nlp/parser/BUILD | 35 +++++++++++++- sling/nlp/parser/caspar-trainer.cc | 2 +- .../parser/{trainer => }/frame-evaluation.cc | 2 +- .../parser/{trainer => }/frame-evaluation.h | 6 +-- sling/nlp/parser/parser-trainer.h | 2 +- sling/nlp/parser/tools/BUILD | 2 +- sling/nlp/parser/tools/parse.cc | 2 +- sling/nlp/parser/trainer/BUILD | 47 ------------------- .../parser/trainer/generate-transitions.cc | 41 ---------------- .../nlp/parser/trainer}/lexical_encoder.py | 0 sling/nlp/parser/trainer/pytorch_modules.py | 2 +- .../{trainer => }/transition-generator.cc | 3 +- .../{trainer => }/transition-generator.h | 6 +-- sling/pyapi/BUILD | 2 +- sling/pyapi/pyparser.cc | 2 +- tools/build-wheel.py | 1 - tools/buildall.sh | 1 - 17 files changed, 49 insertions(+), 107 deletions(-) rename sling/nlp/parser/{trainer => }/frame-evaluation.cc (99%) rename sling/nlp/parser/{trainer => }/frame-evaluation.h (97%) delete mode 100644 sling/nlp/parser/trainer/BUILD delete mode 100644 sling/nlp/parser/trainer/generate-transitions.cc rename {python/myelin => sling/nlp/parser/trainer}/lexical_encoder.py (100%) rename sling/nlp/parser/{trainer => }/transition-generator.cc (99%) rename sling/nlp/parser/{trainer => }/transition-generator.h (87%) diff --git a/sling/nlp/parser/BUILD b/sling/nlp/parser/BUILD index 73a4c8bf..aaf5797b 100644 --- a/sling/nlp/parser/BUILD +++ b/sling/nlp/parser/BUILD @@ -129,11 +129,43 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "frame-evaluation", + hdrs = ["frame-evaluation.h"], + srcs = ["frame-evaluation.cc"], + deps = [ + "//sling/base", + "//sling/file", + "//sling/frame:object", + "//sling/frame:serialization", + "//sling/frame:store", + "//sling/nlp/document:document", + "//sling/nlp/document:document-corpus", + "//sling/string:strcat", + "//sling/string:printf", + ], +) + +cc_library( + name = "transition-generator", + hdrs = ["transition-generator.h"], + srcs = ["transition-generator.cc"], + deps = [ + ":parser-action", + "//sling/base", + "//sling/frame:object", + "//sling/frame:serialization", + "//sling/frame:store", + "//sling/nlp/document:document", + ], +) + cc_library( name = "parser-trainer", srcs = ["parser-trainer.cc"], hdrs = ["parser-trainer.h"], deps = [ + "frame-evaluation", ":parser-action", ":parser-features", ":roles", @@ -148,7 +180,6 @@ cc_library( "//sling/nlp/document:document-corpus", "//sling/nlp/document:lexical-encoder", "//sling/nlp/document:lexicon", - "//sling/nlp/parser/trainer:frame-evaluation", "//sling/task:learner", "//sling/util:mutex", "//sling/util:unicode", @@ -160,6 +191,7 @@ cc_library( srcs = ["caspar-trainer.cc"], deps = [ ":parser-trainer", + ":transition-generator", "//sling/base", "//sling/myelin:builder", "//sling/myelin:flow", @@ -167,7 +199,6 @@ cc_library( "//sling/myelin:learning", "//sling/nlp/document:document", "//sling/nlp/document:document-corpus", - "//sling/nlp/parser/trainer:transition-generator", ], alwayslink = 1, ) diff --git a/sling/nlp/parser/caspar-trainer.cc b/sling/nlp/parser/caspar-trainer.cc index d50a80c4..cabe05ff 100644 --- a/sling/nlp/parser/caspar-trainer.cc +++ b/sling/nlp/parser/caspar-trainer.cc @@ -19,7 +19,7 @@ #include "sling/myelin/learning.h" #include "sling/nlp/parser/action-table.h" #include "sling/nlp/parser/parser-trainer.h" -#include "sling/nlp/parser/trainer/transition-generator.h" +#include "sling/nlp/parser/transition-generator.h" namespace sling { namespace nlp { diff --git a/sling/nlp/parser/trainer/frame-evaluation.cc b/sling/nlp/parser/frame-evaluation.cc similarity index 99% rename from sling/nlp/parser/trainer/frame-evaluation.cc rename to sling/nlp/parser/frame-evaluation.cc index c15cefa2..f2694239 100644 --- a/sling/nlp/parser/trainer/frame-evaluation.cc +++ b/sling/nlp/parser/frame-evaluation.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "sling/nlp/parser/trainer/frame-evaluation.h" +#include "sling/nlp/parser/frame-evaluation.h" #include diff --git a/sling/nlp/parser/trainer/frame-evaluation.h b/sling/nlp/parser/frame-evaluation.h similarity index 97% rename from sling/nlp/parser/trainer/frame-evaluation.h rename to sling/nlp/parser/frame-evaluation.h index 7fdb784a..9775bcb8 100644 --- a/sling/nlp/parser/trainer/frame-evaluation.h +++ b/sling/nlp/parser/frame-evaluation.h @@ -14,8 +14,8 @@ // Frame evaluation. -#ifndef SLING_NLP_PARSER_TRAINER_FRAME_EVALUATION_H_ -#define SLING_NLP_PARSER_TRAINER_FRAME_EVALUATION_H_ +#ifndef SLING_NLP_PARSER_FRAME_EVALUATION_H_ +#define SLING_NLP_PARSER_FRAME_EVALUATION_H_ #include #include @@ -225,4 +225,4 @@ class FrameEvaluation { } // namespace nlp } // namespace sling -#endif // SLING_NLP_PARSER_TRAINER_FRAME_EVALUATION_H_ +#endif // SLING_NLP_PARSER_FRAME_EVALUATION_H_ diff --git a/sling/nlp/parser/parser-trainer.h b/sling/nlp/parser/parser-trainer.h index fc4329f6..1b66785a 100644 --- a/sling/nlp/parser/parser-trainer.h +++ b/sling/nlp/parser/parser-trainer.h @@ -27,10 +27,10 @@ #include "sling/nlp/document/document.h" #include "sling/nlp/document/document-corpus.h" #include "sling/nlp/document/lexical-encoder.h" +#include "sling/nlp/parser/frame-evaluation.h" #include "sling/nlp/parser/parser-action.h" #include "sling/nlp/parser/parser-features.h" #include "sling/nlp/parser/roles.h" -#include "sling/nlp/parser/trainer/frame-evaluation.h" #include "sling/task/learner.h" #include "sling/util/mutex.h" diff --git a/sling/nlp/parser/tools/BUILD b/sling/nlp/parser/tools/BUILD index 4fb5e255..82bed00e 100644 --- a/sling/nlp/parser/tools/BUILD +++ b/sling/nlp/parser/tools/BUILD @@ -15,7 +15,7 @@ cc_binary( "//sling/nlp/document:document-tokenizer", "//sling/nlp/document:lex", "//sling/nlp/parser", - "//sling/nlp/parser/trainer:frame-evaluation", + "//sling/nlp/parser:frame-evaluation", "//sling/string:printf", ], ) diff --git a/sling/nlp/parser/tools/parse.cc b/sling/nlp/parser/tools/parse.cc index dbe52c29..b20c2780 100644 --- a/sling/nlp/parser/tools/parse.cc +++ b/sling/nlp/parser/tools/parse.cc @@ -44,8 +44,8 @@ #include "sling/nlp/document/document-corpus.h" #include "sling/nlp/document/document-tokenizer.h" #include "sling/nlp/document/lex.h" +#include "sling/nlp/parser/frame-evaluation.h" #include "sling/nlp/parser/parser.h" -#include "sling/nlp/parser/trainer/frame-evaluation.h" #include "sling/string/printf.h" DEFINE_string(parser, "", "Input file with flow model"); diff --git a/sling/nlp/parser/trainer/BUILD b/sling/nlp/parser/trainer/BUILD deleted file mode 100644 index 2036aee7..00000000 --- a/sling/nlp/parser/trainer/BUILD +++ /dev/null @@ -1,47 +0,0 @@ -package(default_visibility = ["//visibility:public"]) - -cc_library( - name = "frame-evaluation", - hdrs = ["frame-evaluation.h"], - srcs = ["frame-evaluation.cc"], - deps = [ - "//sling/base", - "//sling/file", - "//sling/frame:object", - "//sling/frame:serialization", - "//sling/frame:store", - "//sling/nlp/document:document", - "//sling/nlp/document:document-corpus", - "//sling/string:strcat", - "//sling/string:printf", - ], -) - -cc_library( - name = "transition-generator", - hdrs = ["transition-generator.h"], - srcs = ["transition-generator.cc"], - deps = [ - "//sling/base", - "//sling/frame:object", - "//sling/frame:serialization", - "//sling/frame:store", - "//sling/nlp/document:document", - "//sling/nlp/parser:parser-action", - ], -) - -cc_binary( - name = "generate-transitions", - srcs = ["generate-transitions.cc"], - deps = [ - ":transition-generator", - "//sling/base", - "//sling/file:posix", - "//sling/frame:store", - "//sling/nlp/document:document", - "//sling/nlp/document:document-corpus", - "//sling/nlp/parser:parser-action", - ], -) - diff --git a/sling/nlp/parser/trainer/generate-transitions.cc b/sling/nlp/parser/trainer/generate-transitions.cc deleted file mode 100644 index 9c3de560..00000000 --- a/sling/nlp/parser/trainer/generate-transitions.cc +++ /dev/null @@ -1,41 +0,0 @@ -#include -#include -#include - -#include "sling/base/init.h" -#include "sling/base/logging.h" -#include "sling/base/types.h" -#include "sling/base/flags.h" -#include "sling/file/recordio.h" -#include "sling/frame/store.h" -#include "sling/frame/serialization.h" -#include "sling/nlp/document/document.h" -#include "sling/nlp/document/document-corpus.h" -#include "sling/nlp/parser/parser-action.h" -#include "sling/nlp/parser/trainer/transition-generator.h" - -DEFINE_string(corpus, "", "Input corpus"); - -using namespace sling::nlp; - -int main(int argc, char *argv[]) { - sling::InitProgram(&argc, &argv); - - sling::Store commons; - DocumentCorpus corpus(&commons, FLAGS_corpus); - commons.Freeze(); - - for (int i = 0;; ++i) { - sling::Store store(&commons); - Document *document = corpus.Next(&store); - if (document == nullptr) break; - - Generate(*document, [&](const ParserAction &action) { - std::cout << "Doc " << i << " " << action.ToString(&store) << "\n"; - }); - delete document; - } - - return 0; -} - diff --git a/python/myelin/lexical_encoder.py b/sling/nlp/parser/trainer/lexical_encoder.py similarity index 100% rename from python/myelin/lexical_encoder.py rename to sling/nlp/parser/trainer/lexical_encoder.py diff --git a/sling/nlp/parser/trainer/pytorch_modules.py b/sling/nlp/parser/trainer/pytorch_modules.py index bc505fd4..06122834 100644 --- a/sling/nlp/parser/trainer/pytorch_modules.py +++ b/sling/nlp/parser/trainer/pytorch_modules.py @@ -22,10 +22,10 @@ from cascade import Delegate from cascade import SoftmaxDelegate +from lexical_encoder import LexicalEncoder from parser_state import ParserState from trace import Trace -from sling.myelin.lexical_encoder import LexicalEncoder import sling import sling.myelin.nn as flownn import sling.myelin.flow as flow diff --git a/sling/nlp/parser/trainer/transition-generator.cc b/sling/nlp/parser/transition-generator.cc similarity index 99% rename from sling/nlp/parser/trainer/transition-generator.cc rename to sling/nlp/parser/transition-generator.cc index 391f7b28..aaf4d4fa 100644 --- a/sling/nlp/parser/trainer/transition-generator.cc +++ b/sling/nlp/parser/transition-generator.cc @@ -12,13 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "sling/nlp/parser/transition-generator.h" + #include #include #include #include "sling/frame/object.h" #include "sling/frame/store.h" -#include "sling/nlp/parser/trainer/transition-generator.h" namespace sling { namespace nlp { diff --git a/sling/nlp/parser/trainer/transition-generator.h b/sling/nlp/parser/transition-generator.h similarity index 87% rename from sling/nlp/parser/trainer/transition-generator.h rename to sling/nlp/parser/transition-generator.h index 47686161..ea82b127 100644 --- a/sling/nlp/parser/trainer/transition-generator.h +++ b/sling/nlp/parser/transition-generator.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef SLING_NLP_PARSER_TRAINER_TRANSITION_GENERATOR_H_ -#define SLING_NLP_PARSER_TRAINER_TRANSITION_GENERATOR_H_ +#ifndef SLING_NLP_PARSER_TRANSITION_GENERATOR_H_ +#define SLING_NLP_PARSER_TRANSITION_GENERATOR_H_ #include @@ -37,4 +37,4 @@ void Generate(const Document &document, } // namespace nlp } // namespace sling -#endif // SLING_NLP_PARSER_TRAINER_TRANSITION_GENERATOR_H_ +#endif // SLING_NLP_PARSER_TRANSITION_GENERATOR_H_ diff --git a/sling/pyapi/BUILD b/sling/pyapi/BUILD index 205d52c9..74d73102 100644 --- a/sling/pyapi/BUILD +++ b/sling/pyapi/BUILD @@ -54,7 +54,7 @@ cc_library( "//sling/nlp/kb:facts", "//sling/nlp/kb:phrase-table", "//sling/nlp/parser", - "//sling/nlp/parser/trainer:frame-evaluation", + "//sling/nlp/parser:frame-evaluation", "//sling/nlp/wiki:wikidata-converter", "//sling/task:dashboard", "//sling/task:job", diff --git a/sling/pyapi/pyparser.cc b/sling/pyapi/pyparser.cc index 44967cd1..dad7ef3d 100644 --- a/sling/pyapi/pyparser.cc +++ b/sling/pyapi/pyparser.cc @@ -17,8 +17,8 @@ #include "sling/nlp/document/document.h" #include "sling/nlp/document/document-tokenizer.h" #include "sling/nlp/document/lex.h" +#include "sling/nlp/parser/frame-evaluation.h" #include "sling/nlp/parser/parser.h" -#include "sling/nlp/parser/trainer/frame-evaluation.h" #include "sling/pyapi/pyframe.h" #include "sling/pyapi/pystore.h" diff --git a/tools/build-wheel.py b/tools/build-wheel.py index 3c39ad56..543a1c5b 100644 --- a/tools/build-wheel.py +++ b/tools/build-wheel.py @@ -74,7 +74,6 @@ def sha256_content_checksum(data): 'python/myelin/__init__.py': '$DATA$/sling/myelin/__init__.py', 'python/myelin/builder.py': '$DATA$/sling/myelin/builder.py', 'python/myelin/flow.py': '$DATA$/sling/myelin/flow.py', - 'python/myelin/lexical_encoder.py': '$DATA$/sling/myelin/lexical_encoder.py', 'python/myelin/nn.py': '$DATA$/sling/myelin/nn.py', 'python/myelin/tf.py': '$DATA$/sling/myelin/tf.py', diff --git a/tools/buildall.sh b/tools/buildall.sh index 3e145860..3c02efbc 100755 --- a/tools/buildall.sh +++ b/tools/buildall.sh @@ -15,7 +15,6 @@ bazel build -c opt $* \ sling/nlp/ner:* \ sling/nlp/parser:* \ sling/nlp/parser/tools:* \ - sling/nlp/parser/trainer:* \ sling/nlp/wiki:* \ sling/pyapi:* \ sling/stream:* \