-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathveld_train.yaml
71 lines (66 loc) · 2.15 KB
/
veld_train.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
x-veld:
code:
description: "A word2vec training setup"
topic:
- "NLP"
- "Machine Learning"
- "Word Embeddings"
input:
- volume: /veld/input/
environment_var: in_train_data_file
description: "training data. Must be one single txt file, one sentence per line."
file_type: "txt"
content:
- "NLP training data"
- "Word Embeddings training data"
- "raw text"
output:
- volume: /veld/output/
environment_var: out_model_file
description: "self trained Word Embeddings word2vec model"
file_type: "word2vec model"
content:
- "NLP model"
- "Word Embeddings model"
config:
- environment_var: train_data_description
description: "short human description for the kind of training data"
var_type: "str"
optional: true
- environment_var: model_description
description: "short human description for the overall model and its purpose"
var_type: "str"
optional: true
- environment_var: epochs
description: "word2vec hyperparameter: number of training epochs"
var_type: "int"
optional: true
default: 50
- environment_var: vector_size
description: "word2vec hyperparameter: number of dimensions of the word vectors"
var_type: "int"
default: 200
- environment_var: window
description: "word2vec hyperparameter: number of surrounding context words to be used for training."
var_type: "int"
default: 3
- environment_var: min_count
description: "word2vec hyperparameter: minimal number of occurrence for each word to be used for training."
var_type: "int"
default: 5
services:
veld_train:
build: .
command: python /veld/code/train.py
volumes:
- ./src/train/:/veld/code/:z
- ./data/training_data/:/veld/input/:z
- ./data/models/:/veld/output/:z
environment:
in_train_data_file: null
out_model_file: null
model_description: null
epochs: 50
vector_size: 200
window: 3
min_count: 5