-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathveld_publish_to_hf.yaml
51 lines (45 loc) · 1.91 KB
/
veld_publish_to_hf.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
x-veld:
code:
description: "simple service to push spacy models to huggingface. Important: Only works from
spacy v3.* onwards!"
topic:
- "NLP"
- "ETL"
input:
- volume: /veld/input/
file_type: "spaCy model"
content: "NLP model"
config:
- environment_var: model_name
description: "name of the model, to be used for huggingface metadata. IMPORTANT: do not put
double underscores into the model name, as this crashes spacy while publishing."
var_type: "str"
optional: true
- environment_var: version
description: "version of the model, to be used for huggingface metadata. IMPORTANT: spacy
crashes when the version tag contains the character `v` in front of numeric+dot version
identifiers: E.g. `v1.1` crashes, while `1.1` works."
var_type: "str"
optional: true
- environment_var: hf_token
description: "huggingface authentication token.
IMPORTANT: DON'T HARDCODE THE TOKEN HERE!
Rather define this environment variable on the host, by either:
- PREFERRED: do this manually before calling this docker compose service. On linux and
mac, this can be done with `export hf_token=<TOKEN>` and on windows with
`set hf_token=<TOKEN>` before launching a docker compose service.
- LESS PREFERRED: only do this if you know .gitignore: you may persist it in a `.env`
file (with the content simply being `hf_token=<TOKEN>`) file next to the chain
veld yaml file, which docker would automatically load, and then add the `.env` to
`.gitignore`!"
var_type: "str"
services:
veld_publish_to_hf:
build: .
command: bash /veld/code/publish_to_hf.sh
volumes:
- ./src/:/veld/code/
environment:
model_name: null
version: null
hf_token: $hf_token