-
Notifications
You must be signed in to change notification settings - Fork 756
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Create a component to submit the Dataflow job to compute embeddings f…
…or code search. * Update Beam to 2.8.0 * Remove nmslib from Apache beam requirements.txt; its not needed and appears to have problems installing on the Dataflow workers. * Spacy download was failing on Dataflow workers; reinstalling the spacy package as a pip package appears to fix this. * Fix some bugs in the workflow for building the Docker images.
- Loading branch information
Showing
11 changed files
with
155 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,34 @@ | ||
// TODO(jlewi): We should tag the image latest and then | ||
// use latest as a cache so that rebuilds are fast | ||
// https://cloud.google.com/cloud-build/docs/speeding-up-builds#using_a_cached_docker_image | ||
{ | ||
|
||
"steps": [ | ||
{ | ||
"name": "gcr.io/cloud-builders/docker", | ||
"args": ["build", "-t", "gcr.io/kubeflow-examples/code-search:" + std.extVar("tag"), | ||
"--label=git-versions=" + std.extVar("gitVersion"), | ||
"--build-arg", "BASE_IMAGE_TAG=1.11.0", | ||
"./docker/t2t"], | ||
"--label=git-versions=" + std.extVar("gitVersion"), | ||
"--build-arg", "BASE_IMAGE_TAG=1.11.0", | ||
"--file=docker/t2t/Dockerfile", "."], | ||
"waitFor": ["-"], | ||
}, | ||
{ | ||
"name": "gcr.io/cloud-builders/docker", | ||
"args": ["build", "-t", "gcr.io/kubeflow-examples/code-search-gpu:" + std.extVar("tag"), | ||
"--label=git-versions=" + std.extVar("gitVersion"), | ||
"--build-arg", "BASE_IMAGE_TAG=1.11.0-gpu", | ||
"./docker/t2t"], | ||
"--label=git-versions=" + std.extVar("gitVersion"), | ||
"--build-arg", "BASE_IMAGE_TAG=1.11.0-gpu", | ||
"--file=docker/t2t/Dockerfile", "."], | ||
"waitFor": ["-"], | ||
}, | ||
{ | ||
"name": "gcr.io/cloud-builders/docker", | ||
"args": ["build", "-t", "gcr.io/kubeflow-examples/code-search-dataflow:" + std.extVar("tag"), | ||
"--label=git-versions=" + std.extVar("gitVersion"), | ||
"--file=docker/t2t/Dockerfile.dataflow", "."], | ||
"waitFor": ["-"], | ||
}, | ||
], | ||
"images": ["gcr.io/kubeflow-examples/code-search:" + std.extVar("tag"), | ||
"gcr.io/kubeflow-examples/code-search-gpu:" + std.extVar("tag")], | ||
"gcr.io/kubeflow-examples/code-search-gpu:" + std.extVar("tag"), | ||
"gcr.io/kubeflow-examples/code-search-dataflow:" + std.extVar("tag")], | ||
} |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
14 changes: 14 additions & 0 deletions
14
code_search/kubeflow/components/submit-code-embeddings-job.jsonnet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
// Submit a Dataflow job to compute the code embeddings used a trained model. | ||
local k = import "k.libsonnet"; | ||
|
||
local experiments = import "experiments.libsonnet"; | ||
local lib = import "submit-code-embeddings-job.libsonnet"; | ||
local env = std.extVar("__ksonnet/environments"); | ||
local baseParams = std.extVar("__ksonnet/params").components["submit-code-embeddings-job"]; | ||
local experimentName = baseParams.experiment; | ||
local params = baseParams + experiments[experimentName] + { | ||
name: experimentName + "-embed-code", | ||
}; | ||
|
||
|
||
std.prune(k.core.v1.list.new([lib.parts(params,env).job])) |
73 changes: 73 additions & 0 deletions
73
code_search/kubeflow/components/submit-code-embeddings-job.libsonnet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
{ | ||
parts(params, env):: { | ||
// Submit a Dataflow job to compute the code embeddings used a trained model. | ||
job :: { | ||
apiVersion: "batch/v1", | ||
kind: "Job", | ||
metadata: { | ||
name: params.name, | ||
namespace: env.namespace, | ||
labels: { | ||
app: params.name, | ||
}, | ||
}, | ||
spec: { | ||
replicas: 1, | ||
template: { | ||
metadata: { | ||
labels: { | ||
app: params.name, | ||
}, | ||
}, | ||
spec: { | ||
// Don't restart because all the job should do is launch the Dataflow job. | ||
restartPolicy: "Never", | ||
containers: [ | ||
{ | ||
name: "dataflow", | ||
image: params.image, | ||
command: [ | ||
"python2", | ||
"-m", | ||
"code_search.dataflow.cli.create_function_embeddings", | ||
"--runner=DataflowRunner", | ||
"--project=" + params.project, | ||
"--target_dataset=" + params.targetDataset, | ||
"--data_dir=" + params.dataDir, | ||
"--problem=" + params.problem, | ||
"--job_name=" + params.jobName, | ||
"--saved_model_dir=" + params.modelDir, | ||
"--temp_location=" + params.workingDir + "/dataflow/temp", | ||
"--staging_location=" + params.workingDir + "/dataflow/staging", | ||
"--worker_machine_type=" + params.workerMachineType, | ||
"--num_workers=" + params.numWorkers, | ||
], | ||
env: [ | ||
{ | ||
name: "GOOGLE_APPLICATION_CREDENTIALS", | ||
value: "/secret/gcp-credentials/user-gcp-sa.json", | ||
}, | ||
], | ||
workingDir: "/src", | ||
volumeMounts: [ | ||
{ | ||
mountPath: "/secret/gcp-credentials", | ||
name: "gcp-credentials", | ||
}, | ||
], //volumeMounts | ||
}, | ||
], // containers | ||
volumes: [ | ||
{ | ||
name: "gcp-credentials", | ||
secret: { | ||
secretName: "user-gcp-sa", | ||
}, | ||
}, | ||
], | ||
}, // spec | ||
}, | ||
}, | ||
}, // job | ||
}, // parts | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,13 @@ | ||
astor~=0.7.0 | ||
apache-beam[gcp]~=2.6.0 | ||
apache-beam[gcp]~=2.8.0 | ||
Flask~=1.0.0 | ||
nltk~=3.3.0 | ||
nmslib~=1.7.0 | ||
# TODO(jlewi): nmslib builds are failing on Dataflow workers with Apache beam 2.8.0. | ||
# We shouldn't need nmslib in the Dataflow jobs. | ||
# nmslib~=1.7.0 | ||
oauth2client~=4.1.0 | ||
requests~=2.19.0 | ||
spacy~=2.0.0 | ||
tensor2tensor~=1.9.0 | ||
tensorflow~=1.11.0 | ||
pybind11~=2.2.4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters