diff --git a/cmd/cli/deprecated/exec.go b/cmd/cli/deprecated/exec.go new file mode 100644 index 0000000000..4fd1832eed --- /dev/null +++ b/cmd/cli/deprecated/exec.go @@ -0,0 +1,17 @@ +package deprecated + +import ( + "github.com/spf13/cobra" +) + +func NewExecCommand() *cobra.Command { + cancelCmd := &cobra.Command{ + Use: "exec", + Deprecated: "exec was an experimental feature and no longer supported", + RunE: func(cmd *cobra.Command, cmdArgs []string) error { + return nil + }, + } + + return cancelCmd +} diff --git a/cmd/cli/exec/args.go b/cmd/cli/exec/args.go deleted file mode 100644 index b3e2df0a75..0000000000 --- a/cmd/cli/exec/args.go +++ /dev/null @@ -1,79 +0,0 @@ -package exec - -import ( - "fmt" - "strings" - - "github.com/spf13/pflag" -) - -// ExtractUnknownArgs extracts any long-form flags (--something) that are not -// currently configured for this command, they must be flags intended for the -// custom job type. -func ExtractUnknownArgs(flags *pflag.FlagSet, args []string) []string { - unknownArgs := []string{} - - for i := 0; i < len(args); i++ { - arg := args[i] - var field *pflag.Flag - - if arg[0] == '-' { - if arg[1] == '-' { - field = flags.Lookup(strings.SplitN(arg[2:], "=", 2)[0]) - } else { - for _, s := range arg[1:] { - field = flags.ShorthandLookup(string(s)) - if field == nil { - break - } - } - } - } else { - continue - } - - if field != nil { - if field.NoOptDefVal == "" && i+1 < len(args) && field.Value.String() == args[i+1] { - i++ - } - continue - } - - // Make sure we allow `--code=.` and `--code .` - if !strings.Contains(arg, "=") { - if i+1 < len(args) { - if args[i+1][0] != '-' { - arg = fmt.Sprintf("%s=%s", arg, args[i+1]) - } - } - } - - if arg == "--" { - continue - } - - unknownArgs = append(unknownArgs, arg) - } - - return unknownArgs -} - -func flagsToMap(flags []string) map[string]string { - m := make(map[string]string) - - for _, flag := range flags { - if flag == "--" { - continue // skip the user escaping the cmd args - } - - flagString := strings.TrimPrefix(flag, "-") - flagString = strings.TrimPrefix(flagString, "-") // just in case there's a second - - parts := strings.SplitN(flagString, "=", 2) - if len(parts) == 2 { - // if the flag has no value, it's probably a standalone bool - m[parts[0]] = parts[1] - } - } - - return m -} diff --git a/cmd/cli/exec/exec.go b/cmd/cli/exec/exec.go deleted file mode 100644 index 3996edd794..0000000000 --- a/cmd/cli/exec/exec.go +++ /dev/null @@ -1,276 +0,0 @@ -package exec - -import ( - "context" - "encoding/json" - "fmt" - "os" - "path/filepath" - "strings" - - "github.com/spf13/cobra" - "gopkg.in/alessio/shellescape.v1" - "k8s.io/kubectl/pkg/util/i18n" - - "github.com/bacalhau-project/bacalhau/cmd/util" - "github.com/bacalhau-project/bacalhau/cmd/util/flags/cliflags" - "github.com/bacalhau-project/bacalhau/cmd/util/hook" - "github.com/bacalhau-project/bacalhau/cmd/util/printer" - "github.com/bacalhau-project/bacalhau/pkg/lib/template" - "github.com/bacalhau-project/bacalhau/pkg/models" - "github.com/bacalhau-project/bacalhau/pkg/publicapi/apimodels" - "github.com/bacalhau-project/bacalhau/pkg/publicapi/client/v2" - "github.com/bacalhau-project/bacalhau/pkg/storage/inline" - "github.com/bacalhau-project/bacalhau/pkg/userstrings" - "github.com/bacalhau-project/bacalhau/pkg/util/templates" -) - -var ( - getLong = templates.LongDesc(i18n.T( - fmt.Sprintf(`Execute a specific job type. - -Allows for the execution of a job type with the given code, -without the need to create a container, or webassembly module. -By specifying the code with the '--code' flag you can ship the code -to the cluster for execution, specified by the remainder of the -command line. See examples below. - -Supported job types: - -%s - `, supportedJobTypes()), - )) - - //nolint:lll // Documentation - getExample = templates.Examples(i18n.T(` - # Execute the app.py script with Python - bacalhau exec --code app.py python app.py - - # Run a duckdb query against a CSV file - bacalhau exec -i src=...,dst=/inputs/data.csv duckdb "select * from /inputs/data.csv" -`)) -) - -type ExecOptions struct { - JobSettings *cliflags.JobSettings - TaskSettings *cliflags.TaskSettings - RunTimeSettings *cliflags.RunTimeSettings - Code string -} - -func NewExecOptions() *ExecOptions { - return &ExecOptions{ - JobSettings: cliflags.DefaultJobSettings(), - TaskSettings: cliflags.DefaultTaskSettings(), - RunTimeSettings: cliflags.DefaultRunTimeSettings(), - } -} - -func NewCmd() *cobra.Command { - options := NewExecOptions() - return NewCmdWithOptions(options) -} - -func NewCmdWithOptions(options *ExecOptions) *cobra.Command { - execCmd := &cobra.Command{ - Use: "exec [jobtype]", - Short: "Execute a specific job type", - Long: getLong, - Example: getExample, - Args: cobra.MinimumNArgs(1), - PreRunE: hook.RemoteCmdPreRunHooks, - PostRunE: hook.RemoteCmdPostRunHooks, - FParseErrWhitelist: cobra.FParseErrWhitelist{UnknownFlags: true}, - RunE: func(cmd *cobra.Command, cmdArgs []string) error { - // Find the unknown arguments from the original args. We only want to find the - // flags that are unknown. We will only support the long form for custom - // job types as we will want to use them as keys in template completions. - unknownArgs := ExtractUnknownArgs(cmd.Flags(), os.Args[1:]) - // initialize a new or open an existing repo merging any config file(s) it contains into cfg. - cfg, err := util.SetupRepoConfig(cmd) - if err != nil { - return fmt.Errorf("failed to setup repo: %w", err) - } - // create an api client - api, err := util.GetAPIClientV2(cmd, cfg) - if err != nil { - return fmt.Errorf("failed to create api client: %w", err) - } - return exec(cmd, cmdArgs, unknownArgs, api, options) - }, - } - - cliflags.RegisterJobFlags(execCmd, options.JobSettings) - cliflags.RegisterTaskFlags(execCmd, options.TaskSettings) - - execCmd.Flags().AddFlagSet(cliflags.NewRunTimeSettingsFlags(options.RunTimeSettings)) - execCmd.Flags().StringVar(&options.Code, "code", "", "Specifies the file, or directory of code to send with the request") - - return execCmd -} - -func exec(cmd *cobra.Command, cmdArgs []string, unknownArgs []string, api client.API, options *ExecOptions) error { - job, err := PrepareJob(cmd, cmdArgs, unknownArgs, options) - if err != nil { - return err - } - - job.Normalize() - err = job.ValidateSubmission() - if err != nil { - return fmt.Errorf("%s: %w", userstrings.JobSpecBad, err) - } - - resp, err := api.Jobs().Put(cmd.Context(), &apimodels.PutJobRequest{ - Job: job, - }) - if err != nil { - return fmt.Errorf("failed request: %w", err) - } - - job.ID = resp.JobID - jobProgressPrinter := printer.NewJobProgressPrinter(api, options.RunTimeSettings) - if err := jobProgressPrinter.PrintJobProgress(cmd.Context(), job, cmd); err != nil { - return fmt.Errorf("failed to print job execution: %w", err) - } - - return nil -} - -// Provides a string to diplay the currently available job types -func supportedJobTypes() string { - tpl, _ := NewTemplateMap(embeddedFiles, "templates") - var sb strings.Builder - for _, s := range tpl.AllTemplates() { - sb.WriteString(fmt.Sprintf(" * %s\n", s)) - } - return sb.String() -} - -//nolint:funlen -func PrepareJob(cmd *cobra.Command, cmdArgs []string, unknownArgs []string, options *ExecOptions) (*models.Job, error) { - var err error - var jobType, templateString string - var job *models.Job - - // Determine the job type and lookup the template for that type. If we - // don't have a template, then we don't know how to submit that job type. - jobType = cmdArgs[0] - - for i := range cmdArgs { - // If any parameters were quoted, we should make sure we try and add - // them back in after they were stripped for us. - if strings.Contains(cmdArgs[i], " ") { - cmdArgs[i] = shellescape.Quote(cmdArgs[i]) - } - } - - tpl, err := NewTemplateMap(embeddedFiles, "templates") - if err != nil { - return nil, fmt.Errorf("failed to find supported job types, templates missing") - } - - // Get the template string, or if we can't find one for this type, then - // provide a list of ones we _do_ support. - if templateString, err = tpl.Get(jobType); err != nil { - knownTypes := tpl.AllTemplates() - - supportedTypes := "" - if len(knownTypes) > 0 { - supportedTypes = "\nSupported types:\n" - - for _, kt := range knownTypes { - supportedTypes = supportedTypes + fmt.Sprintf(" * %s\n", kt) - } - } - - return nil, fmt.Errorf("the job type '%s' is not supported."+supportedTypes, jobType) - } - - // Convert the unknown args to a map which we can use to fill in the template - replacements := flagsToMap(unknownArgs) - - parser, err := template.NewParser(template.ParserParams{ - Replacements: replacements, - }) - - if err != nil { - return nil, fmt.Errorf("failed to create %s job when parsing template: %+w", jobType, err) - } - - tplResult, err := parser.ParseBytes([]byte(templateString)) - if err != nil { - return nil, fmt.Errorf("%s: %w", userstrings.JobSpecBad, err) - } - - // tplResult is now a []byte containing json for the job we will eventually submit. - if err = json.Unmarshal(tplResult, &job); err != nil { - return nil, fmt.Errorf("%s: %w", userstrings.JobSpecBad, err) - } - - // Attach the command line arguments that were provided to exec. These are passed through - // to the template as Command/Arguments. e.g. `bacalhau exec python app.py` will set - // Command -> python, and Arguments -> ["app.py"] - job.Tasks[0].Engine.Params["Command"] = jobType - job.Tasks[0].Engine.Params["Arguments"] = cmdArgs[1:] - - // Process --code if anything was specified. In future we may want to try and determine this - // ourselves where it is not specified, but it will likely be dependent on job type. - if options.Code != "" { - if err = addInlineContent(cmd.Context(), options.Code, job); err != nil { - return nil, err - } - } - - job.Labels, err = options.JobSettings.Labels() - job.Task().Publisher = options.TaskSettings.Publisher.Value() - job.Task().ResultPaths = options.TaskSettings.ResultPaths - job.Task().Env = options.TaskSettings.EnvironmentVariables - job.Task().InputSources = options.TaskSettings.InputSources.Values() - if err != nil { - return nil, fmt.Errorf("parsing job labels: %w", err) - } - job.Constraints, err = options.JobSettings.Constraints() - if err != nil { - return nil, fmt.Errorf("parsing job constraints: %w", err) - } - - // Set the execution timeouts - job.Tasks[0].Timeouts = &models.TimeoutConfig{ - TotalTimeout: options.TaskSettings.Timeout, - } - - return job, nil -} - -// addInlineContent will use codeLocation to determine if it is a single file or a -// directory and will attach to the job as an inline attachment. -func addInlineContent(ctx context.Context, codeLocation string, job *models.Job) error { - absPath, err := filepath.Abs(codeLocation) - if err != nil { - return err - } - - target := "/code" - - if finfo, err := os.Stat(absPath); err != nil { - return fmt.Errorf("file '%s' not found", codeLocation) - } else { - if !finfo.IsDir() { - target = fmt.Sprintf("/code/%s", finfo.Name()) - } - } - - specConfig, err := inline.NewStorage().Upload(ctx, absPath) - if err != nil { - return fmt.Errorf("failed to attach code '%s' to job submission: %w", codeLocation, err) - } - - job.Tasks[0].InputSources = append(job.Tasks[0].InputSources, &models.InputSource{ - Source: &specConfig, - Alias: "code", - Target: target, - }) - - return nil -} diff --git a/cmd/cli/exec/exec_test.go b/cmd/cli/exec/exec_test.go deleted file mode 100644 index 076811f186..0000000000 --- a/cmd/cli/exec/exec_test.go +++ /dev/null @@ -1,153 +0,0 @@ -//go:build unit || !integration - -package exec_test - -import ( - "testing" - - "github.com/spf13/cobra" - "github.com/stretchr/testify/suite" - - "github.com/bacalhau-project/bacalhau/cmd/cli/exec" - "github.com/bacalhau-project/bacalhau/pkg/models" -) - -type ExecSuite struct { - suite.Suite -} - -// In order for 'go test' to run this suite, we need to create -// a normal test function and pass our suite to suite.Run -func TestExecSuite(t *testing.T) { - suite.Run(t, new(ExecSuite)) -} - -type testCase struct { - name string - cmdLine []string - expectedUnknownArgs []string - expectedErrMsg string - jobCommand string - jobArguments []string - numInlinedAttachments int - numTotalAttachments int -} - -var testcases []testCase = []testCase{ - { - // bacalhau exec ruby -e "puts 'hello'" - name: "no ruby here", - cmdLine: []string{"ruby", "-e", "\"puts 'hello'\""}, - expectedUnknownArgs: []string{}, - expectedErrMsg: "the job type 'ruby' is not supported", - }, - { - // bacalhau exec python --version=3.10 -- -c "import this" - name: "zen of python", - cmdLine: []string{"python", "--version=3.10", "--", "-c", "import this"}, - expectedUnknownArgs: []string{"--version=3.10", "-c=import this"}, - expectedErrMsg: "", - jobCommand: "python", - jobArguments: []string{"-c", "'import this'"}, - numInlinedAttachments: 0, - numTotalAttachments: 0, - }, - { - // bacalhau exec -i src=http://127.0.0.1/test.csv,dst=/inputs/test.csv python app.py - name: "run a python app", - cmdLine: []string{"-i", "src=http://127.0.0.1/test.csv,dst=/inputs/test.csv", "python", "app.py", "-x"}, - expectedUnknownArgs: []string{"-x"}, - expectedErrMsg: "", - jobCommand: "python", - jobArguments: []string{"app.py"}, - numInlinedAttachments: 0, - numTotalAttachments: 1, - }, - { - // bacalhau exec -i src=http://127.0.0.1/test.csv,dst=/inputs/test.csv python app.py - name: "run a python app with some inputs", - cmdLine: []string{"-i", "src=http://127.0.0.1/test.csv,dst=/inputs/test.csv", "python", "app.py", "/inputs/test.csv"}, - expectedUnknownArgs: []string{}, - expectedErrMsg: "", - jobCommand: "python", - jobArguments: []string{"app.py", "/inputs/test.csv"}, - numInlinedAttachments: 0, - numTotalAttachments: 1, - }, - { - // bacalhau exec -i src=http://127.0.0.1/test.csv,dst=/inputs/test.csv python app.py --code main.go - name: "run a python app with a local file", - cmdLine: []string{"-i", "src=http://127.0.0.1/test.csv,dst=/inputs/test.csv", "python", "app.py", "--code=exec_test.go"}, - expectedUnknownArgs: []string{}, - expectedErrMsg: "", - jobCommand: "python", - jobArguments: []string{"app.py"}, - numInlinedAttachments: 1, - numTotalAttachments: 2, - }, - { - // bacalhau exec -i src=http://127.0.0.1/test.csv,dst=/inputs/test.csv duckdb "select * from /inputs/test.csv" - name: "duckdb", - cmdLine: []string{"-i", "src=http://127.0.0.1/test.csv,dst=/inputs/test.csv", "duckdb", "select * from /inputs/test.csv"}, - expectedUnknownArgs: []string{}, - expectedErrMsg: "", - jobCommand: "duckdb", - jobArguments: []string{"'select * from /inputs/test.csv'"}, - numInlinedAttachments: 0, - numTotalAttachments: 1, - }, -} - -func (s *ExecSuite) TestJobPreparation() { - for _, tc := range testcases { - s.Run(tc.name, func() { - options := exec.NewExecOptions() - cmd := exec.NewCmdWithOptions(options) - - testCaseF := s.testFuncForTestCase(tc) - - cmd.PreRunE = nil - cmd.PostRunE = nil - cmd.Run = func(cmd *cobra.Command, cmdArgs []string) { - unknownArgs := exec.ExtractUnknownArgs(cmd.Flags(), tc.cmdLine) - s.Require().Equal(tc.expectedUnknownArgs, unknownArgs) - - job, err := exec.PrepareJob(cmd, cmdArgs, unknownArgs, options) - _ = testCaseF(job, err) - } - - cmd.SetArgs(tc.cmdLine) - cmd.Execute() - }) - } - -} - -func (s *ExecSuite) testFuncForTestCase(tc testCase) func(*models.Job, error) bool { - return func(job *models.Job, err error) bool { - if tc.expectedErrMsg == "" { - s.Require().NoError(err) - } else { - s.Require().Error(err) - s.Require().Contains(err.Error(), tc.expectedErrMsg) - return false - } - - task := job.Task() - - s.Require().Equal(tc.jobCommand, task.Engine.Params["Command"], "command is incorrect") - s.Require().Equal(tc.jobArguments, task.Engine.Params["Arguments"], "arguments are incorrect") - - var inlineCount = 0 - for _, src := range task.InputSources { - if src.Source.Type == "inline" { - inlineCount += 1 - } - } - - s.Require().Equal(tc.numInlinedAttachments, inlineCount, "wrong number of inline attachments") - s.Require().Equal(tc.numTotalAttachments, len(task.InputSources), "wrong number of input sources") - - return true - } -} diff --git a/cmd/cli/exec/templates.go b/cmd/cli/exec/templates.go deleted file mode 100644 index e65fee7898..0000000000 --- a/cmd/cli/exec/templates.go +++ /dev/null @@ -1,77 +0,0 @@ -package exec - -import ( - "bufio" - "embed" - "fmt" - "io" - "io/fs" - "path" - "path/filepath" - "strings" - - "golang.org/x/exp/maps" -) - -//go:embed templates/*.tpl -var embeddedFiles embed.FS - -func ErrUnknownTemplate(name string) error { - return fmt.Errorf("unknown template specified: %s", name) -} - -type TemplateMap struct { - m map[string]string -} - -func NewTemplateMap(fSys fs.ReadDirFS, tplPath string) (*TemplateMap, error) { - entries, err := fSys.ReadDir(tplPath) - if err != nil { - return nil, err - } - - tpl := &TemplateMap{ - m: make(map[string]string), - } - - for _, entry := range entries { - if entry.IsDir() { - continue - } - - name := nameFromFile(entry.Name()) - - fd, err := fSys.Open(path.Join(tplPath, entry.Name())) - if err != nil { - return nil, err - } - defer fd.Close() - - reader := bufio.NewReader(fd) - data, err := io.ReadAll(reader) - if err != nil { - return nil, err - } - - tpl.m[strings.ToLower(name)] = string(data) - } - - return tpl, nil -} - -func (t *TemplateMap) Get(name string) (string, error) { - tpl, found := t.m[strings.ToLower(name)] - if !found { - return "", ErrUnknownTemplate(name) - } - - return tpl, nil -} - -func (t *TemplateMap) AllTemplates() []string { - return maps.Keys(t.m) -} - -func nameFromFile(filename string) string { - return strings.TrimSuffix(filename, filepath.Ext(filename)) -} diff --git a/cmd/cli/exec/templates/duckdb.tpl b/cmd/cli/exec/templates/duckdb.tpl deleted file mode 100644 index 0df78d4ac8..0000000000 --- a/cmd/cli/exec/templates/duckdb.tpl +++ /dev/null @@ -1,15 +0,0 @@ -{ - "Name": "DuckDB", - "Namespace": "default", - "Type": "batch", - "Count": 1, - "Tasks": [ - { - "Name": "execute", - "Engine": { - "Type": "duckdb", - "Params": {} - } - } - ] -} diff --git a/cmd/cli/exec/templates/python.tpl b/cmd/cli/exec/templates/python.tpl deleted file mode 100644 index a930570ab2..0000000000 --- a/cmd/cli/exec/templates/python.tpl +++ /dev/null @@ -1,17 +0,0 @@ -{ - "Name": "Python", - "Namespace": "default", - "Type": "batch", - "Count": 1, - "Tasks": [ - { - "Name": "execute", - "Engine": { - "Type": "python", - "Params": { - "Version": "{{or (index . "version") "3.11"}}" - } - } - } - ] -} diff --git a/cmd/cli/root.go b/cmd/cli/root.go index 8df28fa5ee..39c5ac8722 100644 --- a/cmd/cli/root.go +++ b/cmd/cli/root.go @@ -15,7 +15,6 @@ import ( "github.com/bacalhau-project/bacalhau/cmd/cli/deprecated" "github.com/bacalhau-project/bacalhau/cmd/cli/devstack" "github.com/bacalhau-project/bacalhau/cmd/cli/docker" - "github.com/bacalhau-project/bacalhau/cmd/cli/exec" "github.com/bacalhau-project/bacalhau/cmd/cli/job" "github.com/bacalhau-project/bacalhau/cmd/cli/node" "github.com/bacalhau-project/bacalhau/cmd/cli/serve" @@ -101,7 +100,6 @@ func NewRootCmd() *cobra.Command { configcli.NewCmd(), devstack.NewCmd(), docker.NewCmd(), - exec.NewCmd(), job.NewCmd(), node.NewCmd(), serve.NewCmd(), @@ -109,6 +107,7 @@ func NewRootCmd() *cobra.Command { wasm.NewCmd(), // deprecated command + deprecated.NewExecCommand(), deprecated.NewCancelCmd(), deprecated.NewCreateCmd(), deprecated.NewDescribeCmd(), diff --git a/cmd/util/flags/configflags/job_translation.go b/cmd/util/flags/configflags/job_translation.go index 050547ce4a..1c1bc82acb 100644 --- a/cmd/util/flags/configflags/job_translation.go +++ b/cmd/util/flags/configflags/job_translation.go @@ -1,18 +1,15 @@ package configflags import ( - "github.com/bacalhau-project/bacalhau/pkg/config" - "github.com/bacalhau-project/bacalhau/pkg/config/types" + legacy_types "github.com/bacalhau-project/bacalhau/pkg/config_legacy/types" ) var JobTranslationFlags = []Definition{ { - FlagName: "requester-job-translation-enabled", - ConfigPath: types.FeatureFlagsExecTranslationKey, - DefaultValue: config.Default.FeatureFlags.ExecTranslation, - Description: `Whether jobs should be translated at the requester node or not. Default: false`, - Deprecated: true, - EnvironmentVariables: []string{"BACALHAU_NODE_REQUESTER_TRANSLATIONENABLED"}, - DeprecatedMessage: makeDeprecationMessage(types.FeatureFlagsExecTranslationKey), + FlagName: "requester-job-translation-enabled", + ConfigPath: legacy_types.NodeRequesterTranslationEnabled, + DefaultValue: false, + Deprecated: true, + DeprecatedMessage: "job translation was an experimental feature and is no longer supported", }, } diff --git a/docker/custom-job-images/Makefile b/docker/custom-job-images/Makefile deleted file mode 100644 index a65e73cd2a..0000000000 --- a/docker/custom-job-images/Makefile +++ /dev/null @@ -1,25 +0,0 @@ -VERSION ?= 0.1 - -local: python-local duckdb-local - -build: python duckdb - -python: - @$(MAKE) -C python build - -python-local: - @$(MAKE) -C python local - -duckdb: - @$(MAKE) -C duckdb build - -duckdb-local: - @$(MAKE) -C duckdb local - -.PHONY: local python duckdb - - -python-test: - docker run --rm -it -v $(shell pwd)/python/test/single-file:/code bacalhauproject/exec-python-3.11:0.5 python /build/launcher.py -- python hello.py - docker run --rm -it -v $(shell pwd)/python/test/multi-file-reqtxt:/code bacalhauproject/exec-python-3.11:0.5 python /build/launcher.py -- python main.py - docker run --rm -it -v $(shell pwd)/python/test/multi-file-poetry:/code bacalhauproject/exec-python-3.11:0.5 python /build/launcher.py -- poetry run mfp diff --git a/docker/custom-job-images/README.md b/docker/custom-job-images/README.md deleted file mode 100644 index 298a5d385b..0000000000 --- a/docker/custom-job-images/README.md +++ /dev/null @@ -1,58 +0,0 @@ -# Custom Job Images - -This directory contains docker images used by the default custom job types, duckdb and python. -These images are used in the translation layer at the orchestrator, where custom job types are -converted into jobs for one of our supported execution environments (as of 1.2 this is docker -and wasm). - -These images make up a bundle that makes up 'custom job types', alongside the translation layer (that converts a 'python' job to a 'docker' job), and the template available to the CLI. - -## Images - -### Python - 3.11 - -`exec-python-3.11` provides a Python image with access to Python 3.11, build-essentials, and -a default set of installed requirements. To add more default requirements, add them to [python/base_requirements.txt](python/base_requirements.txt). - -The image expects a tgz to be mounted at /code from where there build/launcher.py process will: - -* Extract it -* Determine requirements method -* Install requirements -* Execute the command provided by the user - -If an /outputs folder exists, the stdout/stderr from the requirements installation process is written to /outputs/requirements.log for debugging. - -### DuckDB - -`exec-duckdb` provides an installation of duckdb installed in the image root folder. With appropriately mounted inputs, the user is able to specify all of the required parameters for running duckdb tasks (e.g. -csv -c "query") - -## Building - -Each image has two commands, `build` and `local`. - -`local` will build the image, and install it into the local docker engine allow for it to be used on the local machine. - -`build` will build the image and push it to docker hub. - -To use these tasks from the current folder, you can use: - -```shell -make python-local -make duckdb-local - -make python-build -make duckdb-build -``` - - -## Build problems? - -The makefiles provided attempt cross platform builds so that we are able to build on arm64 machines to be executed on amd64 machines. Depending on your setup, this may occasionally show the following error. - -``` -ERROR: Multiple platforms feature is currently not supported for docker driver. -Please switch to a different driver (eg. "docker buildx create --use") -``` - -following the instructions given when you run `docker buildx create --use` should get you building again. diff --git a/docker/custom-job-images/duckdb/Dockerfile b/docker/custom-job-images/duckdb/Dockerfile deleted file mode 100644 index 9b1f1ed0a0..0000000000 --- a/docker/custom-job-images/duckdb/Dockerfile +++ /dev/null @@ -1,17 +0,0 @@ -FROM --platform=$TARGETPLATFORM ubuntu:noble-20231126.1 -ARG TARGETPLATFORM -RUN echo "I am building for $TARGETPLATFORM" > /log - -RUN apt update && apt -yq upgrade && apt -yq install wget unzip -RUN if [ $TARGETPLATFORM = 'linux/arm64' ]; then \ - wget -O /tmp/ddb.zip "https://github.com/duckdb/duckdb/releases/download/v0.9.2/duckdb_cli-linux-aarch64.zip"; \ - else \ - wget -O /tmp/ddb.zip "https://github.com/duckdb/duckdb/releases/download/v0.9.2/duckdb_cli-linux-amd64.zip"; \ - fi -RUN unzip /tmp/ddb.zip -d /usr/local/bin - -LABEL org.opencontainers.image.source https://github.com/bacalhau-project/bacalhau-images -LABEL org.opencontainers.image.title "Bacalhau custom jobtype - Duckdb" -LABEL org.opencontainers.image.description "Duckdb for the bacalhau custom job type" -LABEL org.opencontainers.image.licenses Apache-2.0 -LABEL org.opencontainers.image.url https://bacalhau.org diff --git a/docker/custom-job-images/duckdb/Makefile b/docker/custom-job-images/duckdb/Makefile deleted file mode 100644 index 031e7852a5..0000000000 --- a/docker/custom-job-images/duckdb/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -MACHINE = $(shell uname -m) -USERNAME ?= bacalhauproject -VERSION ?= 0.2 - -ifeq ($(MACHINE),x86_64) - MACHINE := amd64 -endif - -local: - @echo - Building local duckdb $(VERSION) - docker buildx build \ - --platform linux/$(MACHINE) \ - --label org.opencontainers.artifact.created=$(shell date -u +"%Y-%m-%dT%H:%M:%SZ") \ - -t $(USERNAME)/exec-duckdb:$(VERSION) --load . - -build: - @echo - Building duckdb $(VERSION) - docker buildx build \ - --platform linux/amd64,linux/arm64 \ - --label org.opencontainers.artifact.created=$(shell date -u +"%Y-%m-%dT%H:%M:%SZ") \ - -t $(USERNAME)/exec-duckdb:$(VERSION) --push . - -.PHONY: build local diff --git a/docker/custom-job-images/python/Dockerfile b/docker/custom-job-images/python/Dockerfile deleted file mode 100644 index b907ef56ed..0000000000 --- a/docker/custom-job-images/python/Dockerfile +++ /dev/null @@ -1,39 +0,0 @@ -FROM --platform=$TARGETPLATFORM python:3.11.7-bullseye - -RUN mkdir /build -WORKDIR /build - -RUN apt-get -yq update -RUN DEBIAN_FRONTEND=noninteractive apt-get install -y \ - make \ - build-essential \ - libssl-dev \ - zlib1g-dev \ - libbz2-dev \ - libreadline-dev \ - libsqlite3-dev \ - wget \ - curl \ - llvm \ - libncurses5-dev \ - libncursesw5-dev \ - xz-utils \ - tk-dev \ - libffi-dev \ - liblzma-dev \ - git - -RUN python -mpip install --upgrade pip -RUN python -mpip install poetry - -COPY base_requirements.txt /build -RUN python -mpip install -r /build/base_requirements.txt - -COPY launcher.py /build -CMD ["/build/launcher.py"] - -LABEL org.opencontainers.image.source https://github.com/bacalhau-project/bacalhau-images -LABEL org.opencontainers.image.title "Bacalhau custom jobtype - Python 3.11" -LABEL org.opencontainers.image.description "Python for the bacalhau custom job type" -LABEL org.opencontainers.image.licenses Apache-2.0 -LABEL org.opencontainers.image.url https://bacalhau.org diff --git a/docker/custom-job-images/python/Makefile b/docker/custom-job-images/python/Makefile deleted file mode 100644 index 7798374acd..0000000000 --- a/docker/custom-job-images/python/Makefile +++ /dev/null @@ -1,26 +0,0 @@ -MACHINE = $(shell uname -m) -USERNAME ?= bacalhauproject -VERSION ?= 0.5 - -ifeq ($(MACHINE),x86_64) - MACHINE := amd64 -endif - -local: - @echo - Building local python $(VERSION) - $(MACHINE) - docker buildx build \ - --platform linux/$(MACHINE) \ - -t $(USERNAME)/exec-python-3.11:$(VERSION) \ - --label org.opencontainers.artifact.created=$(shell date -u +"%Y-%m-%dT%H:%M:%SZ") \ - --load . - -build: - @echo - Building python $(VERSION) - docker buildx build \ - --platform linux/amd64,linux/arm64 \ - -t $(USERNAME)/exec-python-3.11:$(VERSION) \ - --label org.opencontainers.artifact.created=$(shell date -u +"%Y-%m-%dT%H:%M:%SZ") \ - --push . - - -.PHONY: build local diff --git a/docker/custom-job-images/python/base_requirements.txt b/docker/custom-job-images/python/base_requirements.txt deleted file mode 100644 index ac261435fe..0000000000 --- a/docker/custom-job-images/python/base_requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -# To have some dependencies pre-installed (at docker build time) add them -# to this requirements file. -pandas==2.1 -polar -requests diff --git a/docker/custom-job-images/python/launcher.py b/docker/custom-job-images/python/launcher.py deleted file mode 100755 index bd9fafe623..0000000000 --- a/docker/custom-job-images/python/launcher.py +++ /dev/null @@ -1,181 +0,0 @@ -#!/usr/bin/env python3 -import ast -import os -import shutil -import subprocess -import sys -from glob import glob - -IGNORE = ( - "*.pyc", - ".DS_Store", - "__pycache__", -) - -CODE_DIR = "/code" # The mounted code folder -OUTPUT_DIR = "/outputs" # The output folder - - -def main(): - working_dir = "/app" # Created by the shutil.copytree - - # it's possible we haven't been sent any code (and we're running via -c) - # so let's support not sending code. - if os.path.exists(CODE_DIR): - # Unpack the contents of /code to the working directory which - # will create that working_directory, ignoring the files that - # match the globs in IGNORE - ignore_pattern = shutil.ignore_patterns(*IGNORE) - shutil.copytree(CODE_DIR, working_dir, ignore=ignore_pattern) - os.chdir(working_dir) - - # The inline attachments will have adding the last part of the - # path when adding a directory, and so WORKING_DIR won't contain - # the code, it'll contain that directory. In these cases we'll - # just change the WORKING_DIR. - wd_list = os.listdir(working_dir) - if len(wd_list) == 1: - pth = os.path.join(working_dir, wd_list[0]) - if os.path.isdir(pth): - working_dir = pth - - # Figure out how to install requirements - for f in ( - single_file, - pyproject, - requirements_txt, - setup_py, - ): - if f(working_dir): - break - else: - # We will use the current directory as the working directory as - # we won't have created /app with the copy - working_dir = os.curdir - - # Run the program in that working directory - past = False - args = [] - for a in sys.argv: - if past: - args.append(a) - if a == "--": - past = True - - cmd = " ".join(args) - _ = subprocess.run(cmd, capture_output=False, shell=True, cwd=working_dir) - - -def to_requirements_log(stdoutBytes, stderrBytes): - if os.path.exists(OUTPUT_DIR): - name = os.path.join(OUTPUT_DIR, "requirements.log") - with open(name, "w") as f: - f.write("================================== STDOUT\n") - f.write(stdoutBytes.decode("utf-8")) - f.write("\n================================== STDERR\n") - f.write(stderrBytes.decode("utf-8")) - - -def single_file(working_dir): - """ - If we only find a single file ready to be deployed, we'll read pip install instructions - from the module doc (if it exists). - """ - installed = 0 - doclines = [] - files = glob("*.py", root_dir=working_dir) - - if len(files) == 1: - with open(os.path.join(working_dir, files[0])) as f: - mod = ast.parse(f.read()) - if not mod: - return False - - doc = ast.get_docstring(mod) - if not doc: - return False - - doclines = doc.split("\n") - - for line in doclines: - line = line.strip() - if line.startswith("pip"): - proc = subprocess.run( - f"python -m{line}", capture_output=True, shell=True, cwd=working_dir - ) - to_requirements_log(proc.stdout, proc.stderr) - - installed = installed + 1 - - return installed > 0 - - -def pyproject(working_dir): - """ - If there is a pyproject.toml we'll check to see if it is a poetry app, and if - so then we will get poetry to install dependencies. If not then we will attempt - to pip install them. - """ - pth = os.path.join(working_dir, "pyproject.toml") - if not os.path.exists(pth): - return False - - is_poetry = False - - with open(pth) as f: - contents = f.read() - is_poetry = "[tool.poetry]" in contents - - cmd = "poetry install" - if not is_poetry: - cmd = f"python -mpip install {pth}" - - proc = subprocess.run(cmd, capture_output=True, shell=True, cwd=working_dir) - to_requirements_log(proc.stdout, proc.stderr) - - return True - - -def requirements_txt(working_dir): - """ - Look for a requirements file (or several) based on common names to load the - dependencies from - """ - installed = 0 - files = ("dev-requirements.txt", "requirements-dev.txt", "requirements.txt") - for f in files: - pth = os.path.join(working_dir, f) - if os.path.exists(pth): - proc = subprocess.run( - f"python -mpip install -r {f}", - capture_output=True, - shell=True, - cwd=working_dir, - ) - to_requirements_log(proc.stdout, proc.stderr) - - installed = installed + 1 - - return installed > 0 - - -def setup_py(working_dir): - """ - Look for a setup.py file as a last resort and try to install it locally - """ - pth = os.path.join(working_dir, "setup.py") - if os.path.exists(pth): - proc = subprocess.run( - "python -m pip install -e .", - capture_output=True, - shell=True, - cwd=working_dir, - ) - to_requirements_log(proc.stdout, proc.stderr) - return True - - return False - - -if __name__ == "__main__": - main() diff --git a/docker/custom-job-images/python/test/multi-file-poetry/multi_file_poetry/__init__.py b/docker/custom-job-images/python/test/multi-file-poetry/multi_file_poetry/__init__.py deleted file mode 100644 index 9858d0be1d..0000000000 --- a/docker/custom-job-images/python/test/multi-file-poetry/multi_file_poetry/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -def main(): - from colorama import Fore - - print(Fore.BLUE + "Hello World") diff --git a/docker/custom-job-images/python/test/multi-file-poetry/poetry.lock b/docker/custom-job-images/python/test/multi-file-poetry/poetry.lock deleted file mode 100644 index 39de90bfbf..0000000000 --- a/docker/custom-job-images/python/test/multi-file-poetry/poetry.lock +++ /dev/null @@ -1,17 +0,0 @@ -# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. - -[[package]] -name = "colorama" -version = "0.4.6" -description = "Cross-platform colored terminal text." -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -files = [ - {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, - {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, -] - -[metadata] -lock-version = "2.0" -python-versions = "^3.10" -content-hash = "29aa9de81f853ba77bf312052e460b1f92e0290eed2c8cc67ba60ddd99b4ee19" diff --git a/docker/custom-job-images/python/test/multi-file-poetry/pyproject.toml b/docker/custom-job-images/python/test/multi-file-poetry/pyproject.toml deleted file mode 100644 index e2d762a6bd..0000000000 --- a/docker/custom-job-images/python/test/multi-file-poetry/pyproject.toml +++ /dev/null @@ -1,19 +0,0 @@ -[tool.poetry] -name = "multi-file-poetry" -version = "0.1.0" -description = "" -authors = ["Bacalhau Team"] -readme = "README.md" -packages = [{ include = "multi_file_poetry" }] - -[tool.poetry.dependencies] -python = "^3.10" -colorama = "^0.4.6" - - -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" - -[tool.poetry.scripts] -mfp = "multi_file_poetry:main" diff --git a/docker/custom-job-images/python/test/multi-file-poetry/tests/__init__.py b/docker/custom-job-images/python/test/multi-file-poetry/tests/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/docker/custom-job-images/python/test/multi-file-reqtxt/hello.py b/docker/custom-job-images/python/test/multi-file-reqtxt/hello.py deleted file mode 100644 index 4cae309aad..0000000000 --- a/docker/custom-job-images/python/test/multi-file-reqtxt/hello.py +++ /dev/null @@ -1,5 +0,0 @@ -from colorama import Fore - - -def hello(): - print(Fore.GREEN + "Hello World!") diff --git a/docker/custom-job-images/python/test/multi-file-reqtxt/main.py b/docker/custom-job-images/python/test/multi-file-reqtxt/main.py deleted file mode 100644 index 71c4133de5..0000000000 --- a/docker/custom-job-images/python/test/multi-file-reqtxt/main.py +++ /dev/null @@ -1,4 +0,0 @@ -from hello import hello - -if __name__ == "__main__": - hello() diff --git a/docker/custom-job-images/python/test/multi-file-reqtxt/requirements.txt b/docker/custom-job-images/python/test/multi-file-reqtxt/requirements.txt deleted file mode 100644 index 3fcfb51b2a..0000000000 --- a/docker/custom-job-images/python/test/multi-file-reqtxt/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -colorama diff --git a/docker/custom-job-images/python/test/single-file/hello.py b/docker/custom-job-images/python/test/single-file/hello.py deleted file mode 100644 index aa09810df7..0000000000 --- a/docker/custom-job-images/python/test/single-file/hello.py +++ /dev/null @@ -1,6 +0,0 @@ -""" -pip install colorama -""" -from colorama import Fore - -print(Fore.RED + "Hello World!!") diff --git a/go.mod b/go.mod index 6be2986261..eae94395a9 100644 --- a/go.mod +++ b/go.mod @@ -78,7 +78,6 @@ require ( go.uber.org/zap v1.27.0 golang.org/x/crypto v0.27.0 golang.org/x/exp v0.0.0-20240213143201-ec583247a57a - gopkg.in/alessio/shellescape.v1 v1.0.0-20170105083845-52074bc9df61 k8s.io/apimachinery v0.29.0 k8s.io/kubectl v0.29.0 sigs.k8s.io/yaml v1.4.0 @@ -88,7 +87,6 @@ require ( github.com/KyleBanks/depth v1.2.1 // indirect github.com/OneOfOne/xxhash v1.2.8 // indirect github.com/agnivade/levenshtein v1.1.1 // indirect - github.com/alessio/shellescape v1.4.2 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4 // indirect github.com/aws/aws-sdk-go-v2/credentials v1.17.3 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.15.1 // indirect diff --git a/go.sum b/go.sum index f908e81593..62f4697ba2 100644 --- a/go.sum +++ b/go.sum @@ -288,8 +288,6 @@ github.com/alecthomas/participle/v2 v2.1.0/go.mod h1:Y1+hAs8DHPmc3YUFzqllV+eSQ9l github.com/alecthomas/repr v0.2.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 h1:ez/4by2iGztzR4L0zgAOR8lTQK9VlyBVVd7G4omaOQs= github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE= -github.com/alessio/shellescape v1.4.2 h1:MHPfaU+ddJ0/bYWpgIeUnQUqKrlJ1S7BfEYPM4uEoM0= -github.com/alessio/shellescape v1.4.2/go.mod h1:PZAiSCk0LJaZkiCSkPv8qIobYglO3FPpyFjDCtHLS30= github.com/alexbrainman/goissue34681 v0.0.0-20191006012335-3fc7a47baff5 h1:iW0a5ljuFxkLGPNem5Ui+KBjFJzKg4Fv2fnxe4dvzpM= github.com/alexbrainman/goissue34681 v0.0.0-20191006012335-3fc7a47baff5/go.mod h1:Y2QMoi1vgtOIfc+6DhrMOGkLoGzqSV2rKp4Sm+opsyA= github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= @@ -1973,8 +1971,6 @@ google.golang.org/protobuf v1.34.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHh google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= -gopkg.in/alessio/shellescape.v1 v1.0.0-20170105083845-52074bc9df61 h1:8ajkpB4hXVftY5ko905id+dOnmorcS2CHNxxHLLDcFM= -gopkg.in/alessio/shellescape.v1 v1.0.0-20170105083845-52074bc9df61/go.mod h1:IfMagxm39Ys4ybJrDb7W3Ob8RwxftP0Yy+or/NVz1O8= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/ops/terraform/remote_files/scripts/start-bacalhau.sh b/ops/terraform/remote_files/scripts/start-bacalhau.sh index 252fbb148c..b7c30ad446 100644 --- a/ops/terraform/remote_files/scripts/start-bacalhau.sh +++ b/ops/terraform/remote_files/scripts/start-bacalhau.sh @@ -36,7 +36,6 @@ bacalhau serve \ --job-execution-timeout-bypass-client-id="${TRUSTED_CLIENT_IDS}" \ --ipfs-connect /ip4/127.0.0.1/tcp/5001 \ --api-port 1234 \ - --requester-job-translation-enabled \ --config DisableAnalytics \ --config labels="owner=bacalhau,name=node-${TERRAFORM_NODE_INDEX}"\ --config Compute.Orchestrators="${BACALHAU_ORCHESTRATORS}" \ diff --git a/pkg/config/migrate.go b/pkg/config/migrate.go index f2c0dc7c2e..5a0b5d26eb 100644 --- a/pkg/config/migrate.go +++ b/pkg/config/migrate.go @@ -90,9 +90,7 @@ func MigrateV1(in v1types.BacalhauConfig) (types.Bacalhau, error) { UpdateConfig: types.UpdateConfig{ Interval: types.Duration(in.Update.CheckFrequency), }, - FeatureFlags: types.FeatureFlags{ - ExecTranslation: in.Node.Requester.TranslationEnabled, - }, + FeatureFlags: types.FeatureFlags{}, } return out, nil } diff --git a/pkg/config/types/feature_flags.go b/pkg/config/types/feature_flags.go index b4b55a6dc5..8dc5379636 100644 --- a/pkg/config/types/feature_flags.go +++ b/pkg/config/types/feature_flags.go @@ -1,6 +1,4 @@ package types type FeatureFlags struct { - // ExecTranslation enables the execution translation feature. - ExecTranslation bool `yaml:"ExecTranslation,omitempty" json:"ExecTranslation,omitempty"` } diff --git a/pkg/config/types/generated_constants.go b/pkg/config/types/generated_constants.go index c805277289..f30724e5b5 100644 --- a/pkg/config/types/generated_constants.go +++ b/pkg/config/types/generated_constants.go @@ -31,7 +31,6 @@ const EnginesDisabledKey = "Engines.Disabled" const EnginesTypesDockerManifestCacheRefreshKey = "Engines.Types.Docker.ManifestCache.Refresh" const EnginesTypesDockerManifestCacheSizeKey = "Engines.Types.Docker.ManifestCache.Size" const EnginesTypesDockerManifestCacheTTLKey = "Engines.Types.Docker.ManifestCache.TTL" -const FeatureFlagsExecTranslationKey = "FeatureFlags.ExecTranslation" const InputSourcesDisabledKey = "InputSources.Disabled" const InputSourcesMaxRetryCountKey = "InputSources.MaxRetryCount" const InputSourcesReadTimeoutKey = "InputSources.ReadTimeout" diff --git a/pkg/config/types/generated_descriptions.go b/pkg/config/types/generated_descriptions.go index 897bb42dc2..f60305469b 100644 --- a/pkg/config/types/generated_descriptions.go +++ b/pkg/config/types/generated_descriptions.go @@ -33,7 +33,6 @@ var ConfigDescriptions = map[string]string{ EnginesTypesDockerManifestCacheRefreshKey: "Refresh specifies the refresh interval for cache entries.", EnginesTypesDockerManifestCacheSizeKey: "Size specifies the size of the Docker manifest cache.", EnginesTypesDockerManifestCacheTTLKey: "TTL specifies the time-to-live duration for cache entries.", - FeatureFlagsExecTranslationKey: "ExecTranslation enables the execution translation feature.", InputSourcesDisabledKey: "Disabled specifies a list of storages that are disabled.", InputSourcesMaxRetryCountKey: "ReadTimeout specifies the maximum number of attempts for reading from a storage.", InputSourcesReadTimeoutKey: "ReadTimeout specifies the maximum time allowed for reading from a storage.", diff --git a/pkg/models/constants.go b/pkg/models/constants.go index 1e3d0631b0..05c0a8c950 100644 --- a/pkg/models/constants.go +++ b/pkg/models/constants.go @@ -88,11 +88,6 @@ const ( MetaRequesterID = "bacalhau.org/requester.id" MetaClientID = "bacalhau.org/client.id" - // Job provenance metadata used to track the origin of a job where - // it may have been translated from another job. - MetaDerivedFrom = "bacalhau.org/derivedFrom" - MetaTranslatedBy = "bacalhau.org/translatedBy" - MetaServerInstallationID = "bacalhau.org/server.installation.id" MetaServerInstanceID = "bacalhau.org/server.instance.id" MetaClientInstallationID = "bacalhau.org/client.installation.id" diff --git a/pkg/node/requester.go b/pkg/node/requester.go index 96166b5285..f48ba7f6ee 100644 --- a/pkg/node/requester.go +++ b/pkg/node/requester.go @@ -32,7 +32,6 @@ import ( "github.com/bacalhau-project/bacalhau/pkg/routing/tracing" s3helper "github.com/bacalhau-project/bacalhau/pkg/s3" "github.com/bacalhau-project/bacalhau/pkg/system" - "github.com/bacalhau-project/bacalhau/pkg/translation" "github.com/bacalhau-project/bacalhau/pkg/util" "github.com/bacalhau-project/bacalhau/pkg/compute" @@ -195,11 +194,6 @@ func NewRequesterNode( resultTransformers = append(resultTransformers, resultSigner) } - var translationProvider translation.TranslatorProvider - if cfg.BacalhauConfig.FeatureFlags.ExecTranslation { - translationProvider = translation.NewStandardTranslatorsProvider() - } - jobTransformers := transformer.ChainedTransformer[*models.Job]{ transformer.JobFn(transformer.IDGenerator), transformer.NameOptional(), @@ -214,7 +208,6 @@ func NewRequesterNode( Store: jobStore, ComputeProxy: computeProxy, JobTransformer: jobTransformers, - TaskTranslator: translationProvider, ResultTransformer: resultTransformers, }) diff --git a/pkg/orchestrator/endpoint.go b/pkg/orchestrator/endpoint.go index caf28c0d78..fd5a02fbf4 100644 --- a/pkg/orchestrator/endpoint.go +++ b/pkg/orchestrator/endpoint.go @@ -2,13 +2,10 @@ package orchestrator import ( "context" - "encoding/base64" "fmt" "time" "github.com/google/uuid" - "github.com/pkg/errors" - "sigs.k8s.io/yaml" "github.com/bacalhau-project/bacalhau/pkg/analytics" "github.com/bacalhau-project/bacalhau/pkg/bacerrors" @@ -18,7 +15,6 @@ import ( "github.com/bacalhau-project/bacalhau/pkg/lib/concurrency" "github.com/bacalhau-project/bacalhau/pkg/models" "github.com/bacalhau-project/bacalhau/pkg/orchestrator/transformer" - "github.com/bacalhau-project/bacalhau/pkg/translation" ) type BaseEndpointParams struct { @@ -26,7 +22,6 @@ type BaseEndpointParams struct { Store jobstore.Store ComputeProxy compute.Endpoint JobTransformer transformer.JobTransformer - TaskTranslator translation.TranslatorProvider ResultTransformer transformer.ResultTransformer } @@ -35,7 +30,6 @@ type BaseEndpoint struct { store jobstore.Store computeProxy compute.Endpoint jobTransformer transformer.JobTransformer - taskTranslator translation.TranslatorProvider resultTransformer transformer.ResultTransformer } @@ -45,7 +39,6 @@ func NewBaseEndpoint(params *BaseEndpointParams) *BaseEndpoint { store: params.Store, computeProxy: params.ComputeProxy, jobTransformer: params.JobTransformer, - taskTranslator: params.TaskTranslator, resultTransformer: params.ResultTransformer, } } @@ -73,34 +66,6 @@ func (e *BaseEndpoint) SubmitJob(ctx context.Context, request *SubmitJobRequest) } submitEvent.JobID = job.ID - var translationEvent models.Event - - // We will only perform task translation in the orchestrator if we were provided with a provider - // that can give translators to perform the translation. - if e.taskTranslator != nil { - // Before we create an evaluation for the job, we want to check that none of the job's tasks - // need translating from a custom job type to a known job type (docker, wasm). If they do, - // then we will perform the translation and create the evaluation for the new job instead. - translatedJob, err := translation.Translate(ctx, e.taskTranslator, job) - if err != nil { - return nil, errors.Wrap(err, fmt.Sprintf("failed to translate job type: %s", job.Task().Engine.Type)) - } - - // If we have translated the job (i.e. at least one task was translated) then we will record the original - // job that was used to create the translated job. This will allow us to track the provenance of the job - // when using `describe` and will ensure only the original job is returned when using `list`. - if translatedJob != nil { - if b, err := yaml.Marshal(translatedJob); err != nil { - return nil, errors.Wrap(err, "failure converting job to JSON") - } else { - translatedJob.Meta[models.MetaDerivedFrom] = base64.StdEncoding.EncodeToString(b) - translationEvent = JobTranslatedEvent(job, translatedJob) - } - - job = translatedJob - } - } - txContext, err := e.store.BeginTx(ctx) if err != nil { return nil, fmt.Errorf("failed to begin transaction: %w", err) @@ -118,11 +83,6 @@ func (e *BaseEndpoint) SubmitJob(ctx context.Context, request *SubmitJobRequest) if err = e.store.AddJobHistory(txContext, job.ID, JobSubmittedEvent()); err != nil { return nil, err } - if translationEvent.Message != "" { - if err = e.store.AddJobHistory(txContext, job.ID, translationEvent); err != nil { - return nil, err - } - } eval := &models.Evaluation{ ID: uuid.NewString(), diff --git a/pkg/translation/translation.go b/pkg/translation/translation.go deleted file mode 100644 index 56f8bc2c2d..0000000000 --- a/pkg/translation/translation.go +++ /dev/null @@ -1,118 +0,0 @@ -// Package translation provides interfaces for translating from a Job to a -// different Job. This allow us to accept more job types than we have -// executors as we translate from the abstract type to the concrete executor. -// -// When presented with a Job, this package iterates through the tasks -// belonging to the job to determine whether any of the tasks have an -// Engine type that is not one of the core executors (docker or wasm). -// If it does not, then it returns immediately. -// -// For the discovered tasks, the TranslatorProvider is asked to provide an -// implementation of the Translator interface based on the task's engine type. -// The newly obtained Translator processes the task and returns a new task -// with a known engine type (docker or wasm). Depending on where the -// translation occurs, extra work might result in the generation of a derived -// job. - -package translation - -import ( - "context" - "errors" - "fmt" - - "github.com/bacalhau-project/bacalhau/pkg/lib/provider" - "github.com/bacalhau-project/bacalhau/pkg/models" - "github.com/bacalhau-project/bacalhau/pkg/translation/translators" - "github.com/bacalhau-project/bacalhau/pkg/util/idgen" -) - -// Translator defines what functions are required for a component that -// is able to translate from one job to another. It is important that -// implementers ensure that their implementation is reentrant - which -// means it should not use any mutable state after initialization. -type Translator interface { - provider.Providable - - Translate(*models.Task) (*models.Task, error) -} - -// TranslatorProvider is an alias for `provider.Provider[Translator]` -type TranslatorProvider interface { - provider.Provider[Translator] -} - -// NewStandardTranslatorsProvider returns a TranslatorProvider which maps names -// to implementations of the Translator interface -func NewStandardTranslatorsProvider() TranslatorProvider { - return provider.NewMappedProvider(map[string]Translator{ - "python": &translators.PythonTranslator{}, - "duckdb": &translators.DuckDBTranslator{}, - }) -} - -// Translate attempts to translate from one job to another, based on the engine type -// of the tasks in the job. After ensuring that each of the tasks is either a default -// (docker, wasm) or available via the provider, then a new Job is cloned from the -// original and the individual tasks updated. -func Translate(ctx context.Context, provider TranslatorProvider, original *models.Job) (*models.Job, error) { - if shouldTr, err := ShouldTranslate(ctx, provider, original.Tasks); err != nil { - return nil, err - } else { - // Nothing for us to do so we should return immediately - if !shouldTr { - return nil, nil - } - } - - newJob := original.Copy() - newJob.ID = idgen.NewJobID() - - var errs error - - for i := range newJob.Tasks { - task := newJob.Tasks[i] - kind := task.Engine.Type - - if models.IsDefaultEngineType(kind) { - continue // and leave this task in place - } - - if translator, err := provider.Get(ctx, kind); err != nil { - errs = errors.Join(errs, err) - } else { - t, err := translator.Translate(task) - if err != nil { - errs = errors.Join(errs, err) - continue - } - - // Copy the newly translated task over the top of the task - // that was copied from the original job - newJob.Tasks[i] = t - } - } - - return newJob, errs -} - -// ShouldTranslate works out whether we need to carry on with translation, that is -// are there any engine types specified that are not a default engine and we know -// how to translate. If not, then we can exit early. -func ShouldTranslate(ctx context.Context, provider TranslatorProvider, tasks []*models.Task) (bool, error) { - var errs error - needTranslationCount := 0 - - for i := range tasks { - kind := tasks[i].Engine.Type - if provider.Has(ctx, kind) { - needTranslationCount += 1 - } else if kind == models.EngineDocker || kind == models.EngineWasm || kind == models.EngineNoop { - continue - } else { - errs = errors.Join(errs, fmt.Errorf("unknown task type identified in translation: '%s'", kind)) - } - } - - return needTranslationCount > 0, errs -} diff --git a/pkg/translation/translation_test.go b/pkg/translation/translation_test.go deleted file mode 100644 index d71a694efb..0000000000 --- a/pkg/translation/translation_test.go +++ /dev/null @@ -1,200 +0,0 @@ -//go:build unit || !integration - -package translation_test - -import ( - "context" - "testing" - - "github.com/bacalhau-project/bacalhau/pkg/models" - "github.com/bacalhau-project/bacalhau/pkg/translation" - "github.com/stretchr/testify/suite" -) - -type TranslationTestSuite struct { - suite.Suite - ctx context.Context - provider translation.TranslatorProvider -} - -func TestTranslationTestSuite(t *testing.T) { - suite.Run(t, new(TranslationTestSuite)) -} - -func (s *TranslationTestSuite) SetupSuite() { - s.ctx = context.Background() - s.provider = translation.NewStandardTranslatorsProvider() -} - -var testcases = []struct { - name string - spec *models.SpecConfig - expected *models.SpecConfig -}{ - { - name: "python", - spec: &models.SpecConfig{ - Type: "python", - Params: map[string]interface{}{ - "Command": "python", - "Arguments": []interface{}{"-c", "print('Hello, world!')"}, - }, - }, - expected: &models.SpecConfig{ - Type: "docker", - Params: map[string]interface{}{ - "Image": "bacalhauproject/exec-python-3.11:0.5", - "Entrypoint": []string{}, - "Parameters": []string{ - "/build/launcher.py", "--", "python", "-c", "print('Hello, world!')", - }, - "EnvironmentVariables": []string{}, - "WorkingDirectory": "", - }, - }, - }, - { - name: "python with spaces", - spec: &models.SpecConfig{ - Type: "python", - Params: map[string]interface{}{ - "Command": "python", - "Arguments": []interface{}{"-c", `"import this"`}, - }, - }, - expected: &models.SpecConfig{ - Type: "docker", - Params: map[string]interface{}{ - "Image": "bacalhauproject/exec-python-3.11:0.5", - "Entrypoint": []string{}, - "Parameters": []string{ - "/build/launcher.py", "--", "python", "-c", `"import this"`, - }, - "EnvironmentVariables": []string{}, - "WorkingDirectory": "", - }, - }, - }, -} - -func (s *TranslationTestSuite) TestTranslate() { - for _, tc := range testcases { - s.Run(tc.name, func() { - job := &models.Job{ - ID: tc.name, - Tasks: []*models.Task{ - { - Name: "task1", - Engine: tc.spec, - }, - }, - } - - translated, err := translation.Translate(s.ctx, s.provider, job) - s.Require().NoError(err) - - s.Require().Equal(tc.expected, translated.Task().Engine) - }) - } -} - -func (s *TranslationTestSuite) TestTranslateWithInvalidEngine() { - job := &models.Job{ - ID: "invalid_engine", - Tasks: []*models.Task{ - { - Name: "task1", - Engine: &models.SpecConfig{ - Type: "invalid", - }, - }, - }, - } - - _, err := translation.Translate(s.ctx, s.provider, job) - s.Require().Error(err) -} - -func (s *TranslationTestSuite) TestTranslateWithDefaultEngine() { - job := &models.Job{ - ID: "invalid_engine", - Tasks: []*models.Task{ - { - Name: "task1", - Engine: &models.SpecConfig{ - Type: "docker", - }, - }, - }, - } - - translated, err := translation.Translate(s.ctx, s.provider, job) - s.Require().NoError(err) - s.Require().Nil(translated) -} - -func (s *TranslationTestSuite) TestTranslateWithMixedEngines() { - job := &models.Job{ - ID: "invalid_engine", - Tasks: []*models.Task{ - { - Name: "task1", - Engine: &models.SpecConfig{ - Type: "docker", - }, - }, - { - Name: "task2", - Engine: &models.SpecConfig{ - Type: "duckdb", - Params: map[string]interface{}{ - "Command": "duckdb", - "Arguments": []interface{}{"-csv", "-c", "select * from table;"}, - }, - }, - }, - }, - } - - translated, err := translation.Translate(s.ctx, s.provider, job) - s.Require().NoError(err) - s.Require().NotNil(translated) - - // Before - s.Require().Equal("docker", job.Tasks[0].Engine.Type) - s.Require().Equal("duckdb", job.Tasks[1].Engine.Type) - - // After - s.Require().Equal("docker", translated.Tasks[0].Engine.Type) - s.Require().Equal("docker", translated.Tasks[1].Engine.Type) -} - -func (s *TranslationTestSuite) TestShouldTranslateWithDefaultEngine() { - tasks := []*models.Task{ - { - Name: "task1", - Engine: &models.SpecConfig{ - Type: "docker", - }, - }, - } - - should, err := translation.ShouldTranslate(s.ctx, s.provider, tasks) - s.Require().NoError(err) - s.Require().False(should) -} - -func (s *TranslationTestSuite) TestShouldTranslateWithNonDefaultEngine() { - tasks := []*models.Task{ - { - Name: "task1", - Engine: &models.SpecConfig{ - Type: "python", - }, - }, - } - - should, err := translation.ShouldTranslate(s.ctx, s.provider, tasks) - s.Require().NoError(err) - s.Require().True(should) -} diff --git a/pkg/translation/translators/duckdb.go b/pkg/translation/translators/duckdb.go deleted file mode 100644 index fb98cf0ebc..0000000000 --- a/pkg/translation/translators/duckdb.go +++ /dev/null @@ -1,54 +0,0 @@ -package translators - -import ( - "context" - - "github.com/bacalhau-project/bacalhau/pkg/models" - "github.com/bacalhau-project/bacalhau/pkg/util" -) - -const DuckDBImage = "bacalhauproject/exec-duckdb:0.2" - -type DuckDBTranslator struct{} - -func (d *DuckDBTranslator) IsInstalled(context.Context) (bool, error) { - return true, nil -} - -func (d *DuckDBTranslator) Translate(original *models.Task) (*models.Task, error) { - dkrSpec, err := d.dockerEngine(original.Engine) - if err != nil { - return nil, err - } - - builder := original. - ToBuilder(). - Meta(models.MetaTranslatedBy, "translators/duckdb"). - Engine(dkrSpec) - - return builder.BuildOrDie(), nil -} - -func (d *DuckDBTranslator) dockerEngine(origin *models.SpecConfig) (*models.SpecConfig, error) { - // It'd be nice to use pkg/executor/docker/types/EngineSpec here, but it - // would mean adding a dependency on yet another package. - cmd := origin.Params["Command"].(string) - args, err := util.InterfaceToStringArray(origin.Params["Arguments"]) - if err != nil { - return nil, err - } - - params := []string{} - - params = append(params, cmd) - params = append(params, args...) - - spec := models.NewSpecConfig(models.EngineDocker) - spec.Params["Image"] = DuckDBImage - spec.Params["Entrypoint"] = []string{} - spec.Params["Parameters"] = params - spec.Params["EnvironmentVariables"] = []string{} - spec.Params["WorkingDirectory"] = "" - - return spec, nil -} diff --git a/pkg/translation/translators/errors.go b/pkg/translation/translators/errors.go deleted file mode 100644 index fa50464ca7..0000000000 --- a/pkg/translation/translators/errors.go +++ /dev/null @@ -1,7 +0,0 @@ -package translators - -import "fmt" - -func ErrMissingParameters(trs string) error { - return fmt.Errorf("missing parameters in task for '%s' translator", trs) -} diff --git a/pkg/translation/translators/python.go b/pkg/translation/translators/python.go deleted file mode 100644 index 6ad4d27a52..0000000000 --- a/pkg/translation/translators/python.go +++ /dev/null @@ -1,103 +0,0 @@ -package translators - -import ( - "context" - "fmt" - - "github.com/bacalhau-project/bacalhau/pkg/models" - "github.com/bacalhau-project/bacalhau/pkg/util" - "golang.org/x/exp/maps" -) - -// PythonPackageDomains lists all of the domains that might be needed to install -// dependencies at runtime. -var PythonPackageDomains = []string{ - "pypi.python.org", - "pypi.org", - "pythonhosted.org", - "files.pythonhosted.org", - "repo.anaconda.com", - "repo.continuum.io", - "conda.anaconda.org", -} - -// SupportedPythonVersions maps the python version to the docker image that -// provides support for that version. -var SupportedPythonVersions = map[string]string{ - "3.11": "bacalhauproject/exec-python-3.11:0.5", -} - -type PythonTranslator struct{} - -func (p *PythonTranslator) IsInstalled(context.Context) (bool, error) { - return true, nil -} - -func (p *PythonTranslator) Translate(original *models.Task) (*models.Task, error) { - dkrSpec, err := p.dockerEngine(original.Engine) - if err != nil { - return nil, err - } - - builder := original. - ToBuilder(). - Meta(models.MetaTranslatedBy, "translators/python"). - Engine(dkrSpec) - - original.Network = &models.NetworkConfig{ - Type: models.NetworkHTTP, - Domains: PythonPackageDomains, - } - - return builder.BuildOrDie(), nil -} - -func (p *PythonTranslator) dockerEngine(origin *models.SpecConfig) (*models.SpecConfig, error) { - // It'd be nice to use pkg/executor/docker/types/EngineSpec here, but it - // would mean adding a dependency on yet another package. - cmd := origin.Params["Command"].(string) - args, err := util.InterfaceToStringArray(origin.Params["Arguments"]) - if err != nil { - return nil, err - } - - versionString := "3.11" // Default version - version := origin.Params["Version"] - if version != nil { - versionString = version.(string) - } - - image, err := getImageName(versionString) - if err != nil { - return nil, err - } - - params := []string{ - "/build/launcher.py", "--", - } - - params = append(params, cmd) - params = append(params, args...) - - spec := models.NewSpecConfig(models.EngineDocker) - spec.Params["Image"] = image - spec.Params["Entrypoint"] = []string{} - spec.Params["Parameters"] = params - spec.Params["EnvironmentVariables"] = []string{} - spec.Params["WorkingDirectory"] = "" - - return spec, nil -} - -func getImageName(version string) (string, error) { - image, found := SupportedPythonVersions[version] - if !found { - supported := "" - versions := maps.Keys(SupportedPythonVersions) - for i := range versions { - supported += fmt.Sprintf(" * %s\n", versions[i]) - } - return "", fmt.Errorf("unsupported python version: %s\nsupported versions are:\n%s", version, supported) - } - return image, nil -} diff --git a/pkg/util/conversion.go b/pkg/util/conversion.go deleted file mode 100644 index 216f30e440..0000000000 --- a/pkg/util/conversion.go +++ /dev/null @@ -1,30 +0,0 @@ -package util - -import "fmt" - -// InterfaceToStringArray converts an interface{} that we know is a []string -// to that []string via []interface{}. This is useful when we have a map[string]interface{} -// and we want to get the []string{} out of it. -func InterfaceToStringArray(source interface{}) ([]string, error) { - if source == nil { - return nil, nil - } - - // // If it is already a string array, then return it. - // strArray, ok := source.([]string) - // if ok { - // return strArray, nil - // } - - sourceArray, ok := source.([]interface{}) - if !ok { - return nil, fmt.Errorf("expected []interface{} but got %T", source) - } - - result := make([]string, len(sourceArray)) - for i, v := range sourceArray { - result[i] = fmt.Sprint(v) - } - - return result, nil -} diff --git a/pkg/util/conversion_test.go b/pkg/util/conversion_test.go deleted file mode 100644 index 1a3ec4f735..0000000000 --- a/pkg/util/conversion_test.go +++ /dev/null @@ -1,93 +0,0 @@ -//go:build unit || !integration - -package util_test - -import ( - "testing" - - "github.com/bacalhau-project/bacalhau/pkg/util" - "github.com/stretchr/testify/require" -) - -func TestInterfaceToStringArray(t *testing.T) { - testcases := []struct { - name string - source interface{} - expected []string - shouldError bool - }{ - { - name: "nil", - source: nil, - expected: nil, - shouldError: false, - }, - { - name: "empty", - source: []interface{}{}, - expected: []string{}, - shouldError: false, - }, - { - name: "string", - source: []interface{}{"foo"}, - expected: []string{"foo"}, - shouldError: false, - }, - { - name: "int", - source: []interface{}{1}, - expected: []string{"1"}, - shouldError: false, - }, - { - name: "float", - source: []interface{}{1.1}, - expected: []string{"1.1"}, - shouldError: false, - }, - { - name: "bool", - source: []interface{}{true}, - expected: []string{"true"}, - shouldError: false, - }, - { - name: "mixed", - source: []interface{}{"foo", 1, 1.1, true}, - expected: []string{"foo", "1", "1.1", "true"}, - shouldError: false, - }, - { - name: "map", - source: map[string]interface{}{"foo": "bar"}, - expected: nil, - shouldError: true, - }, - { - name: "string array", - source: []interface{}{"foo", "bar"}, - expected: []string{"foo", "bar"}, - shouldError: false, - }, - { - name: "int array", - source: []interface{}{1, 2}, - expected: []string{"1", "2"}, - shouldError: false, - }, - } - - for _, tc := range testcases { - t.Run(tc.name, func(t *testing.T) { - actual, err := util.InterfaceToStringArray(tc.source) - if tc.shouldError { - require.Error(t, err) - return - } - - require.NoError(t, err) - require.Equal(t, tc.expected, actual) - }) - } -}