From 349eab39661273d38ad8a35c646227b4c757f315 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Sun, 23 Aug 2020 00:08:15 +0200 Subject: [PATCH 01/23] redefine basic OCR-D HTTP API in OpenAPI 3.0.1 --- ocrd_api.swagger.yml | 292 ------------------------------------------- openapi.yml | 287 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 287 insertions(+), 292 deletions(-) delete mode 100644 ocrd_api.swagger.yml create mode 100644 openapi.yml diff --git a/ocrd_api.swagger.yml b/ocrd_api.swagger.yml deleted file mode 100644 index 3fe7fde..0000000 --- a/ocrd_api.swagger.yml +++ /dev/null @@ -1,292 +0,0 @@ -openapi: 3.0.0 -info: - description: This is a generic definition of the modules used for the OCR-D project. - - * Implementation path for the different processors (description) - - * /preprocessing/characterization (Determine specific characteristics of a digitalization.) - - * /preprocessing/optimization (Execute all steps for image optimization at once.) - - * /preprocessing/optimization/cropping (Crops the digitization to printspace.) - - * /preprocessing/optimization/deskewing (Deskews the digitization.) - - * /preprocessing/optimization/despeckling (Despeckles the digitization.) - - * /preprocessing/optimization/dewarping (Dewarps the digitization.) - - * /preprocessing/optimization/binarization (Binarize the digitization.) - - * /recognition/text-recognition (Estimate font families and languages used.) - - * /recognition/font-identification (Estimate font families and languages used.) - - * /layout/segmentation (Detect and classify all regions of a page.) - - * /layout/segmentation/page (Detect all regions of a page.) - - * /layout/segmentation/line (Detect all text line regions inside the text regions of a page.) - - * /layout/segmentation/classification | Fine-grained classification of all detected regions of a page.) - - * /layout/analysis (Analyse regions on document level.) - version: "1.1.0" - title: "OCR-D" - termsOfService: "http://swagger.io/terms/" - contact: - email: "volker.hartmann@kit.edu" - license: - name: "Apache 2.0" - url: "http://www.apache.org/licenses/LICENSE-2.0.html" -servers: - - url: "http://test.ocr-d.de/v1" -tags: - - name: "Image preprocessing" - description: "Methods that characterize and optimize the digital representations for layout and text recognition." - - name: "Layout analysis" - description: "Registration of the correct text flow, that means the reproduction of the logical-structural text flow and high text accuracy." - - name: "Text recognition and optimization" - description: "Combination of several OCR engines with subsequent corrections to improve text results." - - name: "Model training" - description: "Training and provision of models." - - name: "Long-term preservation" - description: "Ensuring the long-term availability and referenceability of the texts obtained." - - name: "Quality assurance" - description: "Determination/estimation of the potential of each component." -paths: - /ocrd/processor: - post: - tags: - - "Image preprocessing" - - "Layout analysis" - - "Text recognition and optimization" - summary: "Start job for processing one of the tasks defined by OCR-D." - description: "Start job for processing one of the tasks defined by OCR-D. May create new images and/or new PAGE XML files depending on algorithm. It has to create a provenance file and a new METS file." - operationId: start-processor - requestBody: - content: - multipart/form-data: - schema: - $ref: '#/components/schemas/processors' - responses: - '201': - description: Successful Operation. The created file(s) has/have to be referenced inside the given fileGrp sections. - content: - application/json: - schema: - $ref: '#/components/schemas/job-id' - headers: - Location: - schema: - type: string - format: url - description: URL of the generated job. - # ----------------------------------------------------- - # Link - # ----------------------------------------------------- - links: - GetFilesByJobId: # <---- arbitrary name for the link - operationId: get-created-files-of-processor - parameters: - jobID: '$response.body#/jobID' - - description: > - The `jobId` value returned in the response can be used as - the `jobID` parameter in `GET /ocrd/processor/jobid/{jobID}`. - '400': - description: "Invalid input" - '500': - description: "Internal error" - /ocrd/processor/jobid/{jobID}: - get: - tags: - - "Image preprocessing" - - "Layout analysis" - - "Text recognition and optimization" - summary: Get created files. - description: Fetch all file created by job with id 'jobID'. If more than one file is created per mimetype use mimetype 'application/zip' as accept header to fetch all files at once. - operationId: get-created-files-of-processor - parameters: - - in: path - name: jobID - schema: - type: string - required: true - description: job ID of the started process. - responses: - '200': - description: Successful Operation. - content: - application/vnd.ocrd+job-status: - schema: - $ref: '#/components/schemas/job-status' - application/mets+xml: - schema: - $ref: '#/components/schemas/mets-file' - image/*: - schema: - $ref: '#/components/schemas/image-file' - application/vnd.ocrd.page+xml: - schema: - $ref: '#/components/schemas/page-file' - application/vnd.ocrd.prov+json: - schema: - $ref: '#/components/schemas/provenance-file' - application/vnd.ocrd.log+json: - schema: - $ref: '#/components/schemas/logging' - application/zip: - schema: - $ref: '#/components/schemas/zip-file' - '400': - description: "Bad request. Maybe more than one file of at least one mimetype was created. Please use mimetype 'application/zip' as accept header to fetch all files at once." - '500': - description: "Internal error" - delete: - tags: - - "Image preprocessing" - summary: Delete all files created by job with ID 'jobID'. - parameters: - - in: path - name: jobID - schema: - type: string - required: true - description: job ID of the started process. - responses: - '204': - description: Successful Operation. All linked resources are removed successfully. - content: - application/vnd.ocrd+job-status: - schema: - $ref: '#/components/schemas/job-status' - '404': - description: "Not found." - '410': - description: "Resource is not available anymore." - '500': - description: "Internal error" -components: - schemas: - processors: - type: object - description: Object holding all parameters needed by most processors. - properties: - mets: - $ref: '#/components/schemas/mets-file' - input-file-grp: - type: array - items: - type: string - description: ID(s) of the fileGrps which have to be used as input. (fileGrp@USE). - output-file-grp: - type: array - items: - type: string - description: ID(s) of the fileGrps which have to be used as output. (fileGrp@USE). - group-id: - type: array - items: - type: string - description: All IDs of the image files which should be characterized. - parameter: - type: array - items: - type: string - description: Parameter file in JSON format. - log-level: - $ref: '#/components/schemas/log-level' - required: - - mets - - input-file-grp - - output-file-grp - mets-file: - type: string - description: XML holding all information of the digitized document. All references of the images and the PAGE XMLs are available via fileGrp section (see http://www.loc.gov/standards/mets/mets.xsd) - page-file: - type: string - description: XML holding all information of the digitized page. (see http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15/pagecontent.xsd) - image-file: - type: string - format: binary - description: Image created by processor. - zip-file: - type: string - format: binary - description: > - zip-file containing all files created by the processor. - Content of zip-file: - * METS file (mandatory) (default: mets.xml) - * Provenance (mandatory) (default: provenance.json) - * PAGE XML (optional) - * Images (optional) - * Output of (error) console. (optional) (default: output.json) - - All files have to be referenced inside the METS file. - provenance-file: - type: string - description: JSON file holding all information about the provenance. - message-with-timestamp: - type: object - properties: - timestamp: - type: string - description: 'timestamp' - format: date - message: - type: string - description: 'message' - logging: - additionalProperties: false - type: object - properties: - stdout: - type: array - description: Output of console. - items: - $ref: '#/components/schemas/message-with-timestamp' - stderr: - type: array - description: Output of error console. - items: - $ref: '#/components/schemas/message-with-timestamp' - description: JSON file holding the output of the job. - job-id: - type: object - properties: - status: - $ref: '#/components/schemas/status' - jobId: - type: string - description: ID of the created job. - description: Status and job ID of created job. - status: - type: string - enum: - - OK - job-status: - type: string - enum: - - RUNNING - - FINISHED - - ERROR - - STOPPED - description: > - Description of the status: - * `RUNNING` - Job is not finished yet. - * `FINISHED` - Job was executed successfully. - * `ERROR` - Job was cancelled due to an error. - * `STOPPED` - Job was stopped due to server restrictions. - log-level: - type: string - enum: - - OFF - - ERROR - - WARN - - INFO - - DEBUG - - TRACE - default: INFO - example: DEBUG - description: Minimum Log level. One of (OFF, ERROR, WARN, INFO (default), DEBUG, TRACE). Actual mechanism for filtering log messages must not be implemented by processors. diff --git a/openapi.yml b/openapi.yml new file mode 100644 index 0000000..3b3b985 --- /dev/null +++ b/openapi.yml @@ -0,0 +1,287 @@ +openapi: 3.0.1 +info: + title: OCR-D Web API + description: > + #

HTTP API for offering OCR-D processing

+ + > This document defines the [data model](#/components/schemas) and + various HTTP APIs related to OCR-D + + ## Media types + + ### `application/json` + + Content serialized as `application/json` is defined by the [data model](#/components/schema) + + ### `application/vnd.ocrd+zip` + + Defined in https://ocr-d.de/en/spec/ocrd_zip + + ### `text/vnd.ocrd+sh` + + Defined in https://ocr-d.de/en/spec/ocrdwf + contact: + email: info@ocr-d.de + license: + name: Apache 2.0 + url: 'http://www.apache.org/licenses/LICENSE-2.0.html' + version: 1.0.0 +externalDocs: + description: OCR-D Website + url: 'http://ocr-d.de' +servers: + - url: 'https://example.org/ocrd/v1' +tags: + - name: discovery + description: Discovery of capabilities of a server + - name: workspace + description: mets.xml-indexed BagIt container + - name: processing + description: OCR-D processing and processors + - name: training + description: Training of OCR engines + - name: acl + description: Authorization and authentication + +paths: + + '/processor': + get: + tags: ['processing', 'discovery'] + responses: + '200': + description: List all processors + content: + application/json: {schema: {$ref: '#/components/schemas/ProcessorList'}} + + '/processor/{executable}': + get: + tags: ['processing', 'discovery'] + parameters: + - name: executable + in: path + description: Name of the executable + schema: {$ref: '#/components/schemas/OcrdExecutable'} + required: true + responses: + '200': + description: List all processors + content: + application/json: {schema: {$ref: '#/components/schemas/Processor'}} + '404': + description: Processor not available + content: {} + post: + tags: ['processing'] + parameters: + - name: executable + in: path + description: Name of the executable + schema: {$ref: '#/components/schemas/OcrdExecutable'} + required: true + requestBody: + description: Execute this ProcessorCall + content: + application/json: {schema: {$ref: '#/components/schemas/ProcessorCall'}} + required: true + responses: + '200': + description: Return the ProcessorJob running this ProcessorCall + content: {application/json: {schema: {$ref: '#/components/schemas/ProcessorJob'}}} + + '/processor/{executable}/{job-id}': + get: + tags: ['processing'] + parameters: + - name: executable + in: path + description: Name of the executable + schema: {$ref: '#/components/schemas/OcrdExecutable'} + required: true + - name: job-id + in: path + description: ID of the ProcessorJob + schema: {type: string} + required: true + responses: + '200': + description: Return ProcessorJob + content: {application/json: {schema: {$ref: '#/components/schemas/ProcessorJob'}}} + '404': {content: {}, description: 'Processor not available'} + + '/workflow': + post: + tags: ['processing'] + requestBody: + description: 'Register a new workflow' + content: + 'text/vnd.ocrd+sh': {} + required: true + responses: + '200': + description: Created a new OCR-D workflow + content: {application/json: {schema: {$ref: '#/components/schemas/Workflow'}}} + + '/workflow/{workflow-id}': + put: + tags: ['processing'] + parameters: + - name: workflow-id + in: path + description: ID of the Workflow + schema: {type: string} + required: true + requestBody: + description: 'Replace existing or create new workflow' + content: + 'text/vnd.ocrd+sh': {} + required: true + responses: + '200': + description: Created a new OCR-D workflow + content: {application/json: {schema: {$ref: '#/components/schemas/Workflow'}}} + get: + tags: ['processing'] + parameters: + - name: workflow-id + in: path + description: ID of the Workflow + schema: {type: string} + required: true + responses: + '200': + description: Return ProcessorJob + content: + application/json: {schema: {$ref: '#/components/schemas/Workflow'}} + 'text/vnd.ocrd+sh': {} + '404': {content: {}, description: 'Workflow not available'} + + '/workspace': + get: + tags: ['workspace'] + responses: + '200': + description: successful operation + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Workspace' + '400': + description: Invalid ID supplied + content: {} + '404': + description: Workspace not found + content: {} + '410': + description: Workspace deleted + content: {} + post: + tags: ['workspace'] + summary: Replace an existing workspace + operationId: createWorkspace + requestBody: + description: OCRD-ZIP of the updated new workspace + content: + application/vnd.ocrd+zip: {} + required: true + responses: + 201: + description: Workspace created + content: + application/json: {schema: {$ref: '/components/schemas/Workspace'}} + put: + tags: ['workspace'] + summary: Replace an existing workspace + operationId: replaceWorkspace + requestBody: + description: OCRD-ZIP of the updated workspace + content: + multipart/form-data: + schema: {$ref: '#/components/schemas/WorkspaceRequest'} + required: true + responses: + 200: + description: Successfully replaced workspace + content: + application/json: {schema: {$ref: '#/components/schemas/Workspace'}} + +components: + schemas: + Resource: + type: object + required: ['@id'] + properties: + '@id': + type: string + description: URL of this thing + description: + type: string + description: description of the thing + JobState: + type: string + pattern: '^(QUEUED|RUNNING|STOPPED)' + JobLog: + allOf: + - {$ref: '#/components/schemas/Resource'} + Workspace: + allOf: + - {$ref: '#/components/schemas/Resource'} + WorkspaceRequest: + type: object + properties: + json: + $ref: '#/components/schemas/Workspace' + workspace: + type: string + format: binary + Job: + allOf: + - {$ref: '#/components/schemas/Resource'} + - type: object + properties: + state: + $ref: '#/components/schemas/JobState' + log: + $ref: '#/components/schemas/JobLog' + OcrdExecutable: + type: string + pattern: '^ocrd-.*' + Processor: + description: The ocrd-tool.json for a specific tool + x-$ref: 'https://ocr-d.de/ocrd_tool.schema.json#/properties/tools/patternProperties/ocrd-.*' + ProcessorList: + description: Maps OCR-D executables to their resp. ocrd-tool.json + type: array + items: + type: object + items: {$ref: '#/components/schemas/Processor'} + ProcessorArgs: + description: The CLI arguments passed to an OCR-D processor + type: object + properties: + workspace: {$ref: '#/components/schemas/Workspace'} + input_file_grps: {type: string} + output_file_grps: {type: string} + page_id: {type: string} + ProcessorCall: + allOf: + - {$ref: '#/components/schemas/ProcessorArgs'} + - type: object + description: Full representation of a CLI call of a processor + required: ['executable'] + properties: + executable: + type: string + pattern: '^ocrd-.*' + ProcessorJob: + allOf: + - {$ref: '#/components/schemas/Job'} + - type: object + properties: + processor: {$ref: '#/components/schemas/Processor'} + workspace: {$ref: '#/components/schemas/Workspace'} + Workflow: + allOf: + - {$ref: '#/components/schemas/Resource'} From 2e909e88f0130de91cc089a68cbd40897d08628f Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Sun, 23 Aug 2020 00:30:49 +0200 Subject: [PATCH 02/23] openapi: explain tagging, add operationId --- openapi.yml | 46 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/openapi.yml b/openapi.yml index 3b3b985..4a9c5b9 100644 --- a/openapi.yml +++ b/openapi.yml @@ -7,6 +7,13 @@ info: > This document defines the [data model](#/components/schemas) and various HTTP APIs related to OCR-D + ## OCR-D API compatibility + + An implementation may claim compatibility with a `OCR-D ${N} API v{$V}` iff + + * it implements all the methods tagged `${N}` + * at major version `${V}` of this API definition + ## Media types ### `application/json` @@ -48,6 +55,7 @@ paths: '/processor': get: tags: ['processing', 'discovery'] + operationId: listProcessors responses: '200': description: List all processors @@ -57,6 +65,7 @@ paths: '/processor/{executable}': get: tags: ['processing', 'discovery'] + operationId: gettProcessor parameters: - name: executable in: path @@ -65,14 +74,13 @@ paths: required: true responses: '200': - description: List all processors + description: Get this processor content: application/json: {schema: {$ref: '#/components/schemas/Processor'}} - '404': - description: Processor not available - content: {} + '404': {content: {}, description: 'Processor not available'} post: tags: ['processing'] + operationId: runProcessor parameters: - name: executable in: path @@ -92,6 +100,7 @@ paths: '/processor/{executable}/{job-id}': get: tags: ['processing'] + operationId: getProcessorJob parameters: - name: executable in: path @@ -112,6 +121,7 @@ paths: '/workflow': post: tags: ['processing'] + operationId: postWorkflow requestBody: description: 'Register a new workflow' content: @@ -125,6 +135,7 @@ paths: '/workflow/{workflow-id}': put: tags: ['processing'] + operationId: putWorkflow parameters: - name: workflow-id in: path @@ -142,6 +153,7 @@ paths: content: {application/json: {schema: {$ref: '#/components/schemas/Workflow'}}} get: tags: ['processing'] + operationId: getWorkflow parameters: - name: workflow-id in: path @@ -155,10 +167,25 @@ paths: application/json: {schema: {$ref: '#/components/schemas/Workflow'}} 'text/vnd.ocrd+sh': {} '404': {content: {}, description: 'Workflow not available'} + post: + tags: ['processing'] + operationId: runWorkflow + parameters: + - name: workflow-id + in: path + description: ID of the Workflow + schema: {type: string} + required: true + responses: + '200': + description: Return WorkflowJob + content: + application/json: {schema: {$ref: '#/components/schemas/WorkflowJob'}} '/workspace': get: tags: ['workspace'] + operationId: getWorkspaces responses: '200': description: successful operation @@ -272,9 +299,7 @@ components: description: Full representation of a CLI call of a processor required: ['executable'] properties: - executable: - type: string - pattern: '^ocrd-.*' + executable: {$ref: '#/components/schemas/OcrdExecutable'} ProcessorJob: allOf: - {$ref: '#/components/schemas/Job'} @@ -285,3 +310,10 @@ components: Workflow: allOf: - {$ref: '#/components/schemas/Resource'} + WorkflowJob: + allOf: + - {$ref: '#/components/schemas/Job'} + - type: object + properties: + workflow: {$ref: '#/components/schemas/Workflow'} + workspace: {$ref: '#/components/schemas/Workspace'} From c8d4435995b062a4aeb2128aaff92c54500f736d Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Sun, 23 Aug 2020 00:40:35 +0200 Subject: [PATCH 03/23] openapi: discovery --- openapi.yml | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/openapi.yml b/openapi.yml index 4a9c5b9..86c5376 100644 --- a/openapi.yml +++ b/openapi.yml @@ -90,7 +90,7 @@ paths: requestBody: description: Execute this ProcessorCall content: - application/json: {schema: {$ref: '#/components/schemas/ProcessorCall'}} + application/json: {schema: {$ref: '#/components/schemas/ProcessorArgs'}} required: true responses: '200': @@ -234,6 +234,16 @@ paths: content: application/json: {schema: {$ref: '#/components/schemas/Workspace'}} + '/discovery': + get: + tags: ['discovery'] + operationId: discover + responses: + '200': + description: Return DiscoveryResponse + content: + application/json: {schema: {$ref: '#/components/schemas/DiscoveryResponse'}} + components: schemas: Resource: @@ -317,3 +327,22 @@ components: properties: workflow: {$ref: '#/components/schemas/Workflow'} workspace: {$ref: '#/components/schemas/Workspace'} + DiscoveryResponse: + type: object + properties: + ram: + description: All available RAM in bytes + type: number + cpu_cores: + description: Number of available CPU cores + type: number + has_ocrd_all: + description: Whether deployment is based on ocrd_all + type: boolean + has_cuda: + description: Whether deployment supports NVIDIA's CUDA + type: boolean + ocrd_all_version: + description: Git tag of the ocrd_all repo + type: string + From b604fff4f43dafc28578c5a4cb172b34e11a4012 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Sun, 23 Aug 2020 00:56:29 +0200 Subject: [PATCH 04/23] openapi: workflow job --- openapi.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/openapi.yml b/openapi.yml index 86c5376..49d5f85 100644 --- a/openapi.yml +++ b/openapi.yml @@ -182,6 +182,30 @@ paths: content: application/json: {schema: {$ref: '#/components/schemas/WorkflowJob'}} + '/workflow/{workflow-id}/{job-id}': + get: + tags: ['processing'] + parameters: + - name: workflow-id + in: path + description: ID of the Workflow + schema: {type: string} + required: true + - name: job-id + in: path + description: ID of the ProcessorJob + schema: {type: string} + required: true + operationId: getWorkflowJob + responses: + '200': + description: Found WorkflowJob + content: + application/json: {schema: {$ref: '#/components/schemas/WorkflowJob'}} + '404': + description: WorkflowJob not found + content: {} + '/workspace': get: tags: ['workspace'] From a9b1a7042e29cc4a81ca92b18467f7f35a7daa90 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Sun, 23 Aug 2020 01:03:30 +0200 Subject: [PATCH 05/23] openapi: new tag "workflow" --- openapi.yml | 152 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 124 insertions(+), 28 deletions(-) diff --git a/openapi.yml b/openapi.yml index 49d5f85..247f941 100644 --- a/openapi.yml +++ b/openapi.yml @@ -41,10 +41,12 @@ servers: tags: - name: discovery description: Discovery of capabilities of a server - - name: workspace - description: mets.xml-indexed BagIt container - name: processing description: OCR-D processing and processors + - name: workflow + description: Processing of OCRD-WF + - name: workspace + description: mets.xml-indexed BagIt container - name: training description: Training of OCR engines - name: acl @@ -116,11 +118,53 @@ paths: '200': description: Return ProcessorJob content: {application/json: {schema: {$ref: '#/components/schemas/ProcessorJob'}}} - '404': {content: {}, description: 'Processor not available'} + '404': {content: {}, description: 'ProcessorJob not found'} - '/workflow': + '/processor/{executable}/{job-id}/log': + get: + tags: ['processing'] + operationId: getProcessorLog + parameters: + - name: executable + in: path + description: Name of the executable + schema: {$ref: '#/components/schemas/OcrdExecutable'} + required: true + - name: job-id + in: path + description: ID of the ProcessorJob + schema: {type: string} + required: true + responses: + '200': + description: Return Log + content: + 'text/plain': {} + '404': {content: {}, description: 'ProcessorJob not found'} post: tags: ['processing'] + operationId: logProcessor + parameters: + - name: executable + in: path + description: LogEntry to log + schema: {$ref: '#/components/schemas/LogEntry'} + required: true + - name: job-id + in: path + description: ID of the ProcessorJob + schema: {type: string} + required: true + responses: + '200': + description: Return Log + content: + 'text/plain': {} + '404': {content: {}, description: 'ProcessorJob not found'} + + '/workflow': + post: + tags: ['workflow', 'discovery'] operationId: postWorkflow requestBody: description: 'Register a new workflow' @@ -134,7 +178,7 @@ paths: '/workflow/{workflow-id}': put: - tags: ['processing'] + tags: ['workflow'] operationId: putWorkflow parameters: - name: workflow-id @@ -152,7 +196,7 @@ paths: description: Created a new OCR-D workflow content: {application/json: {schema: {$ref: '#/components/schemas/Workflow'}}} get: - tags: ['processing'] + tags: ['workflow', 'discovery'] operationId: getWorkflow parameters: - name: workflow-id @@ -165,10 +209,10 @@ paths: description: Return ProcessorJob content: application/json: {schema: {$ref: '#/components/schemas/Workflow'}} - 'text/vnd.ocrd+sh': {} + application/vnd.ocrd+zip: {} '404': {content: {}, description: 'Workflow not available'} post: - tags: ['processing'] + tags: ['workflow'] operationId: runWorkflow parameters: - name: workflow-id @@ -184,7 +228,7 @@ paths: '/workflow/{workflow-id}/{job-id}': get: - tags: ['processing'] + tags: ['workflow'] parameters: - name: workflow-id in: path @@ -219,15 +263,6 @@ paths: type: array items: $ref: '#/components/schemas/Workspace' - '400': - description: Invalid ID supplied - content: {} - '404': - description: Workspace not found - content: {} - '410': - description: Workspace deleted - content: {} post: tags: ['workspace'] summary: Replace an existing workspace @@ -235,7 +270,7 @@ paths: requestBody: description: OCRD-ZIP of the updated new workspace content: - application/vnd.ocrd+zip: {} + 'application/vnd.ocrd+zip': {} required: true responses: 201: @@ -258,6 +293,47 @@ paths: content: application/json: {schema: {$ref: '#/components/schemas/Workspace'}} + '/workspace/{workspace-id}': + get: + tags: ['workspace'] + operationId: getWorkspace + parameters: + - name: workspace-id + in: path + description: ID of the workspace + schema: {type: string} + required: true + responses: + '200': + description: Workspace found + content: + application/json: {schema: {$ref: '#/components/schemas/Workspace'}} + '404': + description: Workspace not found + content: {} + '410': + description: Workspace deleted + content: {} + delete: + operationId: deleteWorkspace + parameters: + - name: workspace-id + in: path + description: ID of the workspace + schema: {type: string} + required: true + responses: + '200': + description: Workspace deleted + content: + application/json: {schema: {$ref: '#/components/schemas/Workspace'}} + '404': + description: Workspace not found + content: {} + '410': + description: Workspace deleted + content: {} + '/discovery': get: tags: ['discovery'] @@ -283,7 +359,7 @@ components: JobState: type: string pattern: '^(QUEUED|RUNNING|STOPPED)' - JobLog: + Log: allOf: - {$ref: '#/components/schemas/Resource'} Workspace: @@ -304,8 +380,6 @@ components: properties: state: $ref: '#/components/schemas/JobState' - log: - $ref: '#/components/schemas/JobLog' OcrdExecutable: type: string pattern: '^ocrd-.*' @@ -313,11 +387,12 @@ components: description: The ocrd-tool.json for a specific tool x-$ref: 'https://ocr-d.de/ocrd_tool.schema.json#/properties/tools/patternProperties/ocrd-.*' ProcessorList: - description: Maps OCR-D executables to their resp. ocrd-tool.json + description: List all available processors type: array items: - type: object - items: {$ref: '#/components/schemas/Processor'} + type: array + items: + $ref: '#/components/schemas/Processor' ProcessorArgs: description: The CLI arguments passed to an OCR-D processor type: object @@ -360,13 +435,34 @@ components: cpu_cores: description: Number of available CPU cores type: number - has_ocrd_all: - description: Whether deployment is based on ocrd_all - type: boolean has_cuda: description: Whether deployment supports NVIDIA's CUDA type: boolean + cuda_version: + description: Major/minor version of CUDA + type: string + has_ocrd_all: + description: Whether deployment is based on ocrd_all + type: boolean ocrd_all_version: description: Git tag of the ocrd_all repo type: string + has_docker: + description: Whether the OCR-D executables run in a docker container + type: boolean + LogEntry: + type: object + required: + - level + - message + properties: + level: + description: Log level + type: string + message: + description: Log message + type: string + time: + description: Log UTC time + type: string From fb1b2d8e3bcfd1aeb560cb4a6dba72b27b8c995f Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 14 Feb 2022 18:41:19 +0100 Subject: [PATCH 06/23] typo: get{t,}Processor --- openapi.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openapi.yml b/openapi.yml index 247f941..748da7b 100644 --- a/openapi.yml +++ b/openapi.yml @@ -67,7 +67,7 @@ paths: '/processor/{executable}': get: tags: ['processing', 'discovery'] - operationId: gettProcessor + operationId: getProcessor parameters: - name: executable in: path From ed8be886e5b2d194c9e04606267d102d18ac3798 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 14 Feb 2022 18:54:30 +0100 Subject: [PATCH 07/23] openapi: found -> return --- openapi.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openapi.yml b/openapi.yml index 748da7b..b914c29 100644 --- a/openapi.yml +++ b/openapi.yml @@ -243,7 +243,7 @@ paths: operationId: getWorkflowJob responses: '200': - description: Found WorkflowJob + description: Return WorkflowJob content: application/json: {schema: {$ref: '#/components/schemas/WorkflowJob'}} '404': From 9fb6eb37f41ef15b047452d3e6f1841c225f1042 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 14 Feb 2022 18:56:33 +0100 Subject: [PATCH 08/23] openapi: fix POST /workspace --- openapi.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openapi.yml b/openapi.yml index b914c29..d5cfa5d 100644 --- a/openapi.yml +++ b/openapi.yml @@ -265,10 +265,10 @@ paths: $ref: '#/components/schemas/Workspace' post: tags: ['workspace'] - summary: Replace an existing workspace + summary: Post a new workspace operationId: createWorkspace requestBody: - description: OCRD-ZIP of the updated new workspace + description: OCRD-ZIP of the new workspace content: 'application/vnd.ocrd+zip': {} required: true @@ -289,7 +289,7 @@ paths: required: true responses: 200: - description: Successfully replaced workspace + description: Workspace replaced content: application/json: {schema: {$ref: '#/components/schemas/Workspace'}} From 24421392684f7ff95a2a7fcca2c86c6d955c8846 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 14 Feb 2022 18:59:00 +0100 Subject: [PATCH 09/23] openapi: PUT /workspace should be PUT /workspace/{workspace-id} --- openapi.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/openapi.yml b/openapi.yml index d5cfa5d..e98c42e 100644 --- a/openapi.yml +++ b/openapi.yml @@ -277,10 +277,18 @@ paths: description: Workspace created content: application/json: {schema: {$ref: '/components/schemas/Workspace'}} + + '/workspace/{workspace-id}': put: tags: ['workspace'] summary: Replace an existing workspace operationId: replaceWorkspace + parameters: + - name: workspace-id + in: path + description: ID of the workspace + schema: {type: string} + required: true requestBody: description: OCRD-ZIP of the updated workspace content: @@ -292,8 +300,6 @@ paths: description: Workspace replaced content: application/json: {schema: {$ref: '#/components/schemas/Workspace'}} - - '/workspace/{workspace-id}': get: tags: ['workspace'] operationId: getWorkspace From a924db4f177a4c2b1fc50e08d389f7750bad0ff7 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 14 Feb 2022 19:04:09 +0100 Subject: [PATCH 10/23] openapi: enumerate log levels --- openapi.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/openapi.yml b/openapi.yml index e98c42e..ca2a736 100644 --- a/openapi.yml +++ b/openapi.yml @@ -465,6 +465,7 @@ components: level: description: Log level type: string + enum: ['debug', 'info', 'warning', 'error'] message: description: Log message type: string From 39187a0696440e3e0173141942b194f40a795c4e Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 15 Feb 2022 12:22:53 +0100 Subject: [PATCH 11/23] openapi: 1.0.0 -> 0.0.1 --- openapi.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openapi.yml b/openapi.yml index ca2a736..da11419 100644 --- a/openapi.yml +++ b/openapi.yml @@ -32,7 +32,7 @@ info: license: name: Apache 2.0 url: 'http://www.apache.org/licenses/LICENSE-2.0.html' - version: 1.0.0 + version: 0.0.1 externalDocs: description: OCR-D Website url: 'http://ocr-d.de' From 3153a627ceea33646039c5d31807ae2df810d411 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 4 Mar 2022 17:35:03 +0100 Subject: [PATCH 12/23] Apply suggestions from code review Co-authored-by: mweidling <13831557+mweidling@users.noreply.github.com> --- openapi.yml | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/openapi.yml b/openapi.yml index da11419..a8f635a 100644 --- a/openapi.yml +++ b/openapi.yml @@ -2,7 +2,7 @@ openapi: 3.0.1 info: title: OCR-D Web API description: > - #

HTTP API for offering OCR-D processing

+ # HTTP API for offering OCR-D processing > This document defines the [data model](#/components/schemas) and various HTTP APIs related to OCR-D @@ -22,7 +22,7 @@ info: ### `application/vnd.ocrd+zip` - Defined in https://ocr-d.de/en/spec/ocrd_zip + Defined in [https://ocr-d.de/en/spec/ocrd_zip](https://ocr-d.de/en/spec/ocrd_zip) ### `text/vnd.ocrd+sh` @@ -38,6 +38,7 @@ externalDocs: url: 'http://ocr-d.de' servers: - url: 'https://example.org/ocrd/v1' + description: The URL of your server offering the OCR-D API. tags: - name: discovery description: Discovery of capabilities of a server @@ -60,7 +61,7 @@ paths: operationId: listProcessors responses: '200': - description: List all processors + description: A list of all processors content: application/json: {schema: {$ref: '#/components/schemas/ProcessorList'}} @@ -123,7 +124,7 @@ paths: '/processor/{executable}/{job-id}/log': get: tags: ['processing'] - operationId: getProcessorLog + operationId: getProcessorJobLog parameters: - name: executable in: path @@ -137,7 +138,7 @@ paths: required: true responses: '200': - description: Return Log + description: Return log content: 'text/plain': {} '404': {content: {}, description: 'ProcessorJob not found'} @@ -147,7 +148,7 @@ paths: parameters: - name: executable in: path - description: LogEntry to log + description: LogEntry to be logged schema: {$ref: '#/components/schemas/LogEntry'} required: true - name: job-id @@ -193,7 +194,7 @@ paths: required: true responses: '200': - description: Created a new OCR-D workflow + description: Created/updated a new OCR-D workflow content: {application/json: {schema: {$ref: '#/components/schemas/Workflow'}}} get: tags: ['workflow', 'discovery'] @@ -256,7 +257,7 @@ paths: operationId: getWorkspaces responses: '200': - description: successful operation + description: Successful operation content: application/json: schema: @@ -274,9 +275,9 @@ paths: required: true responses: 201: - description: Workspace created + description: Created Workspace content: - application/json: {schema: {$ref: '/components/schemas/Workspace'}} + application/json: {schema: {$ref: '#/components/schemas/Workspace'}} '/workspace/{workspace-id}': put: @@ -361,7 +362,7 @@ components: description: URL of this thing description: type: string - description: description of the thing + description: Description of the thing JobState: type: string pattern: '^(QUEUED|RUNNING|STOPPED)' @@ -451,10 +452,10 @@ components: description: Whether deployment is based on ocrd_all type: boolean ocrd_all_version: - description: Git tag of the ocrd_all repo + description: Git tag of the ocrd_all version implemented type: string has_docker: - description: Whether the OCR-D executables run in a docker container + description: Whether the OCR-D executables run in a Docker container type: boolean LogEntry: type: object From b58e30bbab17453fd43c931ba76d31ff50cf4a5f Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 4 Mar 2022 17:40:11 +0100 Subject: [PATCH 13/23] reformat 404 response definitions --- openapi.yml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/openapi.yml b/openapi.yml index da11419..f5ca201 100644 --- a/openapi.yml +++ b/openapi.yml @@ -79,7 +79,8 @@ paths: description: Get this processor content: application/json: {schema: {$ref: '#/components/schemas/Processor'}} - '404': {content: {}, description: 'Processor not available'} + '404': + description: 'Processor not available' post: tags: ['processing'] operationId: runProcessor @@ -118,7 +119,8 @@ paths: '200': description: Return ProcessorJob content: {application/json: {schema: {$ref: '#/components/schemas/ProcessorJob'}}} - '404': {content: {}, description: 'ProcessorJob not found'} + '404': + description: 'ProcessorJob not found' '/processor/{executable}/{job-id}/log': get: @@ -140,7 +142,8 @@ paths: description: Return Log content: 'text/plain': {} - '404': {content: {}, description: 'ProcessorJob not found'} + '404': + description: 'ProcessorJobLog not found' post: tags: ['processing'] operationId: logProcessor @@ -160,7 +163,8 @@ paths: description: Return Log content: 'text/plain': {} - '404': {content: {}, description: 'ProcessorJob not found'} + '404': + description: 'ProcessorJob not found' '/workflow': post: @@ -210,7 +214,8 @@ paths: content: application/json: {schema: {$ref: '#/components/schemas/Workflow'}} application/vnd.ocrd+zip: {} - '404': {content: {}, description: 'Workflow not available'} + '404': + description: 'Workflow not available' post: tags: ['workflow'] operationId: runWorkflow @@ -248,7 +253,6 @@ paths: application/json: {schema: {$ref: '#/components/schemas/WorkflowJob'}} '404': description: WorkflowJob not found - content: {} '/workspace': get: From fe53465cf88d7877e21265387daec2c6bdb68d12 Mon Sep 17 00:00:00 2001 From: mweidling <13831557+mweidling@users.noreply.github.com> Date: Mon, 7 Mar 2022 07:29:19 +0100 Subject: [PATCH 14/23] Update openapi.yml Co-authored-by: Konstantin Baierer --- openapi.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openapi.yml b/openapi.yml index 63786ad..9153334 100644 --- a/openapi.yml +++ b/openapi.yml @@ -147,7 +147,7 @@ paths: description: 'ProcessorJobLog not found' post: tags: ['processing'] - operationId: logProcessor + operationId: postProcessorJobLogEntry parameters: - name: executable in: path From b014f528b15e1c90f0c717f226a1845448e61450 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 4 Apr 2022 09:30:06 +0200 Subject: [PATCH 15/23] Update openapi.yml --- openapi.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openapi.yml b/openapi.yml index 9153334..556aec2 100644 --- a/openapi.yml +++ b/openapi.yml @@ -342,7 +342,7 @@ paths: description: Workspace not found content: {} '410': - description: Workspace deleted + description: Workspace deleted before content: {} '/discovery': From 0dcfb74721736cf50f617e80fab3dcf35b78b250 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 4 Apr 2022 12:02:01 +0200 Subject: [PATCH 16/23] openapi: replace OCRD-WF references --- openapi.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/openapi.yml b/openapi.yml index 556aec2..67650f3 100644 --- a/openapi.yml +++ b/openapi.yml @@ -24,9 +24,10 @@ info: Defined in [https://ocr-d.de/en/spec/ocrd_zip](https://ocr-d.de/en/spec/ocrd_zip) - ### `text/vnd.ocrd+sh` + ### `text/vnd.ocrd.workflow` + + Workflow format, currently (April 2022) still to be determined. - Defined in https://ocr-d.de/en/spec/ocrdwf contact: email: info@ocr-d.de license: @@ -45,7 +46,7 @@ tags: - name: processing description: OCR-D processing and processors - name: workflow - description: Processing of OCRD-WF + description: Processing of workflows - name: workspace description: mets.xml-indexed BagIt container - name: training @@ -174,7 +175,7 @@ paths: requestBody: description: 'Register a new workflow' content: - 'text/vnd.ocrd+sh': {} + 'text/vnd.ocrd.workflow': {} required: true responses: '200': @@ -194,7 +195,7 @@ paths: requestBody: description: 'Replace existing or create new workflow' content: - 'text/vnd.ocrd+sh': {} + 'text/vnd.ocrd.workflow': {} required: true responses: '200': @@ -211,7 +212,7 @@ paths: required: true responses: '200': - description: Return ProcessorJob + description: Return Workflow content: application/json: {schema: {$ref: '#/components/schemas/Workflow'}} application/vnd.ocrd+zip: {} From b416666e108d76f255745a14cf6178f0b1461173 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 4 Apr 2022 14:44:17 +0200 Subject: [PATCH 17/23] openapi: add 400 responses where applicable --- openapi.yml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/openapi.yml b/openapi.yml index 67650f3..0eed34e 100644 --- a/openapi.yml +++ b/openapi.yml @@ -181,6 +181,8 @@ paths: '200': description: Created a new OCR-D workflow content: {application/json: {schema: {$ref: '#/components/schemas/Workflow'}}} + '400': + description: Invalid workflow '/workflow/{workflow-id}': put: @@ -201,6 +203,8 @@ paths: '200': description: Created/updated a new OCR-D workflow content: {application/json: {schema: {$ref: '#/components/schemas/Workflow'}}} + '400': + description: Invalid workflow get: tags: ['workflow', 'discovery'] operationId: getWorkflow @@ -253,6 +257,8 @@ paths: description: Return WorkflowJob content: application/json: {schema: {$ref: '#/components/schemas/WorkflowJob'}} + '400': + description: Workflow failed '404': description: WorkflowJob not found @@ -279,10 +285,12 @@ paths: 'application/vnd.ocrd+zip': {} required: true responses: - 201: + '201': description: Created Workspace content: application/json: {schema: {$ref: '#/components/schemas/Workspace'}} + '400': + description: Invalid workspace '/workspace/{workspace-id}': put: @@ -302,10 +310,12 @@ paths: schema: {$ref: '#/components/schemas/WorkspaceRequest'} required: true responses: - 200: + '200': description: Workspace replaced content: application/json: {schema: {$ref: '#/components/schemas/Workspace'}} + '400': + description: Workspace invalid get: tags: ['workspace'] operationId: getWorkspace @@ -324,7 +334,7 @@ paths: description: Workspace not found content: {} '410': - description: Workspace deleted + description: Workspace deleted before content: {} delete: operationId: deleteWorkspace From ee7fec15c09fe7972d5d08cbe9df390594664ce1 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 4 Apr 2022 14:46:54 +0200 Subject: [PATCH 18/23] openapi: consistent order of properties --- openapi.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/openapi.yml b/openapi.yml index 0eed34e..e6654ed 100644 --- a/openapi.yml +++ b/openapi.yml @@ -240,6 +240,7 @@ paths: '/workflow/{workflow-id}/{job-id}': get: tags: ['workflow'] + operationId: getWorkflowJob parameters: - name: workflow-id in: path @@ -251,7 +252,6 @@ paths: description: ID of the ProcessorJob schema: {type: string} required: true - operationId: getWorkflowJob responses: '200': description: Return WorkflowJob @@ -277,8 +277,8 @@ paths: $ref: '#/components/schemas/Workspace' post: tags: ['workspace'] - summary: Post a new workspace operationId: createWorkspace + summary: Post a new workspace requestBody: description: OCRD-ZIP of the new workspace content: @@ -295,8 +295,8 @@ paths: '/workspace/{workspace-id}': put: tags: ['workspace'] - summary: Replace an existing workspace operationId: replaceWorkspace + summary: Replace an existing workspace parameters: - name: workspace-id in: path @@ -337,6 +337,7 @@ paths: description: Workspace deleted before content: {} delete: + tags: ['workspace'] operationId: deleteWorkspace parameters: - name: workspace-id From 6f6d0e4547a061d82f9abec5eb457f2c7234dbba Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 4 Apr 2022 14:47:56 +0200 Subject: [PATCH 19/23] Update openapi.yml Co-authored-by: mweidling <13831557+mweidling@users.noreply.github.com> --- openapi.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openapi.yml b/openapi.yml index e6654ed..434a626 100644 --- a/openapi.yml +++ b/openapi.yml @@ -311,7 +311,7 @@ paths: required: true responses: '200': - description: Workspace replaced + description: Workspace replaced or created content: application/json: {schema: {$ref: '#/components/schemas/Workspace'}} '400': From 50346bc06d03a81ea51513206c7a3e262d505ca0 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 4 Apr 2022 14:48:21 +0200 Subject: [PATCH 20/23] Update openapi.yml Co-authored-by: mweidling <13831557+mweidling@users.noreply.github.com> --- openapi.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openapi.yml b/openapi.yml index 434a626..627ea0a 100644 --- a/openapi.yml +++ b/openapi.yml @@ -454,7 +454,8 @@ components: properties: ram: description: All available RAM in bytes - type: number + type: integer + format: int64 cpu_cores: description: Number of available CPU cores type: number From 10f45e35351766585d2cac455ba7ee741d752e22 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 4 Apr 2022 14:48:37 +0200 Subject: [PATCH 21/23] Update openapi.yml Co-authored-by: mweidling <13831557+mweidling@users.noreply.github.com> --- openapi.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openapi.yml b/openapi.yml index 627ea0a..f812019 100644 --- a/openapi.yml +++ b/openapi.yml @@ -458,7 +458,8 @@ components: format: int64 cpu_cores: description: Number of available CPU cores - type: number + type: integer + format: int64 has_cuda: description: Whether deployment supports NVIDIA's CUDA type: boolean From 4b478fa514f15556f56b32e266c5eb7062bcf57a Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 4 Apr 2022 14:48:57 +0200 Subject: [PATCH 22/23] Update openapi.yml Co-authored-by: mweidling <13831557+mweidling@users.noreply.github.com> --- openapi.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/openapi.yml b/openapi.yml index f812019..886ccba 100644 --- a/openapi.yml +++ b/openapi.yml @@ -491,4 +491,5 @@ components: time: description: Log UTC time type: string + format: date-time From cab7c244136481cb36aa85fed55ffa81e8cc58d7 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 4 Apr 2022 14:51:29 +0200 Subject: [PATCH 23/23] openapi: add "parameters" to ProcessorArgs --- openapi.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/openapi.yml b/openapi.yml index 886ccba..e15b382 100644 --- a/openapi.yml +++ b/openapi.yml @@ -424,6 +424,9 @@ components: input_file_grps: {type: string} output_file_grps: {type: string} page_id: {type: string} + parameters: + type: object + default: {} ProcessorCall: allOf: - {$ref: '#/components/schemas/ProcessorArgs'}