Pipeline, Ksenia Sizikova - 22FPL1 #1825
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "Check PR" | |
on: | |
push: | |
branches: [ main ] | |
pull_request: | |
branches: [ main ] | |
concurrency: | |
group: ${{ github.repository }}-${{ github.ref }}-main | |
cancel-in-progress: true | |
env: | |
REPOSITORY_TYPE: public | |
jobs: | |
install-dependencies: | |
name: Installing dependencies | |
runs-on: ubuntu-latest | |
timeout-minutes: 5 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup FIPL environment | |
uses: fipl-hse/setup-env-action@v0.10 | |
- name: Confirming everything is OK | |
run: | | |
ls -la venv | |
pr-name-check: | |
name: PR name check | |
runs-on: ubuntu-latest | |
if: github.event_name == 'pull_request' | |
timeout-minutes: 5 | |
needs: [ install-dependencies ] | |
env: | |
PR_NAME: ${{ github.event.pull_request.title }} | |
PR_AUTHOR: ${{ github.actor }} | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup FIPL environment | |
uses: fipl-hse/setup-env-action@v0.10 | |
- name: PR name check | |
run: | | |
bash config/stage_1_style_tests/_stage_pr_name_check.sh "$PR_NAME" "$PR_AUTHOR" | |
code-style: | |
name: Code Style | |
runs-on: ubuntu-latest | |
timeout-minutes: 5 | |
needs: [ install-dependencies ] | |
env: | |
PR_NAME: ${{ github.event.pull_request.title }} | |
PR_AUTHOR: ${{ github.actor }} | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup FIPL environment | |
uses: fipl-hse/setup-env-action@v0.10 | |
- name: Code Style | |
run: | | |
bash config/stage_1_style_tests/_stage_run_lint.sh "$PR_NAME" "$PR_AUTHOR" | |
mypy-checks: | |
name: Mypy checks | |
runs-on: ubuntu-latest | |
timeout-minutes: 5 | |
needs: [ install-dependencies ] | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup FIPL environment | |
uses: fipl-hse/setup-env-action@v0.10 | |
- name: MyPy check | |
run: | | |
bash config/stage_1_style_tests/_stage_run_mypy.sh | |
flake8-checks: | |
name: Import style checks | |
runs-on: ubuntu-latest | |
timeout-minutes: 5 | |
needs: [ install-dependencies ] | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup FIPL environment | |
uses: fipl-hse/setup-env-action@v0.10 | |
- name: Flake8 check | |
run: | | |
bash config/stage_1_style_tests/_stage_run_flake8.sh | |
requirements-check: | |
name: Requirements check | |
runs-on: ubuntu-latest | |
timeout-minutes: 5 | |
needs: [ install-dependencies ] | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup FIPL environment | |
uses: fipl-hse/setup-env-action@v0.10 | |
- name: Dependencies check | |
run: | | |
bash config/stage_1_style_tests/_stage_requirements_check.sh | |
# Stage 2. Crawler tests | |
checking-crawler-config: | |
name: Crawler checks config | |
needs: [ | |
code-style, | |
mypy-checks, | |
flake8-checks | |
] | |
env: | |
PR_NAME: ${{ github.event.pull_request.title }} | |
PR_AUTHOR: ${{ github.actor }} | |
runs-on: ubuntu-latest | |
timeout-minutes: 3 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup FIPL environment | |
uses: fipl-hse/setup-env-action@v0.10 | |
- name: Run crawler config checks | |
run: | | |
bash config/_stage_run_pytest.sh "$PR_NAME" "$PR_AUTHOR" -l lab_5_scrapper -m stage_2_1_crawler_config_check | |
checking-crawler: | |
name: Crawler checks | |
needs: [ | |
code-style, | |
mypy-checks, | |
flake8-checks | |
] | |
env: | |
PR_NAME: ${{ github.event.pull_request.title }} | |
PR_AUTHOR: ${{ github.actor }} | |
runs-on: ubuntu-latest | |
timeout-minutes: 4 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup FIPL environment | |
uses: fipl-hse/setup-env-action@v0.10 | |
- name: Run crawler config checks | |
run: | | |
bash config/_stage_run_pytest.sh "$PR_NAME" "$PR_AUTHOR" -l lab_5_scrapper -m stage_2_2_crawler_check | |
checking-parser: | |
name: Parser checks | |
needs: [ | |
code-style, | |
mypy-checks, | |
flake8-checks | |
] | |
env: | |
PR_NAME: ${{ github.event.pull_request.title }} | |
PR_AUTHOR: ${{ github.actor }} | |
runs-on: ubuntu-latest | |
timeout-minutes: 10 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup FIPL environment | |
uses: fipl-hse/setup-env-action@v0.10 | |
- name: Run crawler config checks | |
run: | | |
bash config/_stage_run_pytest.sh "$PR_NAME" "$PR_AUTHOR" -l lab_5_scrapper -m stage_2_3_HTML_parser_check | |
collecting-articles-from-internet: | |
name: Download articles | |
needs: [ | |
checking-crawler-config, | |
checking-crawler, | |
checking-parser | |
] | |
env: | |
PR_NAME: ${{ github.event.pull_request.title }} | |
PR_AUTHOR: ${{ github.actor }} | |
runs-on: ubuntu-latest | |
timeout-minutes: 10 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup FIPL environment | |
uses: fipl-hse/setup-env-action@v0.10 | |
- name: Crawl a raw dataset from web | |
run: | | |
bash admin_utils/stage_2_crawler_tests/_stage_collect_articles.sh "$PR_NAME" "$PR_AUTHOR" | |
bash config/_stage_run_pytest.sh "$PR_NAME" "$PR_AUTHOR" -l lab_5_scrapper -m stage_2_4_dataset_volume_check | |
- name: Archive raw dataset | |
uses: actions/upload-artifact@v4 | |
with: | |
name: raw-dataset | |
path: | | |
tmp/articles | |
retention-days: 5 | |
checking-articles-dataset: | |
name: Validate dataset | |
needs: [ collecting-articles-from-internet ] | |
env: | |
PR_NAME: ${{ github.event.pull_request.title }} | |
PR_AUTHOR: ${{ github.actor }} | |
runs-on: ubuntu-latest | |
timeout-minutes: 5 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup FIPL environment | |
uses: fipl-hse/setup-env-action@v0.10 | |
- name: Download previously collected dataset | |
continue-on-error: true | |
uses: actions/download-artifact@v4 | |
with: | |
name: raw-dataset | |
- name: Run metadata validation | |
run: | | |
source venv/bin/activate | |
export PYTHONPATH=$(pwd):$PYTHONPATH | |
python admin_utils/unpack_archived_dataset.py lab_5_scrapper | |
bash config/_stage_run_pytest.sh "$PR_NAME" "$PR_AUTHOR" -l lab_5_scrapper -m stage_2_5_dataset_validation | |
milestone-1-crawler-is-working: | |
name: Crawler is accepted! | |
needs: [ | |
checking-articles-dataset | |
] | |
runs-on: ubuntu-latest | |
timeout-minutes: 2 | |
steps: | |
- name: Congratulations | |
run: | | |
echo "You have completed the crawler!" | |
# Stage 3. Pipeline tests | |
milestone-2-pipeline: | |
name: Starting pipeline checks! | |
needs: [ milestone-1-crawler-is-working ] | |
runs-on: ubuntu-latest | |
timeout-minutes: 2 | |
steps: | |
- name: Congratulations | |
run: | | |
echo "Preparing pipeline checks" | |
checking-raw-dataset-before-running-pipeline: | |
name: Pipe verifies dataset | |
needs: [ | |
milestone-2-pipeline | |
] | |
env: | |
PR_AUTHOR: ${{ github.actor }} | |
runs-on: ubuntu-latest | |
timeout-minutes: 5 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup FIPL environment | |
uses: fipl-hse/setup-env-action@v0.10 | |
- name: Download previously collected dataset | |
continue-on-error: true | |
uses: actions/download-artifact@v4 | |
with: | |
name: raw-dataset | |
- name: Run crawler config checks | |
run: | | |
bash config/_stage_run_pytest.sh "$PR_NAME" "$PR_AUTHOR" -l lab_6_pipeline -m stage_3_1_dataset_sanity_checks | |
checking-corpus-manager-creates-instances-correctly: | |
name: CorpusManager detects articles | |
needs: [ | |
milestone-2-pipeline | |
] | |
env: | |
PR_AUTHOR: ${{ github.actor }} | |
runs-on: ubuntu-latest | |
timeout-minutes: 2 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup FIPL environment | |
uses: fipl-hse/setup-env-action@v0.10 | |
- name: Download previously collected dataset | |
continue-on-error: true | |
uses: actions/download-artifact@v4 | |
with: | |
name: raw-dataset | |
- name: Run CorpusManager tests | |
run: | | |
source venv/bin/activate | |
export PYTHONPATH=$(pwd):$PYTHONPATH | |
python admin_utils/unpack_archived_dataset.py lab_6_pipeline | |
bash config/_stage_run_pytest.sh "$PR_NAME" "$PR_AUTHOR" -l lab_6_pipeline -m stage_3_2_corpus_manager_checks | |
checking-student-processing-works-for-admin-dataset: | |
name: Pipe processed admin data | |
needs: [ | |
milestone-2-pipeline | |
] | |
env: | |
PR_AUTHOR: ${{ github.actor }} | |
runs-on: ubuntu-latest | |
timeout-minutes: 5 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup FIPL environment | |
uses: fipl-hse/setup-env-action@v0.10 | |
- name: Download previously collected dataset | |
continue-on-error: true | |
uses: actions/download-artifact@v4 | |
with: | |
name: raw-dataset | |
- name: Run metadata validation | |
run: | | |
source venv/bin/activate | |
export PYTHONPATH=$(pwd):$PYTHONPATH | |
python admin_utils/unpack_archived_dataset.py lab_6_pipeline | |
bash config/_stage_run_pytest.sh "$PR_NAME" "$PR_AUTHOR" -l lab_6_pipeline -m stage_3_4_admin_data_processing | |
bash config/_stage_run_pytest.sh "$PR_NAME" "$PR_AUTHOR" -l lab_6_pipeline -m stage_3_6_advanced_morphological_processing | |
run-student-processing: | |
name: Pipe processed student data | |
needs: [ | |
milestone-2-pipeline | |
] | |
env: | |
PR_AUTHOR: ${{ github.actor }} | |
runs-on: ubuntu-latest | |
timeout-minutes: 5 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup FIPL environment | |
uses: fipl-hse/setup-env-action@v0.10 | |
- name: Download previously collected dataset | |
continue-on-error: true | |
uses: actions/download-artifact@v4 | |
with: | |
name: raw-dataset | |
- name: Run validation of `_processed.txt` files | |
run: | | |
bash admin_utils/stage_3_pipeline_tests/_stage_check_on_student_dataset.sh "$PR_AUTHOR" | |
- name: Archive processed dataset | |
continue-on-error: true | |
uses: actions/upload-artifact@v4 | |
with: | |
name: processed-dataset | |
path: | | |
tmp/articles | |
retention-days: 5 | |
checking-student-processing-works-for-student-dataset: | |
name: Validate final dataset | |
needs: [ | |
run-student-processing | |
] | |
env: | |
PR_AUTHOR: ${{ github.actor }} | |
runs-on: ubuntu-latest | |
timeout-minutes: 5 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup FIPL environment | |
uses: fipl-hse/setup-env-action@v0.10 | |
- name: Download previously collected dataset | |
continue-on-error: true | |
uses: actions/download-artifact@v4 | |
with: | |
name: processed-dataset | |
- name: Run validation of `_processed.txt` files | |
run: | | |
source venv/bin/activate | |
export PYTHONPATH=$(pwd):$PYTHONPATH | |
python admin_utils/unpack_archived_dataset.py lab_6_pipeline | |
bash config/_stage_run_pytest.sh "$PR_NAME" "$PR_AUTHOR" -l lab_6_pipeline -m stage_3_5_student_dataset_validation | |
running-pos-pipeline-tests: | |
name: POSFrequencyPipeline tests | |
needs: [ | |
checking-raw-dataset-before-running-pipeline, | |
checking-student-processing-works-for-admin-dataset, | |
checking-student-processing-works-for-student-dataset, | |
checking-corpus-manager-creates-instances-correctly, | |
] | |
env: | |
PR_AUTHOR: ${{ github.actor }} | |
runs-on: ubuntu-latest | |
timeout-minutes: 7 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup FIPL environment | |
uses: fipl-hse/setup-env-action@v0.10 | |
- name: Download previously collected dataset | |
continue-on-error: true | |
uses: actions/download-artifact@v4 | |
with: | |
name: processed-dataset | |
- name: Congratulations | |
run: | | |
bash config/_stage_run_pytest.sh "$PR_NAME" "$PR_AUTHOR" -l lab_6_pipeline -m stage_4_pos_frequency_pipeline_checks | |
- name: Archive processed dataset | |
continue-on-error: true | |
uses: actions/upload-artifact@v4 | |
with: | |
name: processed-dataset | |
path: | | |
tmp/articles | |
retention-days: 5 | |
milestone-2-pipeline-is-working: | |
name: Pipeline is accepted! | |
needs: [ | |
running-pos-pipeline-tests | |
] | |
runs-on: ubuntu-latest | |
timeout-minutes: 2 | |
steps: | |
- name: Congratulations | |
run: | | |
echo "You have completed the assignment!" |