diff --git a/.actrc b/.actrc new file mode 100644 index 0000000..97cbad5 --- /dev/null +++ b/.actrc @@ -0,0 +1,2 @@ +# Configuration file for nektos/act +--container-daemon-socket - \ No newline at end of file diff --git a/.github/workflows/article-validator.yml b/.github/workflows/article-validator.yml index 4cf2bea..8563f60 100644 --- a/.github/workflows/article-validator.yml +++ b/.github/workflows/article-validator.yml @@ -17,6 +17,7 @@ jobs: - name: Checkout Code uses: actions/checkout@v3 with: + ref: ${{ github.head_ref }} fetch-depth: 0 - run: echo "Checkout code" @@ -37,25 +38,19 @@ jobs: id: find_files run: | # Find new or modified markdown files in the 'articles' directory - MODIFIED_FILES=$(git diff --name-only --diff-filter=ACM origin/main HEAD -- "articles/*.md") + MODIFIED_FILES=$(git diff --name-only --diff-filter=ACM origin/main..HEAD -- "articles/*.md") if [ -z "$MODIFIED_FILES" ]; then echo "No new or modified articles found." echo "::set-output name=modified_files::" else - echo "New or modified markdown files found:" - echo "$MODIFIED_FILES" + echo "New or modified markdown file(s) found:" + echo "- $MODIFIED_FILES" echo "::set-output name=modified_files::$MODIFIED_FILES" fi # Run sanity check for modified files - name: Run sanity check + if: steps.find_files.outputs.modified_files run: | - if [ -z "${{ steps.find_files.outputs.modified_files }}" ]; then - echo "No articles to validate." - else - chmod +x sanity_check.sh - for file in ${{ steps.find_files.outputs.modified_files }}; do - echo "Validating $file..." - ./sanity_check.sh "$file" - done - fi \ No newline at end of file + chmod +x sanity_check.sh + ./sanity_check.sh \ No newline at end of file diff --git a/sanity_check.sh b/sanity_check.sh index 130ed53..7dc2e45 100755 --- a/sanity_check.sh +++ b/sanity_check.sh @@ -1,26 +1,70 @@ #!/bin/bash -echo "Loading the new and modified Markdown files in the articles folder" +echo "Searching new and modified Markdown files in the articles folder" # Find new or modified markdown files in the articles folder using git diff with ACM filter -FILES=$(git diff --name-only --diff-filter=ACM HEAD origin/main -- "articles/*.md") +FILES=$(git diff --name-only --diff-filter=ACM origin/main..HEAD -- "articles/*.md") + +echo # Check if any files were found if [ -z "$FILES" ]; then - echo "No new or modified markdown files found." + echo "No new or modified articles found" exit 0 + else + echo "Detected articles:" + for file in $FILES; do + echo "- $file" + done fi -# Loop through the found files and run validation +echo + +# Initialize arrays to store validations results +passed_articles=() +failed_articles=() +failed_messages=() + +# Loop through the found articles and run validation for x in $FILES; do - # Call the Python script to validate the file and store the result - is_valid_md=$(python validate_markdown_metadata.py $x) + # Call the Python script to validate the article and store the result + validation_result=$(python validate_markdown_metadata.py "$x") - # If the result is "True", the file is valid, continue processing - if [[ $is_valid_md == "True" ]]; then - echo "Validating file $x" + # Check validation and store in respective array + if [[ $validation_result == "True" ]]; then + passed_articles+=("$x") else - echo "File $x failed" - exit 1 + failed_articles+=("$x") + # Store the error message + failed_messages+=("$validation_result") fi -done \ No newline at end of file +done + +echo "Validation results" +echo + +# Display validation results +if [ ${#passed_articles[@]} -gt 0 ]; then + echo "Passed article(s):" + for file in "${passed_articles[@]}"; do + echo "- $file" + done + echo +fi + +if [ ${#failed_articles[@]} -gt 0 ]; then + echo "Failed article(s):" + for i in "${!failed_articles[@]}"; do + echo "- ${failed_articles[$i]}" + # Display each line of the error message + echo " - ${failed_messages[$i]}" | sed 's/^/ /' + echo + done +fi + +# Set the exit code based on whether there were any failed articles +if [ ${#failed_articles[@]} -gt 0 ]; then + exit 1 # Exit with error code if any article failed +else + echo "All articles passed validation." +fi \ No newline at end of file diff --git a/validate_markdown_metadata.py b/validate_markdown_metadata.py index 9291367..086b4ce 100644 --- a/validate_markdown_metadata.py +++ b/validate_markdown_metadata.py @@ -23,7 +23,12 @@ def extract_yaml_and_body(file_content): >>> extract_yaml_and_body("---\\nTest: Data\\nPart: Deux\\n---\\nSeparate this body part\\n") ({'Test': 'Data', 'Part': 'Deux'}, 'Separate this body part\\n') """ - assert has_yaml_header(file_content) # File does not have a YAML header + + # Slightly modified from Portmap method + if not has_yaml_header(file_content): + raise ValueError("The file does not have a valid YAML header.") + + # assert has_yaml_header(file_content) # File does not have a YAML header in_yaml_header = False in_body = False yaml_content = [] @@ -44,7 +49,7 @@ def extract_yaml_and_body(file_content): return yaml_content, body # Checks if a field is of the expected type -def is_field_valid_type(field_name, field_value, expected_type, file_path): +def is_field_valid_type(field_name, field_value, expected_type, file_path, errors): if not isinstance(field_value, expected_type): # If expected_type is a tuple (e.g., (str, list)), it means that multiple types are allowed expected_types = ( @@ -53,18 +58,31 @@ def is_field_valid_type(field_name, field_value, expected_type, file_path): if isinstance(expected_type, tuple) else expected_type.__name__ ) - raise ValueError(f"'{field_name}' must be a {expected_types} in {file_path}") + errors.append(f"'{field_name}' must be a {expected_types} in {file_path}") # Validates the required fields in the frontmatter -def validate_fields(frontmatter, file_path): +def validate_fields(frontmatter, file_path, errors): # 'title': Must be a string - is_field_valid_type('title', frontmatter['title'], str, file_path) + if 'title' not in frontmatter: + errors.append(f" - 'title' is missing in {file_path}") + else: + is_field_valid_type('title', frontmatter.get('title'), str, file_path, errors) # 'datatype': Must be a string (no lists allowed) - is_field_valid_type('datatype', frontmatter['datatype'], str, file_path) + if 'datatype' not in frontmatter: + errors.append(f" - 'datatype' is missing in {file_path}") + else: + is_field_valid_type('datatype', frontmatter.get('datatype'), str, file_path, errors) # 'sources': Must be a string or a list - is_field_valid_type('sources', frontmatter['sources'], (str, list), file_path) + if 'sources' not in frontmatter: + errors.append(f" - 'sources' is missing in {file_path}") + else: + is_field_valid_type('sources', frontmatter.get('sources'), (str, list), file_path, errors) # 'destinations': Must be a string or a list - is_field_valid_type('destinations', frontmatter['destinations'], (str, list), file_path) + if 'destinations' not in frontmatter: + errors.append(f" - 'destinations' is missing in {file_path}") + else: + is_field_valid_type('destinations', frontmatter.get('destinations'), (str, list), file_path, errors) + # Checks if a specified YAML field ends with a comma def does_field_end_with_comma(field, yaml_body): @@ -73,6 +91,7 @@ def does_field_end_with_comma(field, yaml_body): return match is not None def validate_frontmatter(file_path): + errors = [] try: # Open the markdown file and extract content with open(file_path, 'r') as f: @@ -80,32 +99,32 @@ def validate_frontmatter(file_path): content = f.read() # Extract frontmatter and body using Portmap method - frontmatter, _ = extract_yaml_and_body(content) + try: + frontmatter, _ = extract_yaml_and_body(content) + except ValueError as ve: + errors.append(str(ve)) + return "\n".join(errors) # Validate the extracted frontmatter - validate_fields(frontmatter, file_path) + validate_fields(frontmatter, file_path, errors) - # Check for trailing commas fields_to_check = ['title', 'datatype', 'sources', 'destinations'] # Iterate over fields to check and then check for trailing commas - errors = [field for field in fields_to_check if does_field_end_with_comma(field, content)] + comma_errors = [field for field in fields_to_check if does_field_end_with_comma(field, content)] + if comma_errors: + errors.append(f"Trailing comma found in fields: {', '.join(comma_errors)} in {file_path}") - # If there are any fields with trailing commas, raise an error if errors: - raise ValueError(f"Trailing comma found in the following fields: {', '.join(errors)} in {file_path}") - - # If all validations pass, return "True" + return "\n".join(errors) return "True" except yaml.YAMLError as e: # Catch any YAML syntax errors - print(f"YAML Error in {file_path}: {e}") - return "False" + return f"YAML Error in {file_path}: {e}" except Exception as e: - # Catch other errors (missing fields, invalid structure, trailing comma, etc.) - print(f"Error in {file_path}: {e}") - return "False" + # Catch other errors (missing fields, invalid structure, trailing comma, etc.) + return f"Error in {file_path}: {e}" if __name__ == "__main__": # The script takes the file path as an argument