Skip to content

Commit

Permalink
Merge pull request #86 from dtinit/workflows-updates
Browse files Browse the repository at this point in the history
Workflows updates
  • Loading branch information
aaronjae22 authored Nov 5, 2024
2 parents 67f1cae + 07bd0c4 commit 5820933
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 45 deletions.
2 changes: 2 additions & 0 deletions .actrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Configuration file for nektos/act
--container-daemon-socket -
19 changes: 7 additions & 12 deletions .github/workflows/article-validator.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ jobs:
- name: Checkout Code
uses: actions/checkout@v3
with:
ref: ${{ github.head_ref }}
fetch-depth: 0
- run: echo "Checkout code"

Expand All @@ -37,25 +38,19 @@ jobs:
id: find_files
run: |
# Find new or modified markdown files in the 'articles' directory
MODIFIED_FILES=$(git diff --name-only --diff-filter=ACM origin/main HEAD -- "articles/*.md")
MODIFIED_FILES=$(git diff --name-only --diff-filter=ACM origin/main..HEAD -- "articles/*.md")
if [ -z "$MODIFIED_FILES" ]; then
echo "No new or modified articles found."
echo "::set-output name=modified_files::"
else
echo "New or modified markdown files found:"
echo "$MODIFIED_FILES"
echo "New or modified markdown file(s) found:"
echo "- $MODIFIED_FILES"
echo "::set-output name=modified_files::$MODIFIED_FILES"
fi
# Run sanity check for modified files
- name: Run sanity check
if: steps.find_files.outputs.modified_files
run: |
if [ -z "${{ steps.find_files.outputs.modified_files }}" ]; then
echo "No articles to validate."
else
chmod +x sanity_check.sh
for file in ${{ steps.find_files.outputs.modified_files }}; do
echo "Validating $file..."
./sanity_check.sh "$file"
done
fi
chmod +x sanity_check.sh
./sanity_check.sh
68 changes: 56 additions & 12 deletions sanity_check.sh
Original file line number Diff line number Diff line change
@@ -1,26 +1,70 @@
#!/bin/bash

echo "Loading the new and modified Markdown files in the articles folder"
echo "Searching new and modified Markdown files in the articles folder"

# Find new or modified markdown files in the articles folder using git diff with ACM filter
FILES=$(git diff --name-only --diff-filter=ACM HEAD origin/main -- "articles/*.md")
FILES=$(git diff --name-only --diff-filter=ACM origin/main..HEAD -- "articles/*.md")

echo

# Check if any files were found
if [ -z "$FILES" ]; then
echo "No new or modified markdown files found."
echo "No new or modified articles found"
exit 0
else
echo "Detected articles:"
for file in $FILES; do
echo "- $file"
done
fi

# Loop through the found files and run validation
echo

# Initialize arrays to store validations results
passed_articles=()
failed_articles=()
failed_messages=()

# Loop through the found articles and run validation
for x in $FILES; do
# Call the Python script to validate the file and store the result
is_valid_md=$(python validate_markdown_metadata.py $x)
# Call the Python script to validate the article and store the result
validation_result=$(python validate_markdown_metadata.py "$x")

# If the result is "True", the file is valid, continue processing
if [[ $is_valid_md == "True" ]]; then
echo "Validating file $x"
# Check validation and store in respective array
if [[ $validation_result == "True" ]]; then
passed_articles+=("$x")
else
echo "File $x failed"
exit 1
failed_articles+=("$x")
# Store the error message
failed_messages+=("$validation_result")
fi
done
done

echo "Validation results"
echo

# Display validation results
if [ ${#passed_articles[@]} -gt 0 ]; then
echo "Passed article(s):"
for file in "${passed_articles[@]}"; do
echo "- $file"
done
echo
fi

if [ ${#failed_articles[@]} -gt 0 ]; then
echo "Failed article(s):"
for i in "${!failed_articles[@]}"; do
echo "- ${failed_articles[$i]}"
# Display each line of the error message
echo " - ${failed_messages[$i]}" | sed 's/^/ /'
echo
done
fi

# Set the exit code based on whether there were any failed articles
if [ ${#failed_articles[@]} -gt 0 ]; then
exit 1 # Exit with error code if any article failed
else
echo "All articles passed validation."
fi
61 changes: 40 additions & 21 deletions validate_markdown_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,12 @@ def extract_yaml_and_body(file_content):
>>> extract_yaml_and_body("---\\nTest: Data\\nPart: Deux\\n---\\nSeparate this body part\\n")
({'Test': 'Data', 'Part': 'Deux'}, 'Separate this body part\\n')
"""
assert has_yaml_header(file_content) # File does not have a YAML header

# Slightly modified from Portmap method
if not has_yaml_header(file_content):
raise ValueError("The file does not have a valid YAML header.")

# assert has_yaml_header(file_content) # File does not have a YAML header
in_yaml_header = False
in_body = False
yaml_content = []
Expand All @@ -44,7 +49,7 @@ def extract_yaml_and_body(file_content):
return yaml_content, body

# Checks if a field is of the expected type
def is_field_valid_type(field_name, field_value, expected_type, file_path):
def is_field_valid_type(field_name, field_value, expected_type, file_path, errors):
if not isinstance(field_value, expected_type):
# If expected_type is a tuple (e.g., (str, list)), it means that multiple types are allowed
expected_types = (
Expand All @@ -53,18 +58,31 @@ def is_field_valid_type(field_name, field_value, expected_type, file_path):
if isinstance(expected_type, tuple)
else expected_type.__name__
)
raise ValueError(f"'{field_name}' must be a {expected_types} in {file_path}")
errors.append(f"'{field_name}' must be a {expected_types} in {file_path}")

# Validates the required fields in the frontmatter
def validate_fields(frontmatter, file_path):
def validate_fields(frontmatter, file_path, errors):
# 'title': Must be a string
is_field_valid_type('title', frontmatter['title'], str, file_path)
if 'title' not in frontmatter:
errors.append(f" - 'title' is missing in {file_path}")
else:
is_field_valid_type('title', frontmatter.get('title'), str, file_path, errors)
# 'datatype': Must be a string (no lists allowed)
is_field_valid_type('datatype', frontmatter['datatype'], str, file_path)
if 'datatype' not in frontmatter:
errors.append(f" - 'datatype' is missing in {file_path}")
else:
is_field_valid_type('datatype', frontmatter.get('datatype'), str, file_path, errors)
# 'sources': Must be a string or a list
is_field_valid_type('sources', frontmatter['sources'], (str, list), file_path)
if 'sources' not in frontmatter:
errors.append(f" - 'sources' is missing in {file_path}")
else:
is_field_valid_type('sources', frontmatter.get('sources'), (str, list), file_path, errors)
# 'destinations': Must be a string or a list
is_field_valid_type('destinations', frontmatter['destinations'], (str, list), file_path)
if 'destinations' not in frontmatter:
errors.append(f" - 'destinations' is missing in {file_path}")
else:
is_field_valid_type('destinations', frontmatter.get('destinations'), (str, list), file_path, errors)


# Checks if a specified YAML field ends with a comma
def does_field_end_with_comma(field, yaml_body):
Expand All @@ -73,39 +91,40 @@ def does_field_end_with_comma(field, yaml_body):
return match is not None

def validate_frontmatter(file_path):
errors = []
try:
# Open the markdown file and extract content
with open(file_path, 'r') as f:
# Reads content and stores it
content = f.read()

# Extract frontmatter and body using Portmap method
frontmatter, _ = extract_yaml_and_body(content)
try:
frontmatter, _ = extract_yaml_and_body(content)
except ValueError as ve:
errors.append(str(ve))
return "\n".join(errors)

# Validate the extracted frontmatter
validate_fields(frontmatter, file_path)
validate_fields(frontmatter, file_path, errors)

# Check for trailing commas
fields_to_check = ['title', 'datatype', 'sources', 'destinations']
# Iterate over fields to check and then check for trailing commas
errors = [field for field in fields_to_check if does_field_end_with_comma(field, content)]
comma_errors = [field for field in fields_to_check if does_field_end_with_comma(field, content)]
if comma_errors:
errors.append(f"Trailing comma found in fields: {', '.join(comma_errors)} in {file_path}")

# If there are any fields with trailing commas, raise an error
if errors:
raise ValueError(f"Trailing comma found in the following fields: {', '.join(errors)} in {file_path}")

# If all validations pass, return "True"
return "\n".join(errors)
return "True"

except yaml.YAMLError as e:
# Catch any YAML syntax errors
print(f"YAML Error in {file_path}: {e}")
return "False"
return f"YAML Error in {file_path}: {e}"

except Exception as e:
# Catch other errors (missing fields, invalid structure, trailing comma, etc.)
print(f"Error in {file_path}: {e}")
return "False"
# Catch other errors (missing fields, invalid structure, trailing comma, etc.)
return f"Error in {file_path}: {e}"

if __name__ == "__main__":
# The script takes the file path as an argument
Expand Down

0 comments on commit 5820933

Please sign in to comment.