Add lexer for HercScript language highlight

this is a basic highlight based on Pygments C one, from my tests it gives a satisfatory result and should be good enough for our current docs. it may be improved later as we find places where it doesn't work very well. Docs can format hercules scripts using "```HercScript" to get proper highlight
HerculesWS · Apr 25, 2024 · c2d5179 · c2d5179
1 parent d5571a0
commit c2d5179
Show file tree

Hide file tree

Showing 6 changed files with 157 additions and 2 deletions.
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -13,6 +13,7 @@ jobs:
       - uses: actions/checkout@v3
       - run: python -m pip install --upgrade pip
       - run: pip install mkdocs mkdocs-material
+      - run: pip install -e ./hercscript-lexer
       - run: cd docs
       - run: mkdocs build -f mkdocs.yml
       - name: Upload GitHub Pages artifact

diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,4 @@
 /.venv
 /site/
+/hercscript-lexer/hercscript_lexers.egg-info
+/hercscript-lexer/hercscript_lexers/__pycache__
diff --git a/docs/contributing/editing-the-docs.md b/docs/contributing/editing-the-docs.md
@@ -15,9 +15,15 @@ submit a [Pull request](./creating-pull-requests.md) to the [Hercules-docs repos
 ## Setup
 Hercules docs uses mkdocs-material.
 
-You will need to have Python3 installed and install `mkdocs-material` package.
+You will need to have Python3 installed, and install `mkdocs-material` and our lexer packages.
 
-You can install it with `pip install mkdocs-material` or `pip3 install mkdocs-material`.
+You can install it with:
+```SH
+pip install mkdocs-material
+pip install -e ./hercscript-lexer # Optional, required for HercScript highlighting
+```
+
+or perform the same commands with `pip3`.
 
 For more information about installing mkdocs-material, and other alternatives,
 see [Mkdocs Material's getting started](https://squidfunk.github.io/mkdocs-material/getting-started/#installation)

diff --git a/hercscript-lexer/hercscript_lexers/HercScriptLexer.py b/hercscript-lexer/hercscript_lexers/HercScriptLexer.py
@@ -0,0 +1,118 @@
+from pygments.lexer import RegexLexer, include, bygroups, using, \
+    this, default, words
+from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
+    Number, Punctuation, Whitespace
+
+__all__ = ['HercScriptLexer']
+
+
+class HercScriptLexer(RegexLexer):
+    """
+    Hercules Script (a.k.a. Athena Script) lexer.
+
+    Based on Pygments official C grammar
+    """
+
+    name = 'hercscript'
+    aliases = ['hercscript', 'athenascript']
+    url = ''
+    version_added = ''
+    priority = 0.1
+
+    # Hexadecimal part in an hexadecimal integer literal.
+    # This includes separators matching.
+    _hexpart = r'[0-9a-fA-F](_?[0-9a-fA-F])*'
+    # Decimal part in an decimal integer literal.
+    # This includes separators matching.
+    _decpart = r'\d(\_?\d)*'
+
+    # Identifier regex with C and C++ Universal Character Name (UCN) support.
+    _ident = r'(?!\d)(?:[\w$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})+'
+
+    # Single and multiline comment regexes
+    # Beware not to use *? for the inner content! When these regexes
+    # are embedded in larger regexes, that can cause the stuff*? to
+    # match more than it would have if the regex had been used in
+    # a standalone way ...
+    _comment_single = r'//(?:.|(?<=\\)\n)*\n'
+    _comment_multiline = r'/(?:\\\n)?[*](?:[^*]|[*](?!(?:\\\n)?/))*[*](?:\\\n)?/'
+
+    tokens = {
+        'whitespace': [
+            # Labels:
+            # Line start and possible indentation.
+            (r'(^[ \t]*)'
+                # Not followed by keywords which can be mistaken as labels.
+                r'(?!(?:default)\b)'
+                # Actual label, followed by a single colon.
+                r'(' + _ident + r')(\s*)(:)(?!:)',
+                bygroups(Whitespace, Name.Label, Whitespace, Punctuation)
+            ),
+            (r'\n', Whitespace),
+            (r'[^\S\n]+', Whitespace),
+            (_comment_single, Comment.Single),
+            (_comment_multiline, Comment.Multiline),
+            # Open until EOF, so no ending delimiter
+            (r'/(\\\n)?[*][\w\W]*', Comment.Multiline),
+        ],
+        'statements': [
+            include('keywords'),
+            (r'(-)?0[xX]' + _hexpart , Number.Hex),
+            (r'(-)?0[bB][01](_?[01])*', Number.Bin),
+            (r'(-)?0(_?[0-7])+', Number.Oct),
+            (r'(-)?' + _decpart, Number.Integer),
+            (r'[~!%^&*+=|?:<>/-]', Operator),
+            (r'[()\[\],.]', Punctuation),
+            (r'(true|false)\b', Name.Builtin),
+            ('"', String, 'string'),
+            (r'(\w+)(\s*\()', bygroups(Name.Function, using(this))),  # function call
+            (r'[\.#\$]?#?@?\w+\$?', Name.Variable),
+            (_ident, Name)
+        ],
+        'keywords': [
+            (r'case\b', Keyword, 'case-value'),
+            (
+                words(
+                    ('break', 'continue', 'default',
+                        'do', 'else', 'for', 'goto', 'if',
+                        'return', 'switch', 'while',
+                        'end', 'function', 'script', 'trader'
+                    ),
+                    suffix=r'\b'), Keyword
+            )
+        ],
+        'root': [
+            include('whitespace'),
+            include('keywords'),
+            default('statement'),
+        ],
+        'statement': [
+            include('whitespace'),
+            include('statements'),
+            (r'\}', Punctuation),
+            (r'[{;]', Punctuation, '#pop'),
+        ],
+        'string': [
+            (r'"', String, '#pop'),
+            (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
+             r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
+            (r'[^\\"\n]+', String),  # all other characters
+            (r'\\\n', String),  # line continuation
+            (r'\\', String),  # stray backslash
+        ],
+        # Mark identifiers preceded by `case` keyword as constants.
+        'case-value': [
+            (r'(?<!:)(:)(?!:)', Punctuation, '#pop'),
+            (_ident, Name.Constant),
+            include('whitespace'),
+            include('statements'),
+        ]
+    }
+
+    def __init__(self, **options):
+        RegexLexer.__init__(self, **options)
+
+    def get_tokens_unprocessed(self, text, stack=('root',)):
+        for index, token, value in \
+                RegexLexer.get_tokens_unprocessed(self, text, stack):
+            yield index, token, value
diff --git a/hercscript-lexer/hercscript_lexers/__init__.py b/hercscript-lexer/hercscript_lexers/__init__.py
@@ -0,0 +1,3 @@
+from .HercScriptLexer import HercScriptLexer
+
+__all__ = ("HercScriptLexer")
diff --git a/hercscript-lexer/setup.py b/hercscript-lexer/setup.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python
+"""Setup hercscript-lexers."""
+from setuptools import setup, find_packages
+
+entry_points = '''
+[pygments.lexers]
+hercscript=hercscript_lexers:HercScriptLexer
+'''
+
+setup(
+    name='hercscript-lexers',
+    version='1.0.0',
+    description='Pygments lexer package for hercscript.',
+    author='Hercules Team',
+    author_email='',
+    url='',
+    packages=find_packages(),
+    entry_points=entry_points,
+    install_requires=[
+        'Pygments>=2.0.1'
+    ],
+    zip_safe=True,
+    license='MIT License',
+    classifiers=[]
+)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from .HercScriptLexer import HercScriptLexer

		__all__ = ("HercScriptLexer")