Skip to content

Commit

Permalink
Merge pull request #152 from advanced-security/v2_9_1
Browse files Browse the repository at this point in the history
V2.9.1
  • Loading branch information
GeekMasher authored Jan 10, 2025
2 parents da84215 + aad03d0 commit ecab553
Show file tree
Hide file tree
Showing 15 changed files with 314 additions and 148 deletions.
12 changes: 8 additions & 4 deletions .release.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
name: "policy-as-code"
version: "2.9.0"
repository: "advanced-security/policy-as-code"
version: "2.9.1"

ecosystems:
- Python

locations:
- name: "Update Docs"
paths:
- "*.md"
- "docs/*.md"
patterns:
- 'advanced-security/policy-as-code@v([0-9]\.[0-9]\.[0-9])'
- '--branch "v([0-9]\.[0-9]\.[0-9])"'

- "{repository}@v{version}"
- '--branch "v{version}"'
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ verify_ssl = true
[packages]
pyyaml = "*"
semantic-version = "*"
ghastoolkit = "==0.14.2"
ghastoolkit = "==0.15.1"

[dev-packages]
sphinx = "*"
Expand Down
205 changes: 110 additions & 95 deletions Pipfile.lock

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Here is how you can quickly setup policy-as-code.
```yaml
# Policy as Code
- name: Advance Security Policy as Code
uses: advanced-security/policy-as-code@v2.9.0
uses: advanced-security/policy-as-code@v2.9.1
```
> [!WARNING]
Expand All @@ -61,15 +61,15 @@ The Policy as Code project is a self-contained Python based CLI tool.
**Bash / Zsh:**

```bash
git clone --branch "v2.9.0" https://github.com/advanced-security/policy-as-code.git && cd ./policy-as-code
git clone --branch "v2.9.1" https://github.com/advanced-security/policy-as-code.git && cd ./policy-as-code
./policy-as-code --help
```

**Powershell:**

```Powershell
git clone --branch "v2.9.0" https://github.com/advanced-security/policy-as-code.git
git clone --branch "v2.9.1" https://github.com/advanced-security/policy-as-code.git
cd policy-as-code
.\policy-as-code.ps1 --help
Expand Down Expand Up @@ -128,7 +128,7 @@ Here is an example of using a simple yet cross-organization using Policy as Code
```yaml
# Compliance
- name: Advance Security Policy as Code
uses: advanced-security/policy-as-code@v2.9.0
uses: advanced-security/policy-as-code@v2.9.1
with:
# The owner/repo of where the policy is stored
policy: GeekMasher/security-queries
Expand Down
2 changes: 1 addition & 1 deletion ghascompliance/__version__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
__version__ = "2.9.0"
__version__ = "2.9.1"

__title__ = "GitHub Advanced Security Policy as Code"
__name__ = "ghascompliance"
Expand Down
78 changes: 60 additions & 18 deletions vendor/charset_normalizer/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ def from_bytes(

results: CharsetMatches = CharsetMatches()

early_stop_results: CharsetMatches = CharsetMatches()

sig_encoding, sig_payload = identify_sig_or_bom(sequences)

if sig_encoding is not None:
Expand Down Expand Up @@ -221,16 +223,20 @@ def from_bytes(
try:
if is_too_large_sequence and is_multi_byte_decoder is False:
str(
sequences[: int(50e4)]
if strip_sig_or_bom is False
else sequences[len(sig_payload) : int(50e4)],
(
sequences[: int(50e4)]
if strip_sig_or_bom is False
else sequences[len(sig_payload) : int(50e4)]
),
encoding=encoding_iana,
)
else:
decoded_payload = str(
sequences
if strip_sig_or_bom is False
else sequences[len(sig_payload) :],
(
sequences
if strip_sig_or_bom is False
else sequences[len(sig_payload) :]
),
encoding=encoding_iana,
)
except (UnicodeDecodeError, LookupError) as e:
Expand Down Expand Up @@ -367,7 +373,13 @@ def from_bytes(
and not lazy_str_hard_failure
):
fallback_entry = CharsetMatch(
sequences, encoding_iana, threshold, False, [], decoded_payload
sequences,
encoding_iana,
threshold,
False,
[],
decoded_payload,
preemptive_declaration=specified_encoding,
)
if encoding_iana == specified_encoding:
fallback_specified = fallback_entry
Expand Down Expand Up @@ -421,28 +433,58 @@ def from_bytes(
),
)

results.append(
CharsetMatch(
sequences,
encoding_iana,
mean_mess_ratio,
bom_or_sig_available,
cd_ratios_merged,
decoded_payload,
)
current_match = CharsetMatch(
sequences,
encoding_iana,
mean_mess_ratio,
bom_or_sig_available,
cd_ratios_merged,
(
decoded_payload
if (
is_too_large_sequence is False
or encoding_iana in [specified_encoding, "ascii", "utf_8"]
)
else None
),
preemptive_declaration=specified_encoding,
)

results.append(current_match)

if (
encoding_iana in [specified_encoding, "ascii", "utf_8"]
and mean_mess_ratio < 0.1
):
# If md says nothing to worry about, then... stop immediately!
if mean_mess_ratio == 0.0:
logger.debug(
"Encoding detection: %s is most likely the one.",
current_match.encoding,
)
if explain:
logger.removeHandler(explain_handler)
logger.setLevel(previous_logger_level)
return CharsetMatches([current_match])

early_stop_results.append(current_match)

if (
len(early_stop_results)
and (specified_encoding is None or specified_encoding in tested)
and "ascii" in tested
and "utf_8" in tested
):
probable_result: CharsetMatch = early_stop_results.best() # type: ignore[assignment]
logger.debug(
"Encoding detection: %s is most likely the one.", encoding_iana
"Encoding detection: %s is most likely the one.",
probable_result.encoding,
)
if explain:
logger.removeHandler(explain_handler)
logger.setLevel(previous_logger_level)
return CharsetMatches([results[encoding_iana]])

return CharsetMatches([probable_result])

if encoding_iana == sig_encoding:
logger.debug(
Expand Down
36 changes: 30 additions & 6 deletions vendor/charset_normalizer/cli/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,14 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
dest="force",
help="Replace file without asking if you are sure, use this flag with caution.",
)
parser.add_argument(
"-i",
"--no-preemptive",
action="store_true",
default=False,
dest="no_preemptive",
help="Disable looking at a charset declaration to hint the detector.",
)
parser.add_argument(
"-t",
"--threshold",
Expand All @@ -133,31 +141,47 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
args = parser.parse_args(argv)

if args.replace is True and args.normalize is False:
if args.files:
for my_file in args.files:
my_file.close()
print("Use --replace in addition of --normalize only.", file=sys.stderr)
return 1

if args.force is True and args.replace is False:
if args.files:
for my_file in args.files:
my_file.close()
print("Use --force in addition of --replace only.", file=sys.stderr)
return 1

if args.threshold < 0.0 or args.threshold > 1.0:
if args.files:
for my_file in args.files:
my_file.close()
print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr)
return 1

x_ = []

for my_file in args.files:
matches = from_fp(my_file, threshold=args.threshold, explain=args.verbose)
matches = from_fp(
my_file,
threshold=args.threshold,
explain=args.verbose,
preemptive_behaviour=args.no_preemptive is False,
)

best_guess = matches.best()

if best_guess is None:
print(
'Unable to identify originating encoding for "{}". {}'.format(
my_file.name,
"Maybe try increasing maximum amount of chaos."
if args.threshold < 1.0
else "",
(
"Maybe try increasing maximum amount of chaos."
if args.threshold < 1.0
else ""
),
),
file=sys.stderr,
)
Expand Down Expand Up @@ -258,8 +282,8 @@ def cli_detect(argv: Optional[List[str]] = None) -> int:
try:
x_[0].unicode_path = join(dir_path, ".".join(o_))

with open(x_[0].unicode_path, "w", encoding="utf-8") as fp:
fp.write(str(best_guess))
with open(x_[0].unicode_path, "wb") as fp:
fp.write(best_guess.output())
except IOError as e:
print(str(e), file=sys.stderr)
if my_file.closed is False:
Expand Down
2 changes: 2 additions & 0 deletions vendor/charset_normalizer/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,8 @@
"|",
'"',
"-",
"(",
")",
}


Expand Down
15 changes: 13 additions & 2 deletions vendor/charset_normalizer/legacy.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
from typing import Any, Dict, Optional, Union
from __future__ import annotations

from typing import TYPE_CHECKING, Any, Optional
from warnings import warn

from .api import from_bytes
from .constant import CHARDET_CORRESPONDENCE

# TODO: remove this check when dropping Python 3.7 support
if TYPE_CHECKING:
from typing_extensions import TypedDict

class ResultDict(TypedDict):
encoding: Optional[str]
language: str
confidence: Optional[float]


def detect(
byte_str: bytes, should_rename_legacy: bool = False, **kwargs: Any
) -> Dict[str, Optional[Union[str, float]]]:
) -> ResultDict:
"""
chardet legacy method
Detect the encoding of the given byte string. It should be mostly backward-compatible.
Expand Down
19 changes: 16 additions & 3 deletions vendor/charset_normalizer/md.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def reset(self) -> None: # pragma: no cover

@property
def ratio(self) -> float:
if self._character_count <= 24:
if self._character_count <= 13:
return 0.0

ratio_of_suspicious_range_usage: float = (
Expand All @@ -260,6 +260,7 @@ def __init__(self) -> None:

self._buffer: str = ""
self._buffer_accent_count: int = 0
self._buffer_glyph_count: int = 0

def eligible(self, character: str) -> bool:
return True
Expand All @@ -279,6 +280,14 @@ def feed(self, character: str) -> None:
and is_thai(character) is False
):
self._foreign_long_watch = True
if (
is_cjk(character)
or is_hangul(character)
or is_katakana(character)
or is_hiragana(character)
or is_thai(character)
):
self._buffer_glyph_count += 1
return
if not self._buffer:
return
Expand All @@ -291,17 +300,20 @@ def feed(self, character: str) -> None:
self._character_count += buffer_length

if buffer_length >= 4:
if self._buffer_accent_count / buffer_length > 0.34:
if self._buffer_accent_count / buffer_length >= 0.5:
self._is_current_word_bad = True
# Word/Buffer ending with an upper case accentuated letter are so rare,
# that we will consider them all as suspicious. Same weight as foreign_long suspicious.
if (
elif (
is_accentuated(self._buffer[-1])
and self._buffer[-1].isupper()
and all(_.isupper() for _ in self._buffer) is False
):
self._foreign_long_count += 1
self._is_current_word_bad = True
elif self._buffer_glyph_count == 1:
self._is_current_word_bad = True
self._foreign_long_count += 1
if buffer_length >= 24 and self._foreign_long_watch:
camel_case_dst = [
i
Expand All @@ -325,6 +337,7 @@ def feed(self, character: str) -> None:
self._foreign_long_watch = False
self._buffer = ""
self._buffer_accent_count = 0
self._buffer_glyph_count = 0
elif (
character not in {"<", ">", "-", "=", "~", "|", "_"}
and character.isdigit() is False
Expand Down
Loading

0 comments on commit ecab553

Please sign in to comment.