Skip to content

Commit

Permalink
Replace unidecode by unicodedata, pinyin, romkan
Browse files Browse the repository at this point in the history
Potential licence issue
  • Loading branch information
sbrunner committed Jul 5, 2022
1 parent da41b35 commit 6d0736f
Show file tree
Hide file tree
Showing 6 changed files with 159 additions and 99 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ help: ## Display this help message

.poetry.timestamps: pyproject.toml poetry.lock
poetry --version || pip install --user --requirement=requirements.txt
poetry install --extras=tools
poetry install --extras=tools --extras=generate
touch $@

.PHONY: prospector
Expand Down
5 changes: 5 additions & 0 deletions ci/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
checks:
versions:
rebuild: False
codespell:
arguments:
- --quiet-level=2
- --check-filenames
- --ignore-words-list=nd

publish:
docker:
Expand Down
190 changes: 112 additions & 78 deletions jsonschema_gentypes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,104 @@
"""

import textwrap
import unicodedata
from abc import abstractmethod
from typing import Callable, Dict, Iterator, List, Optional, Set, Tuple, Union, cast

import yaml
from jsonschema import RefResolver
from unidecode import unidecode

from jsonschema_gentypes import configuration, jsonschema

# Raise issues here.
ISSUE_URL = "https://github.com/camptcamp/jsonschema-gentypes"


def __convert_char(char: str) -> str:
import pinyin # pylint: disable=import-outside-toplevel
import romkan # pylint: disable=import-outside-toplevel

# Remove accents
if unicodedata.combining(char):
return ""
if char == "-":
return " "
category = unicodedata.category(char)
# All spaced => space
if category in ("Zs", "Cc"):
return " "
# Accept letter and number
if category in ("Nd", "Ll", "Lu"):
return char
# Explicit sign
if category in ("So", "Po"):
name = unicodedata.name(char)
if category == "So":
name = name.replace(" SIGN", "")
return f" {name} "
# Other characters
# Chinese characters
pinyin_char = pinyin.get(char, delimiter=" ")
return (
# Japanese characters
cast(str, romkan.to_roma(char))
if len(pinyin_char) == 1
else "".join(
[c for c in unicodedata.normalize("NFKD", f" {pinyin_char} ") if not unicodedata.combining(c)]
)
)


def normalize(input_str: str) -> str:
"""Normalize the string to be a Python name."""

# Unaccent, ...
nfkd_form = unicodedata.normalize("NFKD", input_str)
name = "".join([__convert_char(c) for c in nfkd_form])

# No number at first position
if name[0] in list(char_range("0", "9")):
name = f"num {name}"

# No python keyword
if name.lower() in [
"and",
"as",
"assert",
"break",
"class",
"continue",
"def",
"del",
"elif",
"else",
"except",
"false",
"finally",
"for",
"from",
"global",
"if",
"import",
"in",
"is",
"lambda",
"none",
"nonlocal",
"not",
"or",
"pass",
"raise",
"return",
"true",
"try",
"while",
"with",
"yield",
]:
name = f"{name} name"
return name


class Type:
"""
The base Type object.
Expand Down Expand Up @@ -94,7 +179,7 @@ def set_name(self, name: str) -> None:

def unescape_name(self) -> str:
"""
Return the unscaped name.
Return the unescaped name.
"""
return self._name

Expand Down Expand Up @@ -158,7 +243,7 @@ def name(self) -> str:

class NativeType(Type):
"""
Native Type that will essencially generates a Python import.
Native Type that will essentially generates a Python import.
"""

def __init__(self, name: str, package: str = "typing") -> None:
Expand Down Expand Up @@ -229,7 +314,7 @@ def __init__(self, name: str, sub_type: Type, descriptions: Optional[List[str]]
Arguments:
name: the type name
sub_type: the type that should be aliazed
sub_type: the type that should be aliased
descriptions: the type description
"""
super().__init__(name)
Expand Down Expand Up @@ -292,8 +377,8 @@ def definition(self, line_length: Optional[int] = None) -> List[str]:
result += ["# The values for the enum"]
for value in self.values:
name = get_name({"title": f"{self._name} {value}"}, upper=True)
formated_value = f'"{value}"' if isinstance(value, str) else str(value)
result.append(f"{name}: {LiteralType(value).name()} = {formated_value}")
formatted_value = f'"{value}"' if isinstance(value, str) else str(value)
result.append(f"{name}: {LiteralType(value).name()} = {formatted_value}")
return result


Expand Down Expand Up @@ -388,58 +473,15 @@ def get_name(
Arguments:
schema: the concerned schema
proposed_name: a name that we will use it the scheema hasn't any title
upper: should we use an upper cass (For constants)
proposed_name: a name that we will use it the schema hasn't any title
upper: should we use an upper case (For constants)
"""
# Get the base name
has_title = isinstance(schema, dict) and "title" in schema
name = schema["title"] if has_title else proposed_name # type: ignore
assert name is not None
# Unaccent, ...
name = unidecode(name)
# Remove unauthorised char
authorised_char = list(char_range("a", "z")) + list(char_range("A", "Z")) + list(char_range("0", "9"))
name = "".join([(c if c in authorised_char else " ") for c in name])
# No number at first position
if name[0] in list(char_range("0", "9")):
name = f"num {name}"
# No python keyword
if name.lower() in [
"and",
"as",
"assert",
"break",
"class",
"continue",
"def",
"del",
"elif",
"else",
"except",
"false",
"finally",
"for",
"from",
"global",
"if",
"import",
"in",
"is",
"lambda",
"none",
"nonlocal",
"not",
"or",
"pass",
"raise",
"return",
"true",
"try",
"while",
"with",
"yield",
]:
name = f"{name} name"
name = normalize(name)

prefix = "" if has_title else "_"
if upper:
# Upper case
Expand All @@ -460,6 +502,8 @@ def get_description(schema: jsonschema.JSONSchemaItem) -> List[str]:
Arguments:
schema: the concerned schema
"""
import yaml # pylint: disable=import-outside-toplevel

result: List[str] = []
for key in ("title", "description"):
if key in schema:
Expand Down Expand Up @@ -594,13 +638,13 @@ def _get_type_internal(self, schema: jsonschema.JSONSchemaItem, proposed_name: s
then_schema.update(self._resolve_ref(cast(jsonschema.JSONSchemaItem, schema.get("then", {}))))
if "properties" not in then_schema:
then_schema["properties"] = {}
then_propoerties = then_schema["properties"]
assert then_propoerties
then_properties = then_schema["properties"]
assert then_properties
if_properties = self._resolve_ref(cast(jsonschema.JSONSchemaItem, schema.get("if", {}))).get(
"properties", {}
)
assert if_properties
then_propoerties.update(if_properties)
then_properties.update(if_properties)
else_schema: jsonschema.JSONSchemaItem = {}
else_schema.update(base_schema)
else_schema.update(self._resolve_ref(cast(jsonschema.JSONSchemaItem, schema.get("else", {}))))
Expand Down Expand Up @@ -676,7 +720,7 @@ def _get_type_internal(self, schema: jsonschema.JSONSchemaItem, proposed_name: s

if schema_type is None:
type_ = BuiltinType("None")
type_.set_comments(["WARNING: we get an scheam without any type"])
type_.set_comments(["WARNING: we get an schema without any type"])
return type_
assert isinstance(schema_type, str), (
f"Expected to find a supported schema type, got {schema_type}" f"\nDuring parsing of {schema}"
Expand Down Expand Up @@ -722,7 +766,7 @@ def ref(self, schema: jsonschema.JSONSchemaItem, proposed_name: str) -> Type:
def any_of(
self,
schema: jsonschema.JSONSchemaItem,
subschema: List[jsonschema.JSONSchemaItem],
sub_schema: List[jsonschema.JSONSchemaItem],
proposed_name: str,
sub_name: str,
) -> Type:
Expand Down Expand Up @@ -779,9 +823,7 @@ def enum(self, schema: jsonschema.JSONSchemaItem, proposed_name: str) -> Type:
get_description(schema),
)

def boolean(
self, schema: jsonschema.JSONSchemaItem, proposed_name: str
) -> Type:
def boolean(self, schema: jsonschema.JSONSchemaItem, proposed_name: str) -> Type:
"""
Generate a ``bool`` annotation for a boolean object.
"""
Expand Down Expand Up @@ -819,9 +861,9 @@ def add_required(type_: Type, prop: str, required: Set[str]) -> Type:

struct = {
prop: add_required(
self.get_type(subschema, proposed_name + " " + prop, auto_alias=False), prop, required
self.get_type(sub_schema, proposed_name + " " + prop, auto_alias=False), prop, required
)
for prop, subschema in properties.items()
for prop, sub_schema in properties.items()
}

type_: Type = TypedDictType(
Expand Down Expand Up @@ -884,7 +926,7 @@ def array(self, schema: jsonschema.JSONSchemaItem, proposed_name: str) -> Type:
def any_of(
self,
schema: jsonschema.JSONSchemaItem,
subschema: List[jsonschema.JSONSchemaItem],
sub_schema: List[jsonschema.JSONSchemaItem],
proposed_name: str,
sub_name: str,
) -> Type:
Expand All @@ -896,7 +938,7 @@ def any_of(
lambda o: o is not None,
[
self.get_type(subs, f"{proposed_name} {sub_name}{index}")
for index, subs in enumerate(subschema)
for index, subs in enumerate(sub_schema)
],
)
)
Expand Down Expand Up @@ -951,36 +993,28 @@ def ref(self, schema: jsonschema.JSONSchemaItem, proposed_name: str) -> Type:
self.ref_type[ref] = type_
return type_

def string(
self, schema: jsonschema.JSONSchemaItem, proposed_name: str
) -> Type:
def string(self, schema: jsonschema.JSONSchemaItem, proposed_name: str) -> Type:
"""
Generate a ``str`` annotation.
"""
del schema, proposed_name
return BuiltinType("str")

def number(
self, schema: jsonschema.JSONSchemaItem, proposed_name: str
) -> Type:
def number(self, schema: jsonschema.JSONSchemaItem, proposed_name: str) -> Type:
"""
Generate a ``Union[int, float]`` annotation.
"""
del schema, proposed_name
return CombinedType(NativeType("Union"), [BuiltinType("int"), BuiltinType("float")])

def integer(
self, schema: jsonschema.JSONSchemaItem, proposed_name: str
) -> Type:
def integer(self, schema: jsonschema.JSONSchemaItem, proposed_name: str) -> Type:
"""
Generate an ``int`` annotation.
"""
del schema, proposed_name
return BuiltinType("int")

def null(
self, schema: jsonschema.JSONSchemaItem, proposed_name: str
) -> Type:
def null(self, schema: jsonschema.JSONSchemaItem, proposed_name: str) -> Type:
"""
Generate an ``None`` annotation.
"""
Expand Down
Loading

0 comments on commit 6d0736f

Please sign in to comment.