Skip to content

Commit

Permalink
Merge pull request #233 from NSoiffer/zh
Browse files Browse the repository at this point in the history
Zh
  • Loading branch information
NSoiffer authored Dec 13, 2023
2 parents dd44031 + 3666807 commit 8218fb2
Show file tree
Hide file tree
Showing 38 changed files with 15,076 additions and 2 deletions.
8 changes: 7 additions & 1 deletion PythonScripts/translate-rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ def collect_phrases_to_translate(file_to_translate: str) -> (list[str], list[str
word = WordToTranslate.search(line)
if word:
words.append(word.group(1))

print(f"#phrases={len(phrases)}, #words={len(words)}")
return (phrases, words)

# break up the words into chunks to make google translate happy (and to run faster) and return a dictionary of word: translation
Expand All @@ -68,9 +70,11 @@ def do_translation_chunk(phrases: list[str]):
# print("***Phrases to translate: {}\n".format(phrases))
translated_phrases_str = GoogleTranslate.translate(phrases_string, src='en', dest=lang).text.lower()
translated_phrases_str = translated_phrases_str.replace('。', '.') # happens for Chinese

translated_phrases_str = translated_phrases_str.replace('"', "'").replace("“", "'").replace("”", "'") # google occasionally changes quotes
translated_phrases_str = translated_phrases_str.replace("«", "'").replace("»", "'") # google occasionally changes quotes to this form
translated_phrases_str = translated_phrases_str.replace("、", ",") # Chinese comma

translated_phrases_list = translated_phrases_str.split('.\n')
if len(translated_phrases_list) != len(phrases):
print("\n!!!Problem in translation: size of translations ({}) differs from phrases to translate ({})\n".format(len(translated_phrases_list), len(phrases)))
Expand Down Expand Up @@ -101,7 +105,7 @@ def do_translation_chunk(phrases: list[str]):
time.sleep(TIMEOUT) # try to avoid google banning us
return translations + do_translation_chunk(phrases_chunks_to_translate)

TargetWord = re.compile(r"'([^']+)'")
argetWord = re.compile(r"'([^']+)'")
TextString = re.compile(r'([ \[{][oc]?t: )"([^"]+)"')
def substitute_in_translated_phrase(line, translated_phrase, translated_word) -> str:
has_phrase = PhraseToTranslate.search(line)
Expand All @@ -120,6 +124,7 @@ def substitute_in_translated_phrase(line, translated_phrase, translated_word) ->
elif text_words:
print(f"Failed to find quoted part in translation \"{translated_phrase}\", \
using '{translated_word}\n original line: {line}")

replacement = text_words.group(1) + '"' + translated_word + '"' # add the surrounding context back
new_line = TextString.sub(replacement, line)
return new_line
Expand Down Expand Up @@ -148,6 +153,7 @@ def build_new_translation(path_to_mathcat: str, lang: str, rule_file_name: str)
(phrases_to_translate, words_to_translate) = collect_phrases_to_translate(file_to_translate)
phrase_translations = translate_phrases(phrases_to_translate, lang)
word_translations = translate_phrases(words_to_translate, lang)

print(f"file:{rule_file_name}: #phrases={len(phrase_translations)}, #words={len(word_translations)}")
create_new_file(file_to_translate, os.path.join(lang, rule_file_name), phrase_translations, word_translations)
print("done\n")
Expand Down
4 changes: 3 additions & 1 deletion PythonScripts/translate-unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ def do_translate_char(match_obj):
result['original'] = match_obj.group(1)
result['translation'] = translation
result['alternatives'] = alternatives
if line.find('divided by') != -1:
print(f" divided by translation: {translation}")
return 't: "{}"'.format(translation)
else:
return line
Expand Down Expand Up @@ -426,4 +428,4 @@ def translate_definition(start: int, lines: list[str], translated_lines: list[st
build_new_translation("..", language, "unicode-full")

# see translate_definitions comments -- you need to manually copy the file to google translate.
translate_definitions("..", language)
# translate_definitions("..", language)
Loading

0 comments on commit 8218fb2

Please sign in to comment.