Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions tools/translations/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
### Extract translation text and use AI to perform the translation.
- modify `files_to_process` list in qgc_translate.py
- run qgc_translate.py, it will generate qgc_xxx.ts_missing in translations folders,
- all the text in it are the text that need translate, copy them to let AI translate.
- copy result to dict_xxx.py's get_dict function, and then run qgc_translate.py again.
- Bingo!!!, you will found the xxx.ts already translated.

### Notice:
- Do not copy too much text to the AI at once, as it may exceed the limit. 500 is ok.
- The AI translation might also have issues, so it's best to have it double-checked manually.
- Can not support long multi-line text.
- ***This method can be used for all QT projects translate.***

### Translation prompt(For zh_CN in https://chat.deepseek.com/)
```
以下文本都是 "英文":"TODO", 的形式, 这些英文是无人机相关的英文, 将他们翻译成中文, 并替换成 "英文": """中文""", 的格式.
注意:
1.不要更改原始输入英文的格式,在输出的中文中也保留对应的格式字符,只要 txt 的纯文本格式, 不要转换成其他格式.
2.不需要流式给我推送结果, 整体处理完以后一起给我翻译后的结果.

输入示例:
"Forward": "TODO",
"Frame Class": "TODO",
"Currently set to frame class '%1'": "TODO",
"All Files (*)": "TODO",
"Receiving signal. Perform range test & confirm.": "TODO",

输出示例:
"Forward": """前进""",
"Frame Class": """机架类别""",
"Currently set to frame class '%1'": """当前设置为机架类别 '%1'""",
"All Files (*)": """所有文件 (*)""",
"Receiving signal. Perform range test & confirm.": """正在接收信号。执行距离测试 & 确认""",

以下是对应要翻译部分的列表:

```
66 changes: 66 additions & 0 deletions tools/translations/dict_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from abc import ABC, abstractmethod

class DictBase(ABC):
def __init__(self):
self.name = "base"
self.dictionary = None

def translate_text(self, english_text: str, keep_space: bool = False):
if not self.dictionary:
d = self.get_dict()
self.dictionary = {}
for k, v in d.items():
self.dictionary[k.strip()] = v.strip()

# Remove leading and trailing spaces for matching
stripped_text = english_text.strip()

# Convert character entity references to plain characters for matching
compare_text = self.decode_html_entities(stripped_text)

if compare_text in self.dictionary:
if keep_space:
leading_spaces = len(english_text) - len(english_text.lstrip())
trailing_spaces = len(english_text) - len(english_text.rstrip())
translation = self.dictionary[compare_text]
return self.encode_html_entities(' ' * leading_spaces + translation + ' ' * trailing_spaces)

return self.encode_html_entities(self.dictionary[compare_text])

# return None to indicate untranslatable
return None

def decode_html_entities(self, text):
# Convert character entity references to plain characters for matching
replacements = {
''': "'",
'"': '"',
'&': '&',
'&lt;': '<',
'&gt;': '>',
}
result = text
for entity, char in replacements.items():
result = result.replace(entity, char)
return result

def encode_html_entities(self, text):
replacements = {
"'":'&apos;',
'"':'&quot;',
# '&':'&amp;', # don't encode this
'<':'&lt;',
'>':'&gt;',
}
result = text
for entity, char in replacements.items():
result = result.replace(entity, char)
return result

@abstractmethod
def get_dict(self) -> dict[str, str]:
return None

14 changes: 14 additions & 0 deletions tools/translations/dict_zh_CN.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from dict_base import DictBase

class DictZhCN(DictBase):

def get_dict(self) -> dict[str, str]:

""" Return translate items """
return {
"""Help""": """帮助""",

}
161 changes: 161 additions & 0 deletions tools/translations/qgc_translate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import logging
import os
import sys
import re

from dict_zh_CN import DictZhCN

LOG_FORMAT_CONSOLE = '%(asctime)s.%(msecs)03d|%(levelname)s|%(message)s'

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
console_handler = logging.StreamHandler(sys.stdout)
console_formatter = logging.Formatter(fmt=LOG_FORMAT_CONSOLE,datefmt='%H:%M:%S')
console_handler.setFormatter(console_formatter)
logger.addHandler(console_handler)

class QgcTranslater:

def __init__(self):
super().__init__()
self.dictionary = DictZhCN()

def translate_text(self, english_text):
return self.dictionary.translate_text(english_text)

def process_qt_file(self, source_path, target_path):
logger.info(f"dict include {len(self.dictionary.get_dict())} items")

logger.info(f"load qt file: {source_path}")

with open(source_path, 'r', encoding='utf-8') as f:
content = f.read()

# Count the number of messages processed
self.processed_count = 0
self.translated_count = 0

# save the original text for which no translation was found, making it convenient for AI translation.
self.missings = set()

message_pattern = r'(<message>.*?</message>)'
new_content = re.sub(message_pattern, self.process_message, content, flags=re.DOTALL)

# write translate result
with open(target_path, 'w', encoding='utf-8') as f:
f.write(new_content)

missing_file = target_path + "_missing.txt"
if len(self.missings) > 0:
with open(missing_file, 'wt', encoding='utf-8') as f:
lines = []
for m in self.missings:
lines.append(rf'"{m}": "TODO",'+"\n")
f.writelines(lines)

logger.info(f"handle {source_path} complete")
logger.info(f" handle {self.processed_count} messages")
logger.info(f" translate {self.translated_count} items")
logger.info(f" found {len(self.missings)} missing, saved in {missing_file}")

def process_message(self, match):
# nonlocal processed_count, translated_count
self.processed_count += 1

message_content = match.group(1)

# extract source content
source_match = re.search(r'<source>(.*?)</source>', message_content, re.DOTALL)
if not source_match:
return message_content

source_text = source_match.group(1).strip()
# logger.info(f"handle source_text={source_text}")

if 'type="unfinished"' not in message_content:
return message_content # already translate

# try translate
translate_result = self.translate_text(source_text)
if translate_result is None:
# can not translate, just add the original text to missing set
self.missings.add(source_text.strip())
return message_content

self.translated_count += 1

# Display only the first 50 characters to avoid overly long output.
source_preview = source_text[:50] + '...' if len(source_text) > 50 else source_text
translation_preview = translate_result[:50] + '...' if len(translate_result) > 50 else translate_result
logger.info(f"[{self.translated_count}]: '{source_preview}' => '{translation_preview}'")

# Replace the content of the translation and remove type="unfinished"
return re.sub(
r'<translation type="unfinished">.*?</translation>',
f'<translation>{translate_result}</translation>',
message_content,
flags=re.DOTALL
)


def do_demo(self):
logger.info(f"there are {len(self.dictionary.get_dict())} translate items")

sample_keys = list(self.dictionary.get_dict().keys())[:10]
logger.info("translate items demos:")
for key in sample_keys:
logger.info(f"'{key}' -> '{self.dictionary.get_dict()[key]}'")

return 0

def main():
base_path = os.path.join("..", "..", "translations" )
files_to_process = [
'qgc_json_zh_CN.ts',
'qgc_source_zh_CN.ts',
# 'qgc_zh_CN.ts',
#'qgc-json.ts'
]
try:
logger.info("start translate")
translater = QgcTranslater()
for file_name in files_to_process:
source_path = os.path.join(base_path, file_name)
target_path = source_path # + ".new"

logger.info(f"source_path={source_path}, target_path={target_path}")
if os.path.exists(source_path):
translater.process_qt_file(source_path, target_path)
else:
logger.warning(f"file not exist:{source_path}")
sys.exit(0)
except KeyboardInterrupt:
logger.warning("user break")
sys.exit(1)
except Exception as e:
logger.error(f"there is error while translate: {e}")
logger.exception("error details:")
sys.exit(1)

def check_dict_duplicate(dict_path: str):
check_dict = {}
duplicate_count = 0
with open(dict_path, 'r', encoding='utf-8') as f:
for idx, line in enumerate(f):
# logger.info(f"[{idx}]line={line.strip()}")
if ":" in line:
split_results = line.split(":", 1)
key = split_results[0].strip()
if not key in check_dict:
check_dict[key] = split_results[1]
else:
logger.warning(f"duplicate: idx={idx}, key={key}")
duplicate_count += 1
logger.info(f"duplicate_count={duplicate_count}")

if __name__ == "__main__":
main()
# check_dict_duplicate("dict_zh_CN.py")
Loading
Loading