Python小工具

谷歌多语言翻译

环境配置

1
2
3
pip uninstall googletrans==4.0.0-rc1

pip install googletrans==3.1.0a0
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from googletrans import Translator


translator = Translator()
data = '国家'

print(translator.translate(data, "zh-CN").text)  # 中文
print(translator.translate(data, "zh-tw").text)  # 繁体
print(translator.translate(data, "en").text)     # 英文
print(translator.translate(data, "ru").text)     # 俄语(RU)
print(translator.translate(data, "th").text)     # 泰语(TH)
print(translator.translate(data, "pt").text)     # 葡萄牙(PT)
print(translator.translate(data, "tr").text)     # 土耳其(TR)
print(translator.translate(data, "es").text)     # 西班牙(ES)
print(translator.translate(data, "id").text)     # 印尼语(ID)
print(translator.translate(data, "pl").text)     # 波兰语(PL)
print(translator.translate(data, "fa").text)     # 波斯语(FA)
print(translator.translate(data, "fr").text)     # 法语(FR)
print(translator.translate(data, "it").text)     # 意大利(IT)
print(translator.translate(data, "vi").text)     # 越南语(VN)
print(translator.translate(data, "he").text)     # 希伯来(HE)
print(translator.translate(data, "de").text)     # 德语(DE)
print(translator.translate(data, "ko").text)     # 韩语(KR)
print(translator.translate(data, "cs").text)     # 捷克语(CZ)
print(translator.translate(data, "el").text)     # 希腊语(EL)
print(translator.translate(data, "nl").text)     # 荷兰语(NL)
print(translator.translate(data, "sv").text)     # 瑞典语(SV)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import re
import html
from urllib import parse
import requests

GOOGLE_TRANSLATE_URL = 'http://translate.google.com/m?q=%s&tl=%s&sl=%s'

def translate(text, to_language="auto", text_language="auto"):

    text = parse.quote(text)
    url = GOOGLE_TRANSLATE_URL % (text,to_language,text_language)
    response = requests.get(url)
    data = response.text
    expr = r'(?s)class="(?:t0|result-container)">(.*?)<'
    result = re.findall(expr, data)
    if (len(result) == 0):
        return ""

    return html.unescape(result[0])

print(translate("你吃饭了么?", "en")) #汉语转英语
print(translate("你吃饭了么?", "ja")) #汉语转日语
print(translate("about your situation", "zh-CN")) #英语转汉语

库所有支持的语言,及语言的缩写:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
LANGUAGES = {
    'af': 'afrikaans',
    'sq': 'albanian',
    'am': 'amharic',
    'ar': 'arabic',
    'hy': 'armenian',
    'az': 'azerbaijani',
    'eu': 'basque',
    'be': 'belarusian',
    'bn': 'bengali',
    'bs': 'bosnian',
    'bg': 'bulgarian',
    'ca': 'catalan',
    'ceb': 'cebuano',
    'ny': 'chichewa',
    'zh-cn': 'chinese (simplified)',
    'zh-tw': 'chinese (traditional)',
    'co': 'corsican',
    'hr': 'croatian',
    'cs': 'czech',
    'da': 'danish',
    'nl': 'dutch',
    'en': 'english',
    'eo': 'esperanto',
    'et': 'estonian',
    'tl': 'filipino',
    'fi': 'finnish',
    'fr': 'french',
    'fy': 'frisian',
    'gl': 'galician',
    'ka': 'georgian',
    'de': 'german',
    'el': 'greek',
    'gu': 'gujarati',
    'ht': 'haitian creole',
    'ha': 'hausa',
    'haw': 'hawaiian',
    'iw': 'hebrew',
    'hi': 'hindi',
    'hmn': 'hmong',
    'hu': 'hungarian',
    'is': 'icelandic',
    'ig': 'igbo',
    'id': 'indonesian',
    'ga': 'irish',
    'it': 'italian',
    'ja': 'japanese',
    'jw': 'javanese',
    'kn': 'kannada',
    'kk': 'kazakh',
    'km': 'khmer',
    'ko': 'korean',
    'ku': 'kurdish (kurmanji)',
    'ky': 'kyrgyz',
    'lo': 'lao',
    'la': 'latin',
    'lv': 'latvian',
    'lt': 'lithuanian',
    'lb': 'luxembourgish',
    'mk': 'macedonian',
    'mg': 'malagasy',
    'ms': 'malay',
    'ml': 'malayalam',
    'mt': 'maltese',
    'mi': 'maori',
    'mr': 'marathi',
    'mn': 'mongolian',
    'my': 'myanmar (burmese)',
    'ne': 'nepali',
    'no': 'norwegian',
    'ps': 'pashto',
    'fa': 'persian',
    'pl': 'polish',
    'pt': 'portuguese',
    'pa': 'punjabi',
    'ro': 'romanian',
    'ru': 'russian',
    'sm': 'samoan',
    'gd': 'scots gaelic',
    'sr': 'serbian',
    'st': 'sesotho',
    'sn': 'shona',
    'sd': 'sindhi',
    'si': 'sinhala',
    'sk': 'slovak',
    'sl': 'slovenian',
    'so': 'somali',
    'es': 'spanish',
    'su': 'sundanese',
    'sw': 'swahili',
    'sv': 'swedish',
    'tg': 'tajik',
    'ta': 'tamil',
    'te': 'telugu',
    'th': 'thai',
    'tr': 'turkish',
    'uk': 'ukrainian',
    'ur': 'urdu',
    'uz': 'uzbek',
    'vi': 'vietnamese',
    'cy': 'welsh',
    'xh': 'xhosa',
    'yi': 'yiddish',
    'yo': 'yoruba',
    'zu': 'zulu',
    'fil': 'Filipino',
    'he': 'Hebrew'
}

通过对语言缩写的字典对应查询,配合detect函数可以实现对语言名称的查询输出。

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from googletrans import Translator

LANGUAGES = {
    'af': 'afrikaans',
    'sq': 'albanian',
    'am': 'amharic',
    'ar': 'arabic',
    'hy': 'armenian',
    'az': 'azerbaijani',
    'eu': 'basque',
    'be': 'belarusian',
    'bn': 'bengali',
    'bs': 'bosnian',
    'bg': 'bulgarian',
    'ca': 'catalan',
    'ceb': 'cebuano',
    'ny': 'chichewa',
    'zh-cn': 'chinese (simplified)',
    'zh-tw': 'chinese (traditional)',
    'co': 'corsican',
    'hr': 'croatian',
    'cs': 'czech',
    'da': 'danish',
    'nl': 'dutch',
    'en': 'english',
    'eo': 'esperanto',
    'et': 'estonian',
    'tl': 'filipino',
    'fi': 'finnish',
    'fr': 'french',
    'fy': 'frisian',
    'gl': 'galician',
    'ka': 'georgian',
    'de': 'german',
    'el': 'greek',
    'gu': 'gujarati',
    'ht': 'haitian creole',
    'ha': 'hausa',
    'haw': 'hawaiian',
    'iw': 'hebrew',
    'he': 'hebrew',
    'hi': 'hindi',
    'hmn': 'hmong',
    'hu': 'hungarian',
    'is': 'icelandic',
    'ig': 'igbo',
    'id': 'indonesian',
    'ga': 'irish',
    'it': 'italian',
    'ja': 'japanese',
    'jw': 'javanese',
    'kn': 'kannada',
    'kk': 'kazakh',
    'km': 'khmer',
    'ko': 'korean',
    'ku': 'kurdish (kurmanji)',
    'ky': 'kyrgyz',
    'lo': 'lao',
    'la': 'latin',
    'lv': 'latvian',
    'lt': 'lithuanian',
    'lb': 'luxembourgish',
    'mk': 'macedonian',
    'mg': 'malagasy',
    'ms': 'malay',
    'ml': 'malayalam',
    'mt': 'maltese',
    'mi': 'maori',
    'mr': 'marathi',
    'mn': 'mongolian',
    'my': 'myanmar (burmese)',
    'ne': 'nepali',
    'no': 'norwegian',
    'or': 'odia',
    'ps': 'pashto',
    'fa': 'persian',
    'pl': 'polish',
    'pt': 'portuguese',
    'pa': 'punjabi',
    'ro': 'romanian',
    'ru': 'russian',
    'sm': 'samoan',
    'gd': 'scots gaelic',
    'sr': 'serbian',
    'st': 'sesotho',
    'sn': 'shona',
    'sd': 'sindhi',
    'si': 'sinhala',
    'sk': 'slovak',
    'sl': 'slovenian',
    'so': 'somali',
    'es': 'spanish',
    'su': 'sundanese',
    'sw': 'swahili',
    'sv': 'swedish',
    'tg': 'tajik',
    'ta': 'tamil',
    'te': 'telugu',
    'th': 'thai',
    'tr': 'turkish',
    'uk': 'ukrainian',
    'ur': 'urdu',
    'ug': 'uyghur',
    'uz': 'uzbek',
    'vi': 'vietnamese',
    'cy': 'welsh',
    'xh': 'xhosa',
    'yi': 'yiddish',
    'yo': 'yoruba',
    'zu': 'zulu',
}

print(LANGUAGES[translator.detect("안녕하세요.").lang])

实例:html文本翻译(多语言)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
<!DOCTYPE html>
<html>

<head>
    <title>Peugeot_213_1081-1086_mechanical</title>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <link rel="stylesheet" href="../style.css">
</head>

<body>
    <table>
        <tr>
            <td>PinCode Required</td>
            <td></td>
        </tr>
        <tr>
            <td>PinCode Source</td>
            <td></td>
        </tr>
        <tr>
            <td>Maximum number of keys</td>
            <td>5</td>
        </tr>
        <tr>
            <td>Key sensing position</td>
            <td></td>
        </tr>
        <tr>
            <td>Key Type</td>
            <td>Key Head Remote</td>
        </tr>
        <tr>
            <td>Pre-code Key</td>
            <td></td>
        </tr>
        <tr>
            <td>Transponder Type</td>
            <td>46</td>
        </tr>
        <tr>
            <td>Remote Frequency</td>
            <td>433</td>
        </tr>
        <tr>
            <td>Number of key blank</td>
            <td></td>
        </tr>
        <tr>
            <td>Are the door lock and ianition faulty</td>
            <td></td>
        </tr>
        <tr>
            <td>ls the transponder copyable</td>
            <td></td>
        </tr>
        <tr>
            <td>Program Remotes</td>
            <td></td>
        </tr>
        <tr>
            <td>Working key Required</td>
            <td></td>
        </tr>
    </table>
</body>

</html>

translateByGoogle.py

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
from googletrans import Translator
from lxml import etree
from datetime import datetime
import os
import re
import shutil
import time

translator = Translator()
error_cnt = 0


def translate_language_google_old(tds, dest):
    tds_ = []

    for td in tds:
        if str(td.text).strip() and str(td.text).strip() != 'None':

            while True:
                try:
                    tds_.append(translator.translate(str(td.text).strip(), dest).text)
                    break
                except:
                    time.sleep(10)
                    continue
        else:
            tds_.append("")

    return tds_


def translate_language_google(tds, dest):
    tds_ = []
    str_data = ""

    for td in tds:
        if str(td.text).strip() and str(td.text).strip() != 'None':
            str_data += str(td.text).strip() + "\n"
            tds_.append(str(td.text).strip())
        else:
            tds_.append("")

    while True:
        try:
            str_data = translator.translate(str_data, dest).text
            time.sleep(1)
            break
        except:
            global error_cnt
            error_cnt += 1
            print(datetime.now(), "translate error:", error_cnt)
            time.sleep(60)

    str_li = str.split(str_data, "\n")
    j = 0
    for i in range(len(tds_)):
        if tds_[i]:
            tds_[i] = str_li[j]
            j += 1

    return tds_


def writeHtml(tds, file_name, output_html):
    output_html.write("<!DOCTYPE html>\n")
    output_html.write("<html>\n")
    output_html.write("<head>\n")
    output_html.write("<title>" + file_name.replace(".html", "") + "</title>\n")
    output_html.write("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n")
    output_html.write("<link rel=\"stylesheet\" href=\"../style.css\">\n")
    # output_html.write("<link rel=\"stylesheet\" href=\"../style.min.css\">\n")
    output_html.write("</head>\n")
    output_html.write("<body>\n")
    output_html.write("<table>\n")

    i = 0
    len_ = len(tds)
    while i < len_:
        output_html.write("<tr>\n")
        if tds[i]:
            output_html.write("<td>" + tds[i] + "</td>\n")
        else:
            output_html.write("<td></td>\n")

        if tds[i + 1]:
            output_html.write("<td>" + tds[i + 1] + "</td>\n")
        else:
            output_html.write("<td></td>\n")
        output_html.write("</tr>\n")
        i += 2

    output_html.write("</table>\n")
    output_html.write("</body>\n")
    output_html.write("</html>\n")


# 待查找的文件夹路径
folder_path = './Htmls/'
output_path = './translate/'

# if os.path.exists(output_path):
# #     shutil.rmtree(output_path)
os.makedirs(os.path.dirname(output_path), exist_ok=True)

# 21 种语言
languages = {
    # "CN": "zh-CN",
    # "TW": "zh-tw",
    "CZ": "cs",
    "DE": "de",
    "EL": "el",
    "EN": "en",
    "ES": "es",
    "FA": "fa",
    "FR": "fr",
    "HE": "he",
    "ID": "id",
    "IT": "it",
    "KR": "ko",
    "PL": "pl",
    "PT": "pt",
    "RU": "ru",
    "TH": "th",
    "TR": "tr",
    "VN": "vi",
    "NL": "nl",
    "SV": "sv"
}

# 定义正则表达式规则
pattern_en = re.compile(r'^EN_.*')
pattern_cn = re.compile(r'^CN_.*')

# 遍历文件夹中所有文件
for file_name in os.listdir(folder_path):
    # 判断文件名是否符合正则表达式规则
    if pattern_en.match(file_name):
        for (key, val) in languages.items():
            new_file_name = file_name.replace("EN_", key + "_")
            output_html = open(output_path + new_file_name, 'w', encoding='utf-8')
            html = etree.parse(folder_path + file_name, etree.HTMLParser())
            # 获取所有td标签内容
            tds = html.xpath('/html/body/table/tr/td')
            print(datetime.now(), file_name, " to ", new_file_name)
            writeHtml(translate_language_google(tds, val), new_file_name, output_html)
            output_html.close()
        print("")
        os.remove(folder_path + file_name)
    elif pattern_cn.match(file_name):
        new_file_name = file_name.replace("CN_", "TW_")
        output_html = open(output_path + new_file_name, 'w', encoding='utf-8')
        html = etree.parse(folder_path + file_name, etree.HTMLParser())
        # 获取所有td标签内容
        tds = html.xpath('/html/body/table/tr/td')
        print(datetime.now(), file_name, " to ", new_file_name)
        writeHtml(translate_language_google(tds, 'zh-tw'), new_file_name, output_html)
        output_html.close()
        shutil.move(folder_path + file_name, output_path)
0%