Make lyx_pot.py python3 compatible

Now it produces the same output if running under python3 (tested with 3.4.2)
or python2 (tested with 2.7.9). python3 always uses unicode strings
internally, so we have to specify the file encoding on opening a file, such
that strings can be converted from and to the file encoding on reading and
writing. Using the io module for file io ensures that the behaviour is the
same for python2 and python3. For python2 we also have to mark string literals
as unicode strings by using the u prefix (which is a noop in python3).

Many thanks to José for review and pointing out all the details.
This commit is contained in:
Georg Baum 2016-03-25 10:24:46 +01:00
parent 0aaa930a5c
commit 262ae8264a

View File

@ -19,6 +19,7 @@
from __future__ import print_function from __future__ import print_function
import sys, os, re, getopt import sys, os, re, getopt
import io
def relativePath(path, base): def relativePath(path, base):
'''return relative path from top source dir''' '''return relative path from top source dir'''
@ -37,13 +38,13 @@ def writeString(outfile, infile, basefile, lineno, string):
string = string.replace('\\', '\\\\').replace('"', '') string = string.replace('\\', '\\\\').replace('"', '')
if string == "": if string == "":
return return
print('#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \ print(u'#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
(relativePath(infile, basefile), lineno, string), file=outfile) (relativePath(infile, basefile), lineno, string), file=outfile)
def ui_l10n(input_files, output, base): def ui_l10n(input_files, output, base):
'''Generate pot file from lib/ui/*''' '''Generate pot file from lib/ui/*'''
output = open(output, 'w') output = io.open(output, 'w', encoding='utf_8')
Submenu = re.compile(r'^[^#]*Submenu\s+"([^"]*)"', re.IGNORECASE) Submenu = re.compile(r'^[^#]*Submenu\s+"([^"]*)"', re.IGNORECASE)
Popupmenu = re.compile(r'^[^#]*PopupMenu\s+"[^"]+"\s+"([^"]*)"', re.IGNORECASE) Popupmenu = re.compile(r'^[^#]*PopupMenu\s+"[^"]+"\s+"([^"]*)"', re.IGNORECASE)
IconPalette = re.compile(r'^[^#]*IconPalette\s+"[^"]+"\s+"([^"]*)"', re.IGNORECASE) IconPalette = re.compile(r'^[^#]*IconPalette\s+"[^"]+"\s+"([^"]*)"', re.IGNORECASE)
@ -51,7 +52,7 @@ def ui_l10n(input_files, output, base):
Item = re.compile(r'[^#]*Item\s+"([^"]*)"', re.IGNORECASE) Item = re.compile(r'[^#]*Item\s+"([^"]*)"', re.IGNORECASE)
TableInsert = re.compile(r'[^#]*TableInsert\s+"([^"]*)"', re.IGNORECASE) TableInsert = re.compile(r'[^#]*TableInsert\s+"([^"]*)"', re.IGNORECASE)
for src in input_files: for src in input_files:
input = open(src) input = io.open(src, encoding='utf_8')
for lineno, line in enumerate(input.readlines()): for lineno, line in enumerate(input.readlines()):
if Submenu.match(line): if Submenu.match(line):
(string,) = Submenu.match(line).groups() (string,) = Submenu.match(line).groups()
@ -70,7 +71,7 @@ def ui_l10n(input_files, output, base):
continue continue
string = string.replace('"', '') string = string.replace('"', '')
if string != "": if string != "":
print('#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \ print(u'#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
(relativePath(src, base), lineno+1, string), file=output) (relativePath(src, base), lineno+1, string), file=output)
input.close() input.close()
output.close() output.close()
@ -125,7 +126,7 @@ def layouts_l10n(input_files, output, base, layouttranslations):
# read old translations if available # read old translations if available
try: try:
input = open(output) input = io.open(output, encoding='utf_8')
lang = '' lang = ''
for line in input.readlines(): for line in input.readlines():
res = Comment.search(line) res = Comment.search(line)
@ -147,8 +148,8 @@ def layouts_l10n(input_files, output, base, layouttranslations):
continue continue
res = KeyValPair.search(line) res = KeyValPair.search(line)
if res and lang != '': if res and lang != '':
key = res.group(1).decode('utf-8') key = res.group(1)
val = res.group(2).decode('utf-8') val = res.group(2)
key = key.replace('\\"', '"').replace('\\\\', '\\') key = key.replace('\\"', '"').replace('\\\\', '\\')
val = val.replace('\\"', '"').replace('\\\\', '\\') val = val.replace('\\"', '"').replace('\\\\', '\\')
oldtrans[lang][key] = val oldtrans[lang][key] = val
@ -165,7 +166,7 @@ def layouts_l10n(input_files, output, base, layouttranslations):
if 'wa' in languages: if 'wa' in languages:
languages.remove('wa') languages.remove('wa')
out = open(output, 'w') out = io.open(output, 'w', encoding='utf_8')
for src in input_files: for src in input_files:
readingDescription = False readingDescription = False
readingI18nPreamble = False readingI18nPreamble = False
@ -178,7 +179,7 @@ def layouts_l10n(input_files, output, base, layouttranslations):
descStartLine = -1 descStartLine = -1
descLines = [] descLines = []
lineno = 0 lineno = 0
for line in open(src).readlines(): for line in io.open(src, encoding='utf_8').readlines():
lineno += 1 lineno += 1
res = ClassDescription.search(line) res = ClassDescription.search(line)
if res != None: if res != None:
@ -381,7 +382,7 @@ def layouts_l10n(input_files, output, base, layouttranslations):
ContextRe = re.compile(r'(.*)(\[\[.*\]\])') ContextRe = re.compile(r'(.*)(\[\[.*\]\])')
print('''# This file has been automatically generated by po/lyx_pot.py. print(u'''# This file has been automatically generated by po/lyx_pot.py.
# PLEASE MODIFY ONLY THE LAGUAGES HAVING NO .po FILE! If you want to regenerate # PLEASE MODIFY ONLY THE LAGUAGES HAVING NO .po FILE! If you want to regenerate
# this file from the translations, run `make ../lib/layouttranslations' in po. # this file from the translations, run `make ../lib/layouttranslations' in po.
# Python polib library is needed for building the output file. # Python polib library is needed for building the output file.
@ -389,7 +390,7 @@ def layouts_l10n(input_files, output, base, layouttranslations):
# This file should remain fixed during minor LyX releases. # This file should remain fixed during minor LyX releases.
# For more comments see README.localization file.''', file=out) # For more comments see README.localization file.''', file=out)
for lang in languages: for lang in languages:
print('\nTranslation %s' % lang, file=out) print(u'\nTranslation %s' % lang, file=out)
if lang in list(oldtrans.keys()): if lang in list(oldtrans.keys()):
trans = oldtrans[lang] trans = oldtrans[lang]
else: else:
@ -415,8 +416,7 @@ def layouts_l10n(input_files, output, base, layouttranslations):
if res != None: if res != None:
val = res.group(1) val = res.group(1)
key = key.replace('\\', '\\\\').replace('"', '\\"') key = key.replace('\\', '\\\\').replace('"', '\\"')
print('\t"%s" "%s"' % \ print(u'\t"%s" "%s"' % (key, val), file=out)
(key.encode('utf-8'), val.encode('utf-8')), file=out)
# also print untranslated entries to help translators # also print untranslated entries to help translators
elif not lang in oldlanguages: elif not lang in oldlanguages:
key = key.replace('\\', '\\\\').replace('"', '\\"') key = key.replace('\\', '\\\\').replace('"', '\\"')
@ -425,20 +425,19 @@ def layouts_l10n(input_files, output, base, layouttranslations):
val = res.group(1) val = res.group(1)
else: else:
val = key val = key
print('\t"%s" "%s"' % \ print(u'\t"%s" "%s"' % (key, val), file=out)
(key.encode('utf-8'), val.encode('utf-8')), file=out) print(u'End', file=out)
print('End', file=out)
out.close() out.close()
def qt4_l10n(input_files, output, base): def qt4_l10n(input_files, output, base):
'''Generate pot file from src/frontends/qt4/ui/*.ui''' '''Generate pot file from src/frontends/qt4/ui/*.ui'''
output = open(output, 'w') output = io.open(output, 'w', encoding='utf_8')
pat = re.compile(r'\s*<string>(.*)</string>') pat = re.compile(r'\s*<string>(.*)</string>')
prop = re.compile(r'\s*<property.*name.*=.*shortcut') prop = re.compile(r'\s*<property.*name.*=.*shortcut')
for src in input_files: for src in input_files:
input = open(src) input = io.open(src, encoding='utf_8')
skipNextLine = False skipNextLine = False
for lineno, line in enumerate(input.readlines()): for lineno, line in enumerate(input.readlines()):
# skip the line after <property name=shortcut> # skip the line after <property name=shortcut>
@ -455,7 +454,7 @@ def qt4_l10n(input_files, output, base):
string = string.replace('&lt;', '<').replace('&gt;', '>') string = string.replace('&lt;', '<').replace('&gt;', '>')
string = string.replace('\\', '\\\\').replace('"', r'\"') string = string.replace('\\', '\\\\').replace('"', r'\"')
string = string.replace('&#x0a;', r'\n') string = string.replace('&#x0a;', r'\n')
print('#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \ print(u'#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
(relativePath(src, base), lineno+1, string), file=output) (relativePath(src, base), lineno+1, string), file=output)
input.close() input.close()
output.close() output.close()
@ -463,14 +462,14 @@ def qt4_l10n(input_files, output, base):
def languages_l10n(input_files, output, base): def languages_l10n(input_files, output, base):
'''Generate pot file from lib/languages''' '''Generate pot file from lib/languages'''
out = open(output, 'w') out = io.open(output, 'w', encoding='utf_8')
GuiName = re.compile(r'^[^#]*GuiName\s+(.*)', re.IGNORECASE) GuiName = re.compile(r'^[^#]*GuiName\s+(.*)', re.IGNORECASE)
for src in input_files: for src in input_files:
descStartLine = -1 descStartLine = -1
descLines = [] descLines = []
lineno = 0 lineno = 0
for line in open(src).readlines(): for line in io.open(src, encoding='utf_8').readlines():
lineno += 1 lineno += 1
res = GuiName.search(line) res = GuiName.search(line)
if res != None: if res != None:
@ -483,14 +482,14 @@ def languages_l10n(input_files, output, base):
def latexfonts_l10n(input_files, output, base): def latexfonts_l10n(input_files, output, base):
'''Generate pot file from lib/latexfonts''' '''Generate pot file from lib/latexfonts'''
out = open(output, 'w') out = io.open(output, 'w', encoding='utf_8')
GuiName = re.compile(r'^[^#]*GuiName\s+(.*)', re.IGNORECASE) GuiName = re.compile(r'^[^#]*GuiName\s+(.*)', re.IGNORECASE)
for src in input_files: for src in input_files:
descStartLine = -1 descStartLine = -1
descLines = [] descLines = []
lineno = 0 lineno = 0
for line in open(src).readlines(): for line in io.open(src, encoding='utf_8').readlines():
lineno += 1 lineno += 1
res = GuiName.search(line) res = GuiName.search(line)
if res != None: if res != None:
@ -503,7 +502,7 @@ def latexfonts_l10n(input_files, output, base):
def external_l10n(input_files, output, base): def external_l10n(input_files, output, base):
'''Generate pot file from lib/external_templates''' '''Generate pot file from lib/external_templates'''
output = open(output, 'w') output = io.open(output, 'w', encoding='utf_8')
Template = re.compile(r'^Template\s+(.*)', re.IGNORECASE) Template = re.compile(r'^Template\s+(.*)', re.IGNORECASE)
GuiName = re.compile(r'\s*GuiName\s+(.*)', re.IGNORECASE) GuiName = re.compile(r'\s*GuiName\s+(.*)', re.IGNORECASE)
HelpTextStart = re.compile(r'\s*HelpText\s', re.IGNORECASE) HelpTextStart = re.compile(r'\s*HelpText\s', re.IGNORECASE)
@ -511,7 +510,7 @@ def external_l10n(input_files, output, base):
HelpTextEnd = re.compile(r'\s*HelpTextEnd\s', re.IGNORECASE) HelpTextEnd = re.compile(r'\s*HelpTextEnd\s', re.IGNORECASE)
i = -1 i = -1
for src in input_files: for src in input_files:
input = open(src) input = io.open(src, encoding='utf_8')
inHelp = False inHelp = False
hadHelp = False hadHelp = False
prev_help_string = '' prev_help_string = ''
@ -523,7 +522,7 @@ def external_l10n(input_files, output, base):
elif inHelp: elif inHelp:
if HelpTextEnd.match(line): if HelpTextEnd.match(line):
if hadHelp: if hadHelp:
print('\nmsgstr ""\n', file=output) print(u'\nmsgstr ""\n', file=output)
inHelp = False inHelp = False
hadHelp = False hadHelp = False
prev_help_string = '' prev_help_string = ''
@ -531,11 +530,11 @@ def external_l10n(input_files, output, base):
(help_string,) = HelpTextSection.match(line).groups() (help_string,) = HelpTextSection.match(line).groups()
help_string = help_string.replace('"', '') help_string = help_string.replace('"', '')
if help_string != "" and prev_help_string == '': if help_string != "" and prev_help_string == '':
print('#: %s:%d\nmsgid ""\n"%s\\n"' % \ print(u'#: %s:%d\nmsgid ""\n"%s\\n"' % \
(relativePath(src, base), lineno+1, help_string), file=output) (relativePath(src, base), lineno+1, help_string), file=output)
hadHelp = True hadHelp = True
elif help_string != "": elif help_string != "":
print('"%s\\n"' % help_string, file=output) print(u'"%s\\n"' % help_string, file=output)
prev_help_string = help_string prev_help_string = help_string
elif HelpTextStart.match(line): elif HelpTextStart.match(line):
inHelp = True inHelp = True
@ -544,7 +543,7 @@ def external_l10n(input_files, output, base):
continue continue
string = string.replace('"', '') string = string.replace('"', '')
if string != "" and not inHelp: if string != "" and not inHelp:
print('#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \ print(u'#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
(relativePath(src, base), lineno+1, string), file=output) (relativePath(src, base), lineno+1, string), file=output)
input.close() input.close()
output.close() output.close()
@ -552,10 +551,10 @@ def external_l10n(input_files, output, base):
def formats_l10n(input_files, output, base): def formats_l10n(input_files, output, base):
'''Generate pot file from configure.py''' '''Generate pot file from configure.py'''
output = open(output, 'w') output = io.open(output, 'w', encoding='utf_8')
GuiName = re.compile(r'.*\\Format\s+\S+\s+\S+\s+"([^"]*)"\s+(\S*)\s+.*', re.IGNORECASE) GuiName = re.compile(r'.*\\Format\s+\S+\s+\S+\s+"([^"]*)"\s+(\S*)\s+.*', re.IGNORECASE)
GuiName2 = re.compile(r'.*\\Format\s+\S+\s+\S+\s+([^"]\S+)\s+(\S*)\s+.*', re.IGNORECASE) GuiName2 = re.compile(r'.*\\Format\s+\S+\s+\S+\s+([^"]\S+)\s+(\S*)\s+.*', re.IGNORECASE)
input = open(input_files[0]) input = io.open(input_files[0], encoding='utf_8')
for lineno, line in enumerate(input.readlines()): for lineno, line in enumerate(input.readlines()):
label = "" label = ""
labelsc = "" labelsc = ""
@ -571,10 +570,10 @@ def formats_l10n(input_files, output, base):
if shortcut != "": if shortcut != "":
labelsc = label + "|" + shortcut labelsc = label + "|" + shortcut
if label != "": if label != "":
print('#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \ print(u'#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
(relativePath(input_files[0], base), lineno+1, label), file=output) (relativePath(input_files[0], base), lineno+1, label), file=output)
if labelsc != "": if labelsc != "":
print('#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \ print(u'#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
(relativePath(input_files[0], base), lineno+1, labelsc), file=output) (relativePath(input_files[0], base), lineno+1, labelsc), file=output)
input.close() input.close()
output.close() output.close()
@ -582,16 +581,16 @@ def formats_l10n(input_files, output, base):
def encodings_l10n(input_files, output, base): def encodings_l10n(input_files, output, base):
'''Generate pot file from lib/encodings''' '''Generate pot file from lib/encodings'''
output = open(output, 'w') output = io.open(output, 'w', encoding='utf_8')
# assuming only one encodings file # assuming only one encodings file
# Encoding utf8 utf8 "Unicode (utf8)" UTF-8 variable inputenc # Encoding utf8 utf8 "Unicode (utf8)" UTF-8 variable inputenc
reg = re.compile('Encoding [\w-]+\s+[\w-]+\s+"([\w \-\(\)]+)"\s+[\w-]+\s+(fixed|variable|variableunsafe)\s+\w+.*') reg = re.compile('Encoding [\w-]+\s+[\w-]+\s+"([\w \-\(\)]+)"\s+[\w-]+\s+(fixed|variable|variableunsafe)\s+\w+.*')
input = open(input_files[0]) input = io.open(input_files[0], encoding='utf_8')
for lineno, line in enumerate(input.readlines()): for lineno, line in enumerate(input.readlines()):
if not line.startswith('Encoding'): if not line.startswith('Encoding'):
continue continue
if reg.match(line): if reg.match(line):
print('#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \ print(u'#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
(relativePath(input_files[0], base), lineno+1, reg.match(line).groups()[0]), file=output) (relativePath(input_files[0], base), lineno+1, reg.match(line).groups()[0]), file=output)
else: else:
print("Error: Unable to handle line:") print("Error: Unable to handle line:")
@ -644,7 +643,7 @@ if __name__ == '__main__':
elif opt in ['-t', '--type']: elif opt in ['-t', '--type']:
input_type = value input_type = value
elif opt in ['-s', '--src_file']: elif opt in ['-s', '--src_file']:
input_files = [f.strip() for f in open(value)] input_files = [f.strip() for f in io.open(value, encoding='utf_8')]
if input_type not in ['ui', 'layouts', 'layouttranslations', 'qt4', 'languages', 'latexfonts', 'encodings', 'external', 'formats'] or output is None: if input_type not in ['ui', 'layouts', 'layouttranslations', 'qt4', 'languages', 'latexfonts', 'encodings', 'external', 'formats'] or output is None:
print('Wrong input type or output filename.') print('Wrong input type or output filename.')