Fix a couple of issues with filename encodings.

* Take into account the filesystem encoding for the zip export on *nix such that the representation of filenames in the zip archive is not mangled, when possible. This only concerns the way filenames are displayed as their creation in the filesystem was nevertheless correct. * On Windows, try to obtain the command line parameters from the wide char representation by directly accessing the platform APIs through ctypes. This allows to also deal with filenames not exactly representable using the current code page and corrects a bug resulting in silently dropping those kind of filenames.
2025-01-21 23:09:40 +00:00 · 2015-07-05 15:45:40 +02:00 · 2015-07-05 15:45:40 +02:00 · 2ea3e07a23
commit 2ea3e07a23
parent ca0ba7c6ad
1 changed files with 61 additions and 15 deletions
--- a/lib/scripts/lyxpak.py
+++ b/lib/scripts/lyxpak.py
@ -21,6 +21,12 @@ if sys.version_info < (2, 4, 0):
 from getopt import getopt
 from cStringIO import StringIO

+running_on_windows = (os.name == 'nt')
+
+if running_on_windows:
+    from shutil import copyfile
+    from tempfile import NamedTemporaryFile
+
 # Pre-compiled regular expressions.
 re_lyxfile = re.compile("\.lyx$")
 re_input = re.compile(r'^(.*)\\(input|include){(\s*)(.+)(\s*)}.*$')
@ -55,12 +61,12 @@ def error(message):


 def gzopen(file, mode):
-    input = open(file, 'rb')
+    input = open(unicode(file, 'utf-8'), 'rb')
    magicnum = input.read(2)
    input.close()
    if magicnum == "\x1f\x8b":
-        return gzip.open(file, mode)
-    return open(file, mode)
+        return gzip.open(unicode(file, 'utf-8'), mode)
+    return open(unicode(file, 'utf-8'), mode)


 def run_cmd(cmd):
@ -83,7 +89,7 @@ def find_exe(candidates, extlist, path):
 def abspath(name):
    " Resolve symlinks and returns the absolute normalized name."
    newname = os.path.normpath(os.path.abspath(name))
-    if os.name != 'nt':
+    if not running_on_windows:
        newname = os.path.realpath(newname)
    return newname

@ -93,8 +99,19 @@ def gather_files(curfile, incfiles, lyx2lyx):
    curdir = os.path.dirname(abspath(curfile))
    is_lyxfile = re_lyxfile.search(curfile)
    if is_lyxfile:
-        lyx2lyx_cmd = 'python "%s" "%s"' % (lyx2lyx, curfile)
-        l2l_status, l2l_stdout = run_cmd(lyx2lyx_cmd)
+        if running_on_windows:
+            # os.popen cannot cope with unicode arguments and we cannot be
+            # sure that curfile can be correctly converted to the current
+            # code page. So, we resort to running lyx2lyx on a copy.
+            tmp = NamedTemporaryFile(delete=False)
+            tmp.close()
+            copyfile(unicode(curfile, 'utf-8'), tmp.name)
+            lyx2lyx_cmd = 'python "%s" "%s"' % (lyx2lyx, tmp.name)
+            l2l_status, l2l_stdout = run_cmd(lyx2lyx_cmd)
+            os.unlink(tmp.name)
+        else:
+            lyx2lyx_cmd = 'python "%s" "%s"' % (lyx2lyx, curfile)
+            l2l_status, l2l_stdout = run_cmd(lyx2lyx_cmd)
        if l2l_status != None:
            error('%s failed to convert "%s"' % (lyx2lyx, curfile))
        if l2l_stdout.startswith("\x1f\x8b"):
@ -136,9 +153,9 @@ def gather_files(curfile, incfiles, lyx2lyx):
            if not os.path.isabs(file):
                file = os.path.join(curdir, file)
            file_exists = False
-            if not os.path.isdir(file):
+            if not os.path.isdir(unicode(file, 'utf-8')):
                for ext in extlist:
-                    if os.path.exists(file + ext):
+                    if os.path.exists(unicode(file + ext, 'utf-8')):
                        file = file + ext
                        file_exists = True
                        break
@ -161,7 +178,7 @@ def gather_files(curfile, incfiles, lyx2lyx):
                file = file[9:]
            if not os.path.isabs(file):
                file = os.path.join(curdir, file + '.bst')
-            if os.path.exists(file):
+            if os.path.exists(unicode(file, 'utf-8')):
                incfiles.append(abspath(file))
            i += 1
            continue
@ -176,7 +193,7 @@ def gather_files(curfile, incfiles, lyx2lyx):
                    file = bibfiles[j] + '.bib'
                else:
                    file = os.path.join(curdir, bibfiles[j] + '.bib')
-                if os.path.exists(file):
+                if os.path.exists(unicode(file, 'utf-8')):
                    incfiles.append(abspath(file))
                j += 1
            i += 1
@ -242,7 +259,7 @@ def main(args):
    if len(argv) != 1:
        error(usage(ourprog))

-    makezip = (os.name == 'nt')
+    makezip = running_on_windows
    outdir = ""
    lyx2lyx = None

@ -258,11 +275,13 @@ def main(args):
        lyx2lyx = param
      elif opt == "-o":
        outdir = param
-        if not os.path.isdir(outdir):
+        if not os.path.isdir(unicode(outdir, 'utf-8')):
          error('Error: "%s" is not a directory.' % outdir)

    lyxfile = argv[0]
-    if not os.path.exists(lyxfile):
+    if not running_on_windows:
+        lyxfile = unicode(lyxfile, sys.getfilesystemencoding()).encode('utf-8')
+    if not os.path.exists(unicode(lyxfile, 'utf-8')):
        error('File "%s" not found.' % lyxfile)

    # Check that it actually is a LyX document
@ -315,14 +334,14 @@ def main(args):
    incfiles.sort()

    if topdir != '':
-        os.chdir(topdir)
+        os.chdir(unicode(topdir, 'utf-8'))

    # Create the archive
    try:
        if makezip:
            zip = zipfile.ZipFile(ar_name, "w", zipfile.ZIP_DEFLATED)
            for file in incfiles:
-                zip.write(file)
+                zip.write(file.decode('utf-8'), unicode(file, 'utf-8'))
            zip.close()
        else:
            tar = tarfile.open(ar_name, "w:gz")
@ -337,4 +356,31 @@ def main(args):


 if __name__ == "__main__":
+    if running_on_windows:
+        # This works around <http://bugs.python.org/issue2128> for Python 2.
+        # All arguments are retrieved in unicode format and converted to utf-8.
+        # In this way, when launched from the command line, lyxpak.py can deal
+        # with any non-ascii names. Unfortunately, this is not the case when
+        # launched by LyX, because LyX converts the arguments of the converters
+        # to the filesystem encoding. On Windows this corresponds to the current
+        # code page and not to the UTF-16 encoding used by NTFS, such that they
+        # are transliterated if not exactly encodable. As an example, α may
+        # become a, β may become ß, and so on. However, this is a problem only
+        # if the full path of the LyX document contains an unencodable character
+        # as all other paths are extracted from the document in utf-8 format.
+        from ctypes import WINFUNCTYPE, windll, POINTER, byref, c_int
+        from ctypes.wintypes import LPWSTR, LPCWSTR
+        GetCommandLineW = WINFUNCTYPE(LPWSTR)(("GetCommandLineW", windll.kernel32))
+        CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(("CommandLineToArgvW", windll.shell32))
+        argc = c_int(0)
+        argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))
+        # unicode_argv[0] is the Python interpreter, so skip that.
+        argv = [argv_unicode[i].encode('utf-8') for i in xrange(1, argc.value)]
+        # Also skip option arguments to the Python interpreter.
+        while len(argv) > 0:
+            if not argv[0].startswith("-"):
+                break
+            argv = argv[1:]
+        sys.argv = argv
+
    main(sys.argv)