Fix lyx2lyx unicodesymbols escaping

The escaping of symbols read from unicodesymbols was inconsistent, which lead to wrong replacements (bug 9559). Now the escaping is consistent: unicode_reps contains unescaped LaTeX commands, the needed escaping for LyX files is applied in put_cmd_in_ert() and lyx2latex().
2024-11-25 10:58:52 +00:00 · 2015-06-14 12:44:44 +02:00 · 2015-06-14 12:44:44 +02:00 · a2f127f8c3
commit a2f127f8c3
parent aa4d41d282
4 changed files with 18 additions and 10 deletions
--- a/lib/lyx2lyx/lyx2lyx_tools.py
+++ b/lib/lyx2lyx/lyx2lyx_tools.py
@ -125,7 +125,7 @@ def put_cmd_in_ert(arg):
    else:
      s = arg
    for rep in unicode_reps:
-      s = s.replace(rep[1], rep[0].replace('\\\\', '\\'))
+      s = s.replace(rep[1], rep[0])
    s = s.replace('\\', "\\backslash\n")
    ret += s.splitlines()
    ret += ["\\end_layout", "", "\\end_inset"]
@ -254,7 +254,7 @@ def lyx2latex(document, lines):

          # Do the LyX text --> LaTeX conversion
          for rep in unicode_reps:
-            line = line.replace(rep[1], rep[0] + "{}")
+              line = line.replace(rep[1], rep[0])
          line = line.replace(r'\backslash', r'\textbackslash{}')
          line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
          line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
--- a/lib/lyx2lyx/lyx_1_5.py
+++ b/lib/lyx2lyx/lyx_1_5.py
@ -343,6 +343,7 @@ def revert_utf8(document):
    convert_multiencoding(document, False)


+# FIXME: Use the version in unicode_symbols.py which has some bug fixes
 def read_unicodesymbols():
    " Read the unicodesymbols list of unicode characters and corresponding commands."
    pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
--- a/lib/lyx2lyx/lyx_1_6.py
+++ b/lib/lyx2lyx/lyx_1_6.py
@ -145,6 +145,7 @@ def set_option(document, m, option, value):
    return l


+# FIXME: Use the version in unicode_symbols.py which has some bug fixes
 def read_unicodesymbols():
    " Read the unicodesymbols list of unicode characters and corresponding commands."
    pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
--- a/lib/lyx2lyx/unicode_symbols.py
+++ b/lib/lyx2lyx/unicode_symbols.py
@ -31,30 +31,36 @@ def read_unicodesymbols():
    pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
    fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
    spec_chars = []
-    # Two backslashes, followed by some non-word character, and then a character
+    # A backslash, followed by some non-word character, and then a character
    # in brackets. The idea is to check for constructs like: \"{u}, which is how
    # they are written in the unicodesymbols file; but they can also be written
    # as: \"u or even \" u.
-    r = re.compile(r'\\\\(\W)\{(\w)\}')
+    # The two backslashes in the string literal are needed to specify a literal
+    # backslash in the regex. Without r prefix, these would be four backslashes.
+    r = re.compile(r'\\(\W)\{(\w)\}')
    for line in fp.readlines():
        if line[0] != '#' and line.strip() != "":
+            # Note: backslashes in the string literals with r prefix are not escaped,
+            #       so one backslash in the source file equals one backslash in memory.
+            #       Without r prefix backslahses are escaped, so two backslashes in the
+            #       source file equal one backslash in memory.
            line=line.replace(' "',' ') # remove all quotation marks with spaces before
            line=line.replace('" ',' ') # remove all quotation marks with spaces after
-            line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
+            line=line.replace(r'\"','"') # unescape "
+            line=line.replace(r'\\','\\') # unescape \
            try:
                [ucs4,command,dead] = line.split(None,2)
                if command[0:1] != "\\":
                    continue
+                if (line.find("notermination=text") < 0 and
+                    line.find("notermination=both") < 0 and command[-1] != "}"):
+                    command = command + "{}"
                spec_chars.append([command, unichr(eval(ucs4))])
            except:
                continue
            m = r.match(command)
            if m != None:
-                command = "\\\\"
-                # If the character is a double-quote, then we need to escape it, too,
-                # since it is done that way in the LyX file.
-                if m.group(1) == "\"":
-                    command += "\\"
+                command = "\\"
                commandbl = command
                command += m.group(1) + m.group(2)
                commandbl += m.group(1) + ' ' + m.group(2)