lyx2lyx refactoring.

* use unicode.transform() instead of loop over replacements * telling variable names * remove trailing whitespace * documentation update * don't set use_ligature_dashes if both dash types are found * remove spurious warning, normalize indentation, and use Python idioms in revert_baselineskip()
2024-11-22 01:59:02 +00:00 · 2018-01-31 15:09:32 +01:00 · 2018-01-31 15:09:32 +01:00 · 2fce4d49ee
commit 2fce4d49ee
parent e4c0c6afdf
8 changed files with 251 additions and 200 deletions
--- a/autotests/export/lyx2lyx/lyx_2_3_test.lyx
+++ b/autotests/export/lyx2lyx/lyx_2_3_test.lyx
@ -94,7 +94,7 @@ Test reversion/conversion between 2.3 and 2.2 formats with lyx2lyx.
 \end_layout
 \begin_layout Description
-Allowbreak: 
+allowbreak: 
 \bar under
 \begin_inset Box Boxed
@ -169,5 +169,69 @@ without spaces.
 \end_layout
 \begin_layout Description
 baselineskip%: 
 \begin_inset Box Boxed
 position "t"
 hor_pos "c"
 has_inner_box 1
 inner_pos "t"
 use_parbox 1
 use_makebox 0
 width "250baselineskip%"
 special "none"
 height "50baselineskip%"
 height_special "none"
 thickness "4baselineskip%"
 separation "9baselineskip%"
 shadowsize "4pt"
 framecolor "black"
 backgroundcolor "none"
 status open
 \begin_layout Plain Layout
 test
 \end_layout
 \end_inset
 \begin_inset CommandInset line
 LatexCommand rule
 offset "40baselineskip%"
 width "800baselineskip%"
 height "5.3baselineskip%"
 \end_inset
 \end_layout
 \begin_deeper
 \begin_layout Standard
 \begin_inset VSpace 200baselineskip%
 \end_inset
 \end_layout
 \begin_layout Standard
 Vertical space above this paragraph is 2·baselineskip.
 \end_layout
 \begin_layout Standard
 \begin_inset space \hspace*{}
 \length 75.2baselineskip%
 \end_inset
 Paragraph with
 \begin_inset space \hspace{}
 \length 135baselineskip%
 \end_inset
 horizontal space insets using baselineskip.
 \end_layout
 \end_deeper
 \end_body
 \end_document
--- a/lib/lyx2lyx/lyx2lyx_tools.py
+++ b/lib/lyx2lyx/lyx2lyx_tools.py
@ -17,8 +17,8 @@
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 '''
-This module offers several free functions to help with lyx2lyx'ing. 
+This module offers several free functions to help with lyx2lyx'ing.
-More documentaton is below, but here is a quick guide to what 
+More documentaton is below, but here is a quick guide to what
 they do. Optional arguments are marked by brackets.
 add_to_preamble(document, text):
@ -37,8 +37,8 @@ insert_to_preamble(document, text[, index]):
  default index is 0, so the material is inserted at the beginning.
  Prepends a comment "% Added by lyx2lyx" to text.
-put_cmd_in_ert(arg):
+put_cmd_in_ert(cmd):
-  Here arg should be a list of strings (lines), which we want to
+  Here cmd should be a list of strings (lines), which we want to
  wrap in ERT. Returns a list of strings so wrapped.
  A call to this routine will often go something like this:
    i = find_token('\\begin_inset FunkyInset', ...)
@ -81,7 +81,6 @@ import string
 from parser_tools import find_token, find_end_of_inset
 from unicode_symbols import unicode_reps
 # This will accept either a list of lines or a single line.
 # It is bad practice to pass something with embedded newlines,
 # though we will handle that.
@ -118,34 +117,37 @@ def add_to_preamble(document, text):
 # It should really be a list.
 def insert_to_preamble(document, text, index = 0):
    """ Insert text to the preamble at a given line"""
-    
+
    if not type(text) is list:
      # split on \n just in case
      # it'll give us the one element list we want
      # if there's no \n, too
      text = text.split('\n')
-    
+
    text.insert(0, "% Added by lyx2lyx")
    document.preamble[index:index] = text
-def put_cmd_in_ert(arg):
+# A dictionary of Unicode->LICR mappings for use in a Unicode string's translate() method
-    '''
+# Created from the reversed list to keep the first of alternative definitions.
-    arg should be a list of lines we want to wrap in ERT.
+licr_table = dict((ord(ch), cmd) for cmd, ch in unicode_reps[::-1])
-    Returns a list of strings, with the lines so wrapped.
+
-    '''
+def put_cmd_in_ert(cmd):
-    
+    """
    Return ERT inset wrapping `cmd` as a list of strings.
    `cmd` can be a string or list of lines. Non-ASCII characters are converted
    to the respective LICR macros if defined in unicodesymbols.
    """
    ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""]
-    # It will be faster for us to work with a single string internally. 
+    # It will be faster to work with a single string internally.
-    # That way, we only go through the unicode_reps loop once.
+    if isinstance(cmd, list):
-    if type(arg) is list:
+        cmd = u"\n".join(cmd)
      s = "\n".join(arg)
    else:
-      s = arg
+        cmd = u"%s" % cmd # ensure it is an unicode instance
-    for rep in unicode_reps:
+    cmd = cmd.translate(licr_table)
-      s = s.replace(rep[1], rep[0])
+    cmd = cmd.replace("\\", "\\backslash\n")
-    s = s.replace('\\', "\\backslash\n")
+    ret += cmd.splitlines()
    ret += s.splitlines()
    ret += ["\\end_layout", "", "\\end_inset"]
    return ret
@ -300,7 +302,7 @@ def lyx2verbatim(document, lines):
 def latex_length(slen):
-    ''' 
+    '''
    Convert lengths to their LaTeX representation. Returns (bool, length),
    where the bool tells us if it was a percentage, and the length is the
    LaTeX representation.
@ -314,9 +316,14 @@ def latex_length(slen):
    # the + always precedes the -
    # Convert relative lengths to LaTeX units
-    units = {"text%":"\\textwidth", "col%":"\\columnwidth",
+    units = {"col%": "\\columnwidth",
-             "page%":"\\paperwidth", "line%":"\\linewidth",
+             "text%": "\\textwidth",
-             "theight%":"\\textheight", "pheight%":"\\paperheight"}
+             "page%": "\\paperwidth", 
             "line%": "\\linewidth",
             "theight%": "\\textheight",
             "pheight%": "\\paperheight",
             "baselineskip%": "\\baselineskip"
            }
    for unit in list(units.keys()):
        i = slen.find(unit)
        if i == -1:
--- a/lib/lyx2lyx/lyx_1_6.py
+++ b/lib/lyx2lyx/lyx_1_6.py
@ -23,7 +23,7 @@ import unicodedata
 import sys, os
 from parser_tools import find_token, find_end_of, find_tokens, get_value
-from unicode_symbols import read_unicodesymbols
+from unicode_symbols import unicode_reps
 ####################################################################
 # Private helper functions
@ -146,54 +146,6 @@ def set_option(document, m, option, value):
    return l
 # FIXME: Remove this function if the version imported from unicode_symbols works.
 # This function was the predecessor from that function, that in the meanwhile got
 # new fixes.
 def read_unicodesymbols2():
    " Read the unicodesymbols list of unicode characters and corresponding commands."
    # Provide support for both python 2 and 3
    PY2 = sys.version_info[0] == 2
    if not PY2:
        unichr = chr
    # End of code to support for both python 2 and 3
    pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
    fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
    spec_chars = []
    # Two backslashes, followed by some non-word character, and then a character
    # in brackets. The idea is to check for constructs like: \"{u}, which is how
    # they are written in the unicodesymbols file; but they can also be written
    # as: \"u or even \" u.
    r = re.compile(r'\\\\(\W)\{(\w)\}')
    for line in fp.readlines():
        if line[0] != '#' and line.strip() != "":
            line=line.replace(' "',' ') # remove all quotation marks with spaces before
            line=line.replace('" ',' ') # remove all quotation marks with spaces after
            line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
            try:
                [ucs4,command,dead] = line.split(None,2)
                if command[0:1] != "\\":
                    continue
                spec_chars.append([command, unichr(eval(ucs4))])
            except:
                continue
            m = r.match(command)
            if m != None:
                command = "\\\\"
                # If the character is a double-quote, then we need to escape it, too,
                # since it is done that way in the LyX file.
                if m.group(1) == "\"":
                    command += "\\"
                commandbl = command
                command += m.group(1) + m.group(2)
                commandbl += m.group(1) + ' ' + m.group(2)
                spec_chars.append([command, unichr(eval(ucs4))])
                spec_chars.append([commandbl, unichr(eval(ucs4))])
    fp.close()
    return spec_chars
 def extract_argument(line):
    'Extracts a LaTeX argument from the start of line. Returns (arg, rest).'
@ -280,8 +232,6 @@ def latex2ert(line, isindex):
    return retval
 unicode_reps = read_unicodesymbols()
 #Bug 5022....
 #Might should do latex2ert first, then deal with stuff that DOESN'T
 #end up inside ERT. That routine could be modified so that it returned
--- a/lib/lyx2lyx/lyx_2_2.py
+++ b/lib/lyx2lyx/lyx_2_2.py
@ -746,10 +746,10 @@ def convert_phrases(document):
            if len(words) > 1 and words[0] == "\\begin_inset" and \
               words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]:
                # must not replace anything in insets that store LaTeX contents in .lyx files
-                # (math and command insets withut overridden read() and write() methods
+                # (math and command insets without overridden read() and write() methods)
                j = find_end_of_inset(document.body, i)
                if j == -1:
-                    document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
+                    document.warning("Malformed LyX document: Can't find end of inset at line " + str(i))
                    i += 1
                else:
                    i = j
--- a/lib/lyx2lyx/lyx_2_3.py
+++ b/lib/lyx2lyx/lyx_2_3.py
@ -27,15 +27,14 @@ import sys, os
 from parser_tools import (del_token, del_value, del_complete_lines,
    find_complete_lines, find_end_of, find_end_of_layout, find_end_of_inset,
    find_re, find_token, find_token_backwards, get_containing_inset,
-    get_containing_layout, get_bool_value, get_value, get_quoted_value)
+    get_containing_layout, get_bool_value, get_value, get_quoted_value,
-#  find_tokens, find_token_exact, is_in_inset,
+    is_in_inset)
-#  check_token, get_option_value
+#  find_tokens, find_token_exact, check_token, get_option_value
-from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, revert_font_attrs, \
+from lyx2lyx_tools import (add_to_preamble, put_cmd_in_ert, revert_font_attrs,
-    insert_to_preamble
+                           insert_to_preamble, latex_length)
-#  get_ert, lyx2latex, \
+#  get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets
-#  lyx2verbatim, length_in_bp, convert_info_insets
+#  revert_flex_inset, hex2ratio, str2bool
 #  latex_length, revert_flex_inset, hex2ratio, str2bool
 ####################################################################
 # Private helper functions
@ -88,13 +87,12 @@ def convert_dateinset(document):
            continue
        if get_value(document.body, 'template', i, j) == "Date":
            document.body[i : j + 1] = put_cmd_in_ert("\\today ")
-        i += 1
+        i = j+1 # skip inset
        continue
 def convert_inputenc(document):
    " Replace no longer supported input encoding settings. "
-    i = find_token(document.header, "\\inputenc", 0)
+    i = find_token(document.header, "\\inputenc")
    if i == -1:
        return
    if get_value(document.header, "\\inputencoding", i) == "pt254":
@ -1859,7 +1857,7 @@ def convert_dashligatures(document):
        while i+1 < len(lines):
            i += 1
            line = lines[i]
-            # skip lines without any dashes:
+            # skip lines without dashes:
            if not re.search(u"[\u2013\u2014]|\\twohyphens|\\threehyphens", line):
                continue
            # skip label width string (see bug 10243):
@ -1867,28 +1865,28 @@ def convert_dashligatures(document):
                continue
            # do not touch hyphens in some insets (cf. lyx_2_2.convert_dashes):
            try:
-                value, start, end = get_containing_inset(lines, i)
+                inset_type, start, end = get_containing_inset(lines, i)
            except TypeError: # no containing inset
-                value, start, end = "no inset", -1, -1
+                inset_type, start, end = "no inset", -1, -1
-            if (value.split()[0] in
+            if (inset_type.split()[0] in
                ["CommandInset", "ERT", "External", "Formula",
                 "FormulaMacro", "Graphics", "IPA", "listings"]
-                or value == "Flex Code"):
+                or inset_type == "Flex Code"):
                i = end
                continue
            try:
-                layout, start, end, j = get_containing_layout(lines, i)
+                layoutname, start, end, j = get_containing_layout(lines, i)
            except TypeError: # no (or malformed) containing layout
                document.warning("Malformed LyX document: "
                                "Can't find layout at line %d" % i)
                continue
-            if layout == "LyX-Code":
+            if layoutname == "LyX-Code":
                i = end
                continue
            # literal dash followed by a word or no-break space:
-            if re.search(u"[\u2013\u2014]([\w\u00A0]|$)", line,
+            if re.search(u"[\u2013\u2014]([\w\u00A0]|$)",
-                         flags=re.UNICODE):
+                         line, flags=re.UNICODE):
                has_literal_dashes = True
            # ligature dash followed by word or no-break space on next line:
            if (re.search(r"(\\twohyphens|\\threehyphens)", line) and
@ -1900,14 +1898,15 @@ def convert_dashligatures(document):
                                 '"ligature" dashes.\n Line breaks may have changed. '
                                 'See UserGuide chapter 3.9.1 for details.')
                break
-        if has_literal_dashes:
+
        if has_literal_dashes and not has_ligature_dashes:
            use_dash_ligatures = False
-        elif has_ligature_dashes:
+        elif has_ligature_dashes and not has_literal_dashes:
            use_dash_ligatures = True
    # insert the setting if there is a preferred value
    if use_dash_ligatures is not None:
-        i = find_token(document.header, "\\graphics")
+        document.header.insert(-1, "\\use_dash_ligatures %s"
        document.header.insert(i, "\\use_dash_ligatures %s"
                               % str(use_dash_ligatures).lower())
@ -2020,64 +2019,37 @@ def revert_mathindent(document):
 def revert_baselineskip(document):
-  " Revert baselineskips to TeX code "
+    " Revert baselineskips to TeX code "
-  i = 0
+    i = 0
-  vspaceLine = 0
+    regexp = re.compile(r'.*baselineskip%.*')
-  hspaceLine = 0
+    while True:
-  while True:
+        i = i + 1
-    regexp = re.compile(r'^.*baselineskip%.*$')
+        i = find_re(document.body, regexp, i)
-    i = find_re(document.body, regexp, i)
+        if i == -1:
-    if i == -1:
+            return
-      return
+        if  document.body[i].startswith("\\begin_inset VSpace"):
-    vspaceLine = find_token(document.body, "\\begin_inset VSpace", i)
+            # output VSpace inset as TeX code
-    if  vspaceLine == i:
+            end = find_end_of_inset(document.body, i)
-      # output VSpace inset as TeX code
+            if end == -1:
-      # first read out the values
+                document.warning("Malformed LyX document: "
-      beg = document.body[i].rfind("VSpace ");
+                        "Can't find end of VSpace inset at line %d." % i)
-      end = document.body[i].rfind("baselineskip%");
+                continue
-      baselineskip = float(document.body[i][beg + 7:end]);
+            # read out the value
-      # we store the value in percent, thus divide by 100
+            baselineskip = document.body[i].split()[-1]
-      baselineskip = baselineskip/100;
+            # check if it is the starred version
-      baselineskip = str(baselineskip);
+            star = '*' if '*' in document.body[i] else ''
-      # check if it is the starred version
+            # now output TeX code
-      if document.body[i].find('*') != -1:
+            cmd = "\\vspace%s{%s}" %(star, latex_length(baselineskip)[1])
-        star = '*'
+            document.body[i:end+1] = put_cmd_in_ert(cmd)
-      else:
+            i += 8
-        star = ''
+            continue
-      # now output TeX code
+        begin, end = is_in_inset(document.body, i, "\\begin_inset space \\hspace")
-      endInset = find_end_of_inset(document.body, i)
+        if  begin != - 1:
-      if endInset == -1:
+            # output space inset as TeX code
-        document.warning("Malformed LyX document: Missing '\\end_inset' of VSpace inset.")
+            baselineskip = document.body[i].split()[-1]
-        return
+            star = '*' if '*' in document.body[i-1] else ''
-      else:
+            cmd = "\\hspace%s{%s}" %(star, latex_length(baselineskip)[1])
-        document.body[vspaceLine: endInset + 1] = put_cmd_in_ert("\\vspace" + star + '{' + baselineskip + "\\baselineskip}")
+            document.body[begin:end+1] = put_cmd_in_ert(cmd)
    hspaceLine = find_token(document.body, "\\begin_inset space \\hspace", i - 1)
    document.warning("hspaceLine: " + str(hspaceLine))
    document.warning("i: " + str(i))
    if  hspaceLine == i - 1:
      # output space inset as TeX code
      # first read out the values
      beg = document.body[i].rfind("\\length ");
      end = document.body[i].rfind("baselineskip%");
      baselineskip = float(document.body[i][beg + 7:end]);
      document.warning("baselineskip: " + str(baselineskip))
      # we store the value in percent, thus divide by 100
      baselineskip = baselineskip/100;
      baselineskip = str(baselineskip);
      # check if it is the starred version
      if document.body[i-1].find('*') != -1:
        star = '*'
      else:
        star = ''
      # now output TeX code
      endInset = find_end_of_inset(document.body, i)
      if endInset == -1:
        document.warning("Malformed LyX document: Missing '\\end_inset' of space inset.")
        return
      else:
        document.body[hspaceLine: endInset + 1] = put_cmd_in_ert("\\hspace" + star + '{' + baselineskip + "\\baselineskip}")
    i = i + 1
 def revert_rotfloat(document):
--- a/lib/lyx2lyx/parser_tools.py
+++ b/lib/lyx2lyx/parser_tools.py
@ -23,7 +23,7 @@ This module offers several free functions to help parse lines.
 More documentaton is below, but here is a quick guide to what
 they do. Optional arguments are marked by brackets.
-find_token(lines, token, start[, end[, ignorews]]):
+find_token(lines, token[, start[, end[, ignorews]]]):
  Returns the first line i, start <= i < end, on which
  token is found at the beginning. Returns -1 if not
  found.
@ -31,10 +31,10 @@ find_token(lines, token, start[, end[, ignorews]]):
  in whitespace do not count, except that there must be no
  extra whitespace following token itself.
-find_token_exact(lines, token, start[, end]):
+find_token_exact(lines, token[, start[, end]]]):
  As find_token, but with ignorews set to True.
-find_tokens(lines, tokens, start[, end[, ignorews]]):
+find_tokens(lines, tokens[, start[, end[, ignorews]]]):
  Returns the first line i, start <= i < end, on which
  one of the tokens in tokens is found at the beginning.
  Returns -1 if not found.
@ -42,7 +42,7 @@ find_tokens(lines, tokens, start[, end[, ignorews]]):
  in whitespace do not count, except that there must be no
  extra whitespace following token itself.
-find_tokens_exact(lines, token, start[, end]):
+find_tokens_exact(lines, token[, start[, end]]):
  As find_tokens, but with ignorews True.
 find_token_backwards(lines, token, start):
@ -543,8 +543,9 @@ def is_in_inset(lines, i, inset, default=(-1,-1)):
      is_in_inset(document.body, i, "\\begin_inset Tabular")
    returns (-1,-1) if `i` is not within a "Tabular" inset (i.e. a table).
    If it is, then it returns the line on which the table begins and the one
-    on which it ends. Note that this pair will evaulate to
+    on which it ends.
-    boolean True, so
+    Note that this pair will evaulate to boolean True, so (with the optional
    default value set to False)
      if is_in_inset(..., default=False):
    will do what you expect.
    """
--- a/lib/lyx2lyx/test_lyx2lyx_tools.py
+++ b/lib/lyx2lyx/test_lyx2lyx_tools.py
@ -0,0 +1,52 @@
 # This file is part of lyx2lyx
 # -*- coding: utf-8 -*-
 # Copyright (C) 2018 The LyX team
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 " This modules tests the auxiliary functions for lyx2lyx."
 from lyx2lyx_tools import *
 import unittest
 class TestParserTools(unittest.TestCase):
    def test_put_cmd_in_ert(self):
        ert =  ['\\begin_inset ERT',
                'status collapsed',
                '',
                '\\begin_layout Plain Layout',
                '',
                u'\\backslash',
                u'texttt{Gr\\backslash',
                u'"{u}\\backslash',
                u'ss{}e}',
                '\\end_layout',
                '',
                '\\end_inset']
        self.assertEqual(put_cmd_in_ert(u"\\texttt{Grüße}"), ert)
        self.assertEqual(put_cmd_in_ert([u"\\texttt{Grüße}"]), ert)
    def test_latex_length(self):
        self.assertEqual(latex_length("-30.5col%"), (True, "-0.305\\columnwidth"))
        self.assertEqual(latex_length("35baselineskip%"), (True, "0.35\\baselineskip"))
        self.assertEqual(latex_length("11em"), (False, "11em"))
        self.assertEqual(latex_length("-0.4pt"), (False, "-0.4pt"))
 if __name__ == '__main__':
    unittest.main()
--- a/lib/lyx2lyx/unicode_symbols.py
+++ b/lib/lyx2lyx/unicode_symbols.py
@ -18,7 +18,7 @@
 " Import unicode_reps from this module for access to the unicode<->LaTeX mapping. "
-import sys, os, re
+import sys, os, re, codecs
 # Provide support for both python 2 and 3
 PY2 = sys.version_info[0] == 2
@ -28,14 +28,13 @@ if not PY2:
 def read_unicodesymbols():
    " Read the unicodesymbols list of unicode characters and corresponding commands."
-    pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
+    pathname = os.path.abspath(os.path.dirname(__file__))
    filename = os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols')
-    # For python 3+ we have to specify the encoding for those systems
+    # Read as Unicode strings in both, Python 2 and 3
-    # where the default is not UTF-8
+    # Specify the encoding for those systems where the default is not UTF-8
-    fp = open(filename, encoding="utf8") if (not PY2) else open(filename)
+    fp = codecs.open(filename, encoding="utf8")
    spec_chars = []
    # A backslash, followed by some non-word character, and then a character
    # in brackets. The idea is to check for constructs like: \"{u}, which is how
    # they are written in the unicodesymbols file; but they can also be written
@ -43,36 +42,42 @@ def read_unicodesymbols():
    # The two backslashes in the string literal are needed to specify a literal
    # backslash in the regex. Without r prefix, these would be four backslashes.
    r = re.compile(r'\\(\W)\{(\w)\}')
    spec_chars = []
    for line in fp.readlines():
-        if line[0] != '#' and line.strip() != "":
+        if not line.strip() or line.startswith('#'):
-            # Note: backslashes in the string literals with r prefix are not escaped,
+            # skip empty lines and comments
-            #       so one backslash in the source file equals one backslash in memory.
+            continue
-            #       Without r prefix backslahses are escaped, so two backslashes in the
+        # Note: backslashes in the string literals with r prefix are not escaped,
-            #       source file equal one backslash in memory.
+        #       so one backslash in the source file equals one backslash in memory.
-            line=line.replace(' "',' ') # remove all quotation marks with spaces before
+        #       Without r prefix backslahses are escaped, so two backslashes in the
-            line=line.replace('" ',' ') # remove all quotation marks with spaces after
+        #       source file equal one backslash in memory.
-            line=line.replace(r'\"','"') # unescape "
+        line=line.replace(' "',' ') # remove all quotation marks with spaces before
-            line=line.replace(r'\\','\\') # unescape \
+        line=line.replace('" ',' ') # remove all quotation marks with spaces after
-            try:
+        line=line.replace(r'\"','"') # unescape "
-                [ucs4,command,dead] = line.split(None,2)
+        line=line.replace(r'\\','\\') # unescape \
-                if command[0:1] != "\\":
+        try:
-                    continue
+            [ucs4,command,dead] = line.split(None,2)
-                if (line.find("notermination=text") < 0 and
+            if command[0:1] != "\\":
                    line.find("notermination=both") < 0 and command[-1] != "}"):
                    command = command + "{}"
                spec_chars.append([command, unichr(eval(ucs4))])
            except:
                continue
-            m = r.match(command)
+            literal_char = unichr(int(ucs4, 16))
-            if m != None:
+            if (line.find("notermination=text") < 0 and
-                command = "\\"
+                line.find("notermination=both") < 0 and command[-1] != "}"):
-                commandbl = command
+                command = command + "{}"
-                command += m.group(1) + m.group(2)
+            spec_chars.append([command, literal_char])
-                commandbl += m.group(1) + ' ' + m.group(2)
+        except:
-                spec_chars.append([command, unichr(eval(ucs4))])
+            continue
-                spec_chars.append([commandbl, unichr(eval(ucs4))])
+        m = r.match(command)
        if m != None:
            command = "\\"
            commandbl = command
            command += m.group(1) + m.group(2)
            commandbl += m.group(1) + ' ' + m.group(2)
            spec_chars.append([command, literal_char])
            spec_chars.append([commandbl, literal_char])
    fp.close()
    return spec_chars
 unicode_reps = read_unicodesymbols()