Add lyx2lyx tools from 2.4.x

2024-12-25 05:55:34 +00:00 · 2024-05-02 11:02:01 -04:00 · 2024-05-02 11:02:01 -04:00 · a5c328b304
commit a5c328b304
parent ee25620931
21 changed files with 8069 additions and 1765 deletions
--- a/lib/lyx2lyx/LyX.py
+++ b/lib/lyx2lyx/LyX.py
@ -1,6 +1,6 @@
 # This file is part of lyx2lyx
 # -*- coding: utf-8 -*-
-# Copyright (C) 2002-2015 The LyX Team
+# Copyright (C) 2002-2018 The LyX Team
 # Copyright (C) 2002-2004 Dekel Tsur <dekel@lyx.org>
 # Copyright (C) 2002-2006 José Matos <jamatos@lyx.org>
 #
@ -20,8 +20,8 @@

 " The LyX module has all the rules related with different lyx file formats."

-from parser_tools import get_value, check_token, find_token, \
-     find_tokens, find_end_of
+from parser_tools import (get_value, check_token, find_token, find_tokens,
+                          find_end_of, find_complete_lines)
 import os.path
 import gzip
 import locale
@ -34,8 +34,10 @@ import codecs
 try:
    import lyx2lyx_version
    version__ = lyx2lyx_version.version
+    stable_version = True
 except: # we are running from build directory so assume the last version
-    version__ = '2.3'
+    version__ = '2.4'
+    stable_version = False

 default_debug__ = 2

@ -69,8 +71,8 @@ def minor_versions(major, last_minor_version):
 # Regular expressions used
 format_re = re.compile(r"(\d)[\.,]?(\d\d)")
 fileformat = re.compile(r"\\lyxformat\s*(\S*)")
-original_version = re.compile(r".*?LyX ([\d.]*)")
-original_tex2lyx_version = re.compile(r".*?tex2lyx ([\d.]*)")
+original_version = re.compile(b".*?LyX ([\\d.]*)")
+original_tex2lyx_version = re.compile(b".*?tex2lyx ([\\d.]*)")

 ##
 # file format information:
@ -92,8 +94,9 @@ format_relation = [("0_06",    [200], minor_versions("0.6" , 4)),
                   ("1_6", list(range(277,346)), minor_versions("1.6" , 10)),
                   ("2_0", list(range(346,414)), minor_versions("2.0" , 8)),
                   ("2_1", list(range(414,475)), minor_versions("2.1" , 5)),
-                   ("2_2", list(range(475,509)), minor_versions("2.2" , 0)),
-                   ("2_3", (), minor_versions("2.3" , 0))
+                   ("2_2", list(range(475,509)), minor_versions("2.2" , 4)),
+                   ("2_3", list(range(509,545)), minor_versions("2.3" , 0)),
+                   ("2_4", (), minor_versions("2.4" , 0))
                  ]

 ####################################################################
@ -119,19 +122,29 @@ def formats_list():


 def format_info():
-    " Returns a list with supported file formats."
-    out = """Major version:
-	minor versions
-	formats
+    " Returns a list with the supported file formats."
+    template = """
+%s\tstable format:       %s
+  \tstable versions:     %s
+  \tdevelopment formats: %s
 """
+
+    out = "version: formats and versions"
    for version in format_relation:
        major = str(version[2][0])
        versions = str(version[2][1:])
        if len(version[1]) == 1:
            formats = str(version[1][0])
+            stable_format = str(version[1][0])
+        elif not stable_version and major == version__:
+            stable_format = "-- not yet --"
+            versions = "-- not yet --"
+            formats = "%s - %s" % (version[1][0], version[1][-1])
        else:
-            formats = "%s - %s" % (version[1][-1], version[1][0])
-        out += "%s\n\t%s\n\t%s\n\n" % (major, versions, formats)
+            formats = "%s - %s" % (version[1][0], version[1][-2])
+            stable_format = str(version[1][-1])
+
+        out += template % (major, stable_format, versions, formats)
    return out + '\n'


@ -281,7 +294,7 @@ class LyX_base:
        """ Emits warning to self.error, if the debug_level is less
        than the self.debug."""
        if debug_level <= self.debug:
-            self.err.write("Warning: " + message + "\n")
+            self.err.write("lyx2lyx warning: " + message + "\n")


    def error(self, message):
@ -434,8 +447,8 @@ class LyX_base:
        else:
            header = self.header

-        for line in header + [''] + self.body:
-            self.output.write(line+u"\n")
+        for line in header + [u''] + self.body:
+            self.output.write(line+u'\n')


    def choose_output(self, output):
@ -506,10 +519,10 @@ class LyX_base:
        file, returns the most likely value, or None otherwise."""

        for line in self.header:
-            if line[0] != "#":
+            if line[0:1] != b"#":
                return None

-            line = line.replace("fix",".")
+            line = line.replace(b"fix",b".")
            # need to test original_tex2lyx_version first because tex2lyx
            # writes "#LyX file created by tex2lyx 2.2"
            result = original_tex2lyx_version.match(line)
@ -517,14 +530,14 @@ class LyX_base:
                result = original_version.match(line)
                if result:
                    # Special know cases: reLyX and KLyX
-                    if line.find("reLyX") != -1 or line.find("KLyX") != -1:
+                    if line.find(b"reLyX") != -1 or line.find(b"KLyX") != -1:
                        return "0.12"
            if result:
                res = result.group(1)
                if not res:
                    self.warning(line)
                #self.warning("Version %s" % result.group(1))
-                return res
+                return res.decode('ascii') if not PY2 else res
        self.warning(str(self.header[:2]))
        return None

@ -533,7 +546,7 @@ class LyX_base:
        " Set the header with the version used."

        initial_comment = " ".join(["#LyX %s created this file." % version__,
-                                    "For more info see http://www.lyx.org/"])
+                                    "For more info see https://www.lyx.org/"])

        # Simple heuristic to determine the comment that always starts
        # a lyx file
@ -582,6 +595,7 @@ class LyX_base:

    #Note that the module will be added at the END of the extant ones
    def add_module(self, module):
+      " Append module to the modules list."
      i = find_token(self.header, "\\begin_modules", 0)
      if i == -1:
        #No modules yet included
@ -602,7 +616,16 @@ class LyX_base:
      self.header.insert(j, module)


+    def del_module(self, module):
+        " Delete `module` from module list, return success."
+        modlist = self.get_module_list()
+        if module not in modlist:
+            return False
+        self.set_module_list([line for line in modlist if line != module])
+        return True
+
    def get_module_list(self):
+      " Return list of modules."
      i = find_token(self.header, "\\begin_modules", 0)
      if (i == -1):
        return []
@ -611,23 +634,23 @@ class LyX_base:


    def set_module_list(self, mlist):
-      modbegin = find_token(self.header, "\\begin_modules", 0)
-      newmodlist = ['\\begin_modules'] + mlist + ['\\end_modules']
-      if (modbegin == -1):
+      i = find_token(self.header, "\\begin_modules", 0)
+      if (i == -1):
        #No modules yet included
        tclass = find_token(self.header, "\\textclass", 0)
        if tclass == -1:
          self.warning("Malformed LyX document: No \\textclass!!")
          return
-        modbegin = tclass + 1
-        self.header[modbegin:modbegin] = newmodlist
-        return
-      modend = find_token(self.header, "\\end_modules", modbegin)
-      if modend == -1:
-        self.warning("(set_module_list)Malformed LyX document: No \\end_modules.")
-        return
-      newmodlist = ['\\begin_modules'] + mlist + ['\\end_modules']
-      self.header[modbegin:modend + 1] = newmodlist
+        i = j = tclass + 1
+      else:
+        j = find_token(self.header, "\\end_modules", i)
+        if j == -1:
+            self.warning("(set_module_list) Malformed LyX document: No \\end_modules.")
+            return
+        j += 1
+      if mlist:
+          mlist = ['\\begin_modules'] + mlist + ['\\end_modules']
+      self.header[i:j] = mlist


    def set_parameter(self, param, value):
@ -678,7 +701,7 @@ class LyX_base:
                    try:
                        conv(self)
                    except:
-                        self.warning("An error ocurred in %s, %s" %
+                        self.warning("An error occurred in %s, %s" %
                                     (version, str(conv)),
                                     default_debug__)
                        if not self.try_hard:
@ -760,6 +783,53 @@ class LyX_base:
        return mode, steps


+    def append_local_layout(self, new_layout):
+        " Append `new_layout` to the local layouts."
+        # new_layout may be a string or a list of strings (lines)
+        try:
+            new_layout = new_layout.splitlines()
+        except AttributeError:
+            pass
+        i = find_token(self.header, "\\begin_local_layout", 0)
+        if i == -1:
+            k = find_token(self.header, "\\language", 0)
+            if k == -1:
+                # this should not happen
+                self.warning("Malformed LyX document! No \\language header found!")
+                return
+            self.header[k : k] = ["\\begin_local_layout", "\\end_local_layout"]
+            i = k
+
+        j = find_end_of(self.header, i, "\\begin_local_layout", "\\end_local_layout")
+        if j == -1:
+            # this should not happen
+            self.warning("Malformed LyX document: Can't find end of local layout!")
+            return
+
+        self.header[i+1 : i+1] = new_layout
+
+    def del_local_layout(self, layout_def):
+        " Delete `layout_def` from local layouts, return success."
+        i = find_complete_lines(self.header, layout_def)
+        if i == -1:
+            return False
+        j = i+len(layout_def)
+        if (self.header[i-1] == "\\begin_local_layout" and
+            self.header[j] == "\\end_local_layout"):
+            i -=1
+            j +=1
+        self.header[i:j] = []
+        return True
+
+    def del_from_header(self, lines):
+        " Delete `lines` from the document header, return success."
+        i = find_complete_lines(self.header, lines)
+        if i == -1:
+            return False
+        j = i + len(lines)
+        self.header[i:j] = []
+        return True
+
 # Part of an unfinished attempt to make lyx2lyx gave a more
 # structured view of the document.
 #    def get_toc(self, depth = 4):
--- a/lib/lyx2lyx/Makefile.am
+++ b/lib/lyx2lyx/Makefile.am
@ -34,6 +34,7 @@ dist_lyx2lyx_PYTHON = \
 	lyx_2_1.py \
 	lyx_2_2.py \
 	lyx_2_3.py \
+	lyx_2_4.py \
 	profiling.py \
 	test_parser_tools.py

--- a/lib/lyx2lyx/lyx2lyx
+++ b/lib/lyx2lyx/lyx2lyx
@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! /usr/bin/python3
 # -*- coding: utf-8 -*-
 # Copyright (C) 2002-2011 The LyX Team
 # Copyright (C) 2002-2007 José Matos <jamatos@lyx.org>
@ -56,26 +56,27 @@ def main():
    parser.add_argument("--noisy",
                      action="store_const", const=10, dest="debug")
    parser.add_argument("-c", "--encoding", type=cmd_arg, dest="cjk_encoding",
-                      help="files in format 413 and lower are read and"
+                      help="Files in format 413 and lower are read and"
                           " written in the format of CJK-LyX."
-                           "If encoding is not given or 'auto' the encoding"
-                           "is determined from the locale.")
+                           " If encoding is not given or 'auto' the encoding"
+                           " is determined from the locale.")
    parser.add_argument("-e", "--err", type=cmd_arg, dest="error",
-                      help= "file name of the error file else goes to stderr")
+                      help= "File name of the error file else goes to stderr.")
    parser.add_argument("-o", "--output", type=cmd_arg, dest="output",
-                      help= "name of the output file else goes to stdout")
+                      help= "Name of the output file else goes to stdout.")
    parser.add_argument("-t", "--to", type=cmd_arg, dest= "end_format",
-                      help= "destination file format, default (latest)")
+                      help= "Destination file format, default <latest>.")
    parser.add_argument("-V", "--final_version", type=cmd_arg, dest= "final_version",
-                      help= "destination version, default (latest)")
+                      help= "Destination version, default <latest>.")
    parser.add_argument("-l", "--list", action="store_true",
-                      help = "list all available formats and supported versions")
+                      help = "List all available formats and supported versions.")
    parser.add_argument("-n", "--try-hard", action="store_true",
-                      help = "try hard (ignore any convertion errors)")
+                      help = "Try hard (ignore any conversion errors).")
    parser.add_argument("-s", "--systemlyxdir", type=cmd_arg, dest= "systemlyxdir",
-                      help= "LyX system directory for conversion from version 489 or older")
+                      help= "LyX system directory for conversion from"
+                            " version 489 or older.")
    parser.add_argument('--version', action='version', version="""lyx2lyx, version %s
-Copyright (C) 2011 The LyX Team, José Matos and Dekel Tsur""" % LyX.version__)
+                        Copyright (C) 2011 The LyX Team, José Matos and Dekel Tsur""" % LyX.version__)
    parser.add_argument("input", nargs='?', type=cmd_arg, default=None)

    options = parser.parse_args()
--- a/lib/lyx2lyx/lyx2lyx_tools.py
+++ b/lib/lyx2lyx/lyx2lyx_tools.py
@ -17,8 +17,8 @@
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

 '''
-This module offers several free functions to help with lyx2lyx'ing. 
-More documentaton is below, but here is a quick guide to what 
+This module offers several free functions to help with lyx2lyx'ing.
+More documentaton is below, but here is a quick guide to what
 they do. Optional arguments are marked by brackets.

 add_to_preamble(document, text):
@ -37,8 +37,8 @@ insert_to_preamble(document, text[, index]):
  default index is 0, so the material is inserted at the beginning.
  Prepends a comment "% Added by lyx2lyx" to text.

-put_cmd_in_ert(arg):
-  Here arg should be a list of strings (lines), which we want to
+put_cmd_in_ert(cmd):
+  Here cmd should be a list of strings (lines), which we want to
  wrap in ERT. Returns a list of strings so wrapped.
  A call to this routine will often go something like this:
    i = find_token('\\begin_inset FunkyInset', ...)
@ -65,23 +65,38 @@ lyx2verbatim(document, lines):
  can and return a string containing the translated material.

 latex_length(slen):
-    Convert lengths (in LyX form) to their LaTeX representation. Returns
-    (bool, length), where the bool tells us if it was a percentage, and
-    the length is the LaTeX representation.
+  Convert lengths (in LyX form) to their LaTeX representation. Returns
+  (bool, length), where the bool tells us if it was a percentage, and
+  the length is the LaTeX representation.

 convert_info_insets(document, type, func):
-    Applies func to the argument of all info insets matching certain types
-    type : the type to match. This can be a regular expression.
-    func : function from string to string to apply to the "arg" field of
-           the info insets.
+  Applies func to the argument of all info insets matching certain types
+  type : the type to match. This can be a regular expression.
+  func : function from string to string to apply to the "arg" field of
+         the info insets.
+
+is_document_option(document, option):
+  Find if _option_ is a document option (\\options in the header).
+
+insert_document_option(document, option):
+  Insert _option_ as a document option.
+
+remove_document_option(document, option):
+  Remove _option_ as a document option.
+
+revert_language(document, lyxname, babelname="", polyglossianame=""):
+  Reverts native language support to ERT
+  If babelname or polyglossianame is empty, it is assumed
+  this language package is not supported for the given language.
 '''

+from __future__ import print_function
 import re
-import string
-from parser_tools import find_token, find_end_of_inset
+import sys
+from parser_tools import (find_token, find_end_of_inset, get_containing_layout,
+                          get_containing_inset, get_value, get_bool_value)
 from unicode_symbols import unicode_reps

-
 # This will accept either a list of lines or a single line.
 # It is bad practice to pass something with embedded newlines,
 # though we will handle that.
@ -118,36 +133,53 @@ def add_to_preamble(document, text):
 # It should really be a list.
 def insert_to_preamble(document, text, index = 0):
    """ Insert text to the preamble at a given line"""
-    
+
    if not type(text) is list:
      # split on \n just in case
      # it'll give us the one element list we want
      # if there's no \n, too
      text = text.split('\n')
-    
+
    text.insert(0, "% Added by lyx2lyx")
    document.preamble[index:index] = text


-def put_cmd_in_ert(arg):
-    '''
-    arg should be a list of lines we want to wrap in ERT.
-    Returns a list of strings, with the lines so wrapped.
-    '''
-    
-    ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""]
-    # It will be faster for us to work with a single string internally. 
-    # That way, we only go through the unicode_reps loop once.
-    if type(arg) is list:
-      s = "\n".join(arg)
-    else:
-      s = arg
-    for rep in unicode_reps:
-      s = s.replace(rep[1], rep[0])
-    s = s.replace('\\', "\\backslash\n")
-    ret += s.splitlines()
-    ret += ["\\end_layout", "", "\\end_inset"]
-    return ret
+# A dictionary of Unicode->LICR mappings for use in a Unicode string's translate() method
+# Created from the reversed list to keep the first of alternative definitions.
+licr_table = {ord(ch): cmd for cmd, ch in unicode_reps[::-1]}
+
+def put_cmd_in_ert(cmd, is_open=False, as_paragraph=False):
+    """
+    Return ERT inset wrapping `cmd` as a list of strings.
+
+    `cmd` can be a string or list of lines. Non-ASCII characters are converted
+    to the respective LICR macros if defined in unicodesymbols,
+    `is_open` is a boolean setting the inset status to "open",
+    `as_paragraph` wraps the ERT inset in a Standard paragraph.
+    """
+
+    status = {False:"collapsed", True:"open"}
+    ert_inset = ["\\begin_inset ERT", "status %s"%status[is_open], "",
+                 "\\begin_layout Plain Layout", "",
+                 # content here ([5:5])
+                 "\\end_layout", "", "\\end_inset"]
+
+    paragraph = ["\\begin_layout Standard",
+                 # content here ([1:1])
+                 "", "", "\\end_layout", ""]
+    # ensure cmd is an unicode instance and make it "LyX safe".
+    if isinstance(cmd, list):
+        cmd = u"\n".join(cmd)
+    elif sys.version_info[0] == 2 and isinstance(cmd, str):
+        cmd = cmd.decode('utf8')
+    cmd = cmd.translate(licr_table)
+    cmd = cmd.replace("\\", "\n\\backslash\n")
+
+    ert_inset[5:5] = cmd.splitlines()
+    if not as_paragraph:
+        return ert_inset
+    paragraph[1:1] = ert_inset
+    return paragraph


 def get_ert(lines, i, verbatim = False):
@ -300,7 +332,7 @@ def lyx2verbatim(document, lines):


 def latex_length(slen):
-    ''' 
+    '''
    Convert lengths to their LaTeX representation. Returns (bool, length),
    where the bool tells us if it was a percentage, and the length is the
    LaTeX representation.
@ -314,9 +346,14 @@ def latex_length(slen):
    # the + always precedes the -

    # Convert relative lengths to LaTeX units
-    units = {"text%":"\\textwidth", "col%":"\\columnwidth",
-             "page%":"\\paperwidth", "line%":"\\linewidth",
-             "theight%":"\\textheight", "pheight%":"\\paperheight"}
+    units = {"col%": "\\columnwidth",
+             "text%": "\\textwidth",
+             "page%": "\\paperwidth",
+             "line%": "\\linewidth",
+             "theight%": "\\textheight",
+             "pheight%": "\\paperheight",
+             "baselineskip%": "\\baselineskip"
+            }
    for unit in list(units.keys()):
        i = slen.find(unit)
        if i == -1:
@ -536,3 +573,220 @@ def convert_info_insets(document, type, func):
                new_arg = func(arg.group(1))
                document.body[i + 2] = 'arg   "%s"' % new_arg
        i += 3
+
+
+def insert_document_option(document, option):
+    "Insert _option_ as a document option."
+
+    # Find \options in the header
+    i = find_token(document.header, "\\options", 0)
+    # if the options does not exists add it after the textclass
+    if i == -1:
+        i = find_token(document.header, "\\textclass", 0) + 1
+        document.header.insert(i, r"\options %s" % option)
+        return
+    # otherwise append to options
+    if not is_document_option(document, option):
+        document.header[i] += ",%s" % option
+
+
+def remove_document_option(document, option):
+    """ Remove _option_ as a document option."""
+
+    i = find_token(document.header, "\\options")
+    options = get_value(document.header, "\\options", i)
+    options = [op.strip() for op in options.split(',')]
+
+    # Remove `option` from \options
+    options = [op for op in options if op != option]
+
+    if options:
+        document.header[i] = "\\options " + ','.join(options)
+    else:
+        del document.header[i]
+
+
+def is_document_option(document, option):
+    "Find if _option_ is a document option"
+
+    options = get_value(document.header, "\\options")
+    options = [op.strip() for op in options.split(',')]
+    return option in options
+
+
+singlepar_insets = [s.strip() for s in
+    u"Argument, Caption Above, Caption Below, Caption Bicaption,"
+    u"Caption Centered, Caption FigCaption, Caption Standard, Caption Table,"
+    u"Flex Chemistry, Flex Fixme_Note, Flex Latin, Flex ListOfSlides,"
+    u"Flex Missing_Figure, Flex PDF-Annotation, Flex PDF-Comment-Setup,"
+    u"Flex Reflectbox, Flex S/R expression, Flex Sweave Input File,"
+    u"Flex Sweave Options, Flex Thanks_Reference, Flex URL, Foot InTitle,"
+    u"IPADeco, Index, Info, Phantom, Script".split(',')]
+# print(singlepar_insets)
+
+def revert_language(document, lyxname, babelname="", polyglossianame=""):
+    " Revert native language support "
+
+    # Does the document use polyglossia?
+    use_polyglossia = False
+    if get_bool_value(document.header, "\\use_non_tex_fonts"):
+        i = find_token(document.header, "\\language_package")
+        if i == -1:
+            document.warning("Malformed document! Missing \\language_package")
+        else:
+            pack = get_value(document.header, "\\language_package", i)
+            if pack in ("default", "auto"):
+                use_polyglossia = True
+
+    # Do we use this language with polyglossia?
+    with_polyglossia = use_polyglossia and polyglossianame != ""
+    # Do we use this language with babel?
+    with_babel = with_polyglossia == False and babelname != ""
+
+    # Are we dealing with a primary or secondary language?
+    primary = document.language == lyxname
+    secondary = False
+
+    # Main language first
+    orig_doc_language = document.language
+    if primary:
+        # Change LyX document language to English (we will tell LaTeX
+        # to use the original language at the end of this function):
+        document.language = "english"
+        i = find_token(document.header, "\\language %s" % lyxname, 0)
+        if i != -1:
+            document.header[i] = "\\language english"
+
+    # Now look for occurences in the body
+    i = 0
+    while True:
+        i = find_token(document.body, "\\lang", i+1)
+        if i == -1:
+            break
+        if document.body[i].startswith("\\lang %s" % lyxname):
+            secondary = True
+            texname = use_polyglossia and polyglossianame or babelname
+        elif primary and document.body[i].startswith("\\lang english"):
+            # Since we switched the main language manually, English parts need to be marked
+            texname = "english"
+        else:
+            continue
+
+        parent = get_containing_layout(document.body, i)
+        i_e = parent[2] # end line no,
+        # print(i, texname, parent, document.body[i+1], file=sys.stderr)
+
+        # Move leading space to the previous line:
+        if document.body[i+1].startswith(" "):
+            document.body[i+1] = document.body[i+1][1:]
+            document.body.insert(i, " ")
+            continue
+
+        # TODO: handle nesting issues with font attributes, e.g.
+        # \begin_layout Standard
+        #
+        # \emph on
+        # \lang macedonian
+        # Македонски јазик
+        # \emph default
+        #  — јужнословенски јазик, дел од групата на словенски јазици од јазичното
+        #  семејство на индоевропски јазици.
+        #  Македонскиот е службен и национален јазик во Македонија.
+        # \end_layout
+
+        # Ensure correct handling of list labels
+        if (parent[0] in ["Labeling", "Description"]
+            and not " " in "\n".join(document.body[parent[3]:i])):
+            # line `i+1` is first line of a list item,
+            # part before a space character is the label
+            # TODO: insets or language change before first space character
+            labelline = document.body[i+1].split(' ', 1)
+            if len(labelline) > 1:
+                # Insert a space in the (original) document language
+                # between label and remainder.
+                # print("  Label:", labelline, file=sys.stderr)
+                lines = [labelline[0],
+                    "\\lang %s" % orig_doc_language,
+                    " ",
+                    "\\lang %s" % (primary and "english" or lyxname),
+                    labelline[1]]
+                document.body[i+1:i+2] = lines
+                i_e += 4
+
+        # Find out where to end the language change.
+        langswitch = i
+        while True:
+            langswitch = find_token(document.body, "\\lang", langswitch+1, i_e)
+            if langswitch == -1:
+                break
+            # print("  ", langswitch, document.body[langswitch], file=sys.stderr)
+            # skip insets
+            i_a = parent[3] # paragraph start line
+            container = get_containing_inset(document.body[i_a:i_e], langswitch-i_a)
+            if container and container[1] < langswitch-i_a and container[2] > langswitch-i_a:
+                # print("  inset", container, file=sys.stderr)
+                continue
+            i_e = langswitch
+            break
+
+        # use function or environment?
+        singlepar = i_e - i < 3
+        if not singlepar and parent[0] == "Plain Layout":
+            # environment not allowed in some insets
+            container = get_containing_inset(document.body, i)
+            singlepar = container[0] in singlepar_insets
+
+        # Delete empty language switches:
+        if not "".join(document.body[i+1:i_e]):
+            del document.body[i:i_e]
+            i -= 1
+            continue
+
+        if singlepar:
+            if with_polyglossia:
+                begin_cmd = "\\text%s{"%texname
+            elif with_babel:
+                begin_cmd = "\\foreignlanguage{%s}{" % texname
+            end_cmd = "}"
+        else:
+            if with_polyglossia:
+                begin_cmd = "\\begin{%s}"%texname
+                end_cmd = "\\end{%s}"%texname
+            elif with_babel:
+                begin_cmd = "\\begin{otherlanguage}{%s}" % texname
+                end_cmd = "\\end{otherlanguage}"
+
+        if (not primary or texname == "english"):
+            try:
+                document.body[i_e:i_e] = put_cmd_in_ert(end_cmd)
+                document.body[i+1:i+1] = put_cmd_in_ert(begin_cmd)
+            except UnboundLocalError:
+                pass
+        del document.body[i]
+
+    if not (primary or secondary):
+        return
+
+    # Make the language known to Babel/Polyglossia and ensure the correct
+    # document language:
+    doc_lang_switch = ""
+    if with_babel:
+        # add as global option
+        insert_document_option(document, babelname)
+        # Since user options are appended to the document options,
+        # Babel will treat `babelname` as primary language.
+        if not primary:
+            doc_lang_switch = "\\selectlanguage{%s}" % orig_doc_language
+    if with_polyglossia:
+        # Define language in the user preamble
+        # (don't use \AtBeginDocument, this fails with some languages).
+        add_to_preamble(document, ["\\usepackage{polyglossia}",
+                                   "\\setotherlanguage{%s}" % polyglossianame])
+        if primary:
+            # Changing the main language must be done in the document body.
+            doc_lang_switch = "\\resetdefaultlanguage{%s}" % polyglossianame
+
+    # Reset LaTeX main language if required and not already done
+    if doc_lang_switch and doc_lang_switch[1:] not in document.body[8:20]:
+        document.body[2:2] = put_cmd_in_ert(doc_lang_switch,
+                                            is_open=True, as_paragraph=True)
--- a/lib/lyx2lyx/lyx_0_10.py
+++ b/lib/lyx2lyx/lyx_0_10.py
@ -32,18 +32,17 @@ def regularise_header(document):

 def find_next_space(line, j):
    """ Return position of next space or backslash, which one comes
-    first, starting from position k, if not existing return last
-    position in line."""
-    l = line.find(' ', j)
-    if l == -1:
-        l = len(line)
-    k = line.find('\\', j)
-    if k == -1:
-        k = len(line)
+    first, starting from position j, if none exists returns last
+    position in line (+1)."""
+    space_pos = line.find(' ', j)
+    if space_pos == -1:
+        space_pos = len(line)

-    if k < l:
-        return k
-    return l
+    bksl_pos = line.find('\\', j)
+    if bksl_pos == -1:
+        bksl_pos = len(line)
+
+    return min(space_pos, bksl_pos)


 def regularise_body(document):
@ -65,36 +64,38 @@ def regularise_body(document):
    while i < len(document.body):
        line = document.body[i]
        j = 0
-        tmp = []
+        new_block = []
        while j < len(line):
            k = line.find('\\', j)

            if k == -1:
-                tmp += [line[j:]]
+                new_block += [line[j:]]
                break

            if k != j:
-                tmp += [line[j: k]]
+                #document.warning("j=%d\tk=%d\t#%s#%s#" % (j,k,line,line[j: k]))
+                new_block += [line[j: k]]
                j = k

            k = find_next_space(line, j+1)

-            # These tokens take the rest of the line
            token = line[j+1:k]
+            # These tokens take the rest of the line
            if token in getline_tokens:
-                tmp += [line[j:]]
+                #document.warning("getline_token:%s\tj=%d\t\t#%s#%s#" % (token,j,line,line[j:]))
+                new_block += [line[j:]]
                break

            # These tokens take no arguments
            if token in noargs_tokens:
-                tmp += [line[j:k]]
+                new_block += [line[j:k]]
                j = k
                continue

            # These tokens take one argument
            if token in onearg_tokens:
                k = find_next_space(line, k + 1)
-                tmp += [line[j:k]]
+                new_block += [line[j:k]]
                j = k
                continue

@ -104,29 +105,30 @@ def regularise_body(document):
                inset = line[k+1: l]

                if inset == "Latex":
-                    tmp += [line[j:l]]
+                    new_block += [line[j:l]]
                    j = l
                    continue

-                if inset in ["LatexCommand", "LatexDel"]:
-                    tmp += [line[j:]]
+                if inset in ["LatexCommand", "LatexDel", "Label", "Figure",
+                             "Formula"]:
+                    new_block += [line[j:]]
                    break

                if inset == "Quotes":
                    l = find_next_space(line, l + 1)
-                    tmp += [line[j:l]]
+                    new_block += [line[j:l]]
                    j = l
                    continue

-                document.warning("unkown inset %s" % line)
+                document.warning("unkown inset %s" % inset)
                assert(False)

            # We are inside a latex inset, pass the text verbatim
-            tmp += [line[j:]]
+            new_block += [line[j:]]
            break

-        document.body[i: i+1] = tmp
-        i += len(tmp)
+        document.body[i: i+1] = new_block
+        i += len(new_block)


 supported_versions = ["0.10.%d" % i for i in range(8)] + ["0.10"]
--- a/lib/lyx2lyx/lyx_0_12.py
+++ b/lib/lyx2lyx/lyx_0_12.py
@ -112,7 +112,7 @@ def update_inset_label(document):
        i = find_token(lines, '\\begin_inset Label', i)
        if i == -1:
            return
-        lines[i] = '\\begin_inset LatexCommand \label{' + lines[i][19:] + '}'
+        lines[i] = '\\begin_inset LatexCommand \\label{' + lines[i][19:] + '}'
        i = i + 1


--- a/lib/lyx2lyx/lyx_1_2.py
+++ b/lib/lyx2lyx/lyx_1_2.py
@ -69,7 +69,7 @@ def find_beginning_of_inset(lines, i):


 def find_end_of_inset(lines, i):
-    " Finds the matching \end_inset"
+    r" Finds the matching \end_inset"
    return find_end_of(lines, i, "\\begin_inset", "\\end_inset")


@ -143,7 +143,7 @@ def get_width(mo):


 def remove_oldfloat(document):
-    " Change \begin_float .. \end_float into \begin_inset Float .. \end_inset"
+    r" Change \begin_float .. \end_float into \begin_inset Float .. \end_inset"
    lines = document.body
    i = 0
    while True:
@ -250,7 +250,7 @@ def remove_pextra(document):
        if flag:
            flag = 0
            if hfill:
-                start = ["","\hfill",""]+start
+                start = ["",r"\hfill",""]+start
        else:
            start = ['\\layout %s' % document.default_layout,''] + start

@ -324,7 +324,7 @@ def remove_oldert(document):
        new = []
        new2 = []
        if check_token(lines[i], "\\layout LaTeX"):
-            new = ['\layout %s' % document.default_layout, "", ""]
+            new = [r'\layout %s' % document.default_layout, "", ""]

        k = i+1
        while True:
@ -745,8 +745,8 @@ def update_longtables(document):
        for j in range(rows):
            i = find_token(body, '<row', i)

-            self.endfoot = false                # footer row
-            self.endlastfoot = false        # last footer row
+            row_info[i].endfoot = false            # footer row
+            row_info[i].endlastfoot = false        # last footer row
            if row_info[j].endhead:
                insert_attribute(body, i, 'endhead="true"')

@ -808,7 +808,7 @@ def change_infoinset(document):
            note_lines = [txt]+note_lines

        for line in note_lines:
-            new = new + ['\layout %s' % document.default_layout, ""]
+            new = new + [r'\layout %s' % document.default_layout, ""]
            tmp = line.split('\\')
            new = new + [tmp[0]]
            for x in tmp[1:]:
--- a/lib/lyx2lyx/lyx_1_3.py
+++ b/lib/lyx2lyx/lyx_1_3.py
@ -27,7 +27,7 @@ from parser_tools import find_token, find_end_of, get_value,\
 # Private helper functions

 def find_end_of_inset(lines, i):
-    "Finds the matching \end_inset"
+    r"Finds the matching \end_inset"
    return find_end_of(lines, i, "\\begin_inset", "\\end_inset")


--- a/lib/lyx2lyx/lyx_1_4.py
+++ b/lib/lyx2lyx/lyx_1_4.py
@ -81,7 +81,7 @@ def get_next_paragraph(lines, i, format):


 def find_end_of_inset(lines, i):
-    "Finds the matching \end_inset"
+    r"Finds the matching \end_inset"
    return find_end_of(lines, i, "\\begin_inset", "\\end_inset")

 def del_token(lines, token, start, end):
@ -103,7 +103,7 @@ def del_token(lines, token, start, end):
 ####################################################################

 def remove_color_default(document):
-    " Remove \color default"
+    r" Remove \color default"
    i = 0
    while True:
        i = find_token(document.body, "\\color default", i)
@ -114,12 +114,12 @@ def remove_color_default(document):


 def add_end_header(document):
-    " Add \end_header"
+    r" Add \end_header"
    document.header.append("\\end_header");


 def rm_end_header(document):
-    " Remove \end_header"
+    r" Remove \end_header"
    i = find_token(document.header, "\\end_header", 0)
    if i == -1:
        return
@ -169,14 +169,14 @@ def revert_amsmath(document):


 def convert_spaces(document):
-    " \SpecialChar ~ -> \InsetSpace ~"
+    r" \SpecialChar ~ -> \InsetSpace ~"
    for i in range(len(document.body)):
        document.body[i] = document.body[i].replace("\\SpecialChar ~",
                                                    "\\InsetSpace ~")


 def revert_spaces(document):
-    " \InsetSpace ~ -> \SpecialChar ~"
+    r" \InsetSpace ~ -> \SpecialChar ~"
    regexp = re.compile(r'(.*)(\\InsetSpace\s+)(\S+)')
    i = 0
    while True:
@ -197,18 +197,18 @@ def revert_spaces(document):


 def rename_spaces(document):
-    """ \InsetSpace \, -> \InsetSpace \thinspace{}
-        \InsetSpace \space -> \InsetSpace \space{}"""
+    """ \\InsetSpace \\, -> \\InsetSpace \thinspace{}
+        \\InsetSpace \\space -> \\InsetSpace \\space{}"""
    for i in range(len(document.body)):
        document.body[i] = document.body[i].replace("\\InsetSpace \\space",
                                                    "\\InsetSpace \\space{}")
-        document.body[i] = document.body[i].replace("\\InsetSpace \,",
+        document.body[i] = document.body[i].replace("\\InsetSpace \\,",
                                                    "\\InsetSpace \\thinspace{}")


 def revert_space_names(document):
-    """ \InsetSpace \thinspace{} -> \InsetSpace \,
-         \InsetSpace \space{} -> \InsetSpace \space"""
+    """ \\InsetSpace \thinspace{} -> \\InsetSpace \\,
+         \\InsetSpace \\space{} -> \\InsetSpace \\space"""
    for i in range(len(document.body)):
        document.body[i] = document.body[i].replace("\\InsetSpace \\space{}",
                                                    "\\InsetSpace \\space")
@ -262,7 +262,7 @@ def revert_bibtex(document):


 def remove_insetparent(document):
-    " Remove \lyxparent"
+    r" Remove \lyxparent"
    i = 0
    while True:
        i = find_token(document.body, "\\begin_inset LatexCommand \\lyxparent", i)
@ -428,7 +428,7 @@ def revert_comment(document):


 def add_end_layout(document):
-    " Add \end_layout"
+    r" Add \end_layout"
    i = find_token(document.body, '\\layout', 0)

    if i == -1:
@ -502,7 +502,7 @@ def add_end_layout(document):


 def rm_end_layout(document):
-    " Remove \end_layout"
+    r" Remove \end_layout"
    i = 0
    while True:
        i = find_token(document.body, '\\end_layout', i)
@ -544,7 +544,7 @@ def rm_body_changes(document):


 def layout2begin_layout(document):
-    " \layout -> \begin_layout "
+    r" \layout -> \begin_layout "
    i = 0
    while True:
        i = find_token(document.body, '\\layout', i)
@ -556,7 +556,7 @@ def layout2begin_layout(document):


 def begin_layout2layout(document):
-    " \begin_layout -> \layout "
+    r" \begin_layout -> \layout "
    i = 0
    while True:
        i = find_token(document.body, '\\begin_layout', i)
@ -1051,7 +1051,7 @@ def convert_minipage(document):
        # convert the inner_position
        if document.body[i][:14] == "inner_position":
            innerpos = inner_pos[int(document.body[i][15])]
-            del document.body[i]    
+            del document.body[i]
        else:
            innerpos = inner_pos[0]

@ -1972,9 +1972,9 @@ def convert_names(document):
                          '\\begin_layout %s' % document.default_layout,
                          "",
                          "%s" % firstname,
-                          "\end_layout",
+                          r"\end_layout",
                          "",
-                          "\end_inset",
+                          r"\end_inset",
                          "",
                          "",
                          "\\begin_inset CharStyle Surname",
@ -2229,7 +2229,7 @@ def use_x_binary(document):
 def normalize_paragraph_params(document):
    " Place all the paragraph parameters in their own line. "
    body = document.body
-    
+
    allowed_parameters = '\\paragraph_spacing', '\\noindent', \
                         '\\align', '\\labelwidthstring', "\\start_of_appendix", \
                         "\\leftindent"
--- a/lib/lyx2lyx/lyx_1_5.py
+++ b/lib/lyx2lyx/lyx_1_5.py
@ -24,6 +24,7 @@ import unicodedata
 import sys, os

 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
+from lyx2lyx_tools import insert_document_option
 from LyX import get_encoding

 # Provide support for both python 2 and 3
@ -420,7 +421,7 @@ def revert_unicode_line(document, i, insets, spec_chars, replacement_character =
                    else:
                        if insets and insets[-1] == "Formula":
                            # avoid putting an ERT in a math; instead put command as text
-                            command = command.replace('\\\\', '\mathrm{')
+                            command = command.replace('\\\\', r'\mathrm{')
                            command = command + '}'
                        elif not insets or insets[-1] != "ERT":
                            # add an ERT inset with the replacement character
@ -490,7 +491,7 @@ def revert_cs_label(document):


 def convert_bibitem(document):
-    """ Convert
+    r""" Convert
 \bibitem [option]{argument}

 to
@ -575,16 +576,16 @@ commandparams_info = {
 def convert_commandparams(document):
    """ Convert

- \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
- \end_inset
+ \\begin_inset LatexCommand \\cmdname[opt1][opt2]{arg}
+ \\end_inset

 to

- \begin_inset LatexCommand cmdname
+ \\begin_inset LatexCommand cmdname
 name1 "opt1"
 name2 "opt2"
 name3 "arg"
- \end_inset
+ \\end_inset

 name1, name2 and name3 can be different for each command.
 """
@ -895,7 +896,7 @@ def revert_cleardoublepage(document):


 def convert_lyxline(document):
-    " remove fontsize commands for \lyxline "
+    r" remove fontsize commands for \lyxline "
    # The problematic is: The old \lyxline definition doesn't handle the fontsize
    # to change the line thickness. The new definiton does this so that imported
    # \lyxlines would have a different line thickness. The eventual fontsize command
@ -1686,7 +1687,7 @@ def revert_CJK(document):


 def revert_preamble_listings_params(document):
-    " Revert preamble option \listings_params "
+    r" Revert preamble option \listings_params "
    i = find_token(document.header, "\\listings_params", 0)
    if i != -1:
        document.preamble.append('\\usepackage{listings}')
@ -1907,13 +1908,7 @@ def revert_ext_font_sizes(document):

    i = find_token(document.header, '\\paperfontsize', 0)
    document.header[i] = '\\paperfontsize default'
-
-    i = find_token(document.header, '\\options', 0)
-    if i == -1:
-        i = find_token(document.header, '\\textclass', 0) + 1
-        document.header[i:i] = ['\\options %s' % fontsize]
-    else:
-        document.header[i] += ',%s' % fontsize
+    insert_document_option(document, fontsize)


 def convert_ext_font_sizes(document):
@ -2010,10 +2005,10 @@ def convert_arabic (document):
            document.header[i] = "\\language arabic_arabtex"
    i = 0
    while i < len(document.body):
-        h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
+        h = document.body[i].find(r"\lang arabic", 0, len(document.body[i]))
        if (h != -1):
            # change the language name
-            document.body[i] = '\lang arabic_arabtex'
+            document.body[i] = r'\lang arabic_arabtex'
        i = i + 1


@ -2025,10 +2020,10 @@ def revert_arabic (document):
            document.header[i] = "\\language arabic"
    i = 0
    while i < len(document.body):
-        h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
+        h = document.body[i].find(r"\lang arabic_arabtex", 0, len(document.body[i]))
        if (h != -1):
            # change the language name
-            document.body[i] = '\lang arabic'
+            document.body[i] = r'\lang arabic'
        i = i + 1


--- a/lib/lyx2lyx/lyx_1_6.py
+++ b/lib/lyx2lyx/lyx_1_6.py
@ -23,7 +23,7 @@ import unicodedata
 import sys, os

 from parser_tools import find_token, find_end_of, find_tokens, get_value
-from unicode_symbols import read_unicodesymbols
+from unicode_symbols import unicode_reps

 ####################################################################
 # Private helper functions
@ -146,61 +146,13 @@ def set_option(document, m, option, value):
    return l


-# FIXME: Remove this function if the version imported from unicode_symbols works.
-# This function was the predecessor from that function, that in the meanwhile got
-# new fixes.
-def read_unicodesymbols2():
-    " Read the unicodesymbols list of unicode characters and corresponding commands."
-
-    # Provide support for both python 2 and 3
-    PY2 = sys.version_info[0] == 2
-    if not PY2:
-        unichr = chr
-    # End of code to support for both python 2 and 3
-
-    pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
-    fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
-    spec_chars = []
-    # Two backslashes, followed by some non-word character, and then a character
-    # in brackets. The idea is to check for constructs like: \"{u}, which is how
-    # they are written in the unicodesymbols file; but they can also be written
-    # as: \"u or even \" u.
-    r = re.compile(r'\\\\(\W)\{(\w)\}')
-    for line in fp.readlines():
-        if line[0] != '#' and line.strip() != "":
-            line=line.replace(' "',' ') # remove all quotation marks with spaces before
-            line=line.replace('" ',' ') # remove all quotation marks with spaces after
-            line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
-            try:
-                [ucs4,command,dead] = line.split(None,2)
-                if command[0:1] != "\\":
-                    continue
-                spec_chars.append([command, unichr(eval(ucs4))])
-            except:
-                continue
-            m = r.match(command)
-            if m != None:
-                command = "\\\\"
-                # If the character is a double-quote, then we need to escape it, too,
-                # since it is done that way in the LyX file.
-                if m.group(1) == "\"":
-                    command += "\\"
-                commandbl = command
-                command += m.group(1) + m.group(2)
-                commandbl += m.group(1) + ' ' + m.group(2)
-                spec_chars.append([command, unichr(eval(ucs4))])
-                spec_chars.append([commandbl, unichr(eval(ucs4))])
-    fp.close()
-    return spec_chars
-
-
 def extract_argument(line):
    'Extracts a LaTeX argument from the start of line. Returns (arg, rest).'

    if not line:
        return (None, "")

-    bracere = re.compile("(\s*)(.*)")
+    bracere = re.compile(r"(\s*)(.*)")
    n = bracere.match(line)
    whitespace = n.group(1)
    stuff = n.group(2)
@ -280,8 +232,6 @@ def latex2ert(line, isindex):
    return retval


-unicode_reps = read_unicodesymbols()
-
 #Bug 5022....
 #Might should do latex2ert first, then deal with stuff that DOESN'T
 #end up inside ERT. That routine could be modified so that it returned
@ -327,7 +277,7 @@ def latex2lyx(data, isindex):
    data = data.replace('\\\\', '\\')

    # Math:
-    mathre = re.compile('^(.*?)(\$.*?\$)(.*)')
+    mathre = re.compile(r'^(.*?)(\$.*?\$)(.*)')
    lines = data.split('\n')
    for line in lines:
        #document.warning("LINE: " + line)
@ -996,7 +946,7 @@ def remove_inzip_options(document):


 def convert_inset_command(document):
-    """
+    r"""
        Convert:
            \begin_inset LatexCommand cmd
        to
@ -1033,7 +983,7 @@ def convert_inset_command(document):


 def revert_inset_command(document):
-    """
+    r"""
        Convert:
            \begin_inset CommandInset InsetType
            LatexCommand cmd
@ -1608,7 +1558,7 @@ def convert_usorbian(document):


 def convert_macro_global(document):
-    "Remove TeX code command \global when it is in front of a macro"
+    r"Remove TeX code command \global when it is in front of a macro"
    # math macros are nowadays already defined \global, so that an additional
    # \global would make the document uncompilable, see
    # http://www.lyx.org/trac/ticket/5371
@ -2389,7 +2339,7 @@ def revert_wrapplacement(document):


 def remove_extra_embedded_files(document):
-    " Remove \extra_embedded_files from buffer params "
+    r" Remove \extra_embedded_files from buffer params "
    i = find_token(document.header, '\\extra_embedded_files', 0)
    if i == -1:
        return
--- a/lib/lyx2lyx/lyx_2_0.py
+++ b/lib/lyx2lyx/lyx_2_0.py
@ -22,14 +22,15 @@ import re, string
 import unicodedata
 import sys, os

-from parser_tools import find_token, find_end_of, find_tokens, \
+from parser_tools import del_complete_lines, \
+  find_token, find_end_of, find_tokens, \
  find_token_exact, find_end_of_inset, find_end_of_layout, \
  find_token_backwards, is_in_inset, get_value, get_quoted_value, \
  del_token, check_token, get_option_value

 from lyx2lyx_tools import add_to_preamble, insert_to_preamble, \
  put_cmd_in_ert, lyx2latex, latex_length, revert_flex_inset, \
-  revert_font_attrs, hex2ratio, str2bool
+  revert_font_attrs, hex2ratio, str2bool, revert_language

 ####################################################################
 # Private helper functions
@ -377,7 +378,7 @@ def revert_splitindex(document):
        l = re.compile(r'\\begin_inset Index (.*)$')
        m = l.match(line)
        itype = m.group(1)
-        if itype == "idx" or indices == "false":
+        if itype == "idx" or useindices == "false":
            document.body[i] = "\\begin_inset Index"
        else:
            k = find_end_of_inset(document.body, i)
@ -484,6 +485,15 @@ def revert_printindexall(document):
            document.body[i:k + 1] = subst
        i = i + 1

+strikeout_preamble = ['%  for proper underlining',
+                      r'\PassOptionsToPackage{normalem}{ulem}',
+                      r'\usepackage{ulem}']
+
+def convert_strikeout(document):
+    " Remove preamble code loading 'ulem' package. "
+    del_complete_lines(document.preamble,
+                       ['% Added by lyx2lyx']+strikeout_preamble)
+

 def revert_strikeout(document):
  " Reverts \\strikeout font attribute "
@ -491,25 +501,30 @@ def revert_strikeout(document):
  changed = revert_font_attrs(document.body, "\\uwave", "\\uwave") or changed
  changed = revert_font_attrs(document.body, "\\strikeout", "\\sout")  or changed
  if changed == True:
-    insert_to_preamble(document, \
-        ['%  for proper underlining',
-        '\\PassOptionsToPackage{normalem}{ulem}',
-        '\\usepackage{ulem}'])
+    insert_to_preamble(document, strikeout_preamble)


+ulinelatex_preamble = ['% fix underbar in citations',
+    r'\let\cite@rig\cite',
+    r'\newcommand{\b@xcite}[2][\%]{\def\def@pt{\%}\def\pas@pt{#1}',
+    r'  \mbox{\ifx\def@pt\pas@pt\cite@rig{#2}\else\cite@rig[#1]{#2}\fi}}',
+    r'\renewcommand{\underbar}[1]{{\let\cite\b@xcite\uline{#1}}}']
+
+def convert_ulinelatex(document):
+    " Remove preamble code for \\uline font attribute. "
+    del_complete_lines(document.preamble,
+                       ['% Added by lyx2lyx']+ulinelatex_preamble)
+
 def revert_ulinelatex(document):
-    " Reverts \\uline font attribute "
+    " Add preamble code for \\uline font attribute in citations. "
    i = find_token(document.body, '\\bar under', 0)
    if i == -1:
        return
-    insert_to_preamble(document,\
-            ['%  for proper underlining',
-            '\\PassOptionsToPackage{normalem}{ulem}',
-            '\\usepackage{ulem}',
-            '\\let\\cite@rig\\cite',
-            '\\newcommand{\\b@xcite}[2][\\%]{\\def\\def@pt{\\%}\\def\\pas@pt{#1}',
-            '  \\mbox{\\ifx\\def@pt\\pas@pt\\cite@rig{#2}\\else\\cite@rig[#1]{#2}\\fi}}',
-            '\\renewcommand{\\underbar}[1]{{\\let\\cite\\b@xcite\\uline{#1}}}'])
+    try:
+        document.preamble.index(r'\usepackage{ulem}')
+    except ValueError:
+        insert_to_preamble(document, strikeout_preamble)
+    insert_to_preamble(document, ulinelatex_preamble)


 def revert_custom_processors(document):
@ -818,6 +833,9 @@ def revert_suppress_date(document):
    del document.header[i]


+mhchem_preamble = [r"\PassOptionsToPackage{version=3}{mhchem}",
+                   r"\usepackage{mhchem}"]
+
 def convert_mhchem(document):
    "Set mhchem to off for versions older than 1.6.x"
    if document.initial_format < 277:
@ -835,47 +853,44 @@ def convert_mhchem(document):
        # pre-1.5.x document
        i = find_token(document.header, "\\use_amsmath", 0)
    if i == -1:
-        document.warning("Malformed LyX document: Could not find amsmath os esint setting.")
+        document.warning("Malformed LyX document: "
+                         "Could not find amsmath or esint setting.")
        return
    document.header.insert(i + 1, "\\use_mhchem %d" % mhchem)
+    # remove LyX-inserted preamble 
+    if mhchem != 0:
+        del_complete_lines(document.preamble,
+                           ['% Added by lyx2lyx']+mhchem_preamble)


 def revert_mhchem(document):
-    "Revert mhchem loading to preamble code"
+    "Revert mhchem loading to preamble code."

-    mhchem = "off"
-    i = find_token(document.header, "\\use_mhchem", 0)
-    if i == -1:
-        document.warning("Malformed LyX document: Could not find mhchem setting.")
-        mhchem = "auto"
-    else:
-        val = get_value(document.header, "\\use_mhchem", i)
-        if val == "1":
-            mhchem = "auto"
-        elif val == "2":
-            mhchem = "on"
-        del document.header[i]
+    mhchem = get_value(document.header, "\\use_mhchem", delete=True)
+    try:
+        mhchem = int(mhchem)
+    except ValueError:
+        document.warning("Malformed LyX document: "
+                         "Could not find mhchem setting.")
+        mhchem = 1 # "auto"
+    # mhchem in {0: "off", 1: "auto", 2: "on"}

-    if mhchem == "off":
-      # don't load case
-      return
-
-    if mhchem == "auto":
+    if mhchem == 1: # "auto"
        i = 0
-        while True:
+        while i != 1 and mhchem == 1:
            i = find_token(document.body, "\\begin_inset Formula", i)
-            if i == -1:
-               break
-            line = document.body[i]
-            if line.find("\\ce{") != -1 or line.find("\\cf{") != -1:
-              mhchem = "on"
-              break
+            j = find_end_of_inset(document.body, i)
+            if j == -1:
+                break
+            if (True for line in document.body[i:j]
+                if r"\ce{" in line or r"\cf{" in line):
+                mhchem = 2
+                break
            i += 1

-    if mhchem == "on":
-        pre = ["\\PassOptionsToPackage{version=3}{mhchem}",
-          "\\usepackage{mhchem}"]
-        insert_to_preamble(document, pre)
+    if (mhchem == 2 # on
+        and find_token(document.preamble, r"\usepackage{mhchem}") == -1):
+        insert_to_preamble(document, mhchem_preamble)


 def revert_fontenc(document):
@ -956,6 +971,20 @@ def revert_includeonly(document):
        document.header[i : j + 1] = []


+def convert_includeall(document):
+    " Add maintain_unincluded_children param "
+
+    i = 0
+    i = find_token(document.header, "\\maintain_unincluded_children", 0)
+    if i == -1:
+        i = find_token(document.header, "\\textclass", 0)
+        if i == -1:
+            document.warning("Malformed LyX document! Missing \\textclass header.")
+            return
+        document.header.insert(i, "\\maintain_unincluded_children false")
+        return
+
+
 def revert_includeall(document):
    " Remove maintain_unincluded_children param "
    del_token(document.header, '\\maintain_unincluded_children', 0)
@ -1090,7 +1119,7 @@ def revert_multirow(document):


 def convert_math_output(document):
-    " Convert \html_use_mathml to \html_math_output "
+    r" Convert \html_use_mathml to \html_math_output "
    i = find_token(document.header, "\\html_use_mathml", 0)
    if i == -1:
        return
@ -1107,7 +1136,7 @@ def convert_math_output(document):


 def revert_math_output(document):
-    " Revert \html_math_output to \html_use_mathml "
+    r" Revert \html_math_output to \html_use_mathml "
    i = find_token(document.header, "\\html_math_output", 0)
    if i == -1:
        return
@ -1266,19 +1295,7 @@ def revert_notefontcolor(document):
 def revert_turkmen(document):
    "Set language Turkmen to English"

-    if document.language == "turkmen":
-        document.language = "english"
-        i = find_token(document.header, "\\language", 0)
-        if i != -1:
-            document.header[i] = "\\language english"
-
-    j = 0
-    while True:
-        j = find_token(document.body, "\\lang turkmen", j)
-        if j == -1:
-            return
-        document.body[j] = document.body[j].replace("\\lang turkmen", "\\lang english")
-        j += 1
+    revert_language(document, "turkmen", "turkmen", "turkmen")


 def revert_fontcolor(document):
@ -1602,8 +1619,8 @@ def revert_IEEEtran(document):

 def convert_prettyref(document):
 	" Converts prettyref references to neutral formatted refs "
-	re_ref = re.compile("^\s*reference\s+\"(\w+):(\S+)\"")
-	nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"")
+	re_ref = re.compile("^\\s*reference\\s+\"(\\w+):(\\S+)\"")
+	nm_ref = re.compile("^\\s*name\\s+\"(\\w+):(\\S+)\"")

 	i = 0
 	while True:
@ -1624,8 +1641,8 @@ def convert_prettyref(document):

 def revert_refstyle(document):
 	" Reverts neutral formatted refs to prettyref "
-	re_ref = re.compile("^reference\s+\"(\w+):(\S+)\"")
-	nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"")
+	re_ref = re.compile("^reference\\s+\"(\\w+):(\\S+)\"")
+	nm_ref = re.compile("^\\s*name\\s+\"(\\w+):(\\S+)\"")

 	i = 0
 	while True:
@ -1664,12 +1681,10 @@ def revert_nameref(document):
      i += 1
      # Make sure it is actually in an inset!
      # A normal line could begin with "LatexCommand nameref"!
-      val = is_in_inset(document.body, cmdloc, \
-          "\\begin_inset CommandInset ref")
-      if not val:
+      stins, endins = is_in_inset(document.body, cmdloc,
+                                  "\\begin_inset CommandInset ref")
+      if endins == -1:
          continue
-      stins, endins = val
-
      # ok, so it is in an InsetRef
      refline = find_token(document.body, "reference", stins, endins)
      if refline == -1:
@ -1699,17 +1714,16 @@ def remove_Nameref(document):
      break
    cmdloc = i
    i += 1
-
    # Make sure it is actually in an inset!
-    val = is_in_inset(document.body, cmdloc, \
-        "\\begin_inset CommandInset ref")
+    val = is_in_inset(document.body, cmdloc,
+                      "\\begin_inset CommandInset ref", default=False)
    if not val:
      continue
    document.body[cmdloc] = "LatexCommand nameref"


 def revert_mathrsfs(document):
-    " Load mathrsfs if \mathrsfs us use in the document "
+    r" Load mathrsfs if \mathrsfs us use in the document "
    i = 0
    for line in document.body:
      if line.find("\\mathscr{") != -1:
@ -2145,7 +2159,7 @@ def convert_passthru(document):
    if not check_passthru:
      return

-    rx = re.compile("\\\\begin_layout \s*(\w+)")
+    rx = re.compile("\\\\begin_layout \\s*(\\w+)")
    beg = 0
    for lay in ["Chunk", "Scrap"]:
      while True:
@ -2175,7 +2189,7 @@ def convert_passthru(document):
            break
          ne = find_end_of_inset(document.body, ns)
          if ne == -1 or ne > end:
-            document.warning("Can't find end of inset at line " + str(nb))
+            document.warning("Can't find end of inset at line " + str(ne))
            ns += 1
            continue
          if document.body[ne + 1] == "":
@ -2209,7 +2223,7 @@ def revert_passthru(document):
    " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html "
    if not check_passthru:
      return
-    rx = re.compile("\\\\begin_layout \s*(\w+)")
+    rx = re.compile("\\\\begin_layout \\s*(\\w+)")
    beg = 0
    for lay in ["Chunk", "Scrap"]:
      while True:
@ -2501,7 +2515,7 @@ def revert_langpack(document):

 def convert_langpack(document):
    " Add \\language_package parameter "
-    i = find_token(document.header, "\language" , 0)
+    i = find_token(document.header, r"\language" , 0)
    if i == -1:
        document.warning("Malformed document. No \\language defined!")
        return
@ -2548,9 +2562,9 @@ convert = [[346, []],
           [352, [convert_splitindex]],
           [353, []],
           [354, []],
-           [355, []],
+           [355, [convert_strikeout]],
           [356, []],
-           [357, []],
+           [357, [convert_ulinelatex]],
           [358, []],
           [359, [convert_nomencl_width]],
           [360, []],
@ -2569,7 +2583,7 @@ convert = [[346, []],
           [373, [merge_gbrief]],
           [374, []],
           [375, []],
-           [376, []],
+           [376, [convert_includeall]],
           [377, []],
           [378, []],
           [379, [convert_math_output]],
--- a/lib/lyx2lyx/lyx_2_1.py
+++ b/lib/lyx2lyx/lyx_2_1.py
@ -24,7 +24,8 @@ import sys, os

 # Uncomment only what you need to import, please.

-from parser_tools import count_pars_in_inset, del_token, find_token, find_token_exact, \
+from parser_tools import count_pars_in_inset, del_complete_lines, del_token, \
+    find_token, find_token_exact, \
    find_token_backwards, find_end_of, find_end_of_inset, find_end_of_layout, \
    find_end_of_sequence, find_re, get_option_value, get_containing_layout, \
    get_containing_inset, get_value, get_quoted_value, set_option_value
@ -33,7 +34,7 @@ from parser_tools import count_pars_in_inset, del_token, find_token, find_token_
  #find_end_of_inset, find_end_of_layout, \
  #is_in_inset, del_token, check_token

-from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, get_ert
+from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, get_ert, revert_language

 #from lyx2lyx_tools import insert_to_preamble, \
 #  lyx2latex, latex_length, revert_flex_inset, \
@ -58,7 +59,7 @@ def revert_Argument_to_TeX_brace(document, line, endline, n, nmax, environment,
    usage:
    revert_Argument_to_TeX_brace(document, LineOfBegin, LineOfEnd, StartArgument, EndArgument, isEnvironment, isOpt)
    LineOfBegin is the line  of the \\begin_layout or \\begin_inset statement
-    LineOfEnd is the line  of the \end_layout or \end_inset statement, if "0" is given, the end of the file is used instead
+    LineOfEnd is the line  of the \\end_layout or \\end_inset statement, if "0" is given, the end of the file is used instead
    StartArgument is the number of the first argument that needs to be converted
    EndArgument is the number of the last argument that needs to be converted or the last defined one
    isEnvironment must be true, if the layout is for a LaTeX environment
@ -352,7 +353,7 @@ def revert_undertilde(document):


 def revert_negative_space(document):
-    "Revert InsetSpace negmedspace and negthickspace into its TeX-code counterpart"
+    "Revert InsetSpace negmedspace and negthickspace into their TeX-code counterparts"
    i = 0
    j = 0
    reverted = False
@ -365,7 +366,7 @@ def revert_negative_space(document):
          if reverted == True:
            i = find_token(document.header, "\\use_amsmath 2", 0)
            if i == -1:
-              add_to_preamble(document, ["\\@ifundefined{negthickspace}{\\usepackage{amsmath}}"])
+              add_to_preamble(document, ["\\@ifundefined{negthickspace}{\\usepackage{amsmath}}{}"])
          return
      if i == -1:
        return
@ -555,8 +556,6 @@ def handle_longtable_captions(document, forward):
                    get_option_value(document.body[begin_row], 'endlastfoot') != 'true'):
                    document.body[begin_row] = set_option_value(document.body[begin_row], 'caption', 'true", endfirsthead="true')
            elif get_option_value(document.body[begin_row], 'caption') == 'true':
-                if get_option_value(document.body[begin_row], 'endfirsthead') == 'true':
-                    document.body[begin_row] = set_option_value(document.body[begin_row], 'endfirsthead', 'false')
                if get_option_value(document.body[begin_row], 'endhead') == 'true':
                    document.body[begin_row] = set_option_value(document.body[begin_row], 'endhead', 'false')
                if get_option_value(document.body[begin_row], 'endfoot') == 'true':
@ -618,15 +617,16 @@ def convert_use_package(document, pkg, commands, oldauto):
    # oldauto defines how the version we are converting from behaves:
    # if it is true, the old version uses the package automatically.
    # if it is false, the old version never uses the package.
-    i = find_token(document.header, "\\use_package", 0)
+    i = find_token(document.header, "\\use_package")
    if i == -1:
        document.warning("Malformed LyX document: Can't find \\use_package.")
        return;
-    j = find_token(document.preamble, "\\usepackage{" + pkg + "}", 0)
-    if j != -1:
-        # package was loaded in the preamble, convert this to header setting for round trip
+    packageline = "\\usepackage{%s}" % pkg
+    if (del_complete_lines(document.preamble,
+                           ['% Added by lyx2lyx', packageline]) or
+        del_complete_lines(document.preamble, [packageline])):
+        # package was loaded in the preamble, convert this to header setting
        document.header.insert(i + 1, "\\use_package " + pkg + " 2") # on
-        del document.preamble[j]
    # If oldauto is true we have two options:
    # We can either set the package to auto - this is correct for files in
    # format 425 to 463, and may create a conflict for older files which use
@ -1060,7 +1060,7 @@ def convert_table_rotation(document):


 def convert_listoflistings(document):
-    'Convert ERT \lstlistoflistings to TOC lstlistoflistings inset'
+    r'Convert ERT \lstlistoflistings to TOC lstlistoflistings inset'
    # We can support roundtrip because the command is so simple
    i = 0
    while True:
@ -1169,24 +1169,16 @@ def revert_ancientgreek(document):
 def revert_languages(document):
    "Set the document language for new supported languages to English"

-    languages = [
-                 "coptic", "divehi", "hindi", "kurmanji", "lao", "marathi", "occitan", "sanskrit",
-                 "syriac", "tamil", "telugu", "urdu"
-                ]
-    for n in range(len(languages)):
-        if document.language == languages[n]:
-            document.language = "english"
-            i = find_token(document.header, "\\language", 0)
-            if i != -1:
-                document.header[i] = "\\language english"
-        j = 0
-        while j < len(document.body):
-            j = find_token(document.body, "\\lang " + languages[n], j)
-            if j != -1:
-                document.body[j] = document.body[j].replace("\\lang " + languages[n], "\\lang english")
-                j += 1
-            else:
-                j = len(document.body)
+    # polyglossia-only
+    polyglossia_languages = ["coptic", "divehi", "hindi", "lao", "marathi",
+                             "occitan", "sanskrit", "syriac", "tamil",
+                             "telugu", "urdu"]
+    # babel-only
+    babel_languages = ["kurmanji"]
+    for lang in polyglossia_languages:
+        revert_language(document, lang, "", lang)
+    for lang in babel_languages:
+        revert_language(document, lang, lang, "")


 def convert_armenian(document):
@ -1555,10 +1547,11 @@ def convert_latexargs(document):
                    "theorems-chap-bytype", "theorems-chap", "theorems-named", "theorems-sec-bytype",
                    "theorems-sec", "theorems-starred", "theorems-std", "todonotes"]
    # Modules we need to take care of
-    caveat_modules = ["initials"]
+    caveat_modules = ["initials"] # TODO: , "graphicboxes", "bicaption"]
    # information about the relevant styles in caveat_modules (number of opt and req args)
    # use this if we get more caveat_modules. For now, use hard coding (see below).
    # initials = [{'Layout' : 'Initial', 'opt' : 1, 'req' : 1}]
+    # graphicboxes = { ... }

    # Is this a known safe layout?
    safe_layout = document.textclass in safe_layouts
@ -4560,19 +4553,7 @@ def revert_aa2(document):
 def revert_tibetan(document):
    "Set the document language for Tibetan to English"

-    if document.language == "tibetan":
-        document.language = "english"
-        i = find_token(document.header, "\\language", 0)
-        if i != -1:
-            document.header[i] = "\\language english"
-    j = 0
-    while j < len(document.body):
-        j = find_token(document.body, "\\lang tibetan", j)
-        if j != -1:
-            document.body[j] = document.body[j].replace("\\lang tibetan", "\\lang english")
-            j += 1
-        else:
-            j = len(document.body)
+    revert_language(document, "tibetan", "", "tibetan")


 #############
--- a/lib/lyx2lyx/lyx_2_2.py
+++ b/lib/lyx2lyx/lyx_2_2.py
--- a/lib/lyx2lyx/lyx_2_3.py
+++ b/lib/lyx2lyx/lyx_2_3.py
--- a/lib/lyx2lyx/lyx_2_4.py
+++ b/lib/lyx2lyx/lyx_2_4.py
--- a/lib/lyx2lyx/parser_tools.py
+++ b/lib/lyx2lyx/parser_tools.py
@ -1,7 +1,7 @@
 # This file is part of lyx2lyx
 # -*- coding: utf-8 -*-
-# Copyright (C) 2002-2011 Dekel Tsur <dekel@lyx.org>, 
-# José Matos <jamatos@lyx.org>, Richard Heck <rgheck@comcast.net>
+# Copyright (C) 2002-2011 Dekel Tsur <dekel@lyx.org>,
+# José Matos <jamatos@lyx.org>, Richard Kimberly Heck <rikiheck@lyx.org>
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
@ -18,53 +18,57 @@
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA


-''' 
+"""
 This module offers several free functions to help parse lines.
-More documentaton is below, but here is a quick guide to what 
+More documentaton is below, but here is a quick guide to what
 they do. Optional arguments are marked by brackets.

-find_token(lines, token, start[, end[, ignorews]]):
+find_token(lines, token[, start[, end[, ignorews]]]):
  Returns the first line i, start <= i < end, on which
-  token is found at the beginning. Returns -1 if not 
-  found. 
+  token is found at the beginning. Returns -1 if not
+  found.
  If ignorews is (given and) True, then differences
-  in whitespace do not count, except that there must be no 
+  in whitespace do not count, except that there must be no
  extra whitespace following token itself.

-find_token_exact(lines, token, start[, end]):
+find_token_exact(lines, token[, start[, end]]]):
  As find_token, but with ignorews set to True.

-find_tokens(lines, tokens, start[, end[, ignorews]]):
+find_tokens(lines, tokens[, start[, end[, ignorews]]]):
  Returns the first line i, start <= i < end, on which
-  one of the tokens in tokens is found at the beginning. 
-  Returns -1 if not found. 
+  one of the tokens in tokens is found at the beginning.
+  Returns -1 if not found.
  If ignorews is (given and) True, then differences
-  in whitespace do not count, except that there must be no 
+  in whitespace do not count, except that there must be no
  extra whitespace following token itself.

-find_tokens_exact(lines, token, start[, end]):
+find_tokens_exact(lines, token[, start[, end]]):
  As find_tokens, but with ignorews True.
-  
+
 find_token_backwards(lines, token, start):
 find_tokens_backwards(lines, tokens, start):
  As before, but look backwards.

+find_substring(lines, sub[, start[, end]]) -> int
+  As find_token, but sub may be anywhere in the line.
+
 find_re(lines, rexp, start[, end]):
  As find_token, but rexp is a regular expression object,
  so it has to be passed as e.g.: re.compile(r'...').

-get_value(lines, token, start[, end[, default]):
-  Similar to find_token, but it returns what follows the 
+get_value(lines, token[, start[, end[, default[, delete]]]]):
+  Similar to find_token, but it returns what follows the
  token on the found line. Example:
    get_value(document.header, "\\use_xetex", 0)
  will find a line like:
    \\use_xetex true
  and, in that case, return "true". (Note that whitespace
-  is stripped.) The final argument, default, defaults to "", 
+  is stripped.) The final argument, default, defaults to "",
  and is what is returned if we do not find anything. So you
  can use that to set a default.
-  
-get_quoted_value(lines, token, start[, end[, default]]):
+  If delete is True, then delete the line if found.
+
+get_quoted_value(lines, token[, start[, end[, default[, delete]]]]):
  Similar to get_value, but it will strip quotes off the
  value, if they are present. So use this one for cases
  where the value is normally quoted.
@ -74,20 +78,27 @@ get_option_value(line, option):
      option="value"
  and returns value. Returns "" if not found.

-get_bool_value(lines, token, start[, end[, default]]):
+get_bool_value(lines, token[, start[, end[, default, delete]]]]):
  Like get_value, but returns a boolean.

-del_token(lines, token, start[, end]):
+set_bool_value(lines, token, value[, start[, end]]):
+  Find `token` in `lines[start:end]` and set to boolean value bool(`value`).
+  Return old value. Raise ValueError if token is not in lines.
+
+del_token(lines, token[, start[, end]]):
  Like find_token, but deletes the line if it finds one.
  Returns True if a line got deleted, otherwise False.

+  Use get_* with the optional argument "delete=True", if you want to
+  get and delete a token.
+
 find_beginning_of(lines, i, start_token, end_token):
-  Here, start_token and end_token are meant to be a matching 
-  pair, like "\\begin_layout" and "\\end_layout". We look for 
+  Here, start_token and end_token are meant to be a matching
+  pair, like "\\begin_layout" and "\\end_layout". We look for
  the start_token that pairs with the end_token that occurs
  on or after line i. Returns -1 if not found.
-  So, in the layout case, this would find the \\begin_layout 
-  for the layout line i is in. 
+  So, in the layout case, this would find the \\begin_layout
+  for the layout line i is in.
  Example:
    ec = find_token(document.body, "</cell", i)
    bc = find_beginning_of(document.body, ec, \
@ -95,7 +106,7 @@ find_beginning_of(lines, i, start_token, end_token):
  Now, assuming no -1s, bc-ec wraps the cell for line i.

 find_end_of(lines, i, start_token, end_token):
-  Like find_beginning_of, but looking for the matching 
+  Like find_beginning_of, but looking for the matching
  end_token. This might look like:
    bc = find_token_(document.body, "<cell", i)
    ec = find_end_of(document.body, bc,  "<cell", "</cell")
@ -110,25 +121,25 @@ find_end_of_layout(lines, i):
 find_end_of_sequence(lines, i):
  Find the end of the sequence of layouts of the same kind.
  Considers nesting. If the last paragraph in sequence is nested,
-  the position of the last \end_deeper is returned, else
-  the position of the last \end_layout.
+  the position of the last \\end_deeper is returned, else
+  the position of the last \\end_layout.

-is_in_inset(lines, i, inset):
-  Checks if line i is in an inset of the given type.
-  If so, returns starting and ending lines. Otherwise, 
-  returns False.
+is_in_inset(lines, i, inset, default=(-1,-1)):
+  Check if line i is in an inset of the given type.
+  If so, returns starting and ending lines. Otherwise,
+  return default.
  Example:
    is_in_inset(document.body, i, "\\begin_inset Tabular")
-  returns False unless i is within a table. If it is, then
+  returns (-1,-1) unless i is within a table. If it is, then
  it returns the line on which the table begins and the one
  on which it ends. Note that this pair will evaulate to
  boolean True, so
-    if is_in_inset(...):
+    if is_in_inset(..., default=False):
  will do what you expect.

 get_containing_inset(lines, i):
-  Finds out what kind of inset line i is within. Returns a 
-  list containing what follows \begin_inset on the line 
+  Finds out what kind of inset line i is within. Returns a
+  list containing what follows \\begin_inset on the line
  on which the inset begins, plus the starting and ending line.
  Returns False on any kind of error or if it isn't in an inset.
  So get_containing_inset(document.body, i) might return:
@ -152,7 +163,7 @@ is_nonempty_line(line):
 count_pars_in_inset(lines, i):
  Counts the paragraphs inside an inset.

-'''
+"""

 import re

@ -161,9 +172,11 @@ def check_token(line, token):
    """ check_token(line, token) -> bool

    Return True if token is present in line and is the first element
-    else returns False."""
+    else returns False.

-    return line[:len(token)] == token
+    Deprecated. Use line.startswith(token).
+    """
+    return line.startswith(token)


 def is_nonempty_line(line):
@ -171,50 +184,53 @@ def is_nonempty_line(line):

    Return False if line is either empty or it has only whitespaces,
    else return True."""
-    return line != " "*len(line)
+    return bool(line.strip())


 # Utilities for a list of lines
-def find_token(lines, token, start, end = 0, ignorews = False):
+def find_token(lines, token, start=0, end=0, ignorews=False):
    """ find_token(lines, token, start[[, end], ignorews]) -> int

    Return the lowest line where token is found, and is the first
    element, in lines[start, end].
-    
+
    If ignorews is True (default is False), then differences in
-    whitespace are ignored, except that there must be no extra
-    whitespace following token itself.
+    whitespace are ignored, but there must be whitespace following
+    token itself.
+
+    Use find_substring(lines, sub) to find a substring anywhere in `lines`.

    Return -1 on failure."""

    if end == 0 or end > len(lines):
        end = len(lines)
-    m = len(token)
+    if ignorews:
+        y = token.split()
    for i in range(start, end):
        if ignorews:
            x = lines[i].split()
-            y = token.split()
            if len(x) < len(y):
                continue
            if x[:len(y)] == y:
                return i
        else:
-            if lines[i][:m] == token:
+            if lines[i].startswith(token):
                return i
    return -1


-def find_token_exact(lines, token, start, end = 0):
+def find_token_exact(lines, token, start=0, end=0):
    return find_token(lines, token, start, end, True)


-def find_tokens(lines, tokens, start, end = 0, ignorews = False):
+def find_tokens(lines, tokens, start=0, end=0, ignorews=False):
    """ find_tokens(lines, tokens, start[[, end], ignorews]) -> int

    Return the lowest line where one token in tokens is found, and is
    the first element, in lines[start, end].

    Return -1 on failure."""
+
    if end == 0 or end > len(lines):
        end = len(lines)

@ -228,23 +244,41 @@ def find_tokens(lines, tokens, start, end = 0, ignorews = False):
                if x[:len(y)] == y:
                    return i
            else:
-                if lines[i][:len(token)] == token:
+                if lines[i].startswith(token):
                    return i
    return -1


-def find_tokens_exact(lines, tokens, start, end = 0):
+def find_tokens_exact(lines, tokens, start=0, end=0):
    return find_tokens(lines, tokens, start, end, True)


-def find_re(lines, rexp, start, end = 0):
-    """ find_token_re(lines, rexp, start[, end]) -> int
+def find_substring(lines, sub, start=0, end=0):
+    """ find_substring(lines, sub[, start[, end]]) -> int

-    Return the lowest line where rexp, a regular expression, is found
-    in lines[start, end].
+    Return the lowest line number `i` in [start, end] where
+    `sub` is a substring of line[i].

    Return -1 on failure."""

+    if end == 0 or end > len(lines):
+        end = len(lines)
+    for i in range(start, end):
+        if sub in lines[i]:
+                return i
+    return -1
+
+
+def find_re(lines, rexp, start=0, end=0):
+    """ find_re(lines, rexp[, start[, end]]) -> int
+
+    Return the lowest line number `i` in [start, end] where the regular
+    expression object `rexp` matches at the beginning of line[i].
+    Return -1 on failure.
+
+    Start your pattern with the wildcard ".*" to find a match anywhere in a
+    line. Use find_substring() to find a substring anywhere in the lines.
+    """
    if end == 0 or end > len(lines):
        end = len(lines)
    for i in range(start, end):
@ -260,10 +294,8 @@ def find_token_backwards(lines, token, start):
    element, in lines[start, end].

    Return -1 on failure."""
-    m = len(token)
    for i in range(start, -1, -1):
-        line = lines[i]
-        if line[:m] == token:
+        if lines[i].startswith(token):
            return i
    return -1

@ -278,30 +310,111 @@ def find_tokens_backwards(lines, tokens, start):
    for i in range(start, -1, -1):
        line = lines[i]
        for token in tokens:
-            if line[:len(token)] == token:
+            if line.startswith(token):
                return i
    return -1


-def get_value(lines, token, start, end = 0, default = ""):
-    """ get_value(lines, token, start[[, end], default]) -> string
+def find_complete_lines(lines, sublines, start=0, end=0):
+    """Find first occurence of sequence `sublines` in list `lines`.
+    Return index of first line or -1 on failure.
+
+    Efficient search for a sub-list in a large list. Works for any values.
+
+    >>> find_complete_lines([1, 2, 3, 1, 1, 2], [1, 2])
+    0
+
+    The `start` and `end` arguments work similar to list.index()
+
+    >>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1)
+    4
+    >>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1, end=4)
+    -1
+
+    The return value can be used to substitute the sub-list.
+    Take care to check before use:
+
+    >>> l = [1, 1, 2]
+    >>> s = find_complete_lines(l, [1, 2])
+    >>> if s != -1:
+    ...     l[s:s+2] = [3]; l
+    [1, 3]
+
+    See also del_complete_lines().
+    """
+    if not sublines:
+        return start
+    end = end or len(lines)
+    N = len(sublines)
+    try:
+        while True:
+            for j, value in enumerate(sublines):
+                i = lines.index(value, start, end)
+                if j and i != start:
+                    start = i-j
+                    break
+                start = i + 1
+            else:
+                return i +1 - N
+    except ValueError: # `sublines` not found
+        return -1
+
+
+def find_across_lines(lines, sub, start=0, end=0):
+    sublines = sub.splitlines()
+    if len(sublines) > 2:
+        # at least 3 lines: the middle one(s) are complete -> use index search
+        i = find_complete_lines(lines, sublines[1:-1], start+1, end-1)
+        if i < start+1:
+            return -1
+        try:
+            if (lines[i-1].endswith(sublines[0]) and
+                lines[i+len(sublines)].startswith(sublines[-1])):
+                return i-1
+        except IndexError:
+            pass
+    elif len(sublines) > 1:
+        # last subline must start a line
+        i = find_token(lines, sublines[-1], start, end)
+        if i < start + 1:
+            return -1
+        if lines[i-1].endswith(sublines[0]):
+            return i-1
+    else: # no line-break, may be in the middle of a line
+        if end == 0 or end > len(lines):
+            end = len(lines)
+        for i in range(start, end):
+            if sub in lines[i]:
+                return i
+    return -1
+
+
+def get_value(lines, token, start=0, end=0, default="", delete=False):
+    """Find `token` in `lines` and return part of line that follows it.

    Find the next line that looks like:
      token followed by other stuff
-    Returns "followed by other stuff" with leading and trailing
+
+    If `delete` is True, delete the line (if found).
+
+    Return "followed by other stuff" with leading and trailing
    whitespace removed.
    """
-
    i = find_token_exact(lines, token, start, end)
    if i == -1:
        return default
+    # TODO: establish desired behaviour, eventually change to
+    #  return lines.pop(i)[len(token):].strip() # or default
+    # see test_parser_tools.py
    l = lines[i].split(None, 1)
+    if delete:
+        del(lines[i])
    if len(l) > 1:
        return l[1].strip()
    return default


-def get_quoted_value(lines, token, start, end = 0, default = ""):
+def get_quoted_value(lines, token, start=0, end=0, default="", delete=False):
    """ get_quoted_value(lines, token, start[[, end], default]) -> string

    Find the next line that looks like:
@ -312,33 +425,52 @@ def get_quoted_value(lines, token, start, end = 0, default = ""):
    if they are there.
    Note that we will NOT strip quotes from default!
    """
-    val = get_value(lines, token, start, end, "")
+    val = get_value(lines, token, start, end, "", delete)
    if not val:
      return default
    return val.strip('"')


-def get_bool_value(lines, token, start, end = 0, default = None):
-    """ get_value(lines, token, start[[, end], default]) -> string
+bool_values = {"true": True, "1": True,
+               "false": False, "0": False}
+
+def get_bool_value(lines, token, start=0, end=0, default=None, delete=False):
+    """ get_bool_value(lines, token, start[[, end], default]) -> string

    Find the next line that looks like:
-      token bool_value
+      `token` <bool_value>

-    Returns True if bool_value is 1 or true and
-    False if bool_value is 0 or false
+    Return True if <bool_value> is 1 or "true", False if <bool_value>
+    is 0 or "false", else `default`.
    """
+    val = get_quoted_value(lines, token, start, end, default, delete)
+    return bool_values.get(val, default)

-    val = get_quoted_value(lines, token, start, end, "")

-    if val == "1" or val == "true":
-        return True
-    if val == "0" or val == "false":
-        return False
-    return default
+def set_bool_value(lines, token, value, start=0, end=0):
+    """Find `token` in `lines` and set to bool(`value`).
+
+    Return previous value. Raise `ValueError` if `token` is not in lines.
+
+    Cf. find_token(), get_bool_value().
+    """
+    i = find_token(lines, token, start, end)
+    if i == -1:
+        raise ValueError
+    oldvalue = get_bool_value(lines, token, i, i+1)
+    if oldvalue is value:
+        return oldvalue
+    # set to new value
+    if get_quoted_value(lines, token, i, i+1) in ('0', '1'):
+        lines[i] = "%s %d" % (token, value)
+    else:
+        lines[i] = "%s %s" % (token, str(value).lower())
+
+    return oldvalue


 def get_option_value(line, option):
-    rx = option + '\s*=\s*"([^"]+)"'
+    rx = option + r'\s*=\s*"([^"]+)"'
    rx = re.compile(rx)
    m = rx.search(line)
    if not m:
@ -347,18 +479,18 @@ def get_option_value(line, option):


 def set_option_value(line, option, value):
-    rx = '(' + option + '\s*=\s*")[^"]+"'
+    rx = '(' + option + r'\s*=\s*")[^"]+"'
    rx = re.compile(rx)
    m = rx.search(line)
    if not m:
        return line
-    return re.sub(rx, '\g<1>' + value + '"', line)
+    return re.sub(rx, r'\g<1>' + value + '"', line)


-def del_token(lines, token, start, end = 0):
+def del_token(lines, token, start=0, end=0):
    """ del_token(lines, token, start, end) -> int

-    Find the first line in lines where token is the first element 
+    Find the first line in lines where token is the first element
    and delete that line. Returns True if we deleted a line, False
    if we did not."""

@ -368,6 +500,41 @@ def del_token(lines, token, start, end = 0):
    del lines[k]
    return True

+def del_complete_lines(lines, sublines, start=0, end=0):
+    """Delete first occurence of `sublines` in list `lines`.
+
+    Efficient deletion of a sub-list in a list. Works for any values.
+    The `start` and `end` arguments work similar to list.index()
+
+    Returns True if a deletion was done and False if not.
+
+    >>> l = [1, 0, 1, 1, 1, 2]
+    >>> del_complete_lines(l, [0, 1, 1])
+    True
+    >>> l
+    [1, 1, 2]
+    """
+    i = find_complete_lines(lines, sublines, start, end)
+    if i == -1:
+        return False
+    del(lines[i:i+len(sublines)])
+    return True
+
+
+def del_value(lines, token, start=0, end=0, default=None):
+    """
+    Find the next line that looks like:
+      token followed by other stuff
+    Delete that line and return "followed by other stuff"
+    with leading and trailing whitespace removed.
+
+    If token is not found, return `default`.
+    """
+    i = find_token_exact(lines, token, start, end)
+    if i == -1:
+        return default
+    return lines.pop(i)[len(token):].strip()
+

 def find_beginning_of(lines, i, start_token, end_token):
    count = 1
@ -375,7 +542,7 @@ def find_beginning_of(lines, i, start_token, end_token):
        i = find_tokens_backwards(lines, [start_token, end_token], i-1)
        if i == -1:
            return -1
-        if check_token(lines[i], end_token):
+        if lines[i].startswith(end_token):
            count = count+1
        else:
            count = count-1
@ -391,7 +558,7 @@ def find_end_of(lines, i, start_token, end_token):
        i = find_tokens(lines, [end_token, start_token], i+1)
        if i == -1:
            return -1
-        if check_token(lines[i], start_token):
+        if lines[i].startswith(start_token):
            count = count+1
        else:
            count = count-1
@ -400,11 +567,11 @@ def find_end_of(lines, i, start_token, end_token):
    return -1


-def find_nonempty_line(lines, start, end = 0):
+def find_nonempty_line(lines, start=0, end=0):
    if end == 0:
        end = len(lines)
    for i in range(start, end):
-        if is_nonempty_line(lines[i]):
+        if lines[i].strip():
            return i
    return -1

@ -419,35 +586,33 @@ def find_end_of_layout(lines, i):
    return find_end_of(lines, i, "\\begin_layout", "\\end_layout")


-def is_in_inset(lines, i, inset):
-    '''
-    Checks if line i is in an inset of the given type.
-    If so, returns starting and ending lines.
-    Otherwise, returns False.
+def is_in_inset(lines, i, inset, default=(-1,-1)):
+    """
+    Check if line i is in an inset of the given type.
+    If so, return starting and ending lines, otherwise `default`.
    Example:
      is_in_inset(document.body, i, "\\begin_inset Tabular")
-    returns False unless i is within a table. If it is, then
-    it returns the line on which the table begins and the one
-    on which it ends. Note that this pair will evaulate to
-    boolean True, so
-      if is_in_inset(...):
+    returns (-1,-1) if `i` is not within a "Tabular" inset (i.e. a table).
+    If it is, then it returns the line on which the table begins and the one
+    on which it ends.
+    Note that this pair will evaulate to boolean True, so (with the optional
+    default value set to False)
+      if is_in_inset(..., default=False):
    will do what you expect.
-    '''
-    defval = (-1, -1)
-    stins = find_token_backwards(lines, inset, i)
-    if stins == -1:
-      return defval
-    endins = find_end_of_inset(lines, stins)
-    # note that this includes the notfound case.
-    if endins < i:
-      return defval
-    return (stins, endins)
+    """
+    start = find_token_backwards(lines, inset, i)
+    if start == -1:
+      return default
+    end = find_end_of_inset(lines, start)
+    if end < i: # this includes the notfound case.
+      return default
+    return (start, end)


 def get_containing_inset(lines, i):
-  ''' 
-  Finds out what kind of inset line i is within. Returns a 
-  list containing (i) what follows \begin_inset on the line
+  '''
+  Finds out what kind of inset line i is within. Returns a
+  list containing (i) what follows \\begin_inset on the line
  on which the inset begins, plus the starting and ending line.
  Returns False on any kind of error or if it isn't in an inset.
  '''
@ -472,13 +637,16 @@ def get_containing_inset(lines, i):


 def get_containing_layout(lines, i):
-  ''' 
-  Finds out what kind of layout line i is within. Returns a 
-  list containing what follows \begin_layout on the line
-  on which the layout begins, plus the starting and ending line
-  and the start of the paragraph (after all params). I.e, returns:
+  '''
+  Find out what kind of layout line `i` is within.
+  Return a tuple
    (layoutname, layoutstart, layoutend, startofcontent)
-  Returns False on any kind of error.
+  containing
+    * layout style/name,
+    * start line number,
+    * end line number, and
+    * number of first paragraph line (after all params).
+  Return `False` on any kind of error.
  '''
  j = i
  while True:
@ -493,10 +661,13 @@ def get_containing_layout(lines, i):
  if endlay < i:
      return False

-  lay = get_value(lines, "\\begin_layout", stlay)
-  if lay == "":
-      # shouldn't happen
-      return False
+  layoutname = get_value(lines, "\\begin_layout", stlay)
+  if layoutname == "": # layout style missing
+      # TODO: What shall we do in this case?
+      pass
+      # layoutname == "Standard" # use same fallback as the LyX parser:
+      # raise ValueError("Missing layout name on line %d"%stlay) # diagnosis
+      # return False # generic error response
  par_params = ["\\noindent", "\\indent", "\\indent-toggle", "\\leftindent",
                "\\start_of_appendix", "\\paragraph_spacing", "\\align",
                "\\labelwidthstring"]
@ -505,7 +676,7 @@ def get_containing_layout(lines, i):
      stpar += 1
      if lines[stpar].split(' ', 1)[0] not in par_params:
          break
-  return (lay, stlay, endlay, stpar)
+  return (layoutname, stlay, endlay, stpar)


 def count_pars_in_inset(lines, i):
@ -518,7 +689,8 @@ def count_pars_in_inset(lines, i):
  pars = 0
  for j in range(ins[1], ins[2]):
      m = re.match(r'\\begin_layout (.*)', lines[j])
-      if m and get_containing_inset(lines, j)[0] == ins[0]:
+      found_inset = get_containing_inset(lines, j)
+      if m and found_inset and found_inset[1] == ins[1]:
          pars += 1

  return pars
@ -553,4 +725,3 @@ def find_end_of_sequence(lines, i):
      i = i + 1

  return endlay
-
--- a/lib/lyx2lyx/profiling.py
+++ b/lib/lyx2lyx/profiling.py
@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! /usr/bin/python3
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004 José Matos <jamatos@lyx.org>
 #
--- a/lib/lyx2lyx/test_lyx2lyx_tools.py
+++ b/lib/lyx2lyx/test_lyx2lyx_tools.py
@ -0,0 +1,79 @@
+# This file is part of lyx2lyx
+# -*- coding: utf-8 -*-
+# Copyright (C) 2018 The LyX team
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+" This modules tests the auxiliary functions for lyx2lyx."
+
+from lyx2lyx_tools import *
+
+import unittest
+
+class TestParserTools(unittest.TestCase):
+
+    def test_put_cmd_in_ert(self):
+        ert =  [u'\\begin_inset ERT',
+                u'status collapsed',
+                u'',
+                u'\\begin_layout Plain Layout',
+                u'',
+                u'',
+                u'\\backslash',
+                u'texttt{Gr',
+                u'\\backslash',
+                u'"{u}',
+                u'\\backslash',
+                u'ss{}e}',
+                u'\\end_layout',
+                u'',
+                u'\\end_inset']
+        ert_open = ert[:]
+        ert_open[1] = u'status open'
+        ert_paragraph = ["\\begin_layout Standard",
+                         u'\\begin_inset ERT',
+                         u'status collapsed',
+                         u'',
+                         u'\\begin_layout Plain Layout',
+                         u'',
+                         u'',
+                         u'\\backslash',
+                         u'texttt{Gr',
+                         u'\\backslash',
+                         u'"{u}',
+                         u'\\backslash',
+                         u'ss{}e}',
+                         u'\\end_layout',
+                         u'',
+                         u'\\end_inset',
+                         u'',
+                         u'',
+                         u'\\end_layout',
+                         u'']
+        self.assertEqual(put_cmd_in_ert("\\texttt{Grüße}"), ert)
+        self.assertEqual(put_cmd_in_ert([u"\\texttt{Grüße}"]), ert)
+        self.assertEqual(put_cmd_in_ert(u"\\texttt{Grüße}", is_open=True), ert_open)
+        self.assertEqual(put_cmd_in_ert(u"\\texttt{Grüße}", as_paragraph=True), ert_paragraph)
+
+    def test_latex_length(self):
+        self.assertEqual(latex_length("-30.5col%"), (True, "-0.305\\columnwidth"))
+        self.assertEqual(latex_length("35baselineskip%"), (True, "0.35\\baselineskip"))
+        self.assertEqual(latex_length("11em"), (False, "11em"))
+        self.assertEqual(latex_length("-0.4pt"), (False, "-0.4pt"))
+        
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/lib/lyx2lyx/test_parser_tools.py
+++ b/lib/lyx2lyx/test_parser_tools.py
@ -22,7 +22,7 @@ from parser_tools import *

 import unittest

-ug = r"""
+lines = r"""
 \begin_layout Standard
 The
 \begin_inset Quotes eld
@ -56,9 +56,26 @@ Introduction
 describes that, too.
 \end_layout

-"""
+""".splitlines()
+
+header = r"""\begin_header
+\origin unavailable
+\paperpagestyle default
+\output_changes false
+\html_math_output 0
+\html_css_as_file 0
+\html_be_strict fallse
+\end_header""".splitlines()
+
+newheader = r"""\begin_header
+\origin unavailable
+\paperpagestyle default
+\output_changes true
+\html_math_output 0
+\html_css_as_file 1
+\html_be_strict false
+\end_header""".splitlines()

-lines = ug.splitlines()

 class TestParserTools(unittest.TestCase):

@ -77,10 +94,21 @@ class TestParserTools(unittest.TestCase):

    def test_find_token(self):
        self.assertEqual(find_token(lines, '\\emph', 0), 7)
-        self.assertEqual(find_token(lines, '\\emph', 0, 5), -1)
-        self.assertEqual(find_token(lines, '\\emp', 0, 0, True), -1)
-        self.assertEqual(find_token(lines, '\\emp', 0, 0, False), 7)
+        # no line starts with "emph" (without backspace):
        self.assertEqual(find_token(lines, 'emph', 0), -1)
+        # token on line[start] is found:
+        self.assertEqual(find_token(lines, '\\emph', 7), 7)
+        self.assertEqual(find_token(lines, '\\emph', 8), 9)
+        # token on line[end] is not found:
+        self.assertEqual(find_token(lines, '\\emph', 0, 7), -1)
+        # `ignorews` looks for whitespace-separated tokens:
+        self.assertEqual(find_token(lines, '\\emp', 0, ignorews=True), -1)
+        self.assertEqual(find_token(lines, '\\emph',0, ignorews=True), 7)
+        self.assertEqual(find_token(lines, '\\emph', 7, ignorews=True), 7)
+        self.assertEqual(find_token(lines, '\\emph', 0, 7, True), -1)
+        # only first token is found:
+        self.assertEqual(find_token(lines, 'Quotes', 0), -1)
+        self.assertEqual(find_token(lines, 'Quotes', 0, ignorews=True), -1)


    def test_find_tokens(self):
@ -89,5 +117,128 @@ class TestParserTools(unittest.TestCase):
        self.assertEqual(find_tokens(lines, tokens, 0, 4), -1)


-if __name__ == '__main__':  
-    unittest.main() 
+    def test_find_substring(self):
+        # Quotes is not a "token" (substring at the start of any line):
+        self.assertEqual(find_token(lines, "Quotes", 0), -1)
+        self.assertEqual(find_substring(lines, "Quotes", 0), 3)
+        # return -1 on failure:
+        self.assertEqual(find_substring(lines, "Qualen", 0), -1)
+
+
+    def test_find_re(self):
+        regexp_object = re.compile(r'\\begin.*Quote')
+        # matching starts with line[start] (default: start=0)
+        self.assertEqual(find_re(lines, regexp_object), 3)
+        self.assertEqual(find_re(lines, regexp_object, start=3), 3)
+        # matching ends one line *before* line[end]:
+        self.assertEqual(find_re(lines, regexp_object, start=4), 11)
+        self.assertEqual(find_re(lines, regexp_object, start=4, end=11), -1)
+
+    def test_find_complete_lines(self):
+        sublines = ["\\begin_inset Quotes eld",
+                    "\\end_inset"]
+        # return index of first line of sublines:
+        self.assertEqual(find_complete_lines(lines, sublines), 3)
+        self.assertEqual(find_complete_lines(lines, ["\\end_inset"]), 4)
+        # return -1 if sublines is not found:
+        self.assertEqual(find_complete_lines(lines, ['x']), -1)
+        # search includes line `start`:
+        self.assertEqual(find_complete_lines(lines, sublines, 3), 3)
+        self.assertEqual(find_complete_lines(lines, sublines, 4), 20)
+        self.assertEqual(find_complete_lines(lines, sublines, 21), -1)
+        # serch excludes line `end`
+        self.assertEqual(find_complete_lines(lines, sublines, 4, 20), -1)
+        # an empty list is always found
+        self.assertEqual(find_complete_lines(lines, []), 0)
+
+
+    def test_find_across_lines(self):
+        # sub with at least 2 line-breaks (uses find_complete_lines):
+        sub = "Quotes eld\n\\end_inset\n\n\n"
+        self.assertEqual(find_across_lines(lines, sub), 3)
+        # Return -1 if not found
+        self.assertEqual(find_across_lines(lines, sub, 4), -1)
+        self.assertEqual(find_across_lines(lines, sub, 0, 6), -1)
+        sub = "Quotes eld\n\\end_inset\nx\n"
+        self.assertEqual(find_across_lines(lines, sub), -1)
+        sub = "Quotes X\n\\end_inset\n\n"
+        self.assertEqual(find_across_lines(lines, sub), -1)
+        sub = "Quotes eld\n\\end_insert\n\n"
+        self.assertEqual(find_across_lines(lines, sub), -1)
+        # sub with up to 1 line-break:
+        sub = "Quotes eld\n\\end_inset"
+        self.assertEqual(find_across_lines(lines, sub), 3)
+        self.assertEqual(find_across_lines(lines, sub, 4), -1)
+        self.assertEqual(find_across_lines(lines, sub, 0, 4), -1)
+        self.assertEqual(find_across_lines(lines, sub, 4, 3), -1)
+        sub = "Quotes X eld\n\\end_inset\n"
+        self.assertEqual(find_across_lines(lines, sub), -1)
+        sub = "Quotes eld\n\\end_insert\n"
+        self.assertEqual(find_across_lines(lines, sub), -1)
+        # sub without line-break
+        sub = "end_"
+        self.assertEqual(find_across_lines(lines, sub), 4)
+        self.assertEqual(find_across_lines(lines, sub, 5), 12)
+        self.assertEqual(find_across_lines(lines, sub, 0, 4), -1)
+        self.assertEqual(find_across_lines(lines, sub, 2, 1), -1)
+        self.assertEqual(find_across_lines(lines, "XXX"), -1)
+
+
+    def test_get_value(self):
+        self.assertEqual(get_value(lines, "\\begin_inset"), "Quotes eld")
+        # TODO: do we want this:
+        self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "Quotes eld")
+        # or only the part after "token":
+        #  self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "eld")
+        # return default if not found
+        self.assertEqual(get_value(lines, "\\begin_insert", default=42), 42)
+        # TODO: do we want this:
+        self.assertEqual(get_value(lines, "\\end_inset", default=None), None)
+        # or emtpy string if token is found but has no value:
+        #  self.assertEqual(get_value(lines, "\\end_inset", default=None), "")
+
+    def test_get_bool_value(self):
+        self.assertEqual(get_bool_value(header, "\\output_changes"), False)
+        self.assertEqual(get_bool_value(newheader, "\\output_changes"), True)
+        self.assertEqual(get_bool_value(header, "\\html_css_as_file"), False)
+        self.assertEqual(get_bool_value(newheader, "\\html_css_as_file"), True)
+        self.assertEqual(get_bool_value(header, "\\something"), None)
+        self.assertEqual(get_bool_value(header, "\\output_changes", 4), None)
+
+    def test_set_bool_value(self):
+        # set to new value, return old value
+        self.assertEqual(set_bool_value(header, "\\output_changes", True), False)
+        self.assertEqual(set_bool_value(header, "\\html_css_as_file", True), False)
+        # return default if misspelled:
+        self.assertEqual(set_bool_value(header, "\\html_be_strict", False), None)
+        # catch error and insert new setting:
+        self.assertRaises(ValueError, set_bool_value, header, "\\something", 0)
+        self.assertEqual(header, newheader)
+
+    def test_del_complete_lines(self):
+        l = lines[:]
+        sublines = ["\\begin_inset Quotes eld",
+                    "\\end_inset"]
+        # normal operation: remove the first occurence of sublines:
+        self.assertEqual(del_complete_lines(l, sublines), True)
+        self.assertEqual(l[3], "")
+        self.assertEqual(len(l), len(lines)-len(sublines))
+        # special cases:
+        l = lines[:]
+        self.assertEqual(del_complete_lines(l, sublines, 21), False)
+        self.assertEqual(l, lines)
+        # deleting empty sublist returns success but does not change the list:
+        self.assertEqual(del_complete_lines(l, [], 21), True)
+        self.assertEqual(l, lines)
+
+    def test_del_value(self):
+        l = lines[:]
+        self.assertEqual(del_value(l, "\\begin_inset"), "Quotes eld")
+        self.assertEqual(del_value(l, "\\begin_inset Quotes"), "erd")
+        # return default if not found
+        self.assertEqual(del_value(l, "\\begin_insert", default=42), 42)
+        self.assertEqual(del_value(l, "\\end_inset", default=None), "")
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/lib/lyx2lyx/unicode_symbols.py
+++ b/lib/lyx2lyx/unicode_symbols.py
@ -18,7 +18,7 @@

 " Import unicode_reps from this module for access to the unicode<->LaTeX mapping. "

-import sys, os, re
+import sys, os, re, codecs

 # Provide support for both python 2 and 3
 PY2 = sys.version_info[0] == 2
@ -28,14 +28,13 @@ if not PY2:

 def read_unicodesymbols():
    " Read the unicodesymbols list of unicode characters and corresponding commands."
-    pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
+    pathname = os.path.abspath(os.path.dirname(__file__))
    filename = os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols')

-    # For python 3+ we have to specify the encoding for those systems
-    # where the default is not UTF-8
-    fp = open(filename, encoding="utf8") if (not PY2) else open(filename)
+    # Read as Unicode strings in both, Python 2 and 3
+    # Specify the encoding for those systems where the default is not UTF-8
+    fp = codecs.open(filename, encoding="utf8")

-    spec_chars = []
    # A backslash, followed by some non-word character, and then a character
    # in brackets. The idea is to check for constructs like: \"{u}, which is how
    # they are written in the unicodesymbols file; but they can also be written
@ -43,36 +42,42 @@ def read_unicodesymbols():
    # The two backslashes in the string literal are needed to specify a literal
    # backslash in the regex. Without r prefix, these would be four backslashes.
    r = re.compile(r'\\(\W)\{(\w)\}')
+
+    spec_chars = []
    for line in fp.readlines():
-        if line[0] != '#' and line.strip() != "":
-            # Note: backslashes in the string literals with r prefix are not escaped,
-            #       so one backslash in the source file equals one backslash in memory.
-            #       Without r prefix backslahses are escaped, so two backslashes in the
-            #       source file equal one backslash in memory.
-            line=line.replace(' "',' ') # remove all quotation marks with spaces before
-            line=line.replace('" ',' ') # remove all quotation marks with spaces after
-            line=line.replace(r'\"','"') # unescape "
-            line=line.replace(r'\\','\\') # unescape \
-            try:
-                [ucs4,command,dead] = line.split(None,2)
-                if command[0:1] != "\\":
-                    continue
-                if (line.find("notermination=text") < 0 and
-                    line.find("notermination=both") < 0 and command[-1] != "}"):
-                    command = command + "{}"
-                spec_chars.append([command, unichr(eval(ucs4))])
-            except:
+        if not line.strip() or line.startswith('#'):
+            # skip empty lines and comments
+            continue
+        # Note: backslashes in the string literals with r prefix are not escaped,
+        #       so one backslash in the source file equals one backslash in memory.
+        #       Without r prefix backslahses are escaped, so two backslashes in the
+        #       source file equal one backslash in memory.
+        line=line.replace(' "',' ') # remove all quotation marks with spaces before
+        line=line.replace('" ',' ') # remove all quotation marks with spaces after
+        line=line.replace(r'\"','"') # unescape "
+        line=line.replace(r'\\','\\') # unescape \
+        try:
+            [ucs4,command,dead] = line.split(None,2)
+            if command[0:1] != "\\":
                continue
-            m = r.match(command)
-            if m != None:
-                command = "\\"
-                commandbl = command
-                command += m.group(1) + m.group(2)
-                commandbl += m.group(1) + ' ' + m.group(2)
-                spec_chars.append([command, unichr(eval(ucs4))])
-                spec_chars.append([commandbl, unichr(eval(ucs4))])
+            literal_char = unichr(int(ucs4, 16))
+            if (line.find("notermination=text") < 0 and
+                line.find("notermination=both") < 0 and command[-1] != "}"):
+                command = command + "{}"
+            spec_chars.append([command, literal_char])
+        except:
+            continue
+        m = r.match(command)
+        if m != None:
+            command = "\\"
+            commandbl = command
+            command += m.group(1) + m.group(2)
+            commandbl += m.group(1) + ' ' + m.group(2)
+            spec_chars.append([command, literal_char])
+            spec_chars.append([commandbl, literal_char])
    fp.close()
    return spec_chars


 unicode_reps = read_unicodesymbols()
+