New lyx2lyx tools.

New lyx2lyx parser tools find_complete_lines() (replaces find_slice), del_complete_lines(), and find_across_lines(). Default value 0 for start argument in utility functions. Rework the implementation of dash-conversion.
2025-01-18 13:40:19 +00:00 · 2018-01-23 08:45:19 +01:00 · 2018-01-23 08:45:19 +01:00 · 8da6cdcf23
commit 8da6cdcf23
parent 40e32d4d27
4 changed files with 345 additions and 214 deletions
--- a/lib/lyx2lyx/lyx_2_2.py
+++ b/lib/lyx2lyx/lyx_2_2.py
@ -34,9 +34,10 @@ from lyx2lyx_tools import (add_to_preamble, put_cmd_in_ert, get_ert,
 #   insert_to_preamble, latex_length, revert_flex_inset,
 #   revert_font_attrs, hex2ratio, str2bool
-from parser_tools import (find_end_of_inset, find_end_of_layout,
+from parser_tools import (del_complete_lines,
-    find_nonempty_line, find_re, find_slice, find_token, find_token_backwards,
+    find_end_of_inset, find_end_of_layout, find_nonempty_line, find_re,
-    get_containing_layout, get_value, check_token)
+    find_token, find_token_backwards, get_containing_layout,
    get_value, check_token)
 ####################################################################
 # Private helper functions
@ -615,130 +616,105 @@ def convert_dashes(document):
    if document.backend != "latex":
        return
    lines = document.body
    i = 0
-    while i < len(document.body):
+    while i+1 < len(lines):
-        words = document.body[i].split()
+        i += 1
        line = lines[i]
        words = line.split()
        if (len(words) > 1 and words[0] == "\\begin_inset"
            and (words[1] in ["CommandInset", "ERT", "External", "Formula",
                              "FormulaMacro", "Graphics", "IPA", "listings"]
-                 or ' '.join(words[1:]) == "Flex Code")):
+                 or line.endswith("Flex Code"))):
            # must not replace anything in insets that store LaTeX contents in .lyx files
            # (math and command insets without overridden read() and write() methods
            # filtering out IPA makes Text::readParToken() more simple
            # skip ERT as well since it is not needed there
            # Flex Code is logical markup, typically rendered as typewriter
-            j = find_end_of_inset(document.body, i)
+            j = find_end_of_inset(lines, i)
            if j == -1:
-                document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
+                document.warning("Malformed LyX document: Can't find end of " +
-                i += 1
+                                 words[1] + " inset at line " + str(i))
            else:
                i = j
            continue
-        if document.body[i] == "\\begin_layout LyX-Code":
+        if lines[i] == "\\begin_layout LyX-Code":
-            j = find_end_of_layout(document.body, i)
+            j = find_end_of_layout(lines, i)
            if j == -1:
                document.warning("Malformed LyX document: "
                    "Can't find end of %s layout at line %d" % (words[1],i))
                i += 1
            else:
                i = j
            continue
-
+        if line.startswith("\\labelwidthstring"):
-        if len(words) > 0 and words[0] in ["\\leftindent", "\\paragraph_spacing", "\\align", "\\labelwidthstring"]:
+            # skip label width string (bug 10243)
            # skip paragraph parameters (bug 10243)
            i += 1
            continue
-        while True:
+
-            j = document.body[i].find("--")
+        if "--" in line:
            if j == -1:
                break
            front = document.body[i][:j]
            back = document.body[i][j+2:]
            # We can have an arbitrary number of consecutive hyphens.
-            # These must be split into the corresponding number of two and three hyphens
+            # Replace as LaTeX does: First try emdash, then endash
-            # We must match what LaTeX does: First try emdash, then endash, then single hyphen
+            line = line.replace("---", "\\threehyphens\n")
-            if back.find("-") == 0:
+            line = line.replace("--", "\\twohyphens\n")
-                back = back[1:]
+            lines[i:i+1] = line.splitlines()
                if len(back) > 0:
                    document.body.insert(i+1, back)
                document.body[i] = front + "\\threehyphens"
            else:
                if len(back) > 0:
                    document.body.insert(i+1, back)
                document.body[i] = front + "\\twohyphens"
        i += 1
-    i = 0
+    # remove ligature breaks between dashes
-    while i < len(document.body):
+    i = 1
-        line = document.body[i]
+    while i < len(lines):
-        while (line.endswith(r"-\SpecialChar \textcompwordmark{}") and
+        line = lines[i]
-               document.body[i+1].startswith("-")):
+        if (line.endswith(r"-\SpecialChar \textcompwordmark{}") and
-            line = line.replace(r"\SpecialChar \textcompwordmark{}",
+            lines[i+1].startswith("-")):
-                                document.body.pop(i+1))
+            lines[i] = line.replace(r"\SpecialChar \textcompwordmark{}",
-            document.body[i] = line
+                                    lines.pop(i+1))
-        i += 1
+        else:
            i += 1
 # Return number of the next line to check for dashes.
 def _dashes_next_line(document, i):
    i +=1
    words = document.body[i].split()
    # skip paragraph parameters (bug 10243):
    if words and words[0] in ["\\leftindent", "\\paragraph_spacing",
                              "\\align", "\\labelwidthstring"]:
        i += 1
        words = document.body[i].split()
    # some insets should be skipped in revert_dashes (cf. convert_dashes)
    if (len(words) > 1 and words[0] == "\\begin_inset" and
        words[1] in ["CommandInset", "ERT", "External", "Formula",
                     "FormulaMacro", "Graphics", "IPA", "listings"]):
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Can't find end of "
                                + words[1] + " inset at line " + str(i))
            return i
        return j+1
    return i
 def revert_dashes(document):
    """
    Prevent ligatures of existing --- and --.
-    Convert \\twohyphens and \\threehyphens to -- and ---.
+    Revert \\twohyphens and \\threehyphens to -- and ---.
    Remove preamble code from 2.3->2.2 conversion.
    """
-    # Remove preamble code from 2.3->2.2 conversion:
+    del_complete_lines(document.preamble,
-    dash_renew_lines = find_slice(document.preamble,
+                       ['% Added by lyx2lyx',
-                                  ['% Added by lyx2lyx',
+                        r'\renewcommand{\textendash}{--}',
-                                   r'\renewcommand{\textendash}{--}',
+                        r'\renewcommand{\textemdash}{---}'])
-                                   r'\renewcommand{\textemdash}{---}'])
+    # Insert ligature breaks to prevent ligation of hyphens to dashes:
-    del(document.preamble[dash_renew_lines])
+    lines = document.body
    # Prevent ligation of hyphens:
    i = 0
-    while i < len(document.body)-1:
+    while i+1 < len(lines):
-        # increment i, skip some insets (cf. convert_dashes)
+        i += 1
-        i = _dashes_next_line(document, i)
+        line = lines[i]
-        line = document.body[i]
+        # skip label width string (bug 10243):
        if line.startswith("\\labelwidthstring"):
            continue
        # do not touch hyphens in some insets (cf. convert_dashes):
        if line.startswith("\\begin_inset"):
            try:
                if line.split()[1] in ["CommandInset", "ERT", "External",
                                       "Formula", "FormulaMacro", "Graphics",
                                       "IPA", "listings"]:
                    j = find_end_of_inset(lines, i)
                    if j == -1:
                        document.warning("Malformed LyX document: Can't find "
                                    "end of %s inset at line %d." % (itype, i))
                        continue
                    i = j
            except IndexError:
                continue
        if "--" in line:
            line = line.replace("--", "-\\SpecialChar \\textcompwordmark{}\n-")
            document.body[i:i+1] = line.split('\n')
-    # Convert \twohyphens and \threehyphens:
+    # Revert \twohyphens and \threehyphens:
-    i = 0
+    i = 1
-    while i < len(document.body):
+    while i < len(lines):
-        # skip some insets (see convert_dashes())
+        line = lines[i]
-        i = _dashes_next_line(document, i-1)
+        if not line.endswith("hyphens"):
-        replaced = False
+            i +=1
-        if document.body[i].find("\\twohyphens") >= 0:
+        elif line.endswith("\\twohyphens") or line.endswith("\\threehyphens"):
-            document.body[i] = document.body[i].replace("\\twohyphens", "--")
+            line = line.replace("\\twohyphens", "--")
-            replaced = True
+            line = line.replace("\\threehyphens", "---")
-        if document.body[i].find("\\threehyphens") >= 0:
+            lines[i] = line + lines.pop(i+1)
            document.body[i] = document.body[i].replace("\\threehyphens", "---")
            replaced = True
        if replaced and i+1 < len(document.body) and \
           (document.body[i+1].find("\\") != 0 or \
            document.body[i+1].find("\\twohyphens") == 0 or
            document.body[i+1].find("\\threehyphens") == 0) and \
           len(document.body[i]) + len(document.body[i+1]) <= 80:
            document.body[i] = document.body[i] + document.body[i+1]
            document.body[i+1:i+2] = []
        else:
            i += 1
@ -879,16 +855,16 @@ def revert_georgian(document):
        document.language = "english"
        i = find_token(document.header, "\\language georgian", 0)
        if i != -1:
-    	    document.header[i] = "\\language english"
+            document.header[i] = "\\language english"
        j = find_token(document.header, "\\language_package default", 0)
        if j != -1:
-    	    document.header[j] = "\\language_package babel"
+            document.header[j] = "\\language_package babel"
        k = find_token(document.header, "\\options", 0)
        if k != -1:
-    	    document.header[k] = document.header[k].replace("\\options", "\\options georgian,")
+            document.header[k] = document.header[k].replace("\\options", "\\options georgian,")
        else:
-    	    l = find_token(document.header, "\\use_default_options", 0)
+            l = find_token(document.header, "\\use_default_options", 0)
-    	    document.header.insert(l + 1, "\\options georgian")
+            document.header.insert(l + 1, "\\options georgian")
 def revert_sigplan_doi(document):
--- a/lib/lyx2lyx/lyx_2_3.py
+++ b/lib/lyx2lyx/lyx_2_3.py
@ -24,9 +24,9 @@ import sys, os
 # Uncomment only what you need to import, please.
-from parser_tools import del_token, find_end_of, find_end_of_layout, \
+from parser_tools import del_token, del_value, del_complete_lines, \
-    find_end_of_inset, find_re, find_slice, find_token, \
+    find_end_of, find_end_of_layout, find_end_of_inset, find_re, \
-    find_token_backwards, get_containing_layout, \
+    find_token, find_token_backwards, get_containing_layout, \
    get_bool_value, get_value, get_quoted_value
 #  find_tokens, find_token_exact, is_in_inset, \
 #  check_token, get_option_value
@ -1303,7 +1303,7 @@ def revert_biblatex(document):
                      "Citealt*", "Citealp*", "Citeauthor*", "fullcite", "footcite",\
                      "footcitet", "footcitep", "footcitealt", "footcitealp",\
                      "footciteauthor", "footciteyear", "footciteyearpar",\
-		      "citefield", "citetitle", "cite*" ]
+                      "citefield", "citetitle", "cite*" ]
    i = 0
    while (True):
@ -1843,19 +1843,16 @@ def revert_chapterbib(document):
 def convert_dashligatures(document):
    "Set 'use_dash_ligatures' according to content."
-    use_dash_ligatures = None
+    # Look for and remove dashligatures workaround from 2.3->2.2 reversion,
-    # Eventually remove preamble code from 2.3->2.2 conversion:
+    # set use_dash_ligatures to True if found, to None else.
-    dash_renew_lines = find_slice(document.preamble,
+    use_dash_ligatures = del_complete_lines(document.preamble,
-                                  ['% Added by lyx2lyx',
+                                ['% Added by lyx2lyx',
-                                   r'\renewcommand{\textendash}{--}',
+                                 r'\renewcommand{\textendash}{--}',
-                                   r'\renewcommand{\textemdash}{---}'])
+                                 r'\renewcommand{\textemdash}{---}']) or None
    del(document.preamble[dash_renew_lines])
    use_dash_ligatures = bool(dash_renew_lines.stop)
    if use_dash_ligatures is None:
-        # Look for dashes:
+        # Look for dashes (Documents by LyX 2.1 or older have "\twohyphens\n"
-        # (Documents by LyX 2.1 or older have "\twohyphens\n" or "\threehyphens\n"
+        # or "\threehyphens\n" as interim representation for -- an ---.)
        # as interim representation for dash ligatures)
        has_literal_dashes = False
        has_ligature_dashes = False
        j = 0
@ -1863,16 +1860,19 @@ def convert_dashligatures(document):
            # Skip some document parts where dashes are not converted
            if (i < j) or line.startswith("\\labelwidthstring"):
                continue
-            words = line.split()
+            if line.startswith("\\begin_inset"):
-            if (len(words) > 1 and words[0] == "\\begin_inset"
+                try:
-                and (words[1] in ["CommandInset", "ERT", "External", "Formula",
+                    it = line.split()[1]
-                                 "FormulaMacro", "Graphics", "IPA", "listings"]
+                except IndexError:
-                     or ' '.join(words[1:]) == "Flex Code")):
+                    continue
-                j = find_end_of_inset(document.body, i)
+                if (it in ["CommandInset", "ERT", "External", "Formula",
-                if j == -1:
+                           "FormulaMacro", "Graphics", "IPA", "listings"]
-                    document.warning("Malformed LyX document: "
+                    or line.endswith("Flex Code")):
-                        "Can't find end of %s inset at line %d" % (words[1],i))
+                    j = find_end_of_inset(document.body, i)
-                continue
+                    if j == -1:
                        document.warning("Malformed LyX document: Can't "
                            "find end of %s inset at line %d." % (itype, i))
                        continue
            if line == "\\begin_layout LyX-Code":
                j = find_end_of_layout(document.body, i)
                if j == -1:
@ -1898,22 +1898,16 @@ def convert_dashligatures(document):
            use_dash_ligatures = True
    # insert the setting if there is a preferred value
    if use_dash_ligatures is not None:
-        i = find_token(document.header, "\\use_microtype", 0)
+        i = find_token(document.header, "\\graphics")
-        if i != -1:
+        document.header.insert(i, "\\use_dash_ligatures %s"
-            document.header.insert(i+1, "\\use_dash_ligatures %s"
+                               % str(use_dash_ligatures).lower())
                                % str(use_dash_ligatures).lower())
 def revert_dashligatures(document):
    """Remove font ligature settings for en- and em-dashes.
    Revert conversion of \twodashes or \threedashes to literal dashes."""
-    i = find_token(document.header, "\\use_dash_ligatures", 0)
+    use_dash_ligatures = del_value(document.header, "\\use_dash_ligatures")
-    if i == -1:
+    if use_dash_ligatures != "true" or document.backend != "latex":
        return
    use_dash_ligatures = get_bool_value(document.header, "\\use_dash_ligatures", i)
    del document.header[i]
    if not use_dash_ligatures or document.backend != "latex":
        return
    j = 0
    new_body = []
    for i, line in enumerate(document.body):
@ -2018,8 +2012,8 @@ def revert_mathindent(document):
    else:
        k = find_token(document.header, "\\options", 0)
        if k != -1:
-    	    document.header[k] = document.header[k].replace("\\options", "\\options fleqn,")
+            document.header[k] = document.header[k].replace("\\options", "\\options fleqn,")
-    	    del document.header[i]
+            del document.header[i]
        else:
            l = find_token(document.header, "\\use_default_options", 0)
            document.header.insert(l, "\\options fleqn")
--- a/lib/lyx2lyx/parser_tools.py
+++ b/lib/lyx2lyx/parser_tools.py
@ -156,53 +156,6 @@ count_pars_in_inset(lines, i):
 import re
 # Fast search in lists
 def find_slice(l, sl, start = 0, stop = None):
    """Return position of first occurence of sequence `sl` in list `l`
    as a `slice` object.
    >>> find_slice([1, 2, 3, 1, 1, 2], (1, 2))
    slice(0, 2, None)
    The return value can be used to delete or substitute the sub-list:
    >>> l = [1, 0, 1, 1, 1, 2]
    >>> s = find_slice(l, [0, 1, 1])
    >>> del(l[s]); l
    [1, 1, 2]
    >>> s = find_slice(l, (1, 2))
    >>> l[s] = [3]; l
    [1, 3]
    The start argument works similar to list.index()
    >>> find_slice([1, 2, 3, 1, 1 ,2], (1, 2), start = 1)
    slice(4, 6, None)
    Use the `stop` attribute of the returned `slice` to test for success:
    >>> s1 = find_slice([2, 3, 1], (3, 1))
    >>> s2 = find_slice([2, 3, 1], (2, 1))
    >>> if s1.stop and not s2.stop:
    ...     print "wow"
    wow
    """
    stop = stop or len(l)
    N = len(sl) # lenght of sub-list
    try:
        while True:
            for j, value in enumerate(sl):
                i = l.index(value, start, stop)
                if j and i != start:
                    start = i-j
                    break
                start = i +1
            else:
                return slice(i+1-N, i+1)
    except ValueError: # sub list `sl` not found
        return slice(0, 0)
 # Utilities for one line
 def check_token(line, token):
    """ check_token(line, token) -> bool
@ -212,7 +165,6 @@ def check_token(line, token):
    Deprecated. Use line.startswith(token).
    """
    return line.startswith(token)
@ -225,40 +177,40 @@ def is_nonempty_line(line):
 # Utilities for a list of lines
-def find_token(lines, token, start, end = 0, ignorews = False):
+def find_token(lines, token, start=0, end=0, ignorews=False):
    """ find_token(lines, token, start[[, end], ignorews]) -> int
    Return the lowest line where token is found, and is the first
    element, in lines[start, end].
    If ignorews is True (default is False), then differences in
-    whitespace are ignored, except that there must be no extra
+    whitespace are ignored, but there must be whitespace following
-    whitespace following token itself.
+    token itself.
    Return -1 on failure."""
    if end == 0 or end > len(lines):
        end = len(lines)
-    m = len(token)
+    if ignorews:
        y = token.split()
    for i in range(start, end):
        if ignorews:
            x = lines[i].split()
            y = token.split()
            if len(x) < len(y):
                continue
            if x[:len(y)] == y:
                return i
        else:
-            if lines[i][:m] == token:
+            if lines[i].startswith(token):
                return i
    return -1
-def find_token_exact(lines, token, start, end = 0):
+def find_token_exact(lines, token, start=0, end=0):
    return find_token(lines, token, start, end, True)
-def find_tokens(lines, tokens, start, end = 0, ignorews = False):
+def find_tokens(lines, tokens, start=0, end=0, ignorews=False):
    """ find_tokens(lines, tokens, start[[, end], ignorews]) -> int
    Return the lowest line where one token in tokens is found, and is
@ -278,17 +230,17 @@ def find_tokens(lines, tokens, start, end = 0, ignorews = False):
                if x[:len(y)] == y:
                    return i
            else:
-                if lines[i][:len(token)] == token:
+                if lines[i].startswith(token):
                    return i
    return -1
-def find_tokens_exact(lines, tokens, start, end = 0):
+def find_tokens_exact(lines, tokens, start=0, end=0):
    return find_tokens(lines, tokens, start, end, True)
-def find_re(lines, rexp, start, end = 0):
+def find_re(lines, rexp, start=0, end=0):
-    """ find_token_re(lines, rexp, start[, end]) -> int
+    """ find_re(lines, rexp, start[, end]) -> int
    Return the lowest line where rexp, a regular expression, is found
    in lines[start, end].
@ -310,10 +262,8 @@ def find_token_backwards(lines, token, start):
    element, in lines[start, end].
    Return -1 on failure."""
    m = len(token)
    for i in range(start, -1, -1):
-        line = lines[i]
+        if lines[i].startswith(token):
        if line[:m] == token:
            return i
    return -1
@ -328,12 +278,86 @@ def find_tokens_backwards(lines, tokens, start):
    for i in range(start, -1, -1):
        line = lines[i]
        for token in tokens:
-            if line[:len(token)] == token:
+            if line.startswith(token):
                return i
    return -1
-def get_value(lines, token, start, end = 0, default = ""):
+def find_complete_lines(lines, sublines, start=0, end=0):
    """Find first occurence of sequence `sublines` in list `lines`.
    Return index of first line or -1 on failure.
    Efficient search for a sub-list in a large list. Works for any values.
    >>> find_complete_lines([1, 2, 3, 1, 1, 2], [1, 2])
    0
    The `start` and `end` arguments work similar to list.index()
    >>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1)
    4
    >>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1, end=4)
    -1
    The return value can be used to substitute the sub-list.
    Take care to check before use:
    >>> l = [1, 1, 2]
    >>> s = find_complete_lines(l, [1, 2])
    >>> if s != -1:
    ...     l[s:s+2] = [3]; l
    [1, 3]
    See also del_complete_lines().
    """
    if not sublines:
        return start
    end = end or len(lines)
    N = len(sublines)
    try:
        while True:
            for j, value in enumerate(sublines):
                i = lines.index(value, start, end)
                if j and i != start:
                    start = i-j
                    break
                start = i + 1
            else:
                return i +1 - N
    except ValueError: # `sublines` not found
        return -1
 def find_across_lines(lines, sub, start=0, end=0):
    sublines = sub.splitlines()
    if len(sublines) > 2:
        # at least 3 lines: the middle one(s) are complete -> use index search
        i = find_complete_lines(lines, sublines[1:-1], start+1, end-1)
        if i < start+1:
            return -1
        try:
            if (lines[i-1].endswith(sublines[0]) and
                lines[i+len(sublines)].startswith(sublines[-1])):
                return i-1
        except IndexError:
            pass
    elif len(sublines) > 1:
        # last subline must start a line
        i = find_token(lines, sublines[-1], start, end)
        if i < start + 1:
            return -1
        if lines[i-1].endswith(sublines[0]):
            return i-1
    else: # no line-break, may be in the middle of a line
        if end == 0 or end > len(lines):
            end = len(lines)
        for i in range(start, end):
            if sub in lines[i]:
                return i
    return -1
 def get_value(lines, token, start=0, end=0, default=""):
    """ get_value(lines, token, start[[, end], default]) -> string
    Find the next line that looks like:
@ -341,17 +365,19 @@ def get_value(lines, token, start, end = 0, default = ""):
    Returns "followed by other stuff" with leading and trailing
    whitespace removed.
    """
    i = find_token_exact(lines, token, start, end)
    if i == -1:
        return default
    # TODO: establish desired behaviour, eventually change to
    #  return lines.pop(i)[len(token):].strip() # or default
    # see test_parser_tools.py
    l = lines[i].split(None, 1)
    if len(l) > 1:
        return l[1].strip()
    return default
-def get_quoted_value(lines, token, start, end = 0, default = ""):
+def get_quoted_value(lines, token, start=0, end=0, default=""):
    """ get_quoted_value(lines, token, start[[, end], default]) -> string
    Find the next line that looks like:
@ -368,8 +394,8 @@ def get_quoted_value(lines, token, start, end = 0, default = ""):
    return val.strip('"')
-def get_bool_value(lines, token, start, end = 0, default = None):
+def get_bool_value(lines, token, start=0, end=0, default=None):
-    """ get_value(lines, token, start[[, end], default]) -> string
+    """ get_bool_value(lines, token, start[[, end], default]) -> string
    Find the next line that looks like:
      token bool_value
@ -405,7 +431,7 @@ def set_option_value(line, option, value):
    return re.sub(rx, '\g<1>' + value + '"', line)
-def del_token(lines, token, start, end = 0):
+def del_token(lines, token, start=0, end=0):
    """ del_token(lines, token, start, end) -> int
    Find the first line in lines where token is the first element
@ -418,6 +444,41 @@ def del_token(lines, token, start, end = 0):
    del lines[k]
    return True
 def del_complete_lines(lines, sublines, start=0, end=0):
    """Delete first occurence of `sublines` in list `lines`.
    Efficient deletion of a sub-list in a list. Works for any values.
    The `start` and `end` arguments work similar to list.index()
    Returns True if a deletion was done and False if not.
    >>> l = [1, 0, 1, 1, 1, 2]
    >>> del_complete_lines(l, [0, 1, 1])
    True
    >>> l
    [1, 1, 2]
    """
    i = find_complete_lines(lines, sublines, start, end)
    if i == -1:
        return False
    del(lines[i:i+len(sublines)])
    return True
 def del_value(lines, token, start=0, end=0, default=None):
    """
    Find the next line that looks like:
      token followed by other stuff
    Delete that line and return "followed by other stuff"
    with leading and trailing whitespace removed.
    If token is not found, return `default`.
    """
    i = find_token_exact(lines, token, start, end)
    if i == -1:
        return default
    return lines.pop(i)[len(token):].strip()
 def find_beginning_of(lines, i, start_token, end_token):
    count = 1
@ -425,7 +486,7 @@ def find_beginning_of(lines, i, start_token, end_token):
        i = find_tokens_backwards(lines, [start_token, end_token], i-1)
        if i == -1:
            return -1
-        if check_token(lines[i], end_token):
+        if lines[i].startswith(end_token):
            count = count+1
        else:
            count = count-1
@ -441,7 +502,7 @@ def find_end_of(lines, i, start_token, end_token):
        i = find_tokens(lines, [end_token, start_token], i+1)
        if i == -1:
            return -1
-        if check_token(lines[i], start_token):
+        if lines[i].startswith(start_token):
            count = count+1
        else:
            count = count-1
@ -450,11 +511,11 @@ def find_end_of(lines, i, start_token, end_token):
    return -1
-def find_nonempty_line(lines, start, end = 0):
+def find_nonempty_line(lines, start=0, end=0):
    if end == 0:
        end = len(lines)
    for i in range(start, end):
-        if is_nonempty_line(lines[i]):
+        if lines[i].strip():
            return i
    return -1
--- a/lib/lyx2lyx/test_parser_tools.py
+++ b/lib/lyx2lyx/test_parser_tools.py
@ -77,10 +77,21 @@ class TestParserTools(unittest.TestCase):
    def test_find_token(self):
        self.assertEqual(find_token(lines, '\\emph', 0), 7)
-        self.assertEqual(find_token(lines, '\\emph', 0, 5), -1)
+        # no line starts with "emph" (without backspace):
        self.assertEqual(find_token(lines, '\\emp', 0, 0, True), -1)
        self.assertEqual(find_token(lines, '\\emp', 0, 0, False), 7)
        self.assertEqual(find_token(lines, 'emph', 0), -1)
        # token on line[start] is found:
        self.assertEqual(find_token(lines, '\\emph', 7), 7)
        self.assertEqual(find_token(lines, '\\emph', 8), 9)
        # token on line[end] is not found:
        self.assertEqual(find_token(lines, '\\emph', 0, 7), -1)
        # `ignorews` looks for whitespace-separated tokens:
        self.assertEqual(find_token(lines, '\\emp', 0, ignorews=True), -1)
        self.assertEqual(find_token(lines, '\\emph',0, ignorews=True), 7)
        self.assertEqual(find_token(lines, '\\emph', 7, ignorews=True), 7)
        self.assertEqual(find_token(lines, '\\emph', 0, 7, True), -1)
        # only first token is found:
        self.assertEqual(find_token(lines, 'Quotes', 0), -1)
        self.assertEqual(find_token(lines, 'Quotes', 0, ignorews=True), -1)
    def test_find_tokens(self):
@ -89,5 +100,94 @@ class TestParserTools(unittest.TestCase):
        self.assertEqual(find_tokens(lines, tokens, 0, 4), -1)
    def test_find_complete_lines(self):
        sublines = ["\\begin_inset Quotes eld",
                    "\\end_inset"]
        # return index of first line of sublines:
        self.assertEqual(find_complete_lines(lines, sublines), 3)
        self.assertEqual(find_complete_lines(lines, ["\\end_inset"]), 4)
        # return -1 if sublines is not found:
        self.assertEqual(find_complete_lines(lines, ['x']), -1)
        # search includes line `start`:
        self.assertEqual(find_complete_lines(lines, sublines, 3), 3)
        self.assertEqual(find_complete_lines(lines, sublines, 4), 20)
        self.assertEqual(find_complete_lines(lines, sublines, 21), -1)
        # serch excludes line `end`
        self.assertEqual(find_complete_lines(lines, sublines, 4, 20), -1)
        # an empty list is always found
        self.assertEqual(find_complete_lines(lines, []), 0)
    def test_find_across_lines(self):
        # sub with at least 2 line-breaks (uses find_complete_lines):
        sub = "Quotes eld\n\\end_inset\n\n\n"
        self.assertEqual(find_across_lines(lines, sub), 3)
        # Return -1 if not found
        self.assertEqual(find_across_lines(lines, sub, 4), -1)
        self.assertEqual(find_across_lines(lines, sub, 0, 6), -1)
        sub = "Quotes eld\n\\end_inset\nx\n"
        self.assertEqual(find_across_lines(lines, sub), -1)
        sub = "Quotes X\n\\end_inset\n\n"
        self.assertEqual(find_across_lines(lines, sub), -1)
        sub = "Quotes eld\n\\end_insert\n\n"
        self.assertEqual(find_across_lines(lines, sub), -1)
        # sub with up to 1 line-break:
        sub = "Quotes eld\n\\end_inset"
        self.assertEqual(find_across_lines(lines, sub), 3)
        self.assertEqual(find_across_lines(lines, sub, 4), -1)
        self.assertEqual(find_across_lines(lines, sub, 0, 4), -1)
        self.assertEqual(find_across_lines(lines, sub, 4, 3), -1)
        sub = "Quotes X eld\n\\end_inset\n"
        self.assertEqual(find_across_lines(lines, sub), -1)
        sub = "Quotes eld\n\\end_insert\n"
        self.assertEqual(find_across_lines(lines, sub), -1)
        # sub without line-break
        sub = "end_"
        self.assertEqual(find_across_lines(lines, sub), 4)
        self.assertEqual(find_across_lines(lines, sub, 5), 12)
        self.assertEqual(find_across_lines(lines, sub, 0, 4), -1)
        self.assertEqual(find_across_lines(lines, sub, 2, 1), -1)
        self.assertEqual(find_across_lines(lines, "XXX"), -1)
    def test_get_value(self):
        self.assertEqual(get_value(lines, "\\begin_inset"), "Quotes eld")
        # TODO: do we want this:
        self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "Quotes eld")
        # or only the part after "token":
        #  self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "eld")
        # return default if not found
        self.assertEqual(get_value(lines, "\\begin_insert", default=42), 42)
        # TODO: do we want this:
        self.assertEqual(get_value(lines, "\\end_inset", default=None), None)
        # or emtpy string if token is found but has no value:
        #  self.assertEqual(get_value(lines, "\\end_inset", default=None), "")
    def test_del_complete_lines(self):
        l = lines[:]
        sublines = ["\\begin_inset Quotes eld",
                    "\\end_inset"]
        # normal operation: remove the first occurence of sublines:
        self.assertEqual(del_complete_lines(l, sublines), True)
        self.assertEqual(l[3], "")
        self.assertEqual(len(l), len(lines)-len(sublines))
        # special cases:
        l = lines[:]
        self.assertEqual(del_complete_lines(l, sublines, 21), False)
        self.assertEqual(l, lines)
        # deleting empty sublist returns success but does not change the list:
        self.assertEqual(del_complete_lines(l, [], 21), True)
        self.assertEqual(l, lines)
    def test_del_value(self):
        l = lines[:]
        self.assertEqual(del_value(l, "\\begin_inset"), "Quotes eld")
        self.assertEqual(del_value(l, "\\begin_inset Quotes"), "erd")
        # return default if not found
        self.assertEqual(del_value(l, "\\begin_insert", default=42), 42)
        self.assertEqual(del_value(l, "\\end_inset", default=None), "")
 if __name__ == '__main__':
    unittest.main()