Update lyx2lyx from trunk in preparation for 1.6.10.

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/branches/BRANCH_1_6_X@38487 a592a061-630c-0410-9148-cb99ea01b6c8
2024-12-12 16:50:39 +00:00 · 2011-04-25 01:50:05 +00:00 · 2011-04-25 01:50:05 +00:00 · afac0c997a
commit afac0c997a
parent 1b329e3187
8 changed files with 3125 additions and 200 deletions
--- a/lib/lyx2lyx/LyX.py
+++ b/lib/lyx2lyx/LyX.py
@ -32,7 +32,7 @@ try:
    import lyx2lyx_version
    version__ = lyx2lyx_version.version
 except: # we are running from build directory so assume the last version
-    version__ = '1.6.0svn'
+    version__ = '2.0.0svn'

 default_debug__ = 2

@ -73,14 +73,15 @@ format_relation = [("0_06",    [200], minor_versions("0.6" , 4)),
                   ("0_12",    [215], minor_versions("0.12", 1) + ["0.11"]),
                   ("1_0",     [215], minor_versions("1.0" , 4)),
                   ("1_1",     [215], minor_versions("1.1" , 4)),
-                   ("1_1_5",   [216], ["1.1.5","1.1.5.1","1.1.5.2","1.1"]),
-                   ("1_1_6_0", [217], ["1.1.6","1.1.6.1","1.1.6.2","1.1"]),
-                   ("1_1_6_3", [218], ["1.1.6.3","1.1.6.4","1.1"]),
+                   ("1_1_5",   [216], ["1.1", "1.1.5","1.1.5.1","1.1.5.2"]),
+                   ("1_1_6_0", [217], ["1.1", "1.1.6","1.1.6.1","1.1.6.2"]),
+                   ("1_1_6_3", [218], ["1.1", "1.1.6.3","1.1.6.4"]),
                   ("1_2",     [220], minor_versions("1.2" , 4)),
                   ("1_3",     [221], minor_versions("1.3" , 7)),
                   ("1_4", range(222,246), minor_versions("1.4" , 5)),
-                   ("1_5", range(246,277), minor_versions("1.5" , 6)),
-                   ("1_6", range(277,346), minor_versions("1.6" , 0))]
+                   ("1_5", range(246,277), minor_versions("1.5" , 7)),
+                   ("1_6", range(277,346), minor_versions("1.6" , 0)),
+                   ("2_0",     [],    minor_versions("2.0", 0))]

 ####################################################################
 # This is useful just for development versions                     #
@ -104,6 +105,23 @@ def formats_list():
    return formats


+def format_info():
+    " Returns a list with supported file formats."
+    out = """Major version:
+	minor versions
+	formats
+"""
+    for version in format_relation:
+        major = str(version[2][0])
+        versions = str(version[2][1:])
+        if len(version[1]) == 1:
+            formats = str(version[1][0])
+        else:
+            formats = "%s - %s" % (version[1][-1], version[1][0])
+        out += "%s\n\t%s\n\t%s\n\n" % (major, versions, formats)
+    return out + '\n'
+
+
 def get_end_format():
    " Returns the more recent file format available."
    return format_relation[-1][1][-1]
@ -158,7 +176,7 @@ class LyX_base:

    def __init__(self, end_format = 0, input = "", output = "", error = "",
                 debug = default_debug__, try_hard = 0, cjk_encoding = '',
-                 language = "english", encoding = "auto"):
+                 final_version = "", language = "english", encoding = "auto"):

        """Arguments:
        end_format: final format that the file should be converted. (integer)
@ -180,9 +198,37 @@ class LyX_base:

        if end_format:
            self.end_format = self.lyxformat(end_format)
+
+            # In case the target version and format are both specified
+            # verify that they are compatible. If not send a warning
+            # and ignore the version.
+            if final_version:
+                message = "Incompatible version %s for specified format %d" % (
+                    final_version, self.end_format)
+                for version in format_relation:
+                    if self.end_format in version[1]:
+                        if final_version not in version[2]:
+                            self.warning(message)
+                            final_version = ""
+        elif final_version:
+            for version in format_relation:
+                if final_version in version[2]:
+                    # set the last format for that version
+                    self.end_format = version[1][-1]
+                    break
+            else:
+                final_version = ""
        else:
            self.end_format = get_end_format()

+        if not final_version:
+            for step in format_relation:
+                if self.end_format in step[1]:
+                    final_version = step[2][1]
+        self.final_version = final_version
+        self.warning("Final version: %s" % self.final_version, 10)
+        self.warning("Final format: %d" % self.end_format, 10)
+
        self.backend = "latex"
        self.textclass = "article"
        # This is a hack: We use '' since we don't know the default
@ -208,7 +254,7 @@ class LyX_base:
        " Emits a warning and exits if not in try_hard mode."
        self.warning(message)
        if not self.try_hard:
-            self.warning("Quiting.")
+            self.warning("Quitting.")
            sys.exit(1)

        self.status = 2
@ -475,16 +521,16 @@ class LyX_base:

    def convert(self):
        "Convert from current (self.format) to self.end_format."
-        mode, convertion_chain = self.chain()
-        self.warning("convertion chain: " + str(convertion_chain), 3)
+        mode, conversion_chain = self.chain()
+        self.warning("conversion chain: " + str(conversion_chain), 3)

-        for step in convertion_chain:
+        for step in conversion_chain:
            steps = getattr(__import__("lyx_" + step), mode)

            self.warning("Convertion step: %s - %s" % (step, mode),
                         default_debug__ + 1)
            if not steps:
-                self.error("The convertion to an older "
+                self.error("The conversion to an older "
                "format (%s) is not implemented." % self.format)

            multi_conv = len(steps) != 1
@ -517,7 +563,7 @@ class LyX_base:

    def chain(self):
        """ This is where all the decisions related with the
-        convertion are taken.  It returns a list of modules needed to
+        conversion are taken.  It returns a list of modules needed to
        convert the LyX file from self.format to self.end_format"""

        self.start =  self.format
@ -556,13 +602,14 @@ class LyX_base:
        steps = []
        if (initial_step, self.start) < (final_step, self.end_format):
            mode = "convert"
-            first_step = 1
+            full_steps = []
            for step in format_relation:
-                if  initial_step <= step[0] <= final_step:
-                    if first_step and len(step[1]) == 1:
-                        first_step = 0
-                        continue
-                    steps.append(step[0])
+                if  initial_step <= step[0] <= final_step and step[2][0] <= self.final_version:
+                    full_steps.append(step)
+            if full_steps[0][1][-1] == self.format:
+                full_steps = full_steps[1:]
+            for step in full_steps:
+                steps.append(step[0])
        else:
            mode = "revert"
            relation_format = format_relation[:]
@ -581,162 +628,167 @@ class LyX_base:
        return mode, steps


-    def get_toc(self, depth = 4):
-        " Returns the TOC of this LyX document."
-        paragraphs_filter = {'Title' : 0,'Chapter' : 1, 'Section' : 2,
-                             'Subsection' : 3, 'Subsubsection': 4}
-        allowed_insets = ['Quotes']
-        allowed_parameters = ('\\paragraph_spacing', '\\noindent',
-                              '\\align', '\\labelwidthstring',
-                              "\\start_of_appendix", "\\leftindent")
-        sections = []
-        for section in paragraphs_filter.keys():
-            sections.append('\\begin_layout %s' % section)
+# Part of an unfinished attempt to make lyx2lyx gave a more
+# structured view of the document.
+#    def get_toc(self, depth = 4):
+#        " Returns the TOC of this LyX document."
+#        paragraphs_filter = {'Title' : 0,'Chapter' : 1, 'Section' : 2,
+#                             'Subsection' : 3, 'Subsubsection': 4}
+#        allowed_insets = ['Quotes']
+#        allowed_parameters = ('\\paragraph_spacing', '\\noindent',
+#                              '\\align', '\\labelwidthstring',
+#                              "\\start_of_appendix", "\\leftindent")
+#        sections = []
+#        for section in paragraphs_filter.keys():
+#            sections.append('\\begin_layout %s' % section)

-        toc_par = []
-        i = 0
-        while 1:
-            i = find_tokens(self.body, sections, i)
-            if i == -1:
-                break
+#        toc_par = []
+#        i = 0
+#        while 1:
+#            i = find_tokens(self.body, sections, i)
+#            if i == -1:
+#                break

-            j = find_end_of(self.body,  i + 1, '\\begin_layout', '\\end_layout')
-            if j == -1:
-                self.warning('Incomplete file.', 0)
-                break
+#            j = find_end_of(self.body,  i + 1, '\\begin_layout', '\\end_layout')
+#            if j == -1:
+#                self.warning('Incomplete file.', 0)
+#                break

-            section = self.body[i].split()[1]
-            if section[-1] == '*':
-                section = section[:-1]
+#            section = self.body[i].split()[1]
+#            if section[-1] == '*':
+#                section = section[:-1]

-            par = []
+#            par = []

-            k = i + 1
-            # skip paragraph parameters
-            while not self.body[k].strip() or self.body[k].split()[0] \
-                      in allowed_parameters:
-                k += 1
+#            k = i + 1
+#            # skip paragraph parameters
+#            while not self.body[k].strip() or self.body[k].split()[0] \
+#                      in allowed_parameters:
+#                k += 1

-            while k < j:
-                if check_token(self.body[k], '\\begin_inset'):
-                    inset = self.body[k].split()[1]
-                    end = find_end_of_inset(self.body, k)
-                    if end == -1 or end > j:
-                        self.warning('Malformed file.', 0)
+#            while k < j:
+#                if check_token(self.body[k], '\\begin_inset'):
+#                    inset = self.body[k].split()[1]
+#                    end = find_end_of_inset(self.body, k)
+#                    if end == -1 or end > j:
+#                        self.warning('Malformed file.', 0)

-                    if inset in allowed_insets:
-                        par.extend(self.body[k: end+1])
-                    k = end + 1
-                else:
-                    par.append(self.body[k])
-                    k += 1
+#                    if inset in allowed_insets:
+#                        par.extend(self.body[k: end+1])
+#                    k = end + 1
+#                else:
+#                    par.append(self.body[k])
+#                    k += 1

-            # trim empty lines in the end.
-            while par and par[-1].strip() == '':
-                par.pop()
+#            # trim empty lines in the end.
+#            while par and par[-1].strip() == '':
+#                par.pop()

-            toc_par.append(Paragraph(section, par))
+#            toc_par.append(Paragraph(section, par))

-            i = j + 1
+#            i = j + 1

-        return toc_par
+#        return toc_par


 class File(LyX_base):
    " This class reads existing LyX files."

    def __init__(self, end_format = 0, input = "", output = "", error = "",
-                 debug = default_debug__, try_hard = 0, cjk_encoding = ''):
+                 debug = default_debug__, try_hard = 0, cjk_encoding = '',
+                 final_version = ''):
        LyX_base.__init__(self, end_format, input, output, error,
-                          debug, try_hard, cjk_encoding)
+                          debug, try_hard, cjk_encoding, final_version)
        self.read()


-class NewFile(LyX_base):
-    " This class is to create new LyX files."
-    def set_header(self, **params):
-        # set default values
-        self.header.extend([
-            "#LyX xxxx created this file."
-            "For more info see http://www.lyx.org/",
-            "\\lyxformat xxx",
-            "\\begin_document",
-            "\\begin_header",
-            "\\textclass article",
-            "\\language english",
-            "\\inputencoding auto",
-            "\\font_roman default",
-            "\\font_sans default",
-            "\\font_typewriter default",
-            "\\font_default_family default",
-            "\\font_sc false",
-            "\\font_osf false",
-            "\\font_sf_scale 100",
-            "\\font_tt_scale 100",
-            "\\graphics default",
-            "\\paperfontsize default",
-            "\\papersize default",
-            "\\use_geometry false",
-            "\\use_amsmath 1",
-            "\\cite_engine basic",
-            "\\use_bibtopic false",
-            "\\paperorientation portrait",
-            "\\secnumdepth 3",
-            "\\tocdepth 3",
-            "\\paragraph_separation indent",
-            "\\defskip medskip",
-            "\\quotes_language english",
-            "\\papercolumns 1",
-            "\\papersides 1",
-            "\\paperpagestyle default",
-            "\\tracking_changes false",
-            "\\end_header"])
+#class NewFile(LyX_base):
+#    " This class is to create new LyX files."
+#    def set_header(self, **params):
+#        # set default values
+#        self.header.extend([
+#            "#LyX xxxx created this file."
+#            "For more info see http://www.lyx.org/",
+#            "\\lyxformat xxx",
+#            "\\begin_document",
+#            "\\begin_header",
+#            "\\textclass article",
+#            "\\language english",
+#            "\\inputencoding auto",
+#            "\\font_roman default",
+#            "\\font_sans default",
+#            "\\font_typewriter default",
+#            "\\font_default_family default",
+#            "\\font_sc false",
+#            "\\font_osf false",
+#            "\\font_sf_scale 100",
+#            "\\font_tt_scale 100",
+#            "\\graphics default",
+#            "\\paperfontsize default",
+#            "\\papersize default",
+#            "\\use_geometry false",
+#            "\\use_amsmath 1",
+#            "\\cite_engine basic",
+#            "\\use_bibtopic false",
+#            "\\paperorientation portrait",
+#            "\\secnumdepth 3",
+#            "\\tocdepth 3",
+#            "\\paragraph_separation indent",
+#            "\\defskip medskip",
+#            "\\quotes_language english",
+#            "\\papercolumns 1",
+#            "\\papersides 1",
+#            "\\paperpagestyle default",
+#            "\\tracking_changes false",
+#            "\\end_header"])

-        self.format = get_end_format()
-        for param in params:
-            self.set_parameter(param, params[param])
+#        self.format = get_end_format()
+#        for param in params:
+#            self.set_parameter(param, params[param])


-    def set_body(self, paragraphs):
-        self.body.extend(['\\begin_body',''])
+#    def set_body(self, paragraphs):
+#        self.body.extend(['\\begin_body',''])

-        for par in paragraphs:
-            self.body.extend(par.asLines())
+#        for par in paragraphs:
+#            self.body.extend(par.asLines())

-        self.body.extend(['','\\end_body', '\\end_document'])
+#        self.body.extend(['','\\end_body', '\\end_document'])


-class Paragraph:
-    # unfinished implementation, it is missing the Text and Insets
-    # representation.
-    " This class represents the LyX paragraphs."
-    def __init__(self, name, body=[], settings = [], child = []):
-        """ Parameters:
-        name: paragraph name.
-        body: list of lines of body text.
-        child: list of paragraphs that descend from this paragraph.
-        """
-        self.name = name
-        self.body = body
-        self.settings = settings
-        self.child = child
+# Part of an unfinished attempt to make lyx2lyx gave a more
+# structured view of the document.
+#class Paragraph:
+#    # unfinished implementation, it is missing the Text and Insets
+#    # representation.
+#    " This class represents the LyX paragraphs."
+#    def __init__(self, name, body=[], settings = [], child = []):
+#        """ Parameters:
+#        name: paragraph name.
+#        body: list of lines of body text.
+#        child: list of paragraphs that descend from this paragraph.
+#        """
+#        self.name = name
+#        self.body = body
+#        self.settings = settings
+#        self.child = child

-    def asLines(self):
-        """ Converts the paragraph to a list of strings, representing
-        it in the LyX file."""
+#    def asLines(self):
+#        """ Converts the paragraph to a list of strings, representing
+#        it in the LyX file."""

-        result = ['','\\begin_layout %s' % self.name]
-        result.extend(self.settings)
-        result.append('')
-        result.extend(self.body)
-        result.append('\\end_layout')
+#        result = ['','\\begin_layout %s' % self.name]
+#        result.extend(self.settings)
+#        result.append('')
+#        result.extend(self.body)
+#        result.append('\\end_layout')

-        if not self.child:
-            return result
+#        if not self.child:
+#            return result

-        result.append('\\begin_deeper')
-        for node in self.child:
-            result.extend(node.asLines())
-        result.append('\\end_deeper')
+#        result.append('\\begin_deeper')
+#        for node in self.child:
+#            result.extend(node.asLines())
+#        result.append('\\end_deeper')

-        return result
+#        return result
--- a/lib/lyx2lyx/Makefile.am
+++ b/lib/lyx2lyx/Makefile.am
@ -13,6 +13,8 @@ dist_lyx2lyx_PYTHON = \
 	lyx2lyx_lang.py \
 	generate_encoding_info.py \
 	parser_tools.py \
+	lyx2lyx_tools.py \
+	unicode_symbols.py \
 	LyX.py \
 	lyx_0_06.py \
 	lyx_0_08.py \
@ -28,6 +30,7 @@ dist_lyx2lyx_PYTHON = \
 	lyx_1_4.py \
 	lyx_1_5.py \
 	lyx_1_6.py \
+	lyx_2_0.py \
 	profiling.py \
 	test_parser_tools.py

--- a/lib/lyx2lyx/lyx2lyx
+++ b/lib/lyx2lyx/lyx2lyx
@ -38,13 +38,13 @@ Copyright (C) 2007 José Matos and Dekel Tsur""" % LyX.version__

    parser.set_defaults(debug=LyX.default_debug__, cjk_encoding = '')
    parser.add_option("-d", "--debug", type="int",
-                      help="level=0..2 (O_ quiet, 2_verbose) default: 1")
+                      help="level=0..2 (O_ quiet, 10_verbose) default: 2")
    parser.add_option("-q", "--quiet",
                      action="store_const", const=0, dest="debug")
    parser.add_option("-v", "--verbose",
                      action="store_const", const=1, dest="debug")
    parser.add_option("--noisy",
-                      action="store_const", const=2, dest="debug")
+                      action="store_const", const=10, dest="debug")
    parser.add_option("-c", "--encoding", dest="cjk_encoding",
                      help="files in format 248 and lower are read and"
                           " written in the format of CJK-LyX."
@ -56,8 +56,10 @@ Copyright (C) 2007 José Matos and Dekel Tsur""" % LyX.version__
                      help= "name of the output file else goes to stdout")
    parser.add_option("-t", "--to", dest= "end_format",
                      help= "destination file format, default (latest)")
+    parser.add_option("-V", "--final_version", dest= "final_version",
+                      help= "destination version, default (latest)")
    parser.add_option("-l", "--list", action="store_true",
-                      help = "list all available formats")
+                      help = "list all available formats and supported versions")
    parser.add_option("-n", "--try-hard", action="store_true",
                      help = "try hard (ignore any convertion errors)")

@ -68,7 +70,7 @@ Copyright (C) 2007 José Matos and Dekel Tsur""" % LyX.version__
        options.input = None

    if options.list:
-        print LyX.formats_list()
+        sys.stderr.write(LyX.format_info())
        sys.exit()
    else:
        del options.list
--- a/lib/lyx2lyx/lyx_1_3.py
+++ b/lib/lyx2lyx/lyx_1_3.py
@ -21,7 +21,7 @@

 import re
 from parser_tools import find_token, find_end_of, get_value,\
-                         find_token_exact, del_token
+                         find_token_exact

 ####################################################################
 # Private helper functions
@ -30,6 +30,22 @@ def find_end_of_inset(lines, i):
    "Finds the matching \end_inset"
    return find_end_of(lines, i, "\\begin_inset", "\\end_inset")

+
+def del_token(lines, token, start, end):
+    """ del_token(lines, token, start, end) -> int
+
+    Find the lower line in lines where token is the first element and
+    delete that line.
+
+    Returns the number of lines remaining."""
+
+    k = find_token_exact(lines, token, start, end)
+    if k == -1:
+        return end
+    else:
+        del lines[k]
+        return end - 1
+
 # End of helper functions
 ####################################################################

--- a/lib/lyx2lyx/lyx_1_4.py
+++ b/lib/lyx2lyx/lyx_1_4.py
@ -24,7 +24,7 @@ import re
 from os import access, F_OK
 import os.path
 from parser_tools import check_token, find_token, \
-                         get_value, del_token, is_nonempty_line, \
+                         get_value, is_nonempty_line, \
                         find_tokens, find_end_of, find_beginning_of, find_token_exact, find_tokens_exact, \
                         find_re, find_tokens_backwards
 from sys import stdin
@ -84,6 +84,21 @@ def find_end_of_inset(lines, i):
    "Finds the matching \end_inset"
    return find_end_of(lines, i, "\\begin_inset", "\\end_inset")

+def del_token(lines, token, start, end):
+    """ del_token(lines, token, start, end) -> int
+
+    Find the lower line in lines where token is the first element and
+    delete that line.
+
+    Returns the number of lines remaining."""
+
+    k = find_token_exact(lines, token, start, end)
+    if k == -1:
+        return end
+    else:
+        del lines[k]
+        return end - 1
+
 # End of helper functions
 ####################################################################

--- a/lib/lyx2lyx/lyx_1_6.py
+++ b/lib/lyx2lyx/lyx_1_6.py
@ -22,11 +22,27 @@ import re
 import unicodedata
 import sys, os

-from parser_tools import find_token, find_end_of, find_tokens, get_value, get_value_string
+from parser_tools import find_token, find_end_of, find_tokens, get_value

 ####################################################################
 # Private helper functions

+
+def get_value_string(lines, token, start, end = 0, trim = False, default = ""):
+    """ get_value_string(lines, token, start[[, end], trim, default]) -> string
+
+    Return tokens after token as string, in lines, where
+    token is the first element. When trim is used, the first and last character
+    of the string is trimmed."""
+
+    val = get_value(lines, token, start, end, "")
+    if not val:
+      return default
+    if trim:
+      return val[1:-1]
+    return val
+
+
 def find_end_of_inset(lines, i):
    " Find end of inset, where lines[i] is included."
    return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
@ -472,6 +488,7 @@ def revert_ltcaption(document):
        j = find_end_of_inset(document.body, i + 1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of tabular.")
+            i += 1
            continue

        m = i + 1
@ -571,9 +588,10 @@ def revert_tablines(document):
        i = find_token(document.body, "\\begin_inset Tabular", i)
        if i == -1:
            return
-        j = find_end_of_inset(document.body, i + 1)
+        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of tabular.")
+            i += 1
            continue

        m = i + 1
@ -781,7 +799,6 @@ def revert_flex(document):
        document.body[i] = document.body[i].replace('\\begin_inset Flex', '\\begin_inset CharStyle')


-#  Discard PDF options for hyperref
 def revert_pdf_options(document):
        "Revert PDF options for hyperref."
        # store the PDF options and delete the entries from the Lyx file
@ -1760,7 +1777,7 @@ def revert_module_names(document):
    return
  newmodlist = []
  for mod in modlist:
-    if modulemap.has_key(mod):
+    if mod in modulemap:
      newmodlist.append(modulemap[mod])
    else:
      document.warning("Can't find module %s in the module map!" % mod)
--- a/lib/lyx2lyx/lyx_2_0.py
+++ b/lib/lyx2lyx/lyx_2_0.py
--- a/lib/lyx2lyx/parser_tools.py
+++ b/lib/lyx2lyx/parser_tools.py
@ -1,6 +1,7 @@
 # This file is part of lyx2lyx
 # -*- coding: utf-8 -*-
-# Copyright (C) 2002-2004 Dekel Tsur <dekel@lyx.org>, José Matos <jamatos@lyx.org>
+# Copyright (C) 2002-2011 Dekel Tsur <dekel@lyx.org>, 
+# José Matos <jamatos@lyx.org>, Richard Heck <rgheck@comcast.net>
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
@ -16,7 +17,132 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

-" This modules offer several free functions to help parse lines."
+
+''' 
+This modules offer several free functions to help parse lines.
+More documentaton is below, but here is a quick guide to what 
+they do. Optional arguments are marked by brackets.
+
+find_token(lines, token, start[, end[, ignorews]]):
+  Returns the first line i, start <= i < end, on which
+  token is found at the beginning. Returns -1 if not 
+  found. 
+  If ignorews is (given and) True, then differences
+  in whitespace do not count, except that there must be no 
+  extra whitespace following token itself.
+
+find_token_exact(lines, token, start[, end]):
+  As find_token, but with ignorews True.
+
+find_tokens(lines, tokens, start[, end[, ignorews]]):
+  Returns the first line i, start <= i < end, on which
+  oen of the tokens in tokens is found at the beginning. 
+  Returns -1 if not found. 
+  If ignorews is (given and) True, then differences
+  in whitespace do not count, except that there must be no 
+  extra whitespace following token itself.
+
+find_tokens_exact(lines, token, start[, end]):
+  As find_tokens, but with ignorews True.
+  
+find_token_backwards(lines, token, start):
+find_tokens_backwards(lines, tokens, start):
+  As before, but look backwards.
+
+find_re(lines, rexp, start[, end]):
+  As find_token, but rexp is a regular expression object,
+  so it has to be passed as e.g.: re.compile(r'...').
+
+get_value(lines, token, start[, end[, default]):
+  Similar to find_token, but it returns what follows the 
+  token on the found line. Example:
+    get_value(document.header, "\use_xetex", 0)
+  will find a line like:
+    \use_xetex true
+  and, in that case, return "true". (Note that whitespace
+  is stripped.) The final argument, default, defaults to "", 
+  and is what is returned if we do not find anything. So you
+  can use that to set a default.
+  
+get_quoted_value(lines, token, start[, end[, default]):
+  Similar to get_value, but it will strip quotes off the
+  value, if they are present. So use this one for cases
+  where the value is normally quoted.
+
+get_option_value(line, option):
+  This assumes we have a line with something like:
+      option="value"
+  and returns value. Returns "" if not found.
+
+del_token(lines, token, start[, end]):
+  Like find_token, but deletes the line if it finds one.
+  Returns True if a line got deleted, otherwise False.
+
+find_beginning_of(lines, i, start_token, end_token):
+  Here, start_token and end_token are meant to be a matching 
+  pair, like "\begin_layout" and "\end_layout". We look for 
+  the start_token that pairs with the end_token that occurs
+  on or after line i. Returns -1 if not found.
+  So, in the layout case, this would find the \begin_layout 
+  for the layout line i is in. 
+  Example:
+    ec = find_token(document.body, "</cell", i)
+    bc = find_beginning_of(document.body, ec, \
+        "<cell", "</cell")
+  Now, assuming no -1s, bc-ec wraps the cell for line i.
+
+find_end_of(lines, i, start_token, end_token):
+  Like find_beginning_of, but looking for the matching 
+  end_token. This might look like:
+    bc = find_token_(document.body, "<cell", i)
+    ec = find_end_of(document.body, bc,  "<cell", "</cell")
+  Now, assuming no -1s, bc-ec wrap the next cell.
+
+find_end_of_inset(lines, i):
+  Specialization of find_end_of for insets.
+
+find_end_of_layout(lines, i):
+  Specialization of find_end_of for layouts.
+
+is_in_inset(lines, i, inset):
+  Checks if line i is in an inset of the given type.
+  If so, returns starting and ending lines. Otherwise, 
+  returns False.
+  Example:
+    is_in_inset(document.body, i, "\\begin_inset Tabular")
+  returns False unless i is within a table. If it is, then
+  it returns the line on which the table begins and the one
+  on which it ends. Note that this pair will evaulate to
+  boolean True, so
+    if is_in_inset(...):
+  will do what you expect.
+
+get_containing_inset(lines, i):
+  Finds out what kind of inset line i is within. Returns a 
+  list containing what follows \begin_inset on the the line 
+  on which the inset begins, plus the starting and ending line.
+  Returns False on any kind of error or if it isn't in an inset.
+  So get_containing_inset(document.body, i) might return:
+    ("CommandInset ref", 300, 306)
+  if i is within an InsetRef beginning on line 300 and ending
+  on line 306.
+
+get_containing_layout(lines, i):
+  As get_containing_inset, but for layout.
+
+
+find_nonempty_line(lines, start[, end):
+  Finds the next non-empty line.
+
+check_token(line, token):
+  Does line begin with token?
+
+is_nonempty_line(line):
+  Does line contain something besides whitespace?
+
+'''
+
+import re

 # Utilities for one line
 def check_token(line, token):
@ -37,19 +163,23 @@ def is_nonempty_line(line):


 # Utilities for a list of lines
-def find_token(lines, token, start, end = 0, exact = False):
-    """ find_token(lines, token, start[[, end], exact]) -> int
+def find_token(lines, token, start, end = 0, ignorews = False):
+    """ find_token(lines, token, start[[, end], ignorews]) -> int

    Return the lowest line where token is found, and is the first
    element, in lines[start, end].
    
+    If ignorews is True (default is False), then differences in
+    whitespace are ignored, except that there must be no extra
+    whitespace following token itself.
+
    Return -1 on failure."""

-    if end == 0:
+    if end == 0 or end > len(lines):
        end = len(lines)
    m = len(token)
    for i in xrange(start, end):
-        if exact:
+        if ignorews:
            x = lines[i].split()
            y = token.split()
            if len(x) < len(y):
@ -66,19 +196,19 @@ def find_token_exact(lines, token, start, end = 0):
    return find_token(lines, token, start, end, True)


-def find_tokens(lines, tokens, start, end = 0, exact = False):
-    """ find_tokens(lines, tokens, start[[, end], exact]) -> int
+def find_tokens(lines, tokens, start, end = 0, ignorews = False):
+    """ find_tokens(lines, tokens, start[[, end], ignorews]) -> int

    Return the lowest line where one token in tokens is found, and is
    the first element, in lines[start, end].

    Return -1 on failure."""
-    if end == 0:
+    if end == 0 or end > len(lines):
        end = len(lines)

    for i in xrange(start, end):
        for token in tokens:
-            if exact:
+            if ignorews:
                x = lines[i].split()
                y = token.split()
                if len(x) < len(y):
@ -103,7 +233,7 @@ def find_re(lines, rexp, start, end = 0):

    Return -1 on failure."""

-    if end == 0:
+    if end == 0 or end > len(lines):
        end = len(lines)
    for i in xrange(start, end):
        if rexp.match(lines[i]):
@ -142,55 +272,61 @@ def find_tokens_backwards(lines, tokens, start):


 def get_value(lines, token, start, end = 0, default = ""):
-    """ get_value(lines, token, start[[, end], default]) -> list of strings
+    """ get_value(lines, token, start[[, end], default]) -> string

-    Return tokens after token for the first line, in lines, where
-    token is the first element."""
+    Find the next line that looks like:
+      token followed by other stuff
+    Returns "followed by other stuff" with leading and trailing
+    whitespace removed.
+    """

    i = find_token_exact(lines, token, start, end)
    if i == -1:
        return default
-    if len(lines[i].split()) > 1:
-        return lines[i].split()[1]
-    else:
-        return default
+    l = lines[i].split(None, 1)
+    if len(l) > 1:
+        return l[1].strip()
+    return default


-def get_value_string(lines, token, start, end = 0, trim = False, default = ""):
-    """ get_value_string(lines, token, start[[, end], trim, default]) -> string
+def get_quoted_value(lines, token, start, end = 0, default = ""):
+    """ get_quoted_value(lines, token, start[[, end], default]) -> string

-    Return tokens after token as string, in lines, where
-    token is the first element. When trim is used, the first and last character
-    of the string is trimmed."""
-
-    i = find_token_exact(lines, token, start, end)
-    if i == -1:
-        return default
-    if len(lines[i].split()) > 1:
-        for k in range (0, len(lines[i])):
-            if lines[i][k] == ' ':
-                if trim ==False:
-                    return lines[i][k+1:len(lines[i])]
-                else:
-                    return lines[i][k+2:len(lines[i])-1]
-    else:
-        return default
+    Find the next line that looks like:
+      token "followed by other stuff"
+    Returns "followed by other stuff" with leading and trailing
+    whitespace and quotes removed. If there are no quotes, that is OK too.
+    So use get_value to preserve possible quotes, this one to remove them,
+    if they are there.
+    Note that we will NOT strip quotes from default!
+    """
+    val = get_value(lines, token, start, end, "")
+    if not val:
+      return default
+    return val.strip('"')


-def del_token(lines, token, start, end):
+def get_option_value(line, option):
+    rx = option + '\s*=\s*"([^"+])"'
+    rx = re.compile(rx)
+    m = rx.search(line)
+    if not m:
+      return ""
+    return m.group(1)
+
+
+def del_token(lines, token, start, end = 0):
    """ del_token(lines, token, start, end) -> int

-    Find the lower line in lines where token is the first element and
-    delete that line.
-
-    Returns the number of lines remaining."""
+    Find the first line in lines where token is the first element 
+    and delete that line. Returns True if we deleted a line, False
+    if we did not."""

    k = find_token_exact(lines, token, start, end)
    if k == -1:
-        return end
-    else:
-        del lines[k]
-        return end - 1
+        return False
+    del lines[k]
+    return True


 def find_beginning_of(lines, i, start_token, end_token):
@ -231,3 +367,78 @@ def find_nonempty_line(lines, start, end = 0):
        if is_nonempty_line(lines[i]):
            return i
    return -1
+
+
+def find_end_of_inset(lines, i):
+    " Find end of inset, where lines[i] is included."
+    return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
+
+
+def find_end_of_layout(lines, i):
+    " Find end of layout, where lines[i] is included."
+    return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
+
+
+def is_in_inset(lines, i, inset):
+    '''
+    Checks if line i is in an inset of the given type.
+    If so, returns starting and ending lines.
+    Otherwise, returns False.
+    Example:
+      is_in_inset(document.body, i, "\\begin_inset Tabular")
+    returns False unless i is within a table. If it is, then
+    it returns the line on which the table begins and the one
+    on which it ends. Note that this pair will evaulate to
+    boolean True, so
+      if is_in_inset(...):
+    will do what you expect.
+    '''
+    defval = (-1, -1)
+    stins = find_token_backwards(lines, inset, i)
+    if stins == -1:
+      return defval
+    endins = find_end_of_inset(lines, stins)
+    # note that this includes the notfound case.
+    if endins < i:
+      return defval
+    return (stins, endins)
+
+
+def get_containing_inset(lines, i):
+  ''' 
+  Finds out what kind of inset line i is within. Returns a 
+  list containing (i) what follows \begin_inset on the the line 
+  on which the inset begins, plus the starting and ending line.
+  Returns False on any kind of error or if it isn't in an inset.
+  '''
+  stins = find_token_backwards(lines, i, "\\begin_inset")
+  if stins == -1:
+      return False
+  endins = find_end_of_inset(lines, stins)
+  if endins < i:
+      return False
+  inset = get_value(lines, "\\begin_inset", stins)
+  if inset == "":
+      # shouldn't happen
+      return False
+  return (inset, stins, endins)
+
+
+def get_containing_layout(lines, i):
+  ''' 
+  Finds out what kind of layout line i is within. Returns a 
+  list containing (i) what follows \begin_layout on the the line 
+  on which the layout begins, plus the starting and ending line.
+  Returns False on any kind of error.
+  '''
+  stins = find_token_backwards(lines, i, "\\begin_layout")
+  if stins == -1:
+      return False
+  endins = find_end_of_layout(lines, stins)
+  if endins < i:
+      return False
+  lay = get_value(lines, "\\begin_layout", stins)
+  if lay == "":
+      # shouldn't happen
+      return False
+  return (lay, stins, endins)