Allow 2.0.8 to read and write 2.1.x files. Also incorporates some

bugfixes in lyx2lyx that made it into 2.1.0.
2024-11-22 10:00:33 +00:00 · 2014-04-11 11:41:56 -04:00 · 2014-04-11 11:41:56 -04:00 · 8c3d6f7178
commit 8c3d6f7178
parent 7267d2f5c5
9 changed files with 4723 additions and 53 deletions
--- a/lib/configure.py
+++ b/lib/configure.py
@ -672,6 +672,7 @@ def checkFormatEntries(dtl_tools):
 \Format lyx14x     14.lyx  "LyX 1.4.x"             "" ""	""	"document"
 \Format lyx15x     15.lyx  "LyX 1.5.x"             "" ""	""	"document"
 \Format lyx16x     16.lyx  "LyX 1.6.x"             "" ""	""	"document,menu=export"
+\Format lyx21x     21.lyx  "LyX 2.1.x"             "" ""	""	"document,menu=export"
 \Format clyx       cjklyx "CJK LyX 1.4.x (big5)"  "" ""	""	"document"
 \Format jlyx       cjklyx "CJK LyX 1.4.x (euc-jp)" "" ""	""	"document"
 \Format klyx       cjklyx "CJK LyX 1.4.x (euc-kr)" "" ""	""	"document"
@ -1008,6 +1009,7 @@ def checkConverterEntries():
 \converter lyx        lyx14x     "python -tt $$s/lyx2lyx/lyx2lyx -t 245 $$i > $$o"	""
 \converter lyx        lyx15x     "python -tt $$s/lyx2lyx/lyx2lyx -t 276 $$i > $$o"	""
 \converter lyx        lyx16x     "python -tt $$s/lyx2lyx/lyx2lyx -t 345 $$i > $$o"	""
+\converter lyx        lyx21x     "python -tt $$s/lyx2lyx/lyx2lyx -t 474 $$i > $$o"	""
 \converter lyx        clyx       "python -tt $$s/lyx2lyx/lyx2lyx -c big5 -t 245 $$i > $$o"	""
 \converter lyx        jlyx       "python -tt $$s/lyx2lyx/lyx2lyx -c euc_jp -t 245 $$i > $$o"	""
 \converter lyx        klyx       "python -tt $$s/lyx2lyx/lyx2lyx -c euc_kr -t 245 $$i > $$o"	""
--- a/lib/lyx2lyx/LyX.py
+++ b/lib/lyx2lyx/LyX.py
@ -82,7 +82,9 @@ format_relation = [("0_06",    [200], minor_versions("0.6" , 4)),
                   ("1_4", range(222,246), minor_versions("1.4" , 5)),
                   ("1_5", range(246,277), minor_versions("1.5" , 7)),
                   ("1_6", range(277,346), minor_versions("1.6" , 10)),
-                   ("2_0", range(346,414), minor_versions("2.0" , 0))]
+                   ("2_0", range(346,414), minor_versions("2.0", 8)),
+                   ("2_1", range(414,475), minor_versions("2.1", 0))
+                  ]

 ####################################################################
 # This is useful just for development versions                     #
@ -125,7 +127,11 @@ def format_info():

 def get_end_format():
    " Returns the more recent file format available."
+    # this check will fail only when we have a new version
+    # and there is no format change yet.
+    if format_relation[-1][1]:
      return format_relation[-1][1][-1]
+    return format_relation[-2][1][-1]


 def get_backend(textclass):
@ -146,7 +152,7 @@ def trim_eol(line):


 def get_encoding(language, inputencoding, format, cjk_encoding):
-    " Returns enconding of the LyX file"
+    " Returns enconding of the lyx file"
    if format > 248:
        return "utf8"
    # CJK-LyX encodes files using the current locale encoding.
--- a/lib/lyx2lyx/Makefile.am
+++ b/lib/lyx2lyx/Makefile.am
@ -31,6 +31,7 @@ dist_lyx2lyx_PYTHON = \
 	lyx_1_5.py \
 	lyx_1_6.py \
 	lyx_2_0.py \
+	lyx_2_1.py \
 	profiling.py \
 	test_parser_tools.py

--- a/lib/lyx2lyx/lyx2lyx_tools.py
+++ b/lib/lyx2lyx/lyx2lyx_tools.py
@ -14,10 +14,10 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

 '''
-This modules offer several free functions to help with lyx2lyx'ing. 
+This module offers several free functions to help with lyx2lyx'ing. 
 More documentaton is below, but here is a quick guide to what 
 they do. Optional arguments are marked by brackets.

@ -60,7 +60,7 @@ latex_length(slen):
 '''

 import string
-from parser_tools import find_token
+from parser_tools import find_token, find_end_of_inset
 from unicode_symbols import unicode_reps


@ -117,7 +117,7 @@ def put_cmd_in_ert(arg):
    Returns a list of strings, with the lines so wrapped.
    '''
    
-    ret = ["\\begin_inset ERT", "status collapsed", "\\begin_layout Plain Layout", ""]
+    ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""]
    # It will be faster for us to work with a single string internally. 
    # That way, we only go through the unicode_reps loop once.
    if type(arg) is list:
@ -128,7 +128,38 @@ def put_cmd_in_ert(arg):
      s = s.replace(rep[1], rep[0].replace('\\\\', '\\'))
    s = s.replace('\\', "\\backslash\n")
    ret += s.splitlines()
-    ret += ["\\end_layout", "\\end_inset"]
+    ret += ["\\end_layout", "", "\\end_inset"]
+    return ret
+
+
+def get_ert(lines, i):
+    'Convert an ERT inset into LaTeX.'
+    if not lines[i].startswith("\\begin_inset ERT"):
+        return ""
+    j = find_end_of_inset(lines, i)
+    if j == -1:
+        return ""
+    while i < j and not lines[i].startswith("status"):
+        i = i + 1
+    i = i + 1
+    ret = ""
+    first = True
+    while i < j:
+        if lines[i] == "\\begin_layout Plain Layout":
+            if first:
+                first = False
+            else:
+                ret = ret + "\n"
+            while i + 1 < j and lines[i+1] == "":
+                i = i + 1
+        elif lines[i] == "\\end_layout":
+            while i + 1 < j and lines[i+1] == "":
+                i = i + 1
+        elif lines[i] == "\\backslash":
+            ret = ret + "\\"
+        else:
+            ret = ret + lines[i]
+        i = i + 1
    return ret


--- a/lib/lyx2lyx/lyx2lyx_version.py.in
+++ b/lib/lyx2lyx/lyx2lyx_version.py.in
@ -16,7 +16,9 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

-version = "@PACKAGE_VERSION@"
+# Important: Keep the version formatting in sync with LyX and
+#            tex2lyx (bug 7951)
+version = "@LYX_MAJOR_VERSION@.@LYX_MINOR_VERSION@"

 if __name__ == "__main__":
    pass
--- a/lib/lyx2lyx/lyx_1_5.py
+++ b/lib/lyx2lyx/lyx_1_5.py
@ -356,7 +356,7 @@ def read_unicodesymbols():

 def revert_unicode_line(document, i, insets, spec_chars, replacement_character = '???'):
    # Define strings to start and end ERT and math insets
-    ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout
+    ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s' % document.default_layout
    ert_outro='\n\\end_layout\n\n\\end_inset\n'
    math_intro='\n\\begin_inset Formula $'
    math_outro='$\n\\end_inset'
@ -415,8 +415,8 @@ def revert_unicode_line(document, i, insets, spec_chars, replacement_character =
                            command = command + '}'
                        elif not insets or insets[-1] != "ERT":
                            # add an ERT inset with the replacement character
-                            command = command.replace('\\\\', ert_intro)
-                            command = command + ert_outro
+                            command = command.replace('\\\\', '\n\\backslash\n')
+                            command = ert_intro + command + ert_outro
                        else:
                            command = command.replace('\\\\', '\n\\backslash\n')
                    last_char = '' # indicate that the character should not be removed
--- a/lib/lyx2lyx/lyx_2_0.py
+++ b/lib/lyx2lyx/lyx_2_0.py
@ -45,17 +45,6 @@ def remove_option(lines, m, option):
    return True


-# DO NOT USE THIS ROUTINE ANY MORE. Better yet, replace the uses that
-# have been made of it with uses of put_cmd_in_ert.
-def old_put_cmd_in_ert(string):
-    for rep in unicode_reps:
-        string = string.replace(rep[1], rep[0].replace('\\\\', '\\'))
-    string = string.replace('\\', "\\backslash\n")
-    string = "\\begin_inset ERT\nstatus collapsed\n\\begin_layout Plain Layout\n" \
-      + string + "\n\\end_layout\n\\end_inset"
-    return string
-
-
 ###############################################################################
 ###
 ### Conversion and reversion routines
@ -994,7 +983,6 @@ def revert_multirow(document):
          numrows = int(numrows)
          numcols = int(numcols)
        except:
-          document.warning(numrows)
          document.warning("Unable to determine rows and columns!")
          begin_table = end_table
          continue
@ -1830,8 +1818,15 @@ def convert_mathdots(document):
    i = find_token(document.header, "\\use_mhchem" , 0)
    if i == -1:
        i = find_token(document.header, "\\use_esint" , 0)
-    if i != -1:
-      document.header.insert(i + 1, "\\use_mathdots 1")
+    if i == -1:
+        document.warning("Malformed LyX document: Can't find \\use_mhchem.")
+        return;
+    j = find_token(document.preamble, "\\usepackage{mathdots}", 0)
+    if j == -1:
+        document.header.insert(i + 1, "\\use_mathdots 0")
+    else:
+        document.header.insert(i + 1, "\\use_mathdots 2")
+        del document.preamble[j]


 def revert_mathdots(document):
@ -2233,7 +2228,6 @@ def revert_multirowOffset(document):
          numrows = int(numrows)
          numcols = int(numcols)
        except:
-          document.warning(numrows)
          document.warning("Unable to determine rows and columns!")
          begin_table = end_table
          continue
--- a/lib/lyx2lyx/lyx_2_1.py
+++ b/lib/lyx2lyx/lyx_2_1.py
--- a/lib/lyx2lyx/parser_tools.py
+++ b/lib/lyx2lyx/parser_tools.py
@ -19,7 +19,7 @@


 ''' 
-This modules offer several free functions to help parse lines.
+This module offers several free functions to help parse lines.
 More documentaton is below, but here is a quick guide to what 
 they do. Optional arguments are marked by brackets.

@ -32,11 +32,11 @@ find_token(lines, token, start[, end[, ignorews]]):
  extra whitespace following token itself.

 find_token_exact(lines, token, start[, end]):
-  As find_token, but with ignorews True.
+  As find_token, but with ignorews set to True.

 find_tokens(lines, tokens, start[, end[, ignorews]]):
  Returns the first line i, start <= i < end, on which
-  oen of the tokens in tokens is found at the beginning. 
+  one of the tokens in tokens is found at the beginning.
  Returns -1 if not found. 
  If ignorews is (given and) True, then differences
  in whitespace do not count, except that there must be no 
@ -104,6 +104,12 @@ find_end_of_inset(lines, i):
 find_end_of_layout(lines, i):
  Specialization of find_end_of for layouts.

+find_end_of_sequence(lines, i):
+  Find the end of the sequence of layouts of the same kind.
+  Considers nesting. If the last paragraph in sequence is nested,
+  the position of the last \end_deeper is returned, else
+  the position of the last \end_layout.
+
 is_in_inset(lines, i, inset):
  Checks if line i is in an inset of the given type.
  If so, returns starting and ending lines. Otherwise, 
@ -119,7 +125,7 @@ is_in_inset(lines, i, inset):

 get_containing_inset(lines, i):
  Finds out what kind of inset line i is within. Returns a 
-  list containing what follows \begin_inset on the the line 
+  list containing what follows \begin_inset on the line
  on which the inset begins, plus the starting and ending line.
  Returns False on any kind of error or if it isn't in an inset.
  So get_containing_inset(document.body, i) might return:
@ -128,8 +134,8 @@ get_containing_inset(lines, i):
  on line 306.

 get_containing_layout(lines, i):
-  As get_containing_inset, but for layout.
-
+  As get_containing_inset, but for layout. Additionally returns the
+  position of real paragraph start (after par params) as 4th value.

 find_nonempty_line(lines, start[, end):
  Finds the next non-empty line.
@ -140,6 +146,9 @@ check_token(line, token):
 is_nonempty_line(line):
  Does line contain something besides whitespace?

+count_pars_in_inset(lines, i):
+  Counts the paragraphs inside an inset.
+
 '''

 import re
@ -307,7 +316,7 @@ def get_quoted_value(lines, token, start, end = 0, default = ""):


 def get_option_value(line, option):
-    rx = option + '\s*=\s*"([^"+])"'
+    rx = option + '\s*=\s*"([^"]+)"'
    rx = re.compile(rx)
    m = rx.search(line)
    if not m:
@ -315,6 +324,15 @@ def get_option_value(line, option):
    return m.group(1)


+def set_option_value(line, option, value):
+    rx = '(' + option + '\s*=\s*")[^"]+"'
+    rx = re.compile(rx)
+    m = rx.search(line)
+    if not m:
+        return line
+    return re.sub(rx, '\g<1>' + value + '"', line)
+
+
 def del_token(lines, token, start, end = 0):
    """ del_token(lines, token, start, end) -> int

@ -407,16 +425,20 @@ def is_in_inset(lines, i, inset):
 def get_containing_inset(lines, i):
  ''' 
  Finds out what kind of inset line i is within. Returns a 
-  list containing (i) what follows \begin_inset on the the line 
+  list containing (i) what follows \begin_inset on the line
  on which the inset begins, plus the starting and ending line.
  Returns False on any kind of error or if it isn't in an inset.
  '''
-  stins = find_token_backwards(lines, i, "\\begin_inset")
+  j = i
+  while True:
+      stins = find_token_backwards(lines, "\\begin_inset", j)
      if stins == -1:
          return False
      endins = find_end_of_inset(lines, stins)
-  if endins < i:
-      return False
+      if endins > j:
+          break
+      j = stins - 1
+
  inset = get_value(lines, "\\begin_inset", stins)
  if inset == "":
      # shouldn't happen
@ -427,18 +449,81 @@ def get_containing_inset(lines, i):
 def get_containing_layout(lines, i):
  ''' 
  Finds out what kind of layout line i is within. Returns a 
-  list containing (i) what follows \begin_layout on the the line 
-  on which the layout begins, plus the starting and ending line.
+  list containing what follows \begin_layout on the line
+  on which the layout begins, plus the starting and ending line
+  and the start of the paragraph (after all params). I.e, returns:
+    (layoutname, layoutstart, layoutend, startofcontent)
  Returns False on any kind of error.
  '''
-  stins = find_token_backwards(lines, i, "\\begin_layout")
-  if stins == -1:
+  j = i
+  while True:
+      stlay = find_token_backwards(lines, "\\begin_layout", j)
+      if stlay == -1:
          return False
-  endins = find_end_of_layout(lines, stins)
-  if endins < i:
-      return False
-  lay = get_value(lines, "\\begin_layout", stins)
+      endlay = find_end_of_layout(lines, stlay)
+      if endlay > i:
+          break
+      j = stlay - 1
+
+  lay = get_value(lines, "\\begin_layout", stlay)
  if lay == "":
      # shouldn't happen
      return False
-  return (lay, stins, endins)
+  par_params = ["\\noindent", "\\indent", "\\indent-toggle", "\\leftindent",
+                "\\start_of_appendix", "\\paragraph_spacing single",
+                "\\paragraph_spacing onehalf", "\\paragraph_spacing double",
+                "\\paragraph_spacing other", "\\align", "\\labelwidthstring"]
+  stpar = stlay
+  while True:
+      stpar += 1
+      if lines[stpar] not in par_params:
+          break
+  return (lay, stlay, endlay, stpar)
+
+
+def count_pars_in_inset(lines, i):
+  '''
+  Counts the paragraphs within this inset
+  '''
+  ins = get_containing_inset(lines, i)
+  if ins == -1:
+      return -1
+  pars = 0
+  for j in range(ins[1], ins[2]):
+      m = re.match(r'\\begin_layout (.*)', lines[j])
+      if m and get_containing_inset(lines, j)[0] == ins[0]:
+          pars += 1
+
+  return pars
+
+
+def find_end_of_sequence(lines, i):
+  '''
+  Returns the end of a sequence of identical layouts.
+  '''
+  lay = get_containing_layout(lines, i)
+  if lay == False:
+      return -1
+  layout = lay[0]
+  endlay = lay[2]
+  i = endlay
+  while True:
+      m = re.match(r'\\begin_layout (.*)', lines[i])
+      if m and m.group(1) != layout:
+          return endlay
+      elif lines[i] == "\\begin_deeper":
+          j = find_end_of(lines, i, "\\begin_deeper", "\\end_deeper")
+          if j != -1:
+              i = j
+              endlay = j
+              continue
+      if m and m.group(1) == layout:
+          endlay = find_end_of_layout(lines, i)
+          i = endlay
+          continue
+      if i == len(lines) - 1:
+          break
+      i = i + 1
+
+  return endlay
+