Forgot to svn add these.

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@36087 a592a061-630c-0410-9148-cb99ea01b6c8
2024-11-29 05:01:49 +00:00 · 2010-11-04 19:07:30 +00:00 · 2010-11-04 19:07:30 +00:00 · 16be7a8b26
commit 16be7a8b26
parent d3fc1c21d1
2 changed files with 420 additions and 0 deletions
--- a/lib/lyx2lyx/lyx2lyx_tools.py
+++ b/lib/lyx2lyx/lyx2lyx_tools.py
@ -0,0 +1,359 @@
+# This file is part of lyx2lyx
+# -*- coding: utf-8 -*-
+# Copyright (C) 2010 The LyX team
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+" This modules offer several free functions to help with lyx2lyx'ing. "
+
+import string
+from parser_tools import find_token
+from unicode_symbols import unicode_reps
+
+# Note that text can be either a list of lines or a single line.
+def add_to_preamble(document, text):
+    """ Add text to the preamble if it is not already there.
+    Only the first line is checked!"""
+
+    if not type(text) is list:
+      # split on \n just in case
+      # it'll give us the one element list we want
+      # if there's no \n, too
+      text = text.split('\n')
+
+    if find_token(document.preamble, text[0], 0) != -1:
+        return
+
+    document.preamble.extend(text)
+
+
+# Note that text can be either a list of lines or a single line.
+# It should really be a list.
+def insert_to_preamble(index, document, text):
+    """ Insert text to the preamble at a given line"""
+    
+    if not type(text) is list:
+      # split on \n just in case
+      # it'll give us the one element list we want
+      # if there's no \n, too
+      text = text.split('\n')
+
+    document.preamble[index:index] = text
+
+
+# This routine wraps some content in an ERT inset. 
+#
+# NOTE: The function accepts either a single string or a LIST of strings as
+# argument. But it returns a LIST of strings, split on \n, so that it does 
+# not have embedded newlines.
+# 
+# This is how lyx2lyx represents a LyX document: as a list of strings, 
+# each representing a line of a LyX file. Embedded newlines confuse 
+# lyx2lyx very much.
+#
+# A call to this routine will often go something like this:
+#   i = find_token('\\begin_inset FunkyInset', ...)
+#   ...
+#   j = find_end_of_inset(document.body, i)
+#   content = ...extract content from insets
+#   # that could be as simple as: 
+#   # content = lyx2latex(document[i:j + 1])
+#   ert = put_cmd_in_ert(content)
+#   document.body[i:j] = ert
+# Now, before we continue, we need to reset i appropriately. Normally,
+# this would be: 
+#   i += len(ert)
+# That puts us right after the ERT we just inserted.
+#
+def put_cmd_in_ert(arg):
+    ret = ["\\begin_inset ERT", "status collapsed", "\\begin_layout Plain Layout", ""]
+    # Despite the warnings just given, it will be faster for us to work
+    # with a single string internally. That way, we only go through the
+    # unicode_reps loop once.
+    if type(arg) is list:
+      s = "\n".join(arg)
+    else:
+      s = arg
+    for rep in unicode_reps:
+      s = s.replace(rep[1], rep[0].replace('\\\\', '\\'))
+    s = s.replace('\\', "\\backslash\n")
+    ret += s.splitlines()
+    ret += ["\\end_layout", "\\end_inset"]
+    return ret
+
+            
+def lyx2latex(document, lines):
+    'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
+    # clean up multiline stuff
+    content = ""
+    ert_end = 0
+    note_end = 0
+    hspace = ""
+
+    for curline in range(len(lines)):
+      line = lines[curline]
+      if line.startswith("\\begin_inset Note Note"):
+          # We want to skip LyX notes, so remember where the inset ends
+          note_end = find_end_of_inset(lines, curline + 1)
+          continue
+      elif note_end >= curline:
+          # Skip LyX notes
+          continue
+      elif line.startswith("\\begin_inset ERT"):
+          # We don't want to replace things inside ERT, so figure out
+          # where the end of the inset is.
+          ert_end = find_end_of_inset(lines, curline + 1)
+          continue
+      elif line.startswith("\\begin_inset Formula"):
+          line = line[20:]
+      elif line.startswith("\\begin_inset Quotes"):
+          # For now, we do a very basic reversion. Someone who understands
+          # quotes is welcome to fix it up.
+          qtype = line[20:].strip()
+          # lang = qtype[0]
+          side = qtype[1]
+          dbls = qtype[2]
+          if side == "l":
+              if dbls == "d":
+                  line = "``"
+              else:
+                  line = "`"
+          else:
+              if dbls == "d":
+                  line = "''"
+              else:
+                  line = "'"
+      elif line.startswith("\\begin_inset space"):
+          line = line[18:].strip()
+          if line.startswith("\\hspace"):
+              # Account for both \hspace and \hspace*
+              hspace = line[:-2]
+              continue
+          elif line == "\\space{}":
+              line = "\\ "
+          elif line == "\\thinspace{}":
+              line = "\\,"
+      elif hspace != "":
+          # The LyX length is in line[8:], after the \length keyword
+          length = latex_length(line[8:])[1]
+          line = hspace + "{" + length + "}"
+          hspace = ""
+      elif line.isspace() or \
+            line.startswith("\\begin_layout") or \
+            line.startswith("\\end_layout") or \
+            line.startswith("\\begin_inset") or \
+            line.startswith("\\end_inset") or \
+            line.startswith("\\lang") or \
+            line.strip() == "status collapsed" or \
+            line.strip() == "status open":
+          #skip all that stuff
+          continue
+
+      # this needs to be added to the preamble because of cases like
+      # \textmu, \textbackslash, etc.
+      add_to_preamble(document, ['% added by lyx2lyx for converted index entries',
+                                 '\\@ifundefined{textmu}',
+                                 ' {\\usepackage{textcomp}}{}'])
+      # a lossless reversion is not possible
+      # try at least to handle some common insets and settings
+      if ert_end >= curline:
+          line = line.replace(r'\backslash', '\\')
+      else:
+          # No need to add "{}" after single-nonletter macros
+          line = line.replace('&', '\\&')
+          line = line.replace('#', '\\#')
+          line = line.replace('^', '\\textasciicircum{}')
+          line = line.replace('%', '\\%')
+          line = line.replace('_', '\\_')
+          line = line.replace('$', '\\$')
+
+          # Do the LyX text --> LaTeX conversion
+          for rep in unicode_reps:
+            line = line.replace(rep[1], rep[0] + "{}")
+          line = line.replace(r'\backslash', r'\textbackslash{}')
+          line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
+          line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
+          line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
+          line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
+          line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
+          line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
+          line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
+          line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
+          line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
+      content += line
+    return content
+
+
+def latex_length(slen):
+    ''' 
+    Convert lengths to their LaTeX representation. Returns (bool, length),
+    where the bool tells us if it was a percentage, and the length is the
+    LaTeX representation.
+    '''
+    i = 0
+    percent = False
+    # the slen has the form
+    # ValueUnit+ValueUnit-ValueUnit or
+    # ValueUnit+-ValueUnit
+    # the + and - (glue lengths) are optional
+    # the + always precedes the -
+
+    # Convert relative lengths to LaTeX units
+    units = {"text%":"\\textwidth", "col%":"\\columnwidth",
+             "page%":"\\paperwidth", "line%":"\\linewidth",
+             "theight%":"\\textheight", "pheight%":"\\paperheight"}
+    for unit in units.keys():
+        i = slen.find(unit)
+        if i == -1:
+            continue
+        percent = True
+        minus = slen.rfind("-", 1, i)
+        plus = slen.rfind("+", 0, i)
+        latex_unit = units[unit]
+        if plus == -1 and minus == -1:
+            value = slen[:i]
+            value = str(float(value)/100)
+            end = slen[i + len(unit):]
+            slen = value + latex_unit + end
+        if plus > minus:
+            value = slen[plus + 1:i]
+            value = str(float(value)/100)
+            begin = slen[:plus + 1]
+            end = slen[i+len(unit):]
+            slen = begin + value + latex_unit + end
+        if plus < minus:
+            value = slen[minus + 1:i]
+            value = str(float(value)/100)
+            begin = slen[:minus + 1]
+            slen = begin + value + latex_unit
+
+    # replace + and -, but only if the - is not the first character
+    slen = slen[0] + slen[1:].replace("+", " plus ").replace("-", " minus ")
+    # handle the case where "+-1mm" was used, because LaTeX only understands
+    # "plus 1mm minus 1mm"
+    if slen.find("plus  minus"):
+        lastvaluepos = slen.rfind(" ")
+        lastvalue = slen[lastvaluepos:]
+        slen = slen.replace("  ", lastvalue + " ")
+    return (percent, slen)
+
+
+def revert_flex_inset(document, name, LaTeXname, position):
+  " Convert flex insets to TeX code "
+  i = position
+  while True:
+    i = find_token(document.body, '\\begin_inset Flex ' + name, i)
+    if i == -1:
+      return
+    z = find_end_of_inset(document.body, i)
+    if z == -1:
+      document.warning("Malformed LyX document: Can't find end of Flex " + name + " inset.")
+      return
+    # remove the \end_inset
+    document.body[z - 2:z + 1] = put_cmd_in_ert("}")
+    # we need to reset character layouts if necessary
+    j = find_token(document.body, '\\emph on', i, z)
+    k = find_token(document.body, '\\noun on', i, z)
+    l = find_token(document.body, '\\series', i, z)
+    m = find_token(document.body, '\\family', i, z)
+    n = find_token(document.body, '\\shape', i, z)
+    o = find_token(document.body, '\\color', i, z)
+    p = find_token(document.body, '\\size', i, z)
+    q = find_token(document.body, '\\bar under', i, z)
+    r = find_token(document.body, '\\uuline on', i, z)
+    s = find_token(document.body, '\\uwave on', i, z)
+    t = find_token(document.body, '\\strikeout on', i, z)
+    if j != -1:
+      document.body.insert(z - 2, "\\emph default")
+    if k != -1:
+      document.body.insert(z - 2, "\\noun default")
+    if l != -1:
+      document.body.insert(z - 2, "\\series default")
+    if m != -1:
+      document.body.insert(z - 2, "\\family default")
+    if n != -1:
+      document.body.insert(z - 2, "\\shape default")
+    if o != -1:
+      document.body.insert(z - 2, "\\color inherit")
+    if p != -1:
+      document.body.insert(z - 2, "\\size default")
+    if q != -1:
+      document.body.insert(z - 2, "\\bar default")
+    if r != -1:
+      document.body.insert(z - 2, "\\uuline default")
+    if s != -1:
+      document.body.insert(z - 2, "\\uwave default")
+    if t != -1:
+      document.body.insert(z - 2, "\\strikeout default")
+    document.body[i:i + 4] = put_cmd_in_ert(LaTeXname + "{")
+    i += 1
+
+
+def revert_font_attrs(document, name, LaTeXname):
+  " Reverts font changes to TeX code "
+  i = 0
+  changed = False
+  while True:
+    i = find_token(document.body, name + ' on', i)
+    if i == -1:
+      return changed
+    j = find_token(document.body, name + ' default', i)
+    k = find_token(document.body, name + ' on', i + 1)
+    # if there is no default set, the style ends with the layout
+    # assure hereby that we found the correct layout end
+    if j != -1 and (j < k or k == -1):
+      document.body[j:j + 1] = put_cmd_in_ert("}")
+    else:
+      j = find_token(document.body, '\\end_layout', i)
+      document.body[j:j] = put_cmd_in_ert("}")
+    document.body[i:i + 1] = put_cmd_in_ert(LaTeXname + "{")
+    changed = True
+    i += 1
+
+
+def revert_layout_command(document, name, LaTeXname, position):
+  " Reverts a command from a layout to TeX code "
+  i = position
+  while True:
+    i = find_token(document.body, '\\begin_layout ' + name, i)
+    if i == -1:
+      return
+    k = -1
+    # find the next layout
+    j = i + 1
+    while k == -1:
+      j = find_token(document.body, '\\begin_layout', j)
+      l = len(document.body)
+      # if nothing was found it was the last layout of the document
+      if j == -1:
+        document.body[l - 4:l - 4] = put_cmd_in_ert("}")
+        k = 0
+      # exclude plain layout because this can be TeX code or another inset
+      elif document.body[j] != '\\begin_layout Plain Layout':
+        document.body[j - 2:j - 2] = put_cmd_in_ert("}")
+        k = 0
+      else:
+        j += 1
+    document.body[i] = '\\begin_layout Standard'
+    document.body[i + 1:i + 1] = put_cmd_in_ert(LaTeXname + "{")
+    i += 1
+
+
+def hex2ratio(s):
+    val = string.atoi(s, 16)
+    if val != 0:
+      val += 1
+    return str(val / 256.0)
+
--- a/lib/lyx2lyx/unicode_symbols.py
+++ b/lib/lyx2lyx/unicode_symbols.py
@ -0,0 +1,61 @@
+# This file is part of lyx2lyx
+# -*- coding: utf-8 -*-
+# Copyright (C) 2010 The LyX team
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+" Import unicode_reps from this module for access to the unicode<->LaTeX mapping. "
+
+import sys, os, re
+
+def read_unicodesymbols():
+    " Read the unicodesymbols list of unicode characters and corresponding commands."
+    pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
+    fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
+    spec_chars = []
+    # Two backslashes, followed by some non-word character, and then a character
+    # in brackets. The idea is to check for constructs like: \"{u}, which is how
+    # they are written in the unicodesymbols file; but they can also be written
+    # as: \"u or even \" u.
+    r = re.compile(r'\\\\(\W)\{(\w)\}')
+    for line in fp.readlines():
+        if line[0] != '#' and line.strip() != "":
+            line=line.replace(' "',' ') # remove all quotation marks with spaces before
+            line=line.replace('" ',' ') # remove all quotation marks with spaces after
+            line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
+            try:
+                [ucs4,command,dead] = line.split(None,2)
+                if command[0:1] != "\\":
+                    continue
+                spec_chars.append([command, unichr(eval(ucs4))])
+            except:
+                continue
+            m = r.match(command)
+            if m != None:
+                command = "\\\\"
+                # If the character is a double-quote, then we need to escape it, too,
+                # since it is done that way in the LyX file.
+                if m.group(1) == "\"":
+                    command += "\\"
+                commandbl = command
+                command += m.group(1) + m.group(2)
+                commandbl += m.group(1) + ' ' + m.group(2)
+                spec_chars.append([command, unichr(eval(ucs4))])
+                spec_chars.append([commandbl, unichr(eval(ucs4))])
+    fp.close()
+    return spec_chars
+
+
+unicode_reps = read_unicodesymbols()