lyx_mirror/lib/lyx2lyx/lyx_1_5.py

# This file is part of lyx2lyx
# -*- coding: utf-8 -*-
# Copyright (C) 2006 José Matos <jamatos@lyx.org>
# Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

""" Convert files to the file format generated by lyx 1.5"""

import re
from parser_tools import find_token, find_token_exact, find_tokens, find_end_of, get_value
from LyX import get_encoding


####################################################################
# Private helper functions

def find_end_of_inset(lines, i):
    " Find beginning of inset, where lines[i] is included."
    return find_end_of(lines, i, "\\begin_inset", "\\end_inset")

# End of helper functions
####################################################################


##
#  Notes: Framed/Shaded
#

def revert_framed(document):
    "Revert framed notes. "
    i = 0
    while 1:
        i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)

        if i == -1:
            return
        document.body[i] = "\\begin_inset Note"
        i = i + 1


##
#  Fonts
#

roman_fonts      = {'default' : 'default', 'ae'       : 'ae',
                    'times'   : 'times',   'palatino' : 'palatino',
                    'helvet'  : 'default', 'avant'    : 'default',
                    'newcent' : 'newcent', 'bookman'  : 'bookman',
                    'pslatex' : 'times'}
sans_fonts       = {'default' : 'default', 'ae'       : 'default',
                    'times'   : 'default', 'palatino' : 'default',
                    'helvet'  : 'helvet',  'avant'    : 'avant',
                    'newcent' : 'default', 'bookman'  : 'default',
                    'pslatex' : 'helvet'}
typewriter_fonts = {'default' : 'default', 'ae'       : 'default',
                    'times'   : 'default', 'palatino' : 'default',
                    'helvet'  : 'default', 'avant'    : 'default',
                    'newcent' : 'default', 'bookman'  : 'default',
                    'pslatex' : 'courier'}

def convert_font_settings(document):
    " Convert font settings. "
    i = 0
    i = find_token_exact(document.header, "\\fontscheme", i)
    if i == -1:
        document.warning("Malformed LyX document: Missing `\\fontscheme'.")
        return
    font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
    if font_scheme == '':
        document.warning("Malformed LyX document: Empty `\\fontscheme'.")
        font_scheme = 'default'
    if not font_scheme in roman_fonts.keys():
        document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
        font_scheme = 'default'
    document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
                          '\\font_sans %s' % sans_fonts[font_scheme],
                          '\\font_typewriter %s' % typewriter_fonts[font_scheme],
                          '\\font_default_family default',
                          '\\font_sc false',
                          '\\font_osf false',
                          '\\font_sf_scale 100',
                          '\\font_tt_scale 100']


def revert_font_settings(document):
    " Revert font settings. "
    i = 0
    insert_line = -1
    fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
    for family in 'roman', 'sans', 'typewriter':
        name = '\\font_%s' % family
        i = find_token_exact(document.header, name, i)
        if i == -1:
            document.warning("Malformed LyX document: Missing `%s'." % name)
            i = 0
        else:
            if (insert_line < 0):
                insert_line = i
            fonts[family] = get_value(document.header, name, i, i + 1)
            del document.header[i]
    i = find_token_exact(document.header, '\\font_default_family', i)
    if i == -1:
        document.warning("Malformed LyX document: Missing `\\font_default_family'.")
        font_default_family = 'default'
    else:
        font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
        del document.header[i]
    i = find_token_exact(document.header, '\\font_sc', i)
    if i == -1:
        document.warning("Malformed LyX document: Missing `\\font_sc'.")
        font_sc = 'false'
    else:
        font_sc = get_value(document.header, '\\font_sc', i, i + 1)
        del document.header[i]
    if font_sc != 'false':
        document.warning("Conversion of '\\font_sc' not yet implemented.")
    i = find_token_exact(document.header, '\\font_osf', i)
    if i == -1:
        document.warning("Malformed LyX document: Missing `\\font_osf'.")
        font_osf = 'false'
    else:
        font_osf = get_value(document.header, '\\font_osf', i, i + 1)
        del document.header[i]
    i = find_token_exact(document.header, '\\font_sf_scale', i)
    if i == -1:
        document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
        font_sf_scale = '100'
    else:
        font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
        del document.header[i]
    if font_sf_scale != '100':
        document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
    i = find_token_exact(document.header, '\\font_tt_scale', i)
    if i == -1:
        document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
        font_tt_scale = '100'
    else:
        font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
        del document.header[i]
    if font_tt_scale != '100':
        document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
    for font_scheme in roman_fonts.keys():
        if (roman_fonts[font_scheme] == fonts['roman'] and
            sans_fonts[font_scheme] == fonts['sans'] and
            typewriter_fonts[font_scheme] == fonts['typewriter']):
            document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
            if font_default_family != 'default':
                document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
            if font_osf == 'true':
                document.warning("Ignoring `\\font_osf = true'")
            return
    font_scheme = 'default'
    document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
    if fonts['roman'] == 'cmr':
        document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
        if font_osf == 'true':
            document.preamble.append('\\usepackage{eco}')
            font_osf = 'false'
    for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
        if fonts['roman'] == font:
            document.preamble.append('\\usepackage{%s}' % font)
    for font in 'cmss', 'lmss', 'cmbr':
        if fonts['sans'] == font:
            document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
    for font in 'berasans':
        if fonts['sans'] == font:
            document.preamble.append('\\usepackage{%s}' % font)
    for font in 'cmtt', 'lmtt', 'cmtl':
        if fonts['typewriter'] == font:
            document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
    for font in 'courier', 'beramono', 'luximono':
        if fonts['typewriter'] == font:
            document.preamble.append('\\usepackage{%s}' % font)
    if font_default_family != 'default':
        document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
    if font_osf == 'true':
        document.warning("Ignoring `\\font_osf = true'")


def revert_booktabs(document):
    " We remove the booktabs flag or everything else will become a mess. "
    re_row = re.compile(r'^<row.*space="[^"]+".*>$')
    re_tspace = re.compile(r'\s+topspace="[^"]+"')
    re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
    re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
    i = 0
    while 1:
        i = find_token(document.body, "\\begin_inset Tabular", i)
        if i == -1:
            return
        j = find_end_of_inset(document.body, i + 1)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of tabular.")
            continue
        for k in range(i, j):
            if re.search('^<features.* booktabs="true".*>$', document.body[k]):
                document.warning("Converting 'booktabs' table to normal table.")
                document.body[k] = document.body[k].replace(' booktabs="true"', '')
            if re.search(re_row, document.body[k]):
                document.warning("Removing extra row space.")
                document.body[k] = re_tspace.sub('', document.body[k])
                document.body[k] = re_bspace.sub('', document.body[k])
                document.body[k] = re_ispace.sub('', document.body[k])
        i = i + 1


def convert_utf8(document):
    document.encoding = "utf8"


def revert_utf8(document):
    i = find_token(document.header, "\\inputencoding", 0)
    if i == -1:
        document.header.append("\\inputencoding auto")
    elif get_value(document.header, "\\inputencoding", i) == "utf8":
        document.header[i] = "\\inputencoding auto"
    document.inputencoding = get_value(document.header, "\\inputencoding", 0)
    document.encoding = get_encoding(document.language, document.inputencoding, 248)


def revert_cs_label(document):
    " Remove status flag of charstyle label. "
    i = 0
    while 1:
        i = find_token(document.body, "\\begin_inset CharStyle", i)
        if i == -1:
            return
        # Seach for a line starting 'show_label'
        # If it is not there, break with a warning message
        i = i + 1
        while 1:
            if (document.body[i][:10] == "show_label"):
                del document.body[i]
                break
            elif (document.body[i][:13] == "\\begin_layout"):
                document.warning("Malformed LyX document: Missing 'show_label'.")
                break
            i = i + 1

        i = i + 1


def convert_bibitem(document):
    """ Convert
\bibitem [option]{argument}

to

\begin_inset LatexCommand bibitem
label "option"
key "argument"

\end_inset

This must be called after convert_commandparams.
"""
    regex = re.compile(r'\S+\s*(\[[^\[\{]*\])?(\{[^}]*\})')
    i = 0
    while 1:
        i = find_token(document.body, "\\bibitem", i)
        if i == -1:
            break
        match = re.match(regex, document.body[i])
        option = match.group(1)
        argument = match.group(2)
        lines = ['\\begin_inset LatexCommand bibitem']
        if option != None:
            lines.append('label "%s"' % option[1:-1].replace('"', '\\"'))
        lines.append('key "%s"' % argument[1:-1].replace('"', '\\"'))
        lines.append('')
        lines.append('\\end_inset')
        document.body[i:i+1] = lines
        i = i + 1


commandparams_info = {
    # command : [option1, option2, argument]
    "bibitem" : ["label", "", "key"],
    "bibtex" : ["options", "btprint", "bibfiles"],
    "cite"        : ["after", "before", "key"],
    "citet"       : ["after", "before", "key"],
    "citep"       : ["after", "before", "key"],
    "citealt"     : ["after", "before", "key"],
    "citealp"     : ["after", "before", "key"],
    "citeauthor"  : ["after", "before", "key"],
    "citeyear"    : ["after", "before", "key"],
    "citeyearpar" : ["after", "before", "key"],
    "citet*"      : ["after", "before", "key"],
    "citep*"      : ["after", "before", "key"],
    "citealt*"    : ["after", "before", "key"],
    "citealp*"    : ["after", "before", "key"],
    "citeauthor*" : ["after", "before", "key"],
    "Citet"       : ["after", "before", "key"],
    "Citep"       : ["after", "before", "key"],
    "Citealt"     : ["after", "before", "key"],
    "Citealp"     : ["after", "before", "key"],
    "Citeauthor"  : ["after", "before", "key"],
    "Citet*"      : ["after", "before", "key"],
    "Citep*"      : ["after", "before", "key"],
    "Citealt*"    : ["after", "before", "key"],
    "Citealp*"    : ["after", "before", "key"],
    "Citeauthor*" : ["after", "before", "key"],
    "citefield"   : ["after", "before", "key"],
    "citetitle"   : ["after", "before", "key"],
    "cite*"       : ["after", "before", "key"],
    "hfill" : ["", "", ""],
    "index"      : ["", "", "name"],
    "printindex" : ["", "", "name"],
    "label" : ["", "", "name"],
    "eqref"     : ["name", "", "reference"],
    "pageref"   : ["name", "", "reference"],
    "prettyref" : ["name", "", "reference"],
    "ref"       : ["name", "", "reference"],
    "vpageref"  : ["name", "", "reference"],
    "vref"      : ["name", "", "reference"],
    "tableofcontents" : ["", "", "type"],
    "htmlurl" : ["name", "", "target"],
    "url"     : ["name", "", "target"]}


def convert_commandparams(document):
    """ Convert

 \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
 \end_inset

 to

 \begin_inset LatexCommand cmdname
 name1 "opt1"
 name2 "opt2"
 name3 "arg"
 \end_inset

 name1, name2 and name3 can be different for each command.
"""
    # \begin_inset LatexCommand bibitem was not the official version (see
    # convert_bibitem()), but could be read in, so we convert it here, too.

    i = 0
    while 1:
        i = find_token(document.body, "\\begin_inset LatexCommand", i)
        if i == -1:
            break
        command = document.body[i][26:].strip()
        if command == "":
            document.warning("Malformed LyX document: Missing LatexCommand name.")
            i = i + 1
            continue

        # The following parser is taken from the original InsetCommandParams::scanCommand
        name = ""
        option1 = ""
        option2 = ""
        argument = ""
        state = "WS"
        # Used to handle things like \command[foo[bar]]{foo{bar}}
        nestdepth = 0
        b = 0
        for c in command:
            if ((state == "CMDNAME" and c == ' ') or
                (state == "CMDNAME" and c == '[') or
                (state == "CMDNAME" and c == '{')):
                state = "WS"
            if ((state == "OPTION" and c == ']') or
                (state == "SECOPTION" and c == ']') or
                (state == "CONTENT" and c == '}')):
                if nestdepth == 0:
                    state = "WS"
                else:
                    nestdepth = nestdepth - 1
            if ((state == "OPTION" and c == '[') or
                (state == "SECOPTION" and c == '[') or
                (state == "CONTENT" and c == '{')):
                nestdepth = nestdepth + 1
            if state == "CMDNAME":
                    name += c
            elif state == "OPTION":
                    option1 += c
            elif state == "SECOPTION":
                    option2 += c
            elif state == "CONTENT":
                    argument += c
            elif state == "WS":
                if c == '\\':
                    state = "CMDNAME"
                elif c == '[' and b != ']':
                    state = "OPTION"
                    nestdepth = 0 # Just to be sure
                elif c == '[' and b == ']':
                    state = "SECOPTION"
                    nestdepth = 0 # Just to be sure
                elif c == '{':
                    state = "CONTENT"
                    nestdepth = 0 # Just to be sure
            b = c

        # Now we have parsed the command, output the parameters
        lines = ["\\begin_inset LatexCommand %s" % name]
        if option1 != "":
            if commandparams_info[name][0] == "":
                document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
            else:
                lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
        if option2 != "":
            if commandparams_info[name][1] == "":
                document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
            else:
                lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
        if argument != "":
            if commandparams_info[name][2] == "":
                document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
            else:
                lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
        document.body[i:i+1] = lines
        i = i + 1


def revert_commandparams(document):
    regex = re.compile(r'(\S+)\s+(.+)')
    i = 0
    while 1:
        i = find_token(document.body, "\\begin_inset LatexCommand", i)
        if i == -1:
            break
        name = document.body[i].split()[2]
        j = find_end_of_inset(document.body, i + 1)
        preview_line = ""
        option1 = ""
        option2 = ""
        argument = ""
        for k in range(i + 1, j):
            match = re.match(regex, document.body[k])
            if match:
                pname = match.group(1)
                pvalue = match.group(2)
                if pname == "preview":
                    preview_line = document.body[k]
                elif (commandparams_info[name][0] != "" and
                      pname == commandparams_info[name][0]):
                    option1 = pvalue.strip('"').replace('\\"', '"')
                elif (commandparams_info[name][1] != "" and
                      pname == commandparams_info[name][1]):
                    option2 = pvalue.strip('"').replace('\\"', '"')
                elif (commandparams_info[name][2] != "" and
                      pname == commandparams_info[name][2]):
                    argument = pvalue.strip('"').replace('\\"', '"')
            elif document.body[k].strip() != "":
                document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
        if name == "bibitem":
            if option1 == "":
                lines = ["\\bibitem {%s}" % argument]
            else:
                lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
        else:
            if option1 == "":
                if option2 == "":
                    lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
                else:
                    lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
            else:
                if option2 == "":
                    lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
                else:
                    lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
        if name != "bibitem":
            if preview_line != "":
                lines.append(preview_line)
            lines.append('')
            lines.append('\\end_inset')
        document.body[i:j+1] = lines
        i = j + 1


def revert_nomenclature(document):
    " Convert nomenclature entry to ERT. "
    regex = re.compile(r'(\S+)\s+(.+)')
    i = 0
    use_nomencl = 0
    while 1:
        i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
        if i == -1:
            break
        use_nomencl = 1
        j = find_end_of_inset(document.body, i + 1)
        preview_line = ""
        symbol = ""
        description = ""
        prefix = ""
        for k in range(i + 1, j):
            match = re.match(regex, document.body[k])
            if match:
                name = match.group(1)
                value = match.group(2)
                if name == "preview":
                    preview_line = document.body[k]
                elif name == "symbol":
                    symbol = value.strip('"').replace('\\"', '"')
                elif name == "description":
                    description = value.strip('"').replace('\\"', '"')
                elif name == "prefix":
                    prefix = value.strip('"').replace('\\"', '"')
            elif document.body[k].strip() != "":
                document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
        if prefix == "":
            command = 'nomenclature{%s}{%s}' % (symbol, description)
        else:
            command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
        document.body[i:j+1] = ['\\begin_inset ERT',
                                'status collapsed',
                                '',
                                '\\begin_layout %s' % document.default_layout,
                                '',
                                '',
                                '\\backslash',
                                command,
                                '\\end_layout',
                                '',
                                '\\end_inset']
        i = i + 11
    if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
        document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
        document.preamble.append('\\makenomenclature')


def revert_printnomenclature(document):
    " Convert printnomenclature to ERT. "
    regex = re.compile(r'(\S+)\s+(.+)')
    i = 0
    use_nomencl = 0
    while 1:
        i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
        if i == -1:
            break
        use_nomencl = 1
        j = find_end_of_inset(document.body, i + 1)
        preview_line = ""
        labelwidth = ""
        for k in range(i + 1, j):
            match = re.match(regex, document.body[k])
            if match:
                name = match.group(1)
                value = match.group(2)
                if name == "preview":
                    preview_line = document.body[k]
                elif name == "labelwidth":
                    labelwidth = value.strip('"').replace('\\"', '"')
            elif document.body[k].strip() != "":
                document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
        if labelwidth == "":
            command = 'nomenclature{}'
        else:
            command = 'nomenclature[%s]' % labelwidth
        document.body[i:j+1] = ['\\begin_inset ERT',
                                'status collapsed',
                                '',
                                '\\begin_layout %s' % document.default_layout,
                                '',
                                '',
                                '\\backslash',
                                command,
                                '\\end_layout',
                                '',
                                '\\end_inset']
        i = i + 11
    if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
        document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
        document.preamble.append('\\makenomenclature')


def convert_esint(document):
    " Add \\use_esint setting to header. "
    i = find_token(document.header, "\\cite_engine", 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing `\\cite_engine'.")
        return
    # 0 is off, 1 is auto, 2 is on.
    document.header.insert(i, '\\use_esint 0')


def revert_esint(document):
    " Remove \\use_esint setting from header. "
    i = find_token(document.header, "\\use_esint", 0)
    if i == -1:
        document.warning("Malformed LyX document: Missing `\\use_esint'.")
        return
    use_esint = document.header[i].split()[1]
    del document.header[i]
    # 0 is off, 1 is auto, 2 is on.
    if (use_esint == 2):
        document.preamble.append('\\usepackage{esint}')


def revert_clearpage(document):
    " clearpage -> ERT"
    i = 0
    while 1:
        i = find_token(document.body, "\\clearpage", i)
        if i == -1:
            break
        document.body[i:i+1] =  ['\\begin_inset ERT',
                                'status collapsed',
                                '',
                                '\\begin_layout %s' % document.default_layout,
                                '',
                                '',
                                '\\backslash',
                                'clearpage',
                                '\\end_layout',
                                '',
                                '\\end_inset']
    i = i + 1


def revert_cleardoublepage(document):
    " cleardoublepage -> ERT"
    i = 0
    while 1:
        i = find_token(document.body, "\\cleardoublepage", i)
        if i == -1:
            break
        document.body[i:i+1] =  ['\\begin_inset ERT',
                                'status collapsed',
                                '',
                                '\\begin_layout %s' % document.default_layout,
                                '',
                                '',
                                '\\backslash',
                                'cleardoublepage',
                                '\\end_layout',
                                '',
                                '\\end_inset']
    i = i + 1


##
# Conversion hub
#

supported_versions = ["1.5.0","1.5"]
convert = [[246, []],
           [247, [convert_font_settings]],
           [248, []],
           [249, [convert_utf8]],
           [250, []],
           [251, []],
           [252, [convert_commandparams, convert_bibitem]],
           [253, []],
           [254, [convert_esint]],
           [255, []]]

revert =  [[254, [revert_clearpage, revert_cleardoublepage]],
           [253, [revert_esint]],
           [252, [revert_nomenclature, revert_printnomenclature]],
           [251, [revert_commandparams]],
           [250, [revert_cs_label]],
           [249, []],
           [248, [revert_utf8]],
           [247, [revert_booktabs]],
           [246, [revert_font_settings]],
           [245, [revert_framed]]]


if __name__ == "__main__":
    pass