mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-11-27 11:52:25 +00:00
c2ac6440f7
Because of extensive changes in the support files, it seems to me that we actually do need to copy over everything.
2109 lines
81 KiB
Python
2109 lines
81 KiB
Python
# This file is part of lyx2lyx
|
|
# -*- coding: utf-8 -*-
|
|
# Copyright (C) 2006 José Matos <jamatos@lyx.org>
|
|
# Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
|
|
#
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License
|
|
# as published by the Free Software Foundation; either version 2
|
|
# of the License, or (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
""" Convert files to the file format generated by lyx 1.5"""
|
|
|
|
import re
|
|
import unicodedata
|
|
import sys, os
|
|
|
|
from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
|
|
from LyX import get_encoding
|
|
|
|
# Provide support for both python 2 and 3
|
|
PY2 = sys.version_info[0] == 2
|
|
if not PY2:
|
|
text_type = str
|
|
unichr = chr
|
|
else:
|
|
text_type = unicode
|
|
# End of code to support for both python 2 and 3
|
|
|
|
####################################################################
|
|
# Private helper functions
|
|
|
|
def find_end_of_inset(lines, i):
|
|
" Find end of inset, where lines[i] is included."
|
|
return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
|
|
|
|
def find_end_of_layout(lines, i):
|
|
" Find end of layout, where lines[i] is included."
|
|
return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
|
|
|
|
def find_beginning_of_layout(lines, i):
|
|
"Find beginning of layout, where lines[i] is included."
|
|
return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
|
|
|
|
# End of helper functions
|
|
####################################################################
|
|
|
|
|
|
##
|
|
# Notes: Framed/Shaded
|
|
#
|
|
|
|
def revert_framed(document):
|
|
"Revert framed notes. "
|
|
i = 0
|
|
while True:
|
|
i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
|
|
|
|
if i == -1:
|
|
return
|
|
document.body[i] = "\\begin_inset Note"
|
|
i = i + 1
|
|
|
|
|
|
##
|
|
# Fonts
|
|
#
|
|
|
|
roman_fonts = {'default' : 'default', 'ae' : 'ae',
|
|
'times' : 'times', 'palatino' : 'palatino',
|
|
'helvet' : 'default', 'avant' : 'default',
|
|
'newcent' : 'newcent', 'bookman' : 'bookman',
|
|
'pslatex' : 'times'}
|
|
sans_fonts = {'default' : 'default', 'ae' : 'default',
|
|
'times' : 'default', 'palatino' : 'default',
|
|
'helvet' : 'helvet', 'avant' : 'avant',
|
|
'newcent' : 'default', 'bookman' : 'default',
|
|
'pslatex' : 'helvet'}
|
|
typewriter_fonts = {'default' : 'default', 'ae' : 'default',
|
|
'times' : 'default', 'palatino' : 'default',
|
|
'helvet' : 'default', 'avant' : 'default',
|
|
'newcent' : 'default', 'bookman' : 'default',
|
|
'pslatex' : 'courier'}
|
|
|
|
def convert_font_settings(document):
|
|
" Convert font settings. "
|
|
i = 0
|
|
i = find_token_exact(document.header, "\\fontscheme", i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\fontscheme'.")
|
|
return
|
|
font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
|
|
if font_scheme == '':
|
|
document.warning("Malformed LyX document: Empty `\\fontscheme'.")
|
|
font_scheme = 'default'
|
|
if not font_scheme in list(roman_fonts.keys()):
|
|
document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
|
|
font_scheme = 'default'
|
|
document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
|
|
'\\font_sans %s' % sans_fonts[font_scheme],
|
|
'\\font_typewriter %s' % typewriter_fonts[font_scheme],
|
|
'\\font_default_family default',
|
|
'\\font_sc false',
|
|
'\\font_osf false',
|
|
'\\font_sf_scale 100',
|
|
'\\font_tt_scale 100']
|
|
|
|
|
|
def revert_font_settings(document):
|
|
" Revert font settings. "
|
|
i = 0
|
|
insert_line = -1
|
|
fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
|
|
for family in 'roman', 'sans', 'typewriter':
|
|
name = '\\font_%s' % family
|
|
i = find_token_exact(document.header, name, i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `%s'." % name)
|
|
i = 0
|
|
else:
|
|
if (insert_line < 0):
|
|
insert_line = i
|
|
fonts[family] = get_value(document.header, name, i, i + 1)
|
|
del document.header[i]
|
|
i = find_token_exact(document.header, '\\font_default_family', i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\font_default_family'.")
|
|
font_default_family = 'default'
|
|
else:
|
|
font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
|
|
del document.header[i]
|
|
i = find_token_exact(document.header, '\\font_sc', i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\font_sc'.")
|
|
font_sc = 'false'
|
|
else:
|
|
font_sc = get_value(document.header, '\\font_sc', i, i + 1)
|
|
del document.header[i]
|
|
if font_sc != 'false':
|
|
document.warning("Conversion of '\\font_sc' not yet implemented.")
|
|
i = find_token_exact(document.header, '\\font_osf', i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\font_osf'.")
|
|
font_osf = 'false'
|
|
else:
|
|
font_osf = get_value(document.header, '\\font_osf', i, i + 1)
|
|
del document.header[i]
|
|
i = find_token_exact(document.header, '\\font_sf_scale', i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
|
|
font_sf_scale = '100'
|
|
else:
|
|
font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
|
|
del document.header[i]
|
|
if font_sf_scale != '100':
|
|
document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
|
|
i = find_token_exact(document.header, '\\font_tt_scale', i)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
|
|
font_tt_scale = '100'
|
|
else:
|
|
font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
|
|
del document.header[i]
|
|
if font_tt_scale != '100':
|
|
document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
|
|
for font_scheme in list(roman_fonts.keys()):
|
|
if (roman_fonts[font_scheme] == fonts['roman'] and
|
|
sans_fonts[font_scheme] == fonts['sans'] and
|
|
typewriter_fonts[font_scheme] == fonts['typewriter']):
|
|
document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
|
|
if font_default_family != 'default':
|
|
document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
|
|
if font_osf == 'true':
|
|
document.warning("Ignoring `\\font_osf = true'")
|
|
return
|
|
font_scheme = 'default'
|
|
document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
|
|
if fonts['roman'] == 'cmr':
|
|
document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
|
|
if font_osf == 'true':
|
|
document.preamble.append('\\usepackage{eco}')
|
|
font_osf = 'false'
|
|
for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
|
|
if fonts['roman'] == font:
|
|
document.preamble.append('\\usepackage{%s}' % font)
|
|
for font in 'cmss', 'lmss', 'cmbr':
|
|
if fonts['sans'] == font:
|
|
document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
|
|
for font in 'berasans':
|
|
if fonts['sans'] == font:
|
|
document.preamble.append('\\usepackage{%s}' % font)
|
|
for font in 'cmtt', 'lmtt', 'cmtl':
|
|
if fonts['typewriter'] == font:
|
|
document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
|
|
for font in 'courier', 'beramono', 'luximono':
|
|
if fonts['typewriter'] == font:
|
|
document.preamble.append('\\usepackage{%s}' % font)
|
|
if font_default_family != 'default':
|
|
document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
|
|
if font_osf == 'true':
|
|
document.warning("Ignoring `\\font_osf = true'")
|
|
|
|
|
|
def revert_booktabs(document):
|
|
" We remove the booktabs flag or everything else will become a mess. "
|
|
re_row = re.compile(r'^<row.*space="[^"]+".*>$')
|
|
re_tspace = re.compile(r'\s+topspace="[^"]+"')
|
|
re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
|
|
re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset Tabular", i)
|
|
if i == -1:
|
|
return
|
|
j = find_end_of_inset(document.body, i + 1)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Could not find end of tabular.")
|
|
continue
|
|
for k in range(i, j):
|
|
if re.search('^<features.* booktabs="true".*>$', document.body[k]):
|
|
document.warning("Converting 'booktabs' table to normal table.")
|
|
document.body[k] = document.body[k].replace(' booktabs="true"', '')
|
|
if re.search(re_row, document.body[k]):
|
|
document.warning("Removing extra row space.")
|
|
document.body[k] = re_tspace.sub('', document.body[k])
|
|
document.body[k] = re_bspace.sub('', document.body[k])
|
|
document.body[k] = re_ispace.sub('', document.body[k])
|
|
i = i + 1
|
|
|
|
|
|
def convert_multiencoding(document, forward):
|
|
""" Fix files with multiple encodings.
|
|
Files with an inputencoding of "auto" or "default" and multiple languages
|
|
where at least two languages have different default encodings are encoded
|
|
in multiple encodings for file formats < 249. These files are incorrectly
|
|
read and written (as if the whole file was in the encoding of the main
|
|
language).
|
|
This is not true for files written by CJK-LyX, they are always in the locale
|
|
encoding.
|
|
|
|
This function
|
|
- converts from fake unicode values to true unicode if forward is true, and
|
|
- converts from true unicode values to fake unicode if forward is false.
|
|
document.encoding must be set to the old value (format 248) in both cases.
|
|
|
|
We do this here and not in LyX.py because it is far easier to do the
|
|
necessary parsing in modern formats than in ancient ones.
|
|
"""
|
|
inset_types = ["Foot", "Note"]
|
|
if document.cjk_encoding != '':
|
|
return
|
|
encoding_stack = [document.encoding]
|
|
insets = []
|
|
lang_re = re.compile(r"^\\lang\s(\S+)")
|
|
inset_re = re.compile(r"^\\begin_inset\s(\S+)")
|
|
if not forward: # no need to read file unless we are reverting
|
|
spec_chars = read_unicodesymbols()
|
|
|
|
if document.inputencoding == "auto" or document.inputencoding == "default":
|
|
i = 0
|
|
while i < len(document.body):
|
|
result = lang_re.match(document.body[i])
|
|
if result:
|
|
language = result.group(1)
|
|
if language == "default":
|
|
document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
|
|
encoding_stack[-1] = document.encoding
|
|
else:
|
|
from lyx2lyx_lang import lang
|
|
document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
|
|
encoding_stack[-1] = lang[language][3]
|
|
elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
|
|
document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
|
|
if len(insets) > 0 and insets[-1] in inset_types:
|
|
from lyx2lyx_lang import lang
|
|
encoding_stack.append(lang[document.language][3])
|
|
else:
|
|
encoding_stack.append(encoding_stack[-1])
|
|
elif find_token(document.body, "\\end_layout", i, i + 1) == i:
|
|
document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
|
|
if len(encoding_stack) == 1:
|
|
# Don't remove the document encoding from the stack
|
|
document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
|
|
else:
|
|
del encoding_stack[-1]
|
|
elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
|
|
inset_result = inset_re.match(document.body[i])
|
|
if inset_result:
|
|
insets.append(inset_result.group(1))
|
|
else:
|
|
insets.append("")
|
|
elif find_token(document.body, "\\end_inset", i, i + 1) == i:
|
|
del insets[-1]
|
|
if encoding_stack[-1] != document.encoding:
|
|
if forward:
|
|
# This line has been incorrectly interpreted as if it was
|
|
# encoded in 'encoding'.
|
|
# Convert back to the 8bit string that was in the file.
|
|
orig = document.body[i].encode(document.encoding)
|
|
# Convert the 8bit string that was in the file to unicode
|
|
# with the correct encoding.
|
|
document.body[i] = orig.decode(encoding_stack[-1])
|
|
else:
|
|
try:
|
|
# Convert unicode to the 8bit string that will be written
|
|
# to the file with the correct encoding.
|
|
orig = document.body[i].encode(encoding_stack[-1])
|
|
# Convert the 8bit string that will be written to the
|
|
# file to fake unicode with the encoding that will later
|
|
# be used when writing to the file.
|
|
document.body[i] = orig.decode(document.encoding)
|
|
except:
|
|
mod_line = revert_unicode_line(document, i, insets, spec_chars)
|
|
document.body[i:i+1] = mod_line.split('\n')
|
|
i += len(mod_line.split('\n')) - 1
|
|
i += 1
|
|
|
|
|
|
def convert_utf8(document):
|
|
" Set document encoding to UTF-8. "
|
|
convert_multiencoding(document, True)
|
|
document.encoding = "utf8"
|
|
|
|
|
|
def revert_utf8(document):
|
|
" Set document encoding to the value corresponding to inputencoding. "
|
|
i = find_token(document.header, "\\inputencoding", 0)
|
|
if i == -1:
|
|
document.header.append("\\inputencoding auto")
|
|
elif get_value(document.header, "\\inputencoding", i) == "utf8":
|
|
document.header[i] = "\\inputencoding auto"
|
|
document.inputencoding = get_value(document.header, "\\inputencoding", 0)
|
|
document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
|
|
convert_multiencoding(document, False)
|
|
|
|
|
|
# FIXME: Use the version in unicode_symbols.py which has some bug fixes
|
|
def read_unicodesymbols():
|
|
" Read the unicodesymbols list of unicode characters and corresponding commands."
|
|
pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
|
|
fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
|
|
spec_chars = {}
|
|
for line in fp.readlines():
|
|
if line[0] != '#':
|
|
line=line.replace(' "',' ') # remove all quotation marks with spaces before
|
|
line=line.replace('" ',' ') # remove all quotation marks with spaces after
|
|
line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
|
|
try:
|
|
# flag1 and flag2 are preamble and other flags
|
|
[ucs4,command,flag1,flag2] =line.split(None,3)
|
|
spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
|
|
except:
|
|
pass
|
|
fp.close()
|
|
return spec_chars
|
|
|
|
|
|
def revert_unicode_line(document, i, insets, spec_chars, replacement_character = '???'):
|
|
# Define strings to start and end ERT and math insets
|
|
ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s' % document.default_layout
|
|
ert_outro='\n\\end_layout\n\n\\end_inset\n'
|
|
math_intro='\n\\begin_inset Formula $'
|
|
math_outro='$\n\\end_inset'
|
|
|
|
mod_line = u''
|
|
if i and not is_inset_line(document, i-1):
|
|
last_char = document.body[i - 1][-1:]
|
|
else:
|
|
last_char = ''
|
|
|
|
line = document.body[i]
|
|
for character in line:
|
|
try:
|
|
# Try to write the character
|
|
dummy = character.encode(document.encoding)
|
|
mod_line += character
|
|
last_char = character
|
|
except:
|
|
# Try to replace with ERT/math inset
|
|
if character in spec_chars:
|
|
command = spec_chars[character][0] # the command to replace unicode
|
|
flag1 = spec_chars[character][1]
|
|
flag2 = spec_chars[character][2]
|
|
if flag1.find('combining') > -1 or flag2.find('combining') > -1:
|
|
# We have a character that should be combined with the previous
|
|
command += '{' + last_char + '}'
|
|
# Remove the last character. Ignore if it is whitespace
|
|
if len(last_char.rstrip()):
|
|
# last_char was found and is not whitespace
|
|
if mod_line:
|
|
mod_line = mod_line[:-1]
|
|
else: # last_char belongs to the last line
|
|
document.body[i-1] = document.body[i-1][:-1]
|
|
else:
|
|
# The last character was replaced by a command. For now it is
|
|
# ignored. This could be handled better.
|
|
pass
|
|
if command[0:2] == '\\\\':
|
|
if command[2:12]=='ensuremath':
|
|
if insets and insets[-1] == "ERT":
|
|
# math in ERT
|
|
command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
|
|
command = command.replace('}', '$\n')
|
|
elif not insets or insets[-1] != "Formula":
|
|
# add a math inset with the replacement character
|
|
command = command.replace('\\\\ensuremath{\\', math_intro)
|
|
command = command.replace('}', math_outro)
|
|
else:
|
|
# we are already in a math inset
|
|
command = command.replace('\\\\ensuremath{\\', '')
|
|
command = command.replace('}', '')
|
|
else:
|
|
if insets and insets[-1] == "Formula":
|
|
# avoid putting an ERT in a math; instead put command as text
|
|
command = command.replace('\\\\', '\mathrm{')
|
|
command = command + '}'
|
|
elif not insets or insets[-1] != "ERT":
|
|
# add an ERT inset with the replacement character
|
|
command = command.replace('\\\\', '\n\\backslash\n')
|
|
command = ert_intro + command + ert_outro
|
|
else:
|
|
command = command.replace('\\\\', '\n\\backslash\n')
|
|
last_char = '' # indicate that the character should not be removed
|
|
mod_line += command
|
|
else:
|
|
# Replace with replacement string
|
|
mod_line += replacement_character
|
|
return mod_line
|
|
|
|
|
|
def revert_unicode(document):
|
|
'''Transform unicode characters that can not be written using the
|
|
document encoding to commands according to the unicodesymbols
|
|
file. Characters that can not be replaced by commands are replaced by
|
|
an replacement string. Flags other than 'combined' are currently not
|
|
implemented.'''
|
|
spec_chars = read_unicodesymbols()
|
|
insets = [] # list of active insets
|
|
|
|
# Go through the document to capture all combining characters
|
|
i = 0
|
|
while i < len(document.body):
|
|
line = document.body[i]
|
|
# Check for insets
|
|
if line.find('\\begin_inset') > -1:
|
|
insets.append(line[13:].split()[0])
|
|
if line.find('\\end_inset') > -1:
|
|
del insets[-1]
|
|
|
|
# Try to write the line
|
|
try:
|
|
# If all goes well the line is written here
|
|
dummy = line.encode(document.encoding)
|
|
i += 1
|
|
except:
|
|
# Error, some character(s) in the line need to be replaced
|
|
mod_line = revert_unicode_line(document, i, insets, spec_chars)
|
|
document.body[i:i+1] = mod_line.split('\n')
|
|
i += len(mod_line.split('\n'))
|
|
|
|
|
|
def revert_cs_label(document):
|
|
" Remove status flag of charstyle label. "
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset CharStyle", i)
|
|
if i == -1:
|
|
return
|
|
# Seach for a line starting 'show_label'
|
|
# If it is not there, break with a warning message
|
|
i = i + 1
|
|
while True:
|
|
if (document.body[i][:10] == "show_label"):
|
|
del document.body[i]
|
|
break
|
|
elif (document.body[i][:13] == "\\begin_layout"):
|
|
document.warning("Malformed LyX document: Missing 'show_label'.")
|
|
break
|
|
i = i + 1
|
|
|
|
i = i + 1
|
|
|
|
|
|
def convert_bibitem(document):
|
|
""" Convert
|
|
\bibitem [option]{argument}
|
|
|
|
to
|
|
|
|
\begin_inset LatexCommand bibitem
|
|
label "option"
|
|
key "argument"
|
|
|
|
\end_inset
|
|
|
|
This must be called after convert_commandparams.
|
|
"""
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\bibitem", i)
|
|
if i == -1:
|
|
break
|
|
j = document.body[i].find('[') + 1
|
|
k = document.body[i].rfind(']')
|
|
if j == 0: # No optional argument found
|
|
option = None
|
|
else:
|
|
option = document.body[i][j:k]
|
|
j = document.body[i].rfind('{') + 1
|
|
k = document.body[i].rfind('}')
|
|
argument = document.body[i][j:k]
|
|
lines = ['\\begin_inset LatexCommand bibitem']
|
|
if option != None:
|
|
lines.append('label "%s"' % option.replace('"', '\\"'))
|
|
lines.append('key "%s"' % argument.replace('"', '\\"'))
|
|
lines.append('')
|
|
lines.append('\\end_inset')
|
|
document.body[i:i+1] = lines
|
|
i = i + 1
|
|
|
|
|
|
commandparams_info = {
|
|
# command : [option1, option2, argument]
|
|
"bibitem" : ["label", "", "key"],
|
|
"bibtex" : ["options", "btprint", "bibfiles"],
|
|
"cite" : ["after", "before", "key"],
|
|
"citet" : ["after", "before", "key"],
|
|
"citep" : ["after", "before", "key"],
|
|
"citealt" : ["after", "before", "key"],
|
|
"citealp" : ["after", "before", "key"],
|
|
"citeauthor" : ["after", "before", "key"],
|
|
"citeyear" : ["after", "before", "key"],
|
|
"citeyearpar" : ["after", "before", "key"],
|
|
"citet*" : ["after", "before", "key"],
|
|
"citep*" : ["after", "before", "key"],
|
|
"citealt*" : ["after", "before", "key"],
|
|
"citealp*" : ["after", "before", "key"],
|
|
"citeauthor*" : ["after", "before", "key"],
|
|
"Citet" : ["after", "before", "key"],
|
|
"Citep" : ["after", "before", "key"],
|
|
"Citealt" : ["after", "before", "key"],
|
|
"Citealp" : ["after", "before", "key"],
|
|
"Citeauthor" : ["after", "before", "key"],
|
|
"Citet*" : ["after", "before", "key"],
|
|
"Citep*" : ["after", "before", "key"],
|
|
"Citealt*" : ["after", "before", "key"],
|
|
"Citealp*" : ["after", "before", "key"],
|
|
"Citeauthor*" : ["after", "before", "key"],
|
|
"citefield" : ["after", "before", "key"],
|
|
"citetitle" : ["after", "before", "key"],
|
|
"cite*" : ["after", "before", "key"],
|
|
"hfill" : ["", "", ""],
|
|
"index" : ["", "", "name"],
|
|
"printindex" : ["", "", "name"],
|
|
"label" : ["", "", "name"],
|
|
"eqref" : ["name", "", "reference"],
|
|
"pageref" : ["name", "", "reference"],
|
|
"prettyref" : ["name", "", "reference"],
|
|
"ref" : ["name", "", "reference"],
|
|
"vpageref" : ["name", "", "reference"],
|
|
"vref" : ["name", "", "reference"],
|
|
"tableofcontents" : ["", "", "type"],
|
|
"htmlurl" : ["name", "", "target"],
|
|
"url" : ["name", "", "target"]}
|
|
|
|
|
|
def convert_commandparams(document):
|
|
""" Convert
|
|
|
|
\begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
|
|
\end_inset
|
|
|
|
to
|
|
|
|
\begin_inset LatexCommand cmdname
|
|
name1 "opt1"
|
|
name2 "opt2"
|
|
name3 "arg"
|
|
\end_inset
|
|
|
|
name1, name2 and name3 can be different for each command.
|
|
"""
|
|
# \begin_inset LatexCommand bibitem was not the official version (see
|
|
# convert_bibitem()), but could be read in, so we convert it here, too.
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset LatexCommand", i)
|
|
if i == -1:
|
|
break
|
|
command = document.body[i][26:].strip()
|
|
if command == "":
|
|
document.warning("Malformed LyX document: Missing LatexCommand name.")
|
|
i = i + 1
|
|
continue
|
|
|
|
j = find_token(document.body, "\\end_inset", i + 1)
|
|
if j == -1:
|
|
document.warning("Malformed document")
|
|
else:
|
|
command += "".join(document.body[i+1:j])
|
|
document.body[i+1:j] = []
|
|
|
|
# The following parser is taken from the original InsetCommandParams::scanCommand
|
|
name = ""
|
|
option1 = ""
|
|
option2 = ""
|
|
argument = ""
|
|
state = "WS"
|
|
# Used to handle things like \command[foo[bar]]{foo{bar}}
|
|
nestdepth = 0
|
|
b = 0
|
|
for c in command:
|
|
if ((state == "CMDNAME" and c == ' ') or
|
|
(state == "CMDNAME" and c == '[') or
|
|
(state == "CMDNAME" and c == '{')):
|
|
state = "WS"
|
|
if ((state == "OPTION" and c == ']') or
|
|
(state == "SECOPTION" and c == ']') or
|
|
(state == "CONTENT" and c == '}')):
|
|
if nestdepth == 0:
|
|
state = "WS"
|
|
else:
|
|
nestdepth = nestdepth - 1
|
|
if ((state == "OPTION" and c == '[') or
|
|
(state == "SECOPTION" and c == '[') or
|
|
(state == "CONTENT" and c == '{')):
|
|
nestdepth = nestdepth + 1
|
|
if state == "CMDNAME":
|
|
name += c
|
|
elif state == "OPTION":
|
|
option1 += c
|
|
elif state == "SECOPTION":
|
|
option2 += c
|
|
elif state == "CONTENT":
|
|
argument += c
|
|
elif state == "WS":
|
|
if c == '\\':
|
|
state = "CMDNAME"
|
|
elif c == '[' and b != ']':
|
|
state = "OPTION"
|
|
nestdepth = 0 # Just to be sure
|
|
elif c == '[' and b == ']':
|
|
state = "SECOPTION"
|
|
nestdepth = 0 # Just to be sure
|
|
elif c == '{':
|
|
state = "CONTENT"
|
|
nestdepth = 0 # Just to be sure
|
|
b = c
|
|
|
|
# Now we have parsed the command, output the parameters
|
|
lines = ["\\begin_inset LatexCommand %s" % name]
|
|
if option1 != "":
|
|
if commandparams_info[name][0] == "":
|
|
document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
|
|
else:
|
|
lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('\\', '\\\\').replace('"', '\\"')))
|
|
if option2 != "":
|
|
if commandparams_info[name][1] == "":
|
|
document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
|
|
else:
|
|
lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('\\', '\\\\').replace('"', '\\"')))
|
|
if argument != "":
|
|
if commandparams_info[name][2] == "":
|
|
document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
|
|
else:
|
|
lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('\\', '\\\\').replace('"', '\\"')))
|
|
document.body[i:i+1] = lines
|
|
i = i + 1
|
|
|
|
|
|
def revert_commandparams(document):
|
|
regex = re.compile(r'(\S+)\s+(.+)')
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset LatexCommand", i)
|
|
if i == -1:
|
|
break
|
|
name = document.body[i].split()[2]
|
|
j = find_end_of_inset(document.body, i)
|
|
preview_line = ""
|
|
option1 = ""
|
|
option2 = ""
|
|
argument = ""
|
|
for k in range(i + 1, j):
|
|
match = re.match(regex, document.body[k])
|
|
if match:
|
|
pname = match.group(1)
|
|
pvalue = match.group(2)
|
|
if pname == "preview":
|
|
preview_line = document.body[k]
|
|
elif (commandparams_info[name][0] != "" and
|
|
pname == commandparams_info[name][0]):
|
|
option1 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
|
|
elif (commandparams_info[name][1] != "" and
|
|
pname == commandparams_info[name][1]):
|
|
option2 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
|
|
elif (commandparams_info[name][2] != "" and
|
|
pname == commandparams_info[name][2]):
|
|
argument = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
|
|
elif document.body[k].strip() != "":
|
|
document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
|
|
if name == "bibitem":
|
|
if option1 == "":
|
|
lines = ["\\bibitem {%s}" % argument]
|
|
else:
|
|
lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
|
|
else:
|
|
if option1 == "":
|
|
if option2 == "":
|
|
lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
|
|
else:
|
|
lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
|
|
else:
|
|
if option2 == "":
|
|
lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
|
|
else:
|
|
lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
|
|
if name != "bibitem":
|
|
if preview_line != "":
|
|
lines.append(preview_line)
|
|
lines.append('')
|
|
lines.append('\\end_inset')
|
|
document.body[i:j+1] = lines
|
|
i += len(lines) + 1
|
|
|
|
|
|
def revert_nomenclature(document):
|
|
" Convert nomenclature entry to ERT. "
|
|
regex = re.compile(r'(\S+)\s+(.+)')
|
|
i = 0
|
|
use_nomencl = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
|
|
if i == -1:
|
|
break
|
|
use_nomencl = 1
|
|
j = find_end_of_inset(document.body, i + 1)
|
|
preview_line = ""
|
|
symbol = ""
|
|
description = ""
|
|
prefix = ""
|
|
for k in range(i + 1, j):
|
|
match = re.match(regex, document.body[k])
|
|
if match:
|
|
name = match.group(1)
|
|
value = match.group(2)
|
|
if name == "preview":
|
|
preview_line = document.body[k]
|
|
elif name == "symbol":
|
|
symbol = value.strip('"').replace('\\"', '"')
|
|
elif name == "description":
|
|
description = value.strip('"').replace('\\"', '"')
|
|
elif name == "prefix":
|
|
prefix = value.strip('"').replace('\\"', '"')
|
|
elif document.body[k].strip() != "":
|
|
document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
|
|
if prefix == "":
|
|
command = 'nomenclature{%s}{%s}' % (symbol, description)
|
|
else:
|
|
command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
|
|
document.body[i:j+1] = ['\\begin_inset ERT',
|
|
'status collapsed',
|
|
'',
|
|
'\\begin_layout %s' % document.default_layout,
|
|
'',
|
|
'',
|
|
'\\backslash',
|
|
command,
|
|
'\\end_layout',
|
|
'',
|
|
'\\end_inset']
|
|
i = i + 11
|
|
if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
|
|
document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
|
|
document.preamble.append('\\makenomenclature')
|
|
|
|
|
|
def revert_printnomenclature(document):
|
|
" Convert printnomenclature to ERT. "
|
|
regex = re.compile(r'(\S+)\s+(.+)')
|
|
i = 0
|
|
use_nomencl = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
|
|
if i == -1:
|
|
break
|
|
use_nomencl = 1
|
|
j = find_end_of_inset(document.body, i + 1)
|
|
preview_line = ""
|
|
labelwidth = ""
|
|
for k in range(i + 1, j):
|
|
match = re.match(regex, document.body[k])
|
|
if match:
|
|
name = match.group(1)
|
|
value = match.group(2)
|
|
if name == "preview":
|
|
preview_line = document.body[k]
|
|
elif name == "labelwidth":
|
|
labelwidth = value.strip('"').replace('\\"', '"')
|
|
elif document.body[k].strip() != "":
|
|
document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
|
|
if labelwidth == "":
|
|
command = 'nomenclature{}'
|
|
else:
|
|
command = 'nomenclature[%s]' % labelwidth
|
|
document.body[i:j+1] = ['\\begin_inset ERT',
|
|
'status collapsed',
|
|
'',
|
|
'\\begin_layout %s' % document.default_layout,
|
|
'',
|
|
'',
|
|
'\\backslash',
|
|
command,
|
|
'\\end_layout',
|
|
'',
|
|
'\\end_inset']
|
|
i = i + 11
|
|
if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
|
|
document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
|
|
document.preamble.append('\\makenomenclature')
|
|
|
|
|
|
def convert_esint(document):
|
|
" Add \\use_esint setting to header. "
|
|
i = find_token(document.header, "\\cite_engine", 0)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\cite_engine'.")
|
|
return
|
|
# 0 is off, 1 is auto, 2 is on.
|
|
document.header.insert(i, '\\use_esint 0')
|
|
|
|
|
|
def revert_esint(document):
|
|
" Remove \\use_esint setting from header. "
|
|
i = find_token(document.header, "\\use_esint", 0)
|
|
if i == -1:
|
|
document.warning("Malformed LyX document: Missing `\\use_esint'.")
|
|
return
|
|
use_esint = document.header[i].split()[1]
|
|
del document.header[i]
|
|
# 0 is off, 1 is auto, 2 is on.
|
|
if (use_esint == 2):
|
|
document.preamble.append('\\usepackage{esint}')
|
|
|
|
|
|
def revert_clearpage(document):
|
|
" clearpage -> ERT "
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\clearpage", i)
|
|
if i == -1:
|
|
break
|
|
document.body[i:i+1] = ['\\begin_inset ERT',
|
|
'status collapsed',
|
|
'',
|
|
'\\begin_layout %s' % document.default_layout,
|
|
'',
|
|
'',
|
|
'\\backslash',
|
|
'clearpage',
|
|
'\\end_layout',
|
|
'',
|
|
'\\end_inset']
|
|
i = i + 1
|
|
|
|
|
|
def revert_cleardoublepage(document):
|
|
" cleardoublepage -> ERT "
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\cleardoublepage", i)
|
|
if i == -1:
|
|
break
|
|
document.body[i:i+1] = ['\\begin_inset ERT',
|
|
'status collapsed',
|
|
'',
|
|
'\\begin_layout %s' % document.default_layout,
|
|
'',
|
|
'',
|
|
'\\backslash',
|
|
'cleardoublepage',
|
|
'\\end_layout',
|
|
'',
|
|
'\\end_inset']
|
|
i = i + 1
|
|
|
|
|
|
def convert_lyxline(document):
|
|
" remove fontsize commands for \lyxline "
|
|
# The problematic is: The old \lyxline definition doesn't handle the fontsize
|
|
# to change the line thickness. The new definiton does this so that imported
|
|
# \lyxlines would have a different line thickness. The eventual fontsize command
|
|
# before \lyxline is therefore removed to get the same output.
|
|
fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
|
|
"large", "Large", "LARGE", "huge", "Huge"]
|
|
for n in range(0, len(fontsizes)):
|
|
i = 0
|
|
k = 0
|
|
while i < len(document.body):
|
|
i = find_token(document.body, "\\size " + fontsizes[n], i)
|
|
k = find_token(document.body, "\\lyxline", i)
|
|
# the corresponding fontsize command is always 2 lines before the \lyxline
|
|
if (i != -1 and k == i+2):
|
|
document.body[i:i+1] = []
|
|
else:
|
|
break
|
|
i = i + 1
|
|
|
|
|
|
def revert_encodings(document):
|
|
" Set new encodings to auto. "
|
|
encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
|
|
"cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
|
|
"cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
|
|
i = find_token(document.header, "\\inputencoding", 0)
|
|
if i == -1:
|
|
document.header.append("\\inputencoding auto")
|
|
else:
|
|
inputenc = get_value(document.header, "\\inputencoding", i)
|
|
if inputenc in encodings:
|
|
document.header[i] = "\\inputencoding auto"
|
|
document.inputencoding = get_value(document.header, "\\inputencoding", 0)
|
|
|
|
|
|
def convert_caption(document):
|
|
" Convert caption layouts to caption insets. "
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_layout Caption", i)
|
|
if i == -1:
|
|
return
|
|
j = find_end_of_layout(document.body, i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Missing `\\end_layout'.")
|
|
return
|
|
|
|
document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
|
|
document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
|
|
"\\begin_inset Caption", "",
|
|
"\\begin_layout %s" % document.default_layout]
|
|
i = i + 1
|
|
|
|
|
|
def revert_caption(document):
|
|
" Convert caption insets to caption layouts. "
|
|
" This assumes that the text class has a caption style. "
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset Caption", i)
|
|
if i == -1:
|
|
return
|
|
|
|
# We either need to delete the previous \begin_layout line, or we
|
|
# need to end the previous layout if this inset is not in the first
|
|
# position of the paragraph.
|
|
layout_before = find_token_backwards(document.body, "\\begin_layout", i)
|
|
if layout_before == -1:
|
|
document.warning("Malformed LyX document: Missing `\\begin_layout'.")
|
|
return
|
|
layout_line = document.body[layout_before]
|
|
del_layout_before = True
|
|
l = layout_before + 1
|
|
while l < i:
|
|
if document.body[l] != "":
|
|
del_layout_before = False
|
|
break
|
|
l = l + 1
|
|
if del_layout_before:
|
|
del document.body[layout_before:i]
|
|
i = layout_before
|
|
else:
|
|
document.body[i:i] = ["\\end_layout", ""]
|
|
i = i + 2
|
|
|
|
# Find start of layout in the inset and end of inset
|
|
j = find_token(document.body, "\\begin_layout", i)
|
|
if j == -1:
|
|
document.warning("Malformed LyX document: Missing `\\begin_layout'.")
|
|
return
|
|
k = find_end_of_inset(document.body, i)
|
|
if k == -1:
|
|
document.warning("Malformed LyX document: Missing `\\end_inset'.")
|
|
return
|
|
|
|
# We either need to delete the following \end_layout line, or we need
|
|
# to restart the old layout if this inset is not at the paragraph end.
|
|
layout_after = find_token(document.body, "\\end_layout", k)
|
|
if layout_after == -1:
|
|
document.warning("Malformed LyX document: Missing `\\end_layout'.")
|
|
return
|
|
del_layout_after = True
|
|
l = k + 1
|
|
while l < layout_after:
|
|
if document.body[l] != "":
|
|
del_layout_after = False
|
|
break
|
|
l = l + 1
|
|
if del_layout_after:
|
|
del document.body[k+1:layout_after+1]
|
|
else:
|
|
document.body[k+1:k+1] = [layout_line, ""]
|
|
|
|
# delete \begin_layout and \end_inset and replace \begin_inset with
|
|
# "\begin_layout Caption". This works because we can only have one
|
|
# paragraph in the caption inset: The old \end_layout will be recycled.
|
|
del document.body[k]
|
|
if document.body[k] == "":
|
|
del document.body[k]
|
|
del document.body[j]
|
|
if document.body[j] == "":
|
|
del document.body[j]
|
|
document.body[i] = "\\begin_layout Caption"
|
|
if document.body[i+1] == "":
|
|
del document.body[i+1]
|
|
i = i + 1
|
|
|
|
|
|
# Accents of InsetLaTeXAccent
|
|
accent_map = {
|
|
"`" : u'\u0300', # grave
|
|
"'" : u'\u0301', # acute
|
|
"^" : u'\u0302', # circumflex
|
|
"~" : u'\u0303', # tilde
|
|
"=" : u'\u0304', # macron
|
|
"u" : u'\u0306', # breve
|
|
"." : u'\u0307', # dot above
|
|
"\"": u'\u0308', # diaeresis
|
|
"r" : u'\u030a', # ring above
|
|
"H" : u'\u030b', # double acute
|
|
"v" : u'\u030c', # caron
|
|
"b" : u'\u0320', # minus sign below
|
|
"d" : u'\u0323', # dot below
|
|
"c" : u'\u0327', # cedilla
|
|
"k" : u'\u0328', # ogonek
|
|
"t" : u'\u0361' # tie. This is special: It spans two characters, but
|
|
# only one is given as argument, so we don't need to
|
|
# treat it differently.
|
|
}
|
|
|
|
|
|
# special accents of InsetLaTeXAccent without argument
|
|
special_accent_map = {
|
|
'i' : u'\u0131', # dotless i
|
|
'j' : u'\u0237', # dotless j
|
|
'l' : u'\u0142', # l with stroke
|
|
'L' : u'\u0141' # L with stroke
|
|
}
|
|
|
|
|
|
# special accent arguments of InsetLaTeXAccent
|
|
accented_map = {
|
|
'\\i' : u'\u0131', # dotless i
|
|
'\\j' : u'\u0237' # dotless j
|
|
}
|
|
|
|
|
|
def _convert_accent(accent, accented_char):
|
|
type = accent
|
|
char = accented_char
|
|
if char == '':
|
|
if type in special_accent_map:
|
|
return special_accent_map[type]
|
|
# a missing char is treated as space by LyX
|
|
char = ' '
|
|
elif type == 'q' and char in ['t', 'd', 'l', 'L']:
|
|
# Special caron, only used with t, d, l and L.
|
|
# It is not in the map because we convert it to the same unicode
|
|
# character as the normal caron: \q{} is only defined if babel with
|
|
# the czech or slovak language is used, and the normal caron
|
|
# produces the correct output if the T1 font encoding is used.
|
|
# For the same reason we never convert to \q{} in the other direction.
|
|
type = 'v'
|
|
elif char in accented_map:
|
|
char = accented_map[char]
|
|
elif (len(char) > 1):
|
|
# We can only convert accents on a single char
|
|
return ''
|
|
a = accent_map.get(type)
|
|
if a:
|
|
return unicodedata.normalize("NFC", "%s%s" % (char, a))
|
|
return ''
|
|
|
|
|
|
def convert_ertbackslash(body, i, ert, default_layout):
|
|
r""" -------------------------------------------------------------------------------------------
|
|
Convert backslashes and '\n' into valid ERT code, append the converted
|
|
text to body[i] and return the (maybe incremented) line index i"""
|
|
|
|
for c in ert:
|
|
if c == '\\':
|
|
body[i] = body[i] + '\\backslash '
|
|
i = i + 1
|
|
body.insert(i, '')
|
|
elif c == '\n':
|
|
body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
|
|
i = i + 4
|
|
else:
|
|
body[i] = body[i] + c
|
|
return i
|
|
|
|
|
|
def convert_accent(document):
|
|
# The following forms are supported by LyX:
|
|
# '\i \"{a}' (standard form, as written by LyX)
|
|
# '\i \"{}' (standard form, as written by LyX if the accented char is a space)
|
|
# '\i \"{ }' (also accepted if the accented char is a space)
|
|
# '\i \" a' (also accepted)
|
|
# '\i \"' (also accepted)
|
|
re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
|
|
re_contents = re.compile(r'^([^\s{]+)(.*)$')
|
|
re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
|
|
i = 0
|
|
while True:
|
|
i = find_re(document.body, re_wholeinset, i)
|
|
if i == -1:
|
|
return
|
|
match = re_wholeinset.match(document.body[i])
|
|
prefix = match.group(1)
|
|
contents = match.group(3).strip()
|
|
match = re_contents.match(contents)
|
|
if match:
|
|
# Strip first char (always \)
|
|
accent = match.group(1)[1:]
|
|
accented_contents = match.group(2).strip()
|
|
match = re_accentedcontents.match(accented_contents)
|
|
accented_char = match.group(1)
|
|
converted = _convert_accent(accent, accented_char)
|
|
if converted == '':
|
|
# Normalize contents
|
|
contents = '%s{%s}' % (accent, accented_char),
|
|
else:
|
|
document.body[i] = '%s%s' % (prefix, converted)
|
|
i += 1
|
|
continue
|
|
document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
|
|
document.body[i] = prefix
|
|
document.body[i+1:i+1] = ['\\begin_inset ERT',
|
|
'status collapsed',
|
|
'',
|
|
'\\begin_layout %s' % document.default_layout,
|
|
'',
|
|
'',
|
|
'']
|
|
i = convert_ertbackslash(document.body, i + 7,
|
|
'\\%s' % contents,
|
|
document.default_layout)
|
|
document.body[i+1:i+1] = ['\\end_layout',
|
|
'',
|
|
'\\end_inset']
|
|
i += 3
|
|
|
|
|
|
def is_inset_line(document, i):
|
|
""" Line i of body has an inset """
|
|
if document.body[i][:1] == '\\':
|
|
return True
|
|
last_tokens = "".join(document.body[i].split()[-2:])
|
|
return last_tokens.find('\\') != -1
|
|
|
|
|
|
# A wrapper around normalize that handles special cases (cf. bug 3313)
|
|
def normalize(form, text):
|
|
# do not normalize OHM, ANGSTROM
|
|
keep_characters = [0x2126,0x212b]
|
|
result = ''
|
|
convert = ''
|
|
for i in text:
|
|
if ord(i) in keep_characters:
|
|
if len(convert) > 0:
|
|
result = result + unicodedata.normalize(form, convert)
|
|
convert = ''
|
|
result = result + i
|
|
else:
|
|
convert = convert + i
|
|
if len(convert) > 0:
|
|
result = result + unicodedata.normalize(form, convert)
|
|
return result
|
|
|
|
|
|
def revert_accent(document):
|
|
inverse_accent_map = {}
|
|
for k in accent_map:
|
|
inverse_accent_map[accent_map[k]] = k
|
|
inverse_special_accent_map = {}
|
|
for k in special_accent_map:
|
|
inverse_special_accent_map[special_accent_map[k]] = k
|
|
inverse_accented_map = {}
|
|
for k in accented_map:
|
|
inverse_accented_map[accented_map[k]] = k
|
|
|
|
# Since LyX may insert a line break within a word we must combine all
|
|
# words before unicode normalization.
|
|
# We do this only if the next line starts with an accent, otherwise we
|
|
# would create things like '\begin_inset ERTstatus'.
|
|
for i in range(len(document.body) - 1):
|
|
if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
|
|
continue
|
|
if (document.body[i+1][0] in inverse_accent_map and not is_inset_line(document, i)):
|
|
# the last character of this line and the first of the next line
|
|
# form probably a surrogate pair, inline insets are excluded (second part of the test)
|
|
while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
|
|
document.body[i] += document.body[i+1][0]
|
|
document.body[i+1] = document.body[i+1][1:]
|
|
|
|
# Normalize to "Normal form D" (NFD, also known as canonical decomposition).
|
|
# This is needed to catch all accented characters.
|
|
for i in range(len(document.body)):
|
|
# Unfortunately we have a mixture of unicode strings and plain strings,
|
|
# because we never use u'xxx' for string literals, but 'xxx'.
|
|
# Therefore we may have to try two times to normalize the data.
|
|
try:
|
|
document.body[i] = normalize("NFD", document.body[i])
|
|
except TypeError:
|
|
document.body[i] = normalize("NFD", text_type(document.body[i], 'utf-8'))
|
|
|
|
# Replace accented characters with InsetLaTeXAccent
|
|
# Do not convert characters that can be represented in the chosen
|
|
# encoding.
|
|
encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
|
|
lang_re = re.compile(r"^\\lang\s(\S+)")
|
|
|
|
i = 0
|
|
while i < len(document.body):
|
|
if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
|
|
# Track the encoding of the current line
|
|
result = lang_re.match(document.body[i])
|
|
if result:
|
|
language = result.group(1)
|
|
if language == "default":
|
|
encoding_stack[-1] = document.encoding
|
|
else:
|
|
from lyx2lyx_lang import lang
|
|
encoding_stack[-1] = lang[language][3]
|
|
continue
|
|
elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
|
|
encoding_stack.append(encoding_stack[-1])
|
|
continue
|
|
elif find_token(document.body, "\\end_layout", i, i + 1) == i:
|
|
del encoding_stack[-1]
|
|
continue
|
|
|
|
for j in range(len(document.body[i])):
|
|
# dotless i and dotless j are both in special_accent_map and can
|
|
# occur as an accented character, so we need to test that the
|
|
# following character is no accent
|
|
if (document.body[i][j] in inverse_special_accent_map and
|
|
(j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
|
|
accent = document.body[i][j]
|
|
try:
|
|
dummy = accent.encode(encoding_stack[-1])
|
|
except UnicodeEncodeError:
|
|
# Insert the rest of the line as new line
|
|
if j < len(document.body[i]) - 1:
|
|
document.body.insert(i+1, document.body[i][j+1:])
|
|
# Delete the accented character
|
|
document.body[i] = document.body[i][:j]
|
|
# Finally add the InsetLaTeXAccent
|
|
document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
|
|
break
|
|
elif j > 0 and document.body[i][j] in inverse_accent_map:
|
|
accented_char = document.body[i][j-1]
|
|
if accented_char == ' ':
|
|
# Conform to LyX output
|
|
accented_char = ''
|
|
elif accented_char in inverse_accented_map:
|
|
accented_char = inverse_accented_map[accented_char]
|
|
accent = document.body[i][j]
|
|
try:
|
|
dummy = normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
|
|
except UnicodeEncodeError:
|
|
# Insert the rest of the line as new line
|
|
if j < len(document.body[i]) - 1:
|
|
document.body.insert(i+1, document.body[i][j+1:])
|
|
# Delete the accented characters
|
|
document.body[i] = document.body[i][:j-1]
|
|
# Finally add the InsetLaTeXAccent
|
|
document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
|
|
break
|
|
i = i + 1
|
|
|
|
# Normalize to "Normal form C" (NFC, pre-composed characters) again
|
|
for i in range(len(document.body)):
|
|
document.body[i] = normalize("NFC", document.body[i])
|
|
|
|
|
|
def normalize_font_whitespace_259(document):
|
|
""" Before format 259 the font changes were ignored if a
|
|
whitespace was the first or last character in the sequence, this function
|
|
transfers the whitespace outside."""
|
|
|
|
char_properties = {"\\series": "default",
|
|
"\\emph": "default",
|
|
"\\color": "none",
|
|
"\\shape": "default",
|
|
"\\bar": "default",
|
|
"\\family": "default"}
|
|
return normalize_font_whitespace(document, char_properties)
|
|
|
|
def normalize_font_whitespace_274(document):
|
|
""" Before format 259 (sic) the font changes were ignored if a
|
|
whitespace was the first or last character in the sequence. This was
|
|
corrected for most font properties in format 259, but the language
|
|
was forgotten then. This function applies the same conversion done
|
|
there (namely, transfers the whitespace outside) for font language
|
|
changes, as well."""
|
|
|
|
char_properties = {"\\lang": "default"}
|
|
return normalize_font_whitespace(document, char_properties)
|
|
|
|
def get_paragraph_language(document, i):
|
|
""" Return the language of the paragraph in which line i of the document
|
|
body is. If the first thing in the paragraph is a \\lang command, that
|
|
is the paragraph's langauge; otherwise, the paragraph's language is the
|
|
document's language."""
|
|
|
|
lines = document.body
|
|
|
|
first_nonempty_line = \
|
|
find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
|
|
|
|
words = lines[first_nonempty_line].split()
|
|
|
|
if len(words) > 1 and words[0] == "\\lang":
|
|
return words[1]
|
|
else:
|
|
return document.language
|
|
|
|
def normalize_font_whitespace(document, char_properties):
|
|
""" Before format 259 the font changes were ignored if a
|
|
whitespace was the first or last character in the sequence, this function
|
|
transfers the whitespace outside. Only a change in one of the properties
|
|
in the provided char_properties is handled by this function."""
|
|
|
|
if document.backend != "latex":
|
|
return
|
|
|
|
lines = document.body
|
|
|
|
changes = {}
|
|
|
|
i = 0
|
|
while i < len(lines):
|
|
words = lines[i].split()
|
|
|
|
if len(words) > 0 and words[0] == "\\begin_layout":
|
|
# a new paragraph resets all font changes
|
|
changes.clear()
|
|
# also reset the default language to be the paragraph's language
|
|
if "\\lang" in list(char_properties.keys()):
|
|
char_properties["\\lang"] = \
|
|
get_paragraph_language(document, i + 1)
|
|
|
|
elif len(words) > 1 and words[0] in list(char_properties.keys()):
|
|
# we have a font change
|
|
if char_properties[words[0]] == words[1]:
|
|
# property gets reset
|
|
if words[0] in list(changes.keys()):
|
|
del changes[words[0]]
|
|
defaultproperty = True
|
|
else:
|
|
# property gets set
|
|
changes[words[0]] = words[1]
|
|
defaultproperty = False
|
|
|
|
# We need to explicitly reset all changed properties if we find
|
|
# a space below, because LyX 1.4 would output the space after
|
|
# closing the previous change and before starting the new one,
|
|
# and closing a font change means to close all properties, not
|
|
# just the changed one.
|
|
|
|
if lines[i-1] and lines[i-1][-1] == " ":
|
|
lines[i-1] = lines[i-1][:-1]
|
|
# a space before the font change
|
|
added_lines = [" "]
|
|
for k in list(changes.keys()):
|
|
# exclude property k because that is already in lines[i]
|
|
if k != words[0]:
|
|
added_lines[1:1] = ["%s %s" % (k, changes[k])]
|
|
for k in list(changes.keys()):
|
|
# exclude property k because that must be added below anyway
|
|
if k != words[0]:
|
|
added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
|
|
if defaultproperty:
|
|
# Property is reset in lines[i], so add the new stuff afterwards
|
|
lines[i+1:i+1] = added_lines
|
|
else:
|
|
# Reset property for the space
|
|
added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
|
|
lines[i:i] = added_lines
|
|
i = i + len(added_lines)
|
|
|
|
elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
|
|
# a space after the font change
|
|
if (lines[i+1] == " " and lines[i+2]):
|
|
next_words = lines[i+2].split()
|
|
if len(next_words) > 0 and next_words[0] == words[0]:
|
|
# a single blank with a property different from the
|
|
# previous and the next line must not be changed
|
|
i = i + 2
|
|
continue
|
|
lines[i+1] = lines[i+1][1:]
|
|
added_lines = [" "]
|
|
for k in list(changes.keys()):
|
|
# exclude property k because that is already in lines[i]
|
|
if k != words[0]:
|
|
added_lines[1:1] = ["%s %s" % (k, changes[k])]
|
|
for k in list(changes.keys()):
|
|
# exclude property k because that must be added below anyway
|
|
if k != words[0]:
|
|
added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
|
|
# Reset property for the space
|
|
added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
|
|
lines[i:i] = added_lines
|
|
i = i + len(added_lines)
|
|
|
|
i = i + 1
|
|
|
|
|
|
def revert_utf8x(document):
|
|
" Set utf8x encoding to utf8. "
|
|
i = find_token(document.header, "\\inputencoding", 0)
|
|
if i == -1:
|
|
document.header.append("\\inputencoding auto")
|
|
else:
|
|
inputenc = get_value(document.header, "\\inputencoding", i)
|
|
if inputenc == "utf8x":
|
|
document.header[i] = "\\inputencoding utf8"
|
|
document.inputencoding = get_value(document.header, "\\inputencoding", 0)
|
|
|
|
|
|
def revert_utf8plain(document):
|
|
" Set utf8plain encoding to utf8. "
|
|
i = find_token(document.header, "\\inputencoding", 0)
|
|
if i == -1:
|
|
document.header.append("\\inputencoding auto")
|
|
else:
|
|
inputenc = get_value(document.header, "\\inputencoding", i)
|
|
if inputenc == "utf8-plain":
|
|
document.header[i] = "\\inputencoding utf8"
|
|
document.inputencoding = get_value(document.header, "\\inputencoding", 0)
|
|
|
|
|
|
def revert_beamer_alert(document):
|
|
" Revert beamer's \\alert inset back to ERT. "
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
|
|
if i == -1:
|
|
return
|
|
document.body[i] = "\\begin_inset ERT"
|
|
i = i + 1
|
|
while True:
|
|
if (document.body[i][:13] == "\\begin_layout"):
|
|
# Insert the \alert command
|
|
document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
|
|
break
|
|
i = i + 1
|
|
|
|
i = i + 1
|
|
|
|
|
|
def revert_beamer_structure(document):
|
|
" Revert beamer's \\structure inset back to ERT. "
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
|
|
if i == -1:
|
|
return
|
|
document.body[i] = "\\begin_inset ERT"
|
|
i = i + 1
|
|
while True:
|
|
if (document.body[i][:13] == "\\begin_layout"):
|
|
document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
|
|
break
|
|
i = i + 1
|
|
|
|
i = i + 1
|
|
|
|
|
|
def convert_changes(document):
|
|
" Switch output_changes off if tracking_changes is off. "
|
|
i = find_token(document.header, '\\tracking_changes', 0)
|
|
if i == -1:
|
|
document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
|
|
return
|
|
j = find_token(document.header, '\\output_changes', 0)
|
|
if j == -1:
|
|
document.warning("Malformed lyx document: Missing '\\output_changes'.")
|
|
return
|
|
tracking_changes = get_value(document.header, "\\tracking_changes", i)
|
|
output_changes = get_value(document.header, "\\output_changes", j)
|
|
if tracking_changes == "false" and output_changes == "true":
|
|
document.header[j] = "\\output_changes false"
|
|
|
|
|
|
def revert_ascii(document):
|
|
" Set ascii encoding to auto. "
|
|
i = find_token(document.header, "\\inputencoding", 0)
|
|
if i == -1:
|
|
document.header.append("\\inputencoding auto")
|
|
else:
|
|
inputenc = get_value(document.header, "\\inputencoding", i)
|
|
if inputenc == "ascii":
|
|
document.header[i] = "\\inputencoding auto"
|
|
document.inputencoding = get_value(document.header, "\\inputencoding", 0)
|
|
|
|
|
|
def normalize_language_name(document):
|
|
lang = { "brazil": "brazilian",
|
|
"portuges": "portuguese"}
|
|
|
|
if document.language in lang:
|
|
document.language = lang[document.language]
|
|
i = find_token(document.header, "\\language", 0)
|
|
document.header[i] = "\\language %s" % document.language
|
|
|
|
|
|
def revert_language_name(document):
|
|
lang = { "brazilian": "brazil",
|
|
"portuguese": "portuges"}
|
|
|
|
if document.language in lang:
|
|
document.language = lang[document.language]
|
|
i = find_token(document.header, "\\language", 0)
|
|
document.header[i] = "\\language %s" % document.language
|
|
|
|
#
|
|
# \textclass cv -> \textclass simplecv
|
|
def convert_cv_textclass(document):
|
|
if document.textclass == "cv":
|
|
document.textclass = "simplecv"
|
|
|
|
|
|
def revert_cv_textclass(document):
|
|
if document.textclass == "simplecv":
|
|
document.textclass = "cv"
|
|
|
|
|
|
#
|
|
# add scaleBeforeRotation graphics param
|
|
def convert_graphics_rotation(document):
|
|
" add scaleBeforeRotation graphics parameter. "
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset Graphics", i)
|
|
if i == -1:
|
|
return
|
|
j = find_end_of_inset(document.body, i+1)
|
|
if j == -1:
|
|
# should not happen
|
|
document.warning("Malformed LyX document: Could not find end of graphics inset.")
|
|
# Seach for rotateAngle and width or height or scale
|
|
# If these params are not there, nothing needs to be done.
|
|
k = find_token(document.body, "\trotateAngle", i + 1, j)
|
|
l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
|
|
if (k != -1 and l != -1):
|
|
document.body.insert(j, 'scaleBeforeRotation')
|
|
i = i + 1
|
|
|
|
|
|
#
|
|
# remove scaleBeforeRotation graphics param
|
|
def revert_graphics_rotation(document):
|
|
" remove scaleBeforeRotation graphics parameter. "
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, "\\begin_inset Graphics", i)
|
|
if i == -1:
|
|
return
|
|
j = find_end_of_inset(document.body, i + 1)
|
|
if j == -1:
|
|
# should not happen
|
|
document.warning("Malformed LyX document: Could not find end of graphics inset.")
|
|
# If there's a scaleBeforeRotation param, just remove that
|
|
k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
|
|
if k != -1:
|
|
del document.body[k]
|
|
else:
|
|
# if not, and if we have rotateAngle and width or height or scale,
|
|
# we have to put the rotateAngle value to special
|
|
rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
|
|
special = get_value(document.body, 'special', i + 1, j)
|
|
if rotateAngle != "":
|
|
k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
|
|
if k == -1:
|
|
break
|
|
if special == "":
|
|
document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
|
|
else:
|
|
l = find_token(document.body, "\tspecial", i + 1, j)
|
|
document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
|
|
k = find_token(document.body, "\trotateAngle", i + 1, j)
|
|
if k != -1:
|
|
del document.body[k]
|
|
i = i + 1
|
|
|
|
|
|
|
|
def convert_tableborder(document):
|
|
# The problem is: LyX doubles the table cell border as it ignores the "|" character in
|
|
# the cell arguments. A fix takes care of this and therefore the "|" has to be removed
|
|
i = 0
|
|
while i < len(document.body):
|
|
h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
|
|
k = document.body[i].find("|>{", 0, len(document.body[i]))
|
|
# the two tokens have to be in one line
|
|
if (h != -1 and k != -1):
|
|
# delete the "|"
|
|
document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])]
|
|
i = i + 1
|
|
|
|
|
|
def revert_tableborder(document):
|
|
i = 0
|
|
while i < len(document.body):
|
|
h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
|
|
k = document.body[i].find(">{", 0, len(document.body[i]))
|
|
# the two tokens have to be in one line
|
|
if (h != -1 and k != -1):
|
|
# add the "|"
|
|
document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
|
|
i = i + 1
|
|
|
|
|
|
def revert_armenian(document):
|
|
|
|
# set inputencoding from armscii8 to auto
|
|
if document.inputencoding == "armscii8":
|
|
i = find_token(document.header, "\\inputencoding", 0)
|
|
if i != -1:
|
|
document.header[i] = "\\inputencoding auto"
|
|
# check if preamble exists, if not k is set to -1
|
|
i = 0
|
|
k = -1
|
|
while i < len(document.preamble):
|
|
if k == -1:
|
|
k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
|
|
if k == -1:
|
|
k = document.preamble[i].find("%", 0, len(document.preamble[i]))
|
|
i = i + 1
|
|
# add the entry \usepackage{armtex} to the document preamble
|
|
if document.language == "armenian":
|
|
# set the armtex entry as the first preamble line
|
|
if k != -1:
|
|
document.preamble[0:0] = ["\\usepackage{armtex}"]
|
|
# create the preamble when it doesn't exist
|
|
else:
|
|
document.preamble.append('\\usepackage{armtex}')
|
|
# Set document language from armenian to english
|
|
if document.language == "armenian":
|
|
document.language = "english"
|
|
i = find_token(document.header, "\\language", 0)
|
|
if i != -1:
|
|
document.header[i] = "\\language english"
|
|
|
|
|
|
def revert_CJK(document):
|
|
" Set CJK encodings to default and languages chinese, japanese and korean to english. "
|
|
encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
|
|
"KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
|
|
i = find_token(document.header, "\\inputencoding", 0)
|
|
if i == -1:
|
|
document.header.append("\\inputencoding auto")
|
|
else:
|
|
inputenc = get_value(document.header, "\\inputencoding", i)
|
|
if inputenc in encodings:
|
|
document.header[i] = "\\inputencoding default"
|
|
document.inputencoding = get_value(document.header, "\\inputencoding", 0)
|
|
|
|
if document.language == "chinese-simplified" or \
|
|
document.language == "chinese-traditional" or \
|
|
document.language == "japanese" or document.language == "korean":
|
|
document.language = "english"
|
|
i = find_token(document.header, "\\language", 0)
|
|
if i != -1:
|
|
document.header[i] = "\\language english"
|
|
|
|
|
|
def revert_preamble_listings_params(document):
|
|
" Revert preamble option \listings_params "
|
|
i = find_token(document.header, "\\listings_params", 0)
|
|
if i != -1:
|
|
document.preamble.append('\\usepackage{listings}')
|
|
document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
|
|
document.header.pop(i);
|
|
|
|
|
|
def revert_listings_inset(document):
|
|
r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
|
|
FROM
|
|
|
|
\begin_inset
|
|
lstparams "language=Delphi"
|
|
inline true
|
|
status open
|
|
|
|
\begin_layout Standard
|
|
var i = 10;
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
TO
|
|
|
|
\begin_inset ERT
|
|
status open
|
|
\begin_layout Standard
|
|
|
|
|
|
\backslash
|
|
lstinline[language=Delphi]{var i = 10;}
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
There can be an caption inset in this inset
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Caption
|
|
|
|
\begin_layout Standard
|
|
before label
|
|
\begin_inset LatexCommand label
|
|
name "lst:caption"
|
|
|
|
\end_inset
|
|
|
|
after label
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
'''
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, '\\begin_inset listings', i)
|
|
if i == -1:
|
|
break
|
|
else:
|
|
if not '\\usepackage{listings}' in document.preamble:
|
|
document.preamble.append('\\usepackage{listings}')
|
|
j = find_end_of_inset(document.body, i + 1)
|
|
if j == -1:
|
|
# this should not happen
|
|
break
|
|
inline = 'false'
|
|
params = ''
|
|
status = 'open'
|
|
# first three lines
|
|
for line in range(i + 1, i + 4):
|
|
if document.body[line].startswith('inline'):
|
|
inline = document.body[line].split()[1]
|
|
if document.body[line].startswith('lstparams'):
|
|
params = document.body[line].split()[1].strip('"')
|
|
if document.body[line].startswith('status'):
|
|
status = document.body[line].split()[1].strip()
|
|
k = line + 1
|
|
# caption?
|
|
caption = ''
|
|
label = ''
|
|
cap = find_token(document.body, '\\begin_inset Caption', i)
|
|
if cap != -1:
|
|
cap_end = find_end_of_inset(document.body, cap + 1)
|
|
if cap_end == -1:
|
|
# this should not happen
|
|
break
|
|
# label?
|
|
lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
|
|
if lbl != -1:
|
|
lbl_end = find_end_of_inset(document.body, lbl + 1)
|
|
if lbl_end == -1:
|
|
# this should not happen
|
|
break
|
|
else:
|
|
lbl = cap_end
|
|
lbl_end = cap_end
|
|
for line in document.body[lbl : lbl_end + 1]:
|
|
if line.startswith('name '):
|
|
label = line.split()[1].strip('"')
|
|
break
|
|
for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
|
|
if not line.startswith('\\'):
|
|
caption += line.strip()
|
|
k = cap_end + 1
|
|
inlinecode = ''
|
|
# looking for the oneline code for lstinline
|
|
inlinecode = document.body[find_end_of_layout(document.body,
|
|
find_token(document.body, '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
|
|
if len(caption) > 0:
|
|
if len(params) == 0:
|
|
params = 'caption={%s}' % caption
|
|
else:
|
|
params += ',caption={%s}' % caption
|
|
if len(label) > 0:
|
|
if len(params) == 0:
|
|
params = 'label={%s}' % label
|
|
else:
|
|
params += ',label={%s}' % label
|
|
if len(params) > 0:
|
|
params = '[%s]' % params
|
|
params = params.replace('\\', '\\backslash\n')
|
|
if inline == 'true':
|
|
document.body[i:(j+1)] = [r'\begin_inset ERT',
|
|
'status %s' % status,
|
|
r'\begin_layout %s' % document.default_layout,
|
|
'',
|
|
'',
|
|
r'\backslash',
|
|
'lstinline%s{%s}' % (params, inlinecode),
|
|
r'\end_layout',
|
|
'',
|
|
r'\end_inset']
|
|
else:
|
|
document.body[i: j+1] = [r'\begin_inset ERT',
|
|
'status %s' % status,
|
|
'',
|
|
r'\begin_layout %s' % document.default_layout,
|
|
'',
|
|
'',
|
|
r'\backslash',
|
|
r'begin{lstlisting}%s' % params,
|
|
r'\end_layout',
|
|
'',
|
|
r'\begin_layout %s' % document.default_layout,
|
|
] + document.body[k : j - 1] + \
|
|
['',
|
|
r'\begin_layout %s' % document.default_layout,
|
|
'',
|
|
r'\backslash',
|
|
'end{lstlisting}',
|
|
r'\end_layout',
|
|
'',
|
|
r'\end_inset']
|
|
|
|
|
|
def revert_include_listings(document):
|
|
r''' Revert lstinputlisting Include option , translate
|
|
\begin_inset Include \lstinputlisting{file}[opt]
|
|
preview false
|
|
|
|
\end_inset
|
|
|
|
TO
|
|
|
|
\begin_inset ERT
|
|
status open
|
|
|
|
\begin_layout Standard
|
|
|
|
|
|
\backslash
|
|
lstinputlisting{file}[opt]
|
|
\end_layout
|
|
|
|
\end_inset
|
|
'''
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
|
|
if i == -1:
|
|
break
|
|
else:
|
|
if not '\\usepackage{listings}' in document.preamble:
|
|
document.preamble.append('\\usepackage{listings}')
|
|
j = find_end_of_inset(document.body, i + 1)
|
|
if j == -1:
|
|
# this should not happen
|
|
break
|
|
# find command line lstinputlisting{file}[options]
|
|
cmd, file, option = '', '', ''
|
|
if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
|
|
cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
|
|
option = option.replace('\\', '\\backslash\n')
|
|
document.body[i : j + 1] = [r'\begin_inset ERT',
|
|
'status open',
|
|
'',
|
|
r'\begin_layout %s' % document.default_layout,
|
|
'',
|
|
'',
|
|
r'\backslash',
|
|
'%s%s{%s}' % (cmd, option, file),
|
|
r'\end_layout',
|
|
'',
|
|
r'\end_inset']
|
|
|
|
|
|
def revert_ext_font_sizes(document):
|
|
if document.backend != "latex": return
|
|
if not document.textclass.startswith("ext"): return
|
|
|
|
fontsize = get_value(document.header, '\\paperfontsize', 0)
|
|
if fontsize not in ('10', '11', '12'): return
|
|
fontsize += 'pt'
|
|
|
|
i = find_token(document.header, '\\paperfontsize', 0)
|
|
document.header[i] = '\\paperfontsize default'
|
|
|
|
i = find_token(document.header, '\\options', 0)
|
|
if i == -1:
|
|
i = find_token(document.header, '\\textclass', 0) + 1
|
|
document.header[i:i] = ['\\options %s' % fontsize]
|
|
else:
|
|
document.header[i] += ',%s' % fontsize
|
|
|
|
|
|
def convert_ext_font_sizes(document):
|
|
if document.backend != "latex": return
|
|
if not document.textclass.startswith("ext"): return
|
|
|
|
fontsize = get_value(document.header, '\\paperfontsize', 0)
|
|
if fontsize != 'default': return
|
|
|
|
i = find_token(document.header, '\\options', 0)
|
|
if i == -1: return
|
|
|
|
options = get_value(document.header, '\\options', i)
|
|
|
|
fontsizes = '10pt', '11pt', '12pt'
|
|
for fs in fontsizes:
|
|
if options.find(fs) != -1:
|
|
break
|
|
else: # this else will only be attained if the for cycle had no match
|
|
return
|
|
|
|
options = options.split(',')
|
|
for j, opt in enumerate(options):
|
|
if opt in fontsizes:
|
|
fontsize = opt[:-2]
|
|
del options[j]
|
|
break
|
|
else:
|
|
return
|
|
|
|
k = find_token(document.header, '\\paperfontsize', 0)
|
|
document.header[k] = '\\paperfontsize %s' % fontsize
|
|
|
|
if options:
|
|
document.header[i] = '\\options %s' % ','.join(options)
|
|
else:
|
|
del document.header[i]
|
|
|
|
|
|
def revert_separator_layout(document):
|
|
r'''Revert --Separator-- to a lyx note
|
|
From
|
|
|
|
\begin_layout --Separator--
|
|
something
|
|
\end_layout
|
|
|
|
to
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Note Note
|
|
status open
|
|
|
|
\begin_layout Standard
|
|
Separate Evironment
|
|
\end_layout
|
|
|
|
\end_inset
|
|
something
|
|
|
|
\end_layout
|
|
|
|
'''
|
|
|
|
i = 0
|
|
while True:
|
|
i = find_token(document.body, r'\begin_layout --Separator--', i)
|
|
if i == -1:
|
|
break
|
|
j = find_end_of_layout(document.body, i + 1)
|
|
if j == -1:
|
|
# this should not happen
|
|
break
|
|
document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
|
|
r'\begin_inset Note Note',
|
|
'status open',
|
|
'',
|
|
r'\begin_layout %s' % document.default_layout,
|
|
'Separate Environment',
|
|
r'\end_layout',
|
|
'',
|
|
r'\end_inset'] + \
|
|
document.body[ i + 1 : j] + \
|
|
['',
|
|
r'\end_layout'
|
|
]
|
|
|
|
|
|
def convert_arabic (document):
|
|
if document.language == "arabic":
|
|
document.language = "arabic_arabtex"
|
|
i = find_token(document.header, "\\language", 0)
|
|
if i != -1:
|
|
document.header[i] = "\\language arabic_arabtex"
|
|
i = 0
|
|
while i < len(document.body):
|
|
h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
|
|
if (h != -1):
|
|
# change the language name
|
|
document.body[i] = '\lang arabic_arabtex'
|
|
i = i + 1
|
|
|
|
|
|
def revert_arabic (document):
|
|
if document.language == "arabic_arabtex":
|
|
document.language = "arabic"
|
|
i = find_token(document.header, "\\language", 0)
|
|
if i != -1:
|
|
document.header[i] = "\\language arabic"
|
|
i = 0
|
|
while i < len(document.body):
|
|
h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
|
|
if (h != -1):
|
|
# change the language name
|
|
document.body[i] = '\lang arabic'
|
|
i = i + 1
|
|
|
|
|
|
##
|
|
# Conversion hub
|
|
#
|
|
|
|
supported_versions = ["1.5.0","1.5"]
|
|
convert = [[246, []],
|
|
[247, [convert_font_settings]],
|
|
[248, []],
|
|
[249, [convert_utf8]],
|
|
[250, []],
|
|
[251, []],
|
|
[252, [convert_commandparams, convert_bibitem]],
|
|
[253, []],
|
|
[254, [convert_esint]],
|
|
[255, []],
|
|
[256, []],
|
|
[257, [convert_caption]],
|
|
[258, [convert_lyxline]],
|
|
[259, [convert_accent, normalize_font_whitespace_259]],
|
|
[260, []],
|
|
[261, [convert_changes]],
|
|
[262, []],
|
|
[263, [normalize_language_name]],
|
|
[264, [convert_cv_textclass]],
|
|
[265, [convert_tableborder]],
|
|
[266, []],
|
|
[267, []],
|
|
[268, []],
|
|
[269, []],
|
|
[270, []],
|
|
[271, [convert_ext_font_sizes]],
|
|
[272, []],
|
|
[273, []],
|
|
[274, [normalize_font_whitespace_274]],
|
|
[275, [convert_graphics_rotation]],
|
|
[276, [convert_arabic]]
|
|
]
|
|
|
|
revert = [
|
|
[275, [revert_arabic]],
|
|
[274, [revert_graphics_rotation]],
|
|
[273, []],
|
|
[272, [revert_separator_layout]],
|
|
[271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
|
|
[270, [revert_ext_font_sizes]],
|
|
[269, [revert_beamer_alert, revert_beamer_structure]],
|
|
[268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
|
|
[267, [revert_CJK]],
|
|
[266, [revert_utf8plain]],
|
|
[265, [revert_armenian]],
|
|
[264, [revert_tableborder]],
|
|
[263, [revert_cv_textclass]],
|
|
[262, [revert_language_name]],
|
|
[261, [revert_ascii]],
|
|
[260, []],
|
|
[259, [revert_utf8x]],
|
|
[258, []],
|
|
[257, []],
|
|
[256, [revert_caption]],
|
|
[255, [revert_encodings]],
|
|
[254, [revert_clearpage, revert_cleardoublepage]],
|
|
[253, [revert_esint]],
|
|
[252, [revert_nomenclature, revert_printnomenclature]],
|
|
[251, [revert_commandparams]],
|
|
[250, [revert_cs_label]],
|
|
[249, []],
|
|
[248, [revert_accent, revert_utf8, revert_unicode]],
|
|
[247, [revert_booktabs]],
|
|
[246, [revert_font_settings]],
|
|
[245, [revert_framed]]]
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pass
|