mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-11-22 01:59:02 +00:00
lyx2lyx refactoring.
* use unicode.transform() instead of loop over replacements * telling variable names * remove trailing whitespace * documentation update * don't set use_ligature_dashes if both dash types are found * remove spurious warning, normalize indentation, and use Python idioms in revert_baselineskip()
This commit is contained in:
parent
e4c0c6afdf
commit
2fce4d49ee
@ -94,7 +94,7 @@ Test reversion/conversion between 2.3 and 2.2 formats with lyx2lyx.
|
|||||||
\end_layout
|
\end_layout
|
||||||
|
|
||||||
\begin_layout Description
|
\begin_layout Description
|
||||||
Allowbreak:
|
allowbreak:
|
||||||
\bar under
|
\bar under
|
||||||
|
|
||||||
\begin_inset Box Boxed
|
\begin_inset Box Boxed
|
||||||
@ -169,5 +169,69 @@ without spaces.
|
|||||||
|
|
||||||
\end_layout
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Description
|
||||||
|
baselineskip%:
|
||||||
|
\begin_inset Box Boxed
|
||||||
|
position "t"
|
||||||
|
hor_pos "c"
|
||||||
|
has_inner_box 1
|
||||||
|
inner_pos "t"
|
||||||
|
use_parbox 1
|
||||||
|
use_makebox 0
|
||||||
|
width "250baselineskip%"
|
||||||
|
special "none"
|
||||||
|
height "50baselineskip%"
|
||||||
|
height_special "none"
|
||||||
|
thickness "4baselineskip%"
|
||||||
|
separation "9baselineskip%"
|
||||||
|
shadowsize "4pt"
|
||||||
|
framecolor "black"
|
||||||
|
backgroundcolor "none"
|
||||||
|
status open
|
||||||
|
|
||||||
|
\begin_layout Plain Layout
|
||||||
|
test
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
|
||||||
|
\begin_inset CommandInset line
|
||||||
|
LatexCommand rule
|
||||||
|
offset "40baselineskip%"
|
||||||
|
width "800baselineskip%"
|
||||||
|
height "5.3baselineskip%"
|
||||||
|
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_deeper
|
||||||
|
\begin_layout Standard
|
||||||
|
\begin_inset VSpace 200baselineskip%
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
Vertical space above this paragraph is 2·baselineskip.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\begin_layout Standard
|
||||||
|
\begin_inset space \hspace*{}
|
||||||
|
\length 75.2baselineskip%
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
Paragraph with
|
||||||
|
\begin_inset space \hspace{}
|
||||||
|
\length 135baselineskip%
|
||||||
|
\end_inset
|
||||||
|
|
||||||
|
horizontal space insets using baselineskip.
|
||||||
|
\end_layout
|
||||||
|
|
||||||
|
\end_deeper
|
||||||
\end_body
|
\end_body
|
||||||
\end_document
|
\end_document
|
||||||
|
@ -17,8 +17,8 @@
|
|||||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
|
||||||
'''
|
'''
|
||||||
This module offers several free functions to help with lyx2lyx'ing.
|
This module offers several free functions to help with lyx2lyx'ing.
|
||||||
More documentaton is below, but here is a quick guide to what
|
More documentaton is below, but here is a quick guide to what
|
||||||
they do. Optional arguments are marked by brackets.
|
they do. Optional arguments are marked by brackets.
|
||||||
|
|
||||||
add_to_preamble(document, text):
|
add_to_preamble(document, text):
|
||||||
@ -37,8 +37,8 @@ insert_to_preamble(document, text[, index]):
|
|||||||
default index is 0, so the material is inserted at the beginning.
|
default index is 0, so the material is inserted at the beginning.
|
||||||
Prepends a comment "% Added by lyx2lyx" to text.
|
Prepends a comment "% Added by lyx2lyx" to text.
|
||||||
|
|
||||||
put_cmd_in_ert(arg):
|
put_cmd_in_ert(cmd):
|
||||||
Here arg should be a list of strings (lines), which we want to
|
Here cmd should be a list of strings (lines), which we want to
|
||||||
wrap in ERT. Returns a list of strings so wrapped.
|
wrap in ERT. Returns a list of strings so wrapped.
|
||||||
A call to this routine will often go something like this:
|
A call to this routine will often go something like this:
|
||||||
i = find_token('\\begin_inset FunkyInset', ...)
|
i = find_token('\\begin_inset FunkyInset', ...)
|
||||||
@ -81,7 +81,6 @@ import string
|
|||||||
from parser_tools import find_token, find_end_of_inset
|
from parser_tools import find_token, find_end_of_inset
|
||||||
from unicode_symbols import unicode_reps
|
from unicode_symbols import unicode_reps
|
||||||
|
|
||||||
|
|
||||||
# This will accept either a list of lines or a single line.
|
# This will accept either a list of lines or a single line.
|
||||||
# It is bad practice to pass something with embedded newlines,
|
# It is bad practice to pass something with embedded newlines,
|
||||||
# though we will handle that.
|
# though we will handle that.
|
||||||
@ -118,34 +117,37 @@ def add_to_preamble(document, text):
|
|||||||
# It should really be a list.
|
# It should really be a list.
|
||||||
def insert_to_preamble(document, text, index = 0):
|
def insert_to_preamble(document, text, index = 0):
|
||||||
""" Insert text to the preamble at a given line"""
|
""" Insert text to the preamble at a given line"""
|
||||||
|
|
||||||
if not type(text) is list:
|
if not type(text) is list:
|
||||||
# split on \n just in case
|
# split on \n just in case
|
||||||
# it'll give us the one element list we want
|
# it'll give us the one element list we want
|
||||||
# if there's no \n, too
|
# if there's no \n, too
|
||||||
text = text.split('\n')
|
text = text.split('\n')
|
||||||
|
|
||||||
text.insert(0, "% Added by lyx2lyx")
|
text.insert(0, "% Added by lyx2lyx")
|
||||||
document.preamble[index:index] = text
|
document.preamble[index:index] = text
|
||||||
|
|
||||||
|
|
||||||
def put_cmd_in_ert(arg):
|
# A dictionary of Unicode->LICR mappings for use in a Unicode string's translate() method
|
||||||
'''
|
# Created from the reversed list to keep the first of alternative definitions.
|
||||||
arg should be a list of lines we want to wrap in ERT.
|
licr_table = dict((ord(ch), cmd) for cmd, ch in unicode_reps[::-1])
|
||||||
Returns a list of strings, with the lines so wrapped.
|
|
||||||
'''
|
def put_cmd_in_ert(cmd):
|
||||||
|
"""
|
||||||
|
Return ERT inset wrapping `cmd` as a list of strings.
|
||||||
|
|
||||||
|
`cmd` can be a string or list of lines. Non-ASCII characters are converted
|
||||||
|
to the respective LICR macros if defined in unicodesymbols.
|
||||||
|
"""
|
||||||
ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""]
|
ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""]
|
||||||
# It will be faster for us to work with a single string internally.
|
# It will be faster to work with a single string internally.
|
||||||
# That way, we only go through the unicode_reps loop once.
|
if isinstance(cmd, list):
|
||||||
if type(arg) is list:
|
cmd = u"\n".join(cmd)
|
||||||
s = "\n".join(arg)
|
|
||||||
else:
|
else:
|
||||||
s = arg
|
cmd = u"%s" % cmd # ensure it is an unicode instance
|
||||||
for rep in unicode_reps:
|
cmd = cmd.translate(licr_table)
|
||||||
s = s.replace(rep[1], rep[0])
|
cmd = cmd.replace("\\", "\\backslash\n")
|
||||||
s = s.replace('\\', "\\backslash\n")
|
ret += cmd.splitlines()
|
||||||
ret += s.splitlines()
|
|
||||||
ret += ["\\end_layout", "", "\\end_inset"]
|
ret += ["\\end_layout", "", "\\end_inset"]
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
@ -300,7 +302,7 @@ def lyx2verbatim(document, lines):
|
|||||||
|
|
||||||
|
|
||||||
def latex_length(slen):
|
def latex_length(slen):
|
||||||
'''
|
'''
|
||||||
Convert lengths to their LaTeX representation. Returns (bool, length),
|
Convert lengths to their LaTeX representation. Returns (bool, length),
|
||||||
where the bool tells us if it was a percentage, and the length is the
|
where the bool tells us if it was a percentage, and the length is the
|
||||||
LaTeX representation.
|
LaTeX representation.
|
||||||
@ -314,9 +316,14 @@ def latex_length(slen):
|
|||||||
# the + always precedes the -
|
# the + always precedes the -
|
||||||
|
|
||||||
# Convert relative lengths to LaTeX units
|
# Convert relative lengths to LaTeX units
|
||||||
units = {"text%":"\\textwidth", "col%":"\\columnwidth",
|
units = {"col%": "\\columnwidth",
|
||||||
"page%":"\\paperwidth", "line%":"\\linewidth",
|
"text%": "\\textwidth",
|
||||||
"theight%":"\\textheight", "pheight%":"\\paperheight"}
|
"page%": "\\paperwidth",
|
||||||
|
"line%": "\\linewidth",
|
||||||
|
"theight%": "\\textheight",
|
||||||
|
"pheight%": "\\paperheight",
|
||||||
|
"baselineskip%": "\\baselineskip"
|
||||||
|
}
|
||||||
for unit in list(units.keys()):
|
for unit in list(units.keys()):
|
||||||
i = slen.find(unit)
|
i = slen.find(unit)
|
||||||
if i == -1:
|
if i == -1:
|
||||||
|
@ -23,7 +23,7 @@ import unicodedata
|
|||||||
import sys, os
|
import sys, os
|
||||||
|
|
||||||
from parser_tools import find_token, find_end_of, find_tokens, get_value
|
from parser_tools import find_token, find_end_of, find_tokens, get_value
|
||||||
from unicode_symbols import read_unicodesymbols
|
from unicode_symbols import unicode_reps
|
||||||
|
|
||||||
####################################################################
|
####################################################################
|
||||||
# Private helper functions
|
# Private helper functions
|
||||||
@ -146,54 +146,6 @@ def set_option(document, m, option, value):
|
|||||||
return l
|
return l
|
||||||
|
|
||||||
|
|
||||||
# FIXME: Remove this function if the version imported from unicode_symbols works.
|
|
||||||
# This function was the predecessor from that function, that in the meanwhile got
|
|
||||||
# new fixes.
|
|
||||||
def read_unicodesymbols2():
|
|
||||||
" Read the unicodesymbols list of unicode characters and corresponding commands."
|
|
||||||
|
|
||||||
# Provide support for both python 2 and 3
|
|
||||||
PY2 = sys.version_info[0] == 2
|
|
||||||
if not PY2:
|
|
||||||
unichr = chr
|
|
||||||
# End of code to support for both python 2 and 3
|
|
||||||
|
|
||||||
pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
|
|
||||||
fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
|
|
||||||
spec_chars = []
|
|
||||||
# Two backslashes, followed by some non-word character, and then a character
|
|
||||||
# in brackets. The idea is to check for constructs like: \"{u}, which is how
|
|
||||||
# they are written in the unicodesymbols file; but they can also be written
|
|
||||||
# as: \"u or even \" u.
|
|
||||||
r = re.compile(r'\\\\(\W)\{(\w)\}')
|
|
||||||
for line in fp.readlines():
|
|
||||||
if line[0] != '#' and line.strip() != "":
|
|
||||||
line=line.replace(' "',' ') # remove all quotation marks with spaces before
|
|
||||||
line=line.replace('" ',' ') # remove all quotation marks with spaces after
|
|
||||||
line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
|
|
||||||
try:
|
|
||||||
[ucs4,command,dead] = line.split(None,2)
|
|
||||||
if command[0:1] != "\\":
|
|
||||||
continue
|
|
||||||
spec_chars.append([command, unichr(eval(ucs4))])
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
m = r.match(command)
|
|
||||||
if m != None:
|
|
||||||
command = "\\\\"
|
|
||||||
# If the character is a double-quote, then we need to escape it, too,
|
|
||||||
# since it is done that way in the LyX file.
|
|
||||||
if m.group(1) == "\"":
|
|
||||||
command += "\\"
|
|
||||||
commandbl = command
|
|
||||||
command += m.group(1) + m.group(2)
|
|
||||||
commandbl += m.group(1) + ' ' + m.group(2)
|
|
||||||
spec_chars.append([command, unichr(eval(ucs4))])
|
|
||||||
spec_chars.append([commandbl, unichr(eval(ucs4))])
|
|
||||||
fp.close()
|
|
||||||
return spec_chars
|
|
||||||
|
|
||||||
|
|
||||||
def extract_argument(line):
|
def extract_argument(line):
|
||||||
'Extracts a LaTeX argument from the start of line. Returns (arg, rest).'
|
'Extracts a LaTeX argument from the start of line. Returns (arg, rest).'
|
||||||
|
|
||||||
@ -280,8 +232,6 @@ def latex2ert(line, isindex):
|
|||||||
return retval
|
return retval
|
||||||
|
|
||||||
|
|
||||||
unicode_reps = read_unicodesymbols()
|
|
||||||
|
|
||||||
#Bug 5022....
|
#Bug 5022....
|
||||||
#Might should do latex2ert first, then deal with stuff that DOESN'T
|
#Might should do latex2ert first, then deal with stuff that DOESN'T
|
||||||
#end up inside ERT. That routine could be modified so that it returned
|
#end up inside ERT. That routine could be modified so that it returned
|
||||||
|
@ -746,10 +746,10 @@ def convert_phrases(document):
|
|||||||
if len(words) > 1 and words[0] == "\\begin_inset" and \
|
if len(words) > 1 and words[0] == "\\begin_inset" and \
|
||||||
words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]:
|
words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]:
|
||||||
# must not replace anything in insets that store LaTeX contents in .lyx files
|
# must not replace anything in insets that store LaTeX contents in .lyx files
|
||||||
# (math and command insets withut overridden read() and write() methods
|
# (math and command insets without overridden read() and write() methods)
|
||||||
j = find_end_of_inset(document.body, i)
|
j = find_end_of_inset(document.body, i)
|
||||||
if j == -1:
|
if j == -1:
|
||||||
document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
|
document.warning("Malformed LyX document: Can't find end of inset at line " + str(i))
|
||||||
i += 1
|
i += 1
|
||||||
else:
|
else:
|
||||||
i = j
|
i = j
|
||||||
|
@ -27,15 +27,14 @@ import sys, os
|
|||||||
from parser_tools import (del_token, del_value, del_complete_lines,
|
from parser_tools import (del_token, del_value, del_complete_lines,
|
||||||
find_complete_lines, find_end_of, find_end_of_layout, find_end_of_inset,
|
find_complete_lines, find_end_of, find_end_of_layout, find_end_of_inset,
|
||||||
find_re, find_token, find_token_backwards, get_containing_inset,
|
find_re, find_token, find_token_backwards, get_containing_inset,
|
||||||
get_containing_layout, get_bool_value, get_value, get_quoted_value)
|
get_containing_layout, get_bool_value, get_value, get_quoted_value,
|
||||||
# find_tokens, find_token_exact, is_in_inset,
|
is_in_inset)
|
||||||
# check_token, get_option_value
|
# find_tokens, find_token_exact, check_token, get_option_value
|
||||||
|
|
||||||
from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, revert_font_attrs, \
|
from lyx2lyx_tools import (add_to_preamble, put_cmd_in_ert, revert_font_attrs,
|
||||||
insert_to_preamble
|
insert_to_preamble, latex_length)
|
||||||
# get_ert, lyx2latex, \
|
# get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets
|
||||||
# lyx2verbatim, length_in_bp, convert_info_insets
|
# revert_flex_inset, hex2ratio, str2bool
|
||||||
# latex_length, revert_flex_inset, hex2ratio, str2bool
|
|
||||||
|
|
||||||
####################################################################
|
####################################################################
|
||||||
# Private helper functions
|
# Private helper functions
|
||||||
@ -88,13 +87,12 @@ def convert_dateinset(document):
|
|||||||
continue
|
continue
|
||||||
if get_value(document.body, 'template', i, j) == "Date":
|
if get_value(document.body, 'template', i, j) == "Date":
|
||||||
document.body[i : j + 1] = put_cmd_in_ert("\\today ")
|
document.body[i : j + 1] = put_cmd_in_ert("\\today ")
|
||||||
i += 1
|
i = j+1 # skip inset
|
||||||
continue
|
|
||||||
|
|
||||||
|
|
||||||
def convert_inputenc(document):
|
def convert_inputenc(document):
|
||||||
" Replace no longer supported input encoding settings. "
|
" Replace no longer supported input encoding settings. "
|
||||||
i = find_token(document.header, "\\inputenc", 0)
|
i = find_token(document.header, "\\inputenc")
|
||||||
if i == -1:
|
if i == -1:
|
||||||
return
|
return
|
||||||
if get_value(document.header, "\\inputencoding", i) == "pt254":
|
if get_value(document.header, "\\inputencoding", i) == "pt254":
|
||||||
@ -1859,7 +1857,7 @@ def convert_dashligatures(document):
|
|||||||
while i+1 < len(lines):
|
while i+1 < len(lines):
|
||||||
i += 1
|
i += 1
|
||||||
line = lines[i]
|
line = lines[i]
|
||||||
# skip lines without any dashes:
|
# skip lines without dashes:
|
||||||
if not re.search(u"[\u2013\u2014]|\\twohyphens|\\threehyphens", line):
|
if not re.search(u"[\u2013\u2014]|\\twohyphens|\\threehyphens", line):
|
||||||
continue
|
continue
|
||||||
# skip label width string (see bug 10243):
|
# skip label width string (see bug 10243):
|
||||||
@ -1867,28 +1865,28 @@ def convert_dashligatures(document):
|
|||||||
continue
|
continue
|
||||||
# do not touch hyphens in some insets (cf. lyx_2_2.convert_dashes):
|
# do not touch hyphens in some insets (cf. lyx_2_2.convert_dashes):
|
||||||
try:
|
try:
|
||||||
value, start, end = get_containing_inset(lines, i)
|
inset_type, start, end = get_containing_inset(lines, i)
|
||||||
except TypeError: # no containing inset
|
except TypeError: # no containing inset
|
||||||
value, start, end = "no inset", -1, -1
|
inset_type, start, end = "no inset", -1, -1
|
||||||
if (value.split()[0] in
|
if (inset_type.split()[0] in
|
||||||
["CommandInset", "ERT", "External", "Formula",
|
["CommandInset", "ERT", "External", "Formula",
|
||||||
"FormulaMacro", "Graphics", "IPA", "listings"]
|
"FormulaMacro", "Graphics", "IPA", "listings"]
|
||||||
or value == "Flex Code"):
|
or inset_type == "Flex Code"):
|
||||||
i = end
|
i = end
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
layout, start, end, j = get_containing_layout(lines, i)
|
layoutname, start, end, j = get_containing_layout(lines, i)
|
||||||
except TypeError: # no (or malformed) containing layout
|
except TypeError: # no (or malformed) containing layout
|
||||||
document.warning("Malformed LyX document: "
|
document.warning("Malformed LyX document: "
|
||||||
"Can't find layout at line %d" % i)
|
"Can't find layout at line %d" % i)
|
||||||
continue
|
continue
|
||||||
if layout == "LyX-Code":
|
if layoutname == "LyX-Code":
|
||||||
i = end
|
i = end
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# literal dash followed by a word or no-break space:
|
# literal dash followed by a word or no-break space:
|
||||||
if re.search(u"[\u2013\u2014]([\w\u00A0]|$)", line,
|
if re.search(u"[\u2013\u2014]([\w\u00A0]|$)",
|
||||||
flags=re.UNICODE):
|
line, flags=re.UNICODE):
|
||||||
has_literal_dashes = True
|
has_literal_dashes = True
|
||||||
# ligature dash followed by word or no-break space on next line:
|
# ligature dash followed by word or no-break space on next line:
|
||||||
if (re.search(r"(\\twohyphens|\\threehyphens)", line) and
|
if (re.search(r"(\\twohyphens|\\threehyphens)", line) and
|
||||||
@ -1900,14 +1898,15 @@ def convert_dashligatures(document):
|
|||||||
'"ligature" dashes.\n Line breaks may have changed. '
|
'"ligature" dashes.\n Line breaks may have changed. '
|
||||||
'See UserGuide chapter 3.9.1 for details.')
|
'See UserGuide chapter 3.9.1 for details.')
|
||||||
break
|
break
|
||||||
if has_literal_dashes:
|
|
||||||
|
if has_literal_dashes and not has_ligature_dashes:
|
||||||
use_dash_ligatures = False
|
use_dash_ligatures = False
|
||||||
elif has_ligature_dashes:
|
elif has_ligature_dashes and not has_literal_dashes:
|
||||||
use_dash_ligatures = True
|
use_dash_ligatures = True
|
||||||
|
|
||||||
# insert the setting if there is a preferred value
|
# insert the setting if there is a preferred value
|
||||||
if use_dash_ligatures is not None:
|
if use_dash_ligatures is not None:
|
||||||
i = find_token(document.header, "\\graphics")
|
document.header.insert(-1, "\\use_dash_ligatures %s"
|
||||||
document.header.insert(i, "\\use_dash_ligatures %s"
|
|
||||||
% str(use_dash_ligatures).lower())
|
% str(use_dash_ligatures).lower())
|
||||||
|
|
||||||
|
|
||||||
@ -2020,64 +2019,37 @@ def revert_mathindent(document):
|
|||||||
|
|
||||||
|
|
||||||
def revert_baselineskip(document):
|
def revert_baselineskip(document):
|
||||||
" Revert baselineskips to TeX code "
|
" Revert baselineskips to TeX code "
|
||||||
i = 0
|
i = 0
|
||||||
vspaceLine = 0
|
regexp = re.compile(r'.*baselineskip%.*')
|
||||||
hspaceLine = 0
|
while True:
|
||||||
while True:
|
i = i + 1
|
||||||
regexp = re.compile(r'^.*baselineskip%.*$')
|
i = find_re(document.body, regexp, i)
|
||||||
i = find_re(document.body, regexp, i)
|
if i == -1:
|
||||||
if i == -1:
|
return
|
||||||
return
|
if document.body[i].startswith("\\begin_inset VSpace"):
|
||||||
vspaceLine = find_token(document.body, "\\begin_inset VSpace", i)
|
# output VSpace inset as TeX code
|
||||||
if vspaceLine == i:
|
end = find_end_of_inset(document.body, i)
|
||||||
# output VSpace inset as TeX code
|
if end == -1:
|
||||||
# first read out the values
|
document.warning("Malformed LyX document: "
|
||||||
beg = document.body[i].rfind("VSpace ");
|
"Can't find end of VSpace inset at line %d." % i)
|
||||||
end = document.body[i].rfind("baselineskip%");
|
continue
|
||||||
baselineskip = float(document.body[i][beg + 7:end]);
|
# read out the value
|
||||||
# we store the value in percent, thus divide by 100
|
baselineskip = document.body[i].split()[-1]
|
||||||
baselineskip = baselineskip/100;
|
# check if it is the starred version
|
||||||
baselineskip = str(baselineskip);
|
star = '*' if '*' in document.body[i] else ''
|
||||||
# check if it is the starred version
|
# now output TeX code
|
||||||
if document.body[i].find('*') != -1:
|
cmd = "\\vspace%s{%s}" %(star, latex_length(baselineskip)[1])
|
||||||
star = '*'
|
document.body[i:end+1] = put_cmd_in_ert(cmd)
|
||||||
else:
|
i += 8
|
||||||
star = ''
|
continue
|
||||||
# now output TeX code
|
begin, end = is_in_inset(document.body, i, "\\begin_inset space \\hspace")
|
||||||
endInset = find_end_of_inset(document.body, i)
|
if begin != - 1:
|
||||||
if endInset == -1:
|
# output space inset as TeX code
|
||||||
document.warning("Malformed LyX document: Missing '\\end_inset' of VSpace inset.")
|
baselineskip = document.body[i].split()[-1]
|
||||||
return
|
star = '*' if '*' in document.body[i-1] else ''
|
||||||
else:
|
cmd = "\\hspace%s{%s}" %(star, latex_length(baselineskip)[1])
|
||||||
document.body[vspaceLine: endInset + 1] = put_cmd_in_ert("\\vspace" + star + '{' + baselineskip + "\\baselineskip}")
|
document.body[begin:end+1] = put_cmd_in_ert(cmd)
|
||||||
hspaceLine = find_token(document.body, "\\begin_inset space \\hspace", i - 1)
|
|
||||||
document.warning("hspaceLine: " + str(hspaceLine))
|
|
||||||
document.warning("i: " + str(i))
|
|
||||||
if hspaceLine == i - 1:
|
|
||||||
# output space inset as TeX code
|
|
||||||
# first read out the values
|
|
||||||
beg = document.body[i].rfind("\\length ");
|
|
||||||
end = document.body[i].rfind("baselineskip%");
|
|
||||||
baselineskip = float(document.body[i][beg + 7:end]);
|
|
||||||
document.warning("baselineskip: " + str(baselineskip))
|
|
||||||
# we store the value in percent, thus divide by 100
|
|
||||||
baselineskip = baselineskip/100;
|
|
||||||
baselineskip = str(baselineskip);
|
|
||||||
# check if it is the starred version
|
|
||||||
if document.body[i-1].find('*') != -1:
|
|
||||||
star = '*'
|
|
||||||
else:
|
|
||||||
star = ''
|
|
||||||
# now output TeX code
|
|
||||||
endInset = find_end_of_inset(document.body, i)
|
|
||||||
if endInset == -1:
|
|
||||||
document.warning("Malformed LyX document: Missing '\\end_inset' of space inset.")
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
document.body[hspaceLine: endInset + 1] = put_cmd_in_ert("\\hspace" + star + '{' + baselineskip + "\\baselineskip}")
|
|
||||||
|
|
||||||
i = i + 1
|
|
||||||
|
|
||||||
|
|
||||||
def revert_rotfloat(document):
|
def revert_rotfloat(document):
|
||||||
|
@ -23,7 +23,7 @@ This module offers several free functions to help parse lines.
|
|||||||
More documentaton is below, but here is a quick guide to what
|
More documentaton is below, but here is a quick guide to what
|
||||||
they do. Optional arguments are marked by brackets.
|
they do. Optional arguments are marked by brackets.
|
||||||
|
|
||||||
find_token(lines, token, start[, end[, ignorews]]):
|
find_token(lines, token[, start[, end[, ignorews]]]):
|
||||||
Returns the first line i, start <= i < end, on which
|
Returns the first line i, start <= i < end, on which
|
||||||
token is found at the beginning. Returns -1 if not
|
token is found at the beginning. Returns -1 if not
|
||||||
found.
|
found.
|
||||||
@ -31,10 +31,10 @@ find_token(lines, token, start[, end[, ignorews]]):
|
|||||||
in whitespace do not count, except that there must be no
|
in whitespace do not count, except that there must be no
|
||||||
extra whitespace following token itself.
|
extra whitespace following token itself.
|
||||||
|
|
||||||
find_token_exact(lines, token, start[, end]):
|
find_token_exact(lines, token[, start[, end]]]):
|
||||||
As find_token, but with ignorews set to True.
|
As find_token, but with ignorews set to True.
|
||||||
|
|
||||||
find_tokens(lines, tokens, start[, end[, ignorews]]):
|
find_tokens(lines, tokens[, start[, end[, ignorews]]]):
|
||||||
Returns the first line i, start <= i < end, on which
|
Returns the first line i, start <= i < end, on which
|
||||||
one of the tokens in tokens is found at the beginning.
|
one of the tokens in tokens is found at the beginning.
|
||||||
Returns -1 if not found.
|
Returns -1 if not found.
|
||||||
@ -42,7 +42,7 @@ find_tokens(lines, tokens, start[, end[, ignorews]]):
|
|||||||
in whitespace do not count, except that there must be no
|
in whitespace do not count, except that there must be no
|
||||||
extra whitespace following token itself.
|
extra whitespace following token itself.
|
||||||
|
|
||||||
find_tokens_exact(lines, token, start[, end]):
|
find_tokens_exact(lines, token[, start[, end]]):
|
||||||
As find_tokens, but with ignorews True.
|
As find_tokens, but with ignorews True.
|
||||||
|
|
||||||
find_token_backwards(lines, token, start):
|
find_token_backwards(lines, token, start):
|
||||||
@ -543,8 +543,9 @@ def is_in_inset(lines, i, inset, default=(-1,-1)):
|
|||||||
is_in_inset(document.body, i, "\\begin_inset Tabular")
|
is_in_inset(document.body, i, "\\begin_inset Tabular")
|
||||||
returns (-1,-1) if `i` is not within a "Tabular" inset (i.e. a table).
|
returns (-1,-1) if `i` is not within a "Tabular" inset (i.e. a table).
|
||||||
If it is, then it returns the line on which the table begins and the one
|
If it is, then it returns the line on which the table begins and the one
|
||||||
on which it ends. Note that this pair will evaulate to
|
on which it ends.
|
||||||
boolean True, so
|
Note that this pair will evaulate to boolean True, so (with the optional
|
||||||
|
default value set to False)
|
||||||
if is_in_inset(..., default=False):
|
if is_in_inset(..., default=False):
|
||||||
will do what you expect.
|
will do what you expect.
|
||||||
"""
|
"""
|
||||||
|
52
lib/lyx2lyx/test_lyx2lyx_tools.py
Normal file
52
lib/lyx2lyx/test_lyx2lyx_tools.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
# This file is part of lyx2lyx
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Copyright (C) 2018 The LyX team
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
|
||||||
|
" This modules tests the auxiliary functions for lyx2lyx."
|
||||||
|
|
||||||
|
from lyx2lyx_tools import *
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
class TestParserTools(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_put_cmd_in_ert(self):
|
||||||
|
ert = ['\\begin_inset ERT',
|
||||||
|
'status collapsed',
|
||||||
|
'',
|
||||||
|
'\\begin_layout Plain Layout',
|
||||||
|
'',
|
||||||
|
u'\\backslash',
|
||||||
|
u'texttt{Gr\\backslash',
|
||||||
|
u'"{u}\\backslash',
|
||||||
|
u'ss{}e}',
|
||||||
|
'\\end_layout',
|
||||||
|
'',
|
||||||
|
'\\end_inset']
|
||||||
|
self.assertEqual(put_cmd_in_ert(u"\\texttt{Grüße}"), ert)
|
||||||
|
self.assertEqual(put_cmd_in_ert([u"\\texttt{Grüße}"]), ert)
|
||||||
|
|
||||||
|
def test_latex_length(self):
|
||||||
|
self.assertEqual(latex_length("-30.5col%"), (True, "-0.305\\columnwidth"))
|
||||||
|
self.assertEqual(latex_length("35baselineskip%"), (True, "0.35\\baselineskip"))
|
||||||
|
self.assertEqual(latex_length("11em"), (False, "11em"))
|
||||||
|
self.assertEqual(latex_length("-0.4pt"), (False, "-0.4pt"))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@ -18,7 +18,7 @@
|
|||||||
|
|
||||||
" Import unicode_reps from this module for access to the unicode<->LaTeX mapping. "
|
" Import unicode_reps from this module for access to the unicode<->LaTeX mapping. "
|
||||||
|
|
||||||
import sys, os, re
|
import sys, os, re, codecs
|
||||||
|
|
||||||
# Provide support for both python 2 and 3
|
# Provide support for both python 2 and 3
|
||||||
PY2 = sys.version_info[0] == 2
|
PY2 = sys.version_info[0] == 2
|
||||||
@ -28,14 +28,13 @@ if not PY2:
|
|||||||
|
|
||||||
def read_unicodesymbols():
|
def read_unicodesymbols():
|
||||||
" Read the unicodesymbols list of unicode characters and corresponding commands."
|
" Read the unicodesymbols list of unicode characters and corresponding commands."
|
||||||
pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
|
pathname = os.path.abspath(os.path.dirname(__file__))
|
||||||
filename = os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols')
|
filename = os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols')
|
||||||
|
|
||||||
# For python 3+ we have to specify the encoding for those systems
|
# Read as Unicode strings in both, Python 2 and 3
|
||||||
# where the default is not UTF-8
|
# Specify the encoding for those systems where the default is not UTF-8
|
||||||
fp = open(filename, encoding="utf8") if (not PY2) else open(filename)
|
fp = codecs.open(filename, encoding="utf8")
|
||||||
|
|
||||||
spec_chars = []
|
|
||||||
# A backslash, followed by some non-word character, and then a character
|
# A backslash, followed by some non-word character, and then a character
|
||||||
# in brackets. The idea is to check for constructs like: \"{u}, which is how
|
# in brackets. The idea is to check for constructs like: \"{u}, which is how
|
||||||
# they are written in the unicodesymbols file; but they can also be written
|
# they are written in the unicodesymbols file; but they can also be written
|
||||||
@ -43,36 +42,42 @@ def read_unicodesymbols():
|
|||||||
# The two backslashes in the string literal are needed to specify a literal
|
# The two backslashes in the string literal are needed to specify a literal
|
||||||
# backslash in the regex. Without r prefix, these would be four backslashes.
|
# backslash in the regex. Without r prefix, these would be four backslashes.
|
||||||
r = re.compile(r'\\(\W)\{(\w)\}')
|
r = re.compile(r'\\(\W)\{(\w)\}')
|
||||||
|
|
||||||
|
spec_chars = []
|
||||||
for line in fp.readlines():
|
for line in fp.readlines():
|
||||||
if line[0] != '#' and line.strip() != "":
|
if not line.strip() or line.startswith('#'):
|
||||||
# Note: backslashes in the string literals with r prefix are not escaped,
|
# skip empty lines and comments
|
||||||
# so one backslash in the source file equals one backslash in memory.
|
continue
|
||||||
# Without r prefix backslahses are escaped, so two backslashes in the
|
# Note: backslashes in the string literals with r prefix are not escaped,
|
||||||
# source file equal one backslash in memory.
|
# so one backslash in the source file equals one backslash in memory.
|
||||||
line=line.replace(' "',' ') # remove all quotation marks with spaces before
|
# Without r prefix backslahses are escaped, so two backslashes in the
|
||||||
line=line.replace('" ',' ') # remove all quotation marks with spaces after
|
# source file equal one backslash in memory.
|
||||||
line=line.replace(r'\"','"') # unescape "
|
line=line.replace(' "',' ') # remove all quotation marks with spaces before
|
||||||
line=line.replace(r'\\','\\') # unescape \
|
line=line.replace('" ',' ') # remove all quotation marks with spaces after
|
||||||
try:
|
line=line.replace(r'\"','"') # unescape "
|
||||||
[ucs4,command,dead] = line.split(None,2)
|
line=line.replace(r'\\','\\') # unescape \
|
||||||
if command[0:1] != "\\":
|
try:
|
||||||
continue
|
[ucs4,command,dead] = line.split(None,2)
|
||||||
if (line.find("notermination=text") < 0 and
|
if command[0:1] != "\\":
|
||||||
line.find("notermination=both") < 0 and command[-1] != "}"):
|
|
||||||
command = command + "{}"
|
|
||||||
spec_chars.append([command, unichr(eval(ucs4))])
|
|
||||||
except:
|
|
||||||
continue
|
continue
|
||||||
m = r.match(command)
|
literal_char = unichr(int(ucs4, 16))
|
||||||
if m != None:
|
if (line.find("notermination=text") < 0 and
|
||||||
command = "\\"
|
line.find("notermination=both") < 0 and command[-1] != "}"):
|
||||||
commandbl = command
|
command = command + "{}"
|
||||||
command += m.group(1) + m.group(2)
|
spec_chars.append([command, literal_char])
|
||||||
commandbl += m.group(1) + ' ' + m.group(2)
|
except:
|
||||||
spec_chars.append([command, unichr(eval(ucs4))])
|
continue
|
||||||
spec_chars.append([commandbl, unichr(eval(ucs4))])
|
m = r.match(command)
|
||||||
|
if m != None:
|
||||||
|
command = "\\"
|
||||||
|
commandbl = command
|
||||||
|
command += m.group(1) + m.group(2)
|
||||||
|
commandbl += m.group(1) + ' ' + m.group(2)
|
||||||
|
spec_chars.append([command, literal_char])
|
||||||
|
spec_chars.append([commandbl, literal_char])
|
||||||
fp.close()
|
fp.close()
|
||||||
return spec_chars
|
return spec_chars
|
||||||
|
|
||||||
|
|
||||||
unicode_reps = read_unicodesymbols()
|
unicode_reps = read_unicodesymbols()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user