mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-12-22 05:16:21 +00:00
lyx2lyx refactoring.
* use unicode.transform() instead of loop over replacements * telling variable names * remove trailing whitespace * documentation update * don't set use_ligature_dashes if both dash types are found * remove spurious warning, normalize indentation, and use Python idioms in revert_baselineskip()
This commit is contained in:
parent
e4c0c6afdf
commit
2fce4d49ee
@ -94,7 +94,7 @@ Test reversion/conversion between 2.3 and 2.2 formats with lyx2lyx.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Description
|
||||
Allowbreak:
|
||||
allowbreak:
|
||||
\bar under
|
||||
|
||||
\begin_inset Box Boxed
|
||||
@ -169,5 +169,69 @@ without spaces.
|
||||
|
||||
\end_layout
|
||||
|
||||
\begin_layout Description
|
||||
baselineskip%:
|
||||
\begin_inset Box Boxed
|
||||
position "t"
|
||||
hor_pos "c"
|
||||
has_inner_box 1
|
||||
inner_pos "t"
|
||||
use_parbox 1
|
||||
use_makebox 0
|
||||
width "250baselineskip%"
|
||||
special "none"
|
||||
height "50baselineskip%"
|
||||
height_special "none"
|
||||
thickness "4baselineskip%"
|
||||
separation "9baselineskip%"
|
||||
shadowsize "4pt"
|
||||
framecolor "black"
|
||||
backgroundcolor "none"
|
||||
status open
|
||||
|
||||
\begin_layout Plain Layout
|
||||
test
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
|
||||
|
||||
\begin_inset CommandInset line
|
||||
LatexCommand rule
|
||||
offset "40baselineskip%"
|
||||
width "800baselineskip%"
|
||||
height "5.3baselineskip%"
|
||||
|
||||
\end_inset
|
||||
|
||||
|
||||
\end_layout
|
||||
|
||||
\begin_deeper
|
||||
\begin_layout Standard
|
||||
\begin_inset VSpace 200baselineskip%
|
||||
\end_inset
|
||||
|
||||
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
Vertical space above this paragraph is 2·baselineskip.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
\begin_inset space \hspace*{}
|
||||
\length 75.2baselineskip%
|
||||
\end_inset
|
||||
|
||||
Paragraph with
|
||||
\begin_inset space \hspace{}
|
||||
\length 135baselineskip%
|
||||
\end_inset
|
||||
|
||||
horizontal space insets using baselineskip.
|
||||
\end_layout
|
||||
|
||||
\end_deeper
|
||||
\end_body
|
||||
\end_document
|
||||
|
@ -17,8 +17,8 @@
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
'''
|
||||
This module offers several free functions to help with lyx2lyx'ing.
|
||||
More documentaton is below, but here is a quick guide to what
|
||||
This module offers several free functions to help with lyx2lyx'ing.
|
||||
More documentaton is below, but here is a quick guide to what
|
||||
they do. Optional arguments are marked by brackets.
|
||||
|
||||
add_to_preamble(document, text):
|
||||
@ -37,8 +37,8 @@ insert_to_preamble(document, text[, index]):
|
||||
default index is 0, so the material is inserted at the beginning.
|
||||
Prepends a comment "% Added by lyx2lyx" to text.
|
||||
|
||||
put_cmd_in_ert(arg):
|
||||
Here arg should be a list of strings (lines), which we want to
|
||||
put_cmd_in_ert(cmd):
|
||||
Here cmd should be a list of strings (lines), which we want to
|
||||
wrap in ERT. Returns a list of strings so wrapped.
|
||||
A call to this routine will often go something like this:
|
||||
i = find_token('\\begin_inset FunkyInset', ...)
|
||||
@ -81,7 +81,6 @@ import string
|
||||
from parser_tools import find_token, find_end_of_inset
|
||||
from unicode_symbols import unicode_reps
|
||||
|
||||
|
||||
# This will accept either a list of lines or a single line.
|
||||
# It is bad practice to pass something with embedded newlines,
|
||||
# though we will handle that.
|
||||
@ -118,34 +117,37 @@ def add_to_preamble(document, text):
|
||||
# It should really be a list.
|
||||
def insert_to_preamble(document, text, index = 0):
|
||||
""" Insert text to the preamble at a given line"""
|
||||
|
||||
|
||||
if not type(text) is list:
|
||||
# split on \n just in case
|
||||
# it'll give us the one element list we want
|
||||
# if there's no \n, too
|
||||
text = text.split('\n')
|
||||
|
||||
|
||||
text.insert(0, "% Added by lyx2lyx")
|
||||
document.preamble[index:index] = text
|
||||
|
||||
|
||||
def put_cmd_in_ert(arg):
|
||||
'''
|
||||
arg should be a list of lines we want to wrap in ERT.
|
||||
Returns a list of strings, with the lines so wrapped.
|
||||
'''
|
||||
|
||||
# A dictionary of Unicode->LICR mappings for use in a Unicode string's translate() method
|
||||
# Created from the reversed list to keep the first of alternative definitions.
|
||||
licr_table = dict((ord(ch), cmd) for cmd, ch in unicode_reps[::-1])
|
||||
|
||||
def put_cmd_in_ert(cmd):
|
||||
"""
|
||||
Return ERT inset wrapping `cmd` as a list of strings.
|
||||
|
||||
`cmd` can be a string or list of lines. Non-ASCII characters are converted
|
||||
to the respective LICR macros if defined in unicodesymbols.
|
||||
"""
|
||||
ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""]
|
||||
# It will be faster for us to work with a single string internally.
|
||||
# That way, we only go through the unicode_reps loop once.
|
||||
if type(arg) is list:
|
||||
s = "\n".join(arg)
|
||||
# It will be faster to work with a single string internally.
|
||||
if isinstance(cmd, list):
|
||||
cmd = u"\n".join(cmd)
|
||||
else:
|
||||
s = arg
|
||||
for rep in unicode_reps:
|
||||
s = s.replace(rep[1], rep[0])
|
||||
s = s.replace('\\', "\\backslash\n")
|
||||
ret += s.splitlines()
|
||||
cmd = u"%s" % cmd # ensure it is an unicode instance
|
||||
cmd = cmd.translate(licr_table)
|
||||
cmd = cmd.replace("\\", "\\backslash\n")
|
||||
ret += cmd.splitlines()
|
||||
ret += ["\\end_layout", "", "\\end_inset"]
|
||||
return ret
|
||||
|
||||
@ -300,7 +302,7 @@ def lyx2verbatim(document, lines):
|
||||
|
||||
|
||||
def latex_length(slen):
|
||||
'''
|
||||
'''
|
||||
Convert lengths to their LaTeX representation. Returns (bool, length),
|
||||
where the bool tells us if it was a percentage, and the length is the
|
||||
LaTeX representation.
|
||||
@ -314,9 +316,14 @@ def latex_length(slen):
|
||||
# the + always precedes the -
|
||||
|
||||
# Convert relative lengths to LaTeX units
|
||||
units = {"text%":"\\textwidth", "col%":"\\columnwidth",
|
||||
"page%":"\\paperwidth", "line%":"\\linewidth",
|
||||
"theight%":"\\textheight", "pheight%":"\\paperheight"}
|
||||
units = {"col%": "\\columnwidth",
|
||||
"text%": "\\textwidth",
|
||||
"page%": "\\paperwidth",
|
||||
"line%": "\\linewidth",
|
||||
"theight%": "\\textheight",
|
||||
"pheight%": "\\paperheight",
|
||||
"baselineskip%": "\\baselineskip"
|
||||
}
|
||||
for unit in list(units.keys()):
|
||||
i = slen.find(unit)
|
||||
if i == -1:
|
||||
|
@ -23,7 +23,7 @@ import unicodedata
|
||||
import sys, os
|
||||
|
||||
from parser_tools import find_token, find_end_of, find_tokens, get_value
|
||||
from unicode_symbols import read_unicodesymbols
|
||||
from unicode_symbols import unicode_reps
|
||||
|
||||
####################################################################
|
||||
# Private helper functions
|
||||
@ -146,54 +146,6 @@ def set_option(document, m, option, value):
|
||||
return l
|
||||
|
||||
|
||||
# FIXME: Remove this function if the version imported from unicode_symbols works.
|
||||
# This function was the predecessor from that function, that in the meanwhile got
|
||||
# new fixes.
|
||||
def read_unicodesymbols2():
|
||||
" Read the unicodesymbols list of unicode characters and corresponding commands."
|
||||
|
||||
# Provide support for both python 2 and 3
|
||||
PY2 = sys.version_info[0] == 2
|
||||
if not PY2:
|
||||
unichr = chr
|
||||
# End of code to support for both python 2 and 3
|
||||
|
||||
pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
|
||||
fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
|
||||
spec_chars = []
|
||||
# Two backslashes, followed by some non-word character, and then a character
|
||||
# in brackets. The idea is to check for constructs like: \"{u}, which is how
|
||||
# they are written in the unicodesymbols file; but they can also be written
|
||||
# as: \"u or even \" u.
|
||||
r = re.compile(r'\\\\(\W)\{(\w)\}')
|
||||
for line in fp.readlines():
|
||||
if line[0] != '#' and line.strip() != "":
|
||||
line=line.replace(' "',' ') # remove all quotation marks with spaces before
|
||||
line=line.replace('" ',' ') # remove all quotation marks with spaces after
|
||||
line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
|
||||
try:
|
||||
[ucs4,command,dead] = line.split(None,2)
|
||||
if command[0:1] != "\\":
|
||||
continue
|
||||
spec_chars.append([command, unichr(eval(ucs4))])
|
||||
except:
|
||||
continue
|
||||
m = r.match(command)
|
||||
if m != None:
|
||||
command = "\\\\"
|
||||
# If the character is a double-quote, then we need to escape it, too,
|
||||
# since it is done that way in the LyX file.
|
||||
if m.group(1) == "\"":
|
||||
command += "\\"
|
||||
commandbl = command
|
||||
command += m.group(1) + m.group(2)
|
||||
commandbl += m.group(1) + ' ' + m.group(2)
|
||||
spec_chars.append([command, unichr(eval(ucs4))])
|
||||
spec_chars.append([commandbl, unichr(eval(ucs4))])
|
||||
fp.close()
|
||||
return spec_chars
|
||||
|
||||
|
||||
def extract_argument(line):
|
||||
'Extracts a LaTeX argument from the start of line. Returns (arg, rest).'
|
||||
|
||||
@ -280,8 +232,6 @@ def latex2ert(line, isindex):
|
||||
return retval
|
||||
|
||||
|
||||
unicode_reps = read_unicodesymbols()
|
||||
|
||||
#Bug 5022....
|
||||
#Might should do latex2ert first, then deal with stuff that DOESN'T
|
||||
#end up inside ERT. That routine could be modified so that it returned
|
||||
|
@ -746,10 +746,10 @@ def convert_phrases(document):
|
||||
if len(words) > 1 and words[0] == "\\begin_inset" and \
|
||||
words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]:
|
||||
# must not replace anything in insets that store LaTeX contents in .lyx files
|
||||
# (math and command insets withut overridden read() and write() methods
|
||||
# (math and command insets without overridden read() and write() methods)
|
||||
j = find_end_of_inset(document.body, i)
|
||||
if j == -1:
|
||||
document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
|
||||
document.warning("Malformed LyX document: Can't find end of inset at line " + str(i))
|
||||
i += 1
|
||||
else:
|
||||
i = j
|
||||
|
@ -27,15 +27,14 @@ import sys, os
|
||||
from parser_tools import (del_token, del_value, del_complete_lines,
|
||||
find_complete_lines, find_end_of, find_end_of_layout, find_end_of_inset,
|
||||
find_re, find_token, find_token_backwards, get_containing_inset,
|
||||
get_containing_layout, get_bool_value, get_value, get_quoted_value)
|
||||
# find_tokens, find_token_exact, is_in_inset,
|
||||
# check_token, get_option_value
|
||||
get_containing_layout, get_bool_value, get_value, get_quoted_value,
|
||||
is_in_inset)
|
||||
# find_tokens, find_token_exact, check_token, get_option_value
|
||||
|
||||
from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, revert_font_attrs, \
|
||||
insert_to_preamble
|
||||
# get_ert, lyx2latex, \
|
||||
# lyx2verbatim, length_in_bp, convert_info_insets
|
||||
# latex_length, revert_flex_inset, hex2ratio, str2bool
|
||||
from lyx2lyx_tools import (add_to_preamble, put_cmd_in_ert, revert_font_attrs,
|
||||
insert_to_preamble, latex_length)
|
||||
# get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets
|
||||
# revert_flex_inset, hex2ratio, str2bool
|
||||
|
||||
####################################################################
|
||||
# Private helper functions
|
||||
@ -88,13 +87,12 @@ def convert_dateinset(document):
|
||||
continue
|
||||
if get_value(document.body, 'template', i, j) == "Date":
|
||||
document.body[i : j + 1] = put_cmd_in_ert("\\today ")
|
||||
i += 1
|
||||
continue
|
||||
i = j+1 # skip inset
|
||||
|
||||
|
||||
def convert_inputenc(document):
|
||||
" Replace no longer supported input encoding settings. "
|
||||
i = find_token(document.header, "\\inputenc", 0)
|
||||
i = find_token(document.header, "\\inputenc")
|
||||
if i == -1:
|
||||
return
|
||||
if get_value(document.header, "\\inputencoding", i) == "pt254":
|
||||
@ -1859,7 +1857,7 @@ def convert_dashligatures(document):
|
||||
while i+1 < len(lines):
|
||||
i += 1
|
||||
line = lines[i]
|
||||
# skip lines without any dashes:
|
||||
# skip lines without dashes:
|
||||
if not re.search(u"[\u2013\u2014]|\\twohyphens|\\threehyphens", line):
|
||||
continue
|
||||
# skip label width string (see bug 10243):
|
||||
@ -1867,28 +1865,28 @@ def convert_dashligatures(document):
|
||||
continue
|
||||
# do not touch hyphens in some insets (cf. lyx_2_2.convert_dashes):
|
||||
try:
|
||||
value, start, end = get_containing_inset(lines, i)
|
||||
inset_type, start, end = get_containing_inset(lines, i)
|
||||
except TypeError: # no containing inset
|
||||
value, start, end = "no inset", -1, -1
|
||||
if (value.split()[0] in
|
||||
inset_type, start, end = "no inset", -1, -1
|
||||
if (inset_type.split()[0] in
|
||||
["CommandInset", "ERT", "External", "Formula",
|
||||
"FormulaMacro", "Graphics", "IPA", "listings"]
|
||||
or value == "Flex Code"):
|
||||
or inset_type == "Flex Code"):
|
||||
i = end
|
||||
continue
|
||||
try:
|
||||
layout, start, end, j = get_containing_layout(lines, i)
|
||||
layoutname, start, end, j = get_containing_layout(lines, i)
|
||||
except TypeError: # no (or malformed) containing layout
|
||||
document.warning("Malformed LyX document: "
|
||||
"Can't find layout at line %d" % i)
|
||||
continue
|
||||
if layout == "LyX-Code":
|
||||
if layoutname == "LyX-Code":
|
||||
i = end
|
||||
continue
|
||||
|
||||
# literal dash followed by a word or no-break space:
|
||||
if re.search(u"[\u2013\u2014]([\w\u00A0]|$)", line,
|
||||
flags=re.UNICODE):
|
||||
if re.search(u"[\u2013\u2014]([\w\u00A0]|$)",
|
||||
line, flags=re.UNICODE):
|
||||
has_literal_dashes = True
|
||||
# ligature dash followed by word or no-break space on next line:
|
||||
if (re.search(r"(\\twohyphens|\\threehyphens)", line) and
|
||||
@ -1900,14 +1898,15 @@ def convert_dashligatures(document):
|
||||
'"ligature" dashes.\n Line breaks may have changed. '
|
||||
'See UserGuide chapter 3.9.1 for details.')
|
||||
break
|
||||
if has_literal_dashes:
|
||||
|
||||
if has_literal_dashes and not has_ligature_dashes:
|
||||
use_dash_ligatures = False
|
||||
elif has_ligature_dashes:
|
||||
elif has_ligature_dashes and not has_literal_dashes:
|
||||
use_dash_ligatures = True
|
||||
|
||||
# insert the setting if there is a preferred value
|
||||
if use_dash_ligatures is not None:
|
||||
i = find_token(document.header, "\\graphics")
|
||||
document.header.insert(i, "\\use_dash_ligatures %s"
|
||||
document.header.insert(-1, "\\use_dash_ligatures %s"
|
||||
% str(use_dash_ligatures).lower())
|
||||
|
||||
|
||||
@ -2020,64 +2019,37 @@ def revert_mathindent(document):
|
||||
|
||||
|
||||
def revert_baselineskip(document):
|
||||
" Revert baselineskips to TeX code "
|
||||
i = 0
|
||||
vspaceLine = 0
|
||||
hspaceLine = 0
|
||||
while True:
|
||||
regexp = re.compile(r'^.*baselineskip%.*$')
|
||||
i = find_re(document.body, regexp, i)
|
||||
if i == -1:
|
||||
return
|
||||
vspaceLine = find_token(document.body, "\\begin_inset VSpace", i)
|
||||
if vspaceLine == i:
|
||||
# output VSpace inset as TeX code
|
||||
# first read out the values
|
||||
beg = document.body[i].rfind("VSpace ");
|
||||
end = document.body[i].rfind("baselineskip%");
|
||||
baselineskip = float(document.body[i][beg + 7:end]);
|
||||
# we store the value in percent, thus divide by 100
|
||||
baselineskip = baselineskip/100;
|
||||
baselineskip = str(baselineskip);
|
||||
# check if it is the starred version
|
||||
if document.body[i].find('*') != -1:
|
||||
star = '*'
|
||||
else:
|
||||
star = ''
|
||||
# now output TeX code
|
||||
endInset = find_end_of_inset(document.body, i)
|
||||
if endInset == -1:
|
||||
document.warning("Malformed LyX document: Missing '\\end_inset' of VSpace inset.")
|
||||
return
|
||||
else:
|
||||
document.body[vspaceLine: endInset + 1] = put_cmd_in_ert("\\vspace" + star + '{' + baselineskip + "\\baselineskip}")
|
||||
hspaceLine = find_token(document.body, "\\begin_inset space \\hspace", i - 1)
|
||||
document.warning("hspaceLine: " + str(hspaceLine))
|
||||
document.warning("i: " + str(i))
|
||||
if hspaceLine == i - 1:
|
||||
# output space inset as TeX code
|
||||
# first read out the values
|
||||
beg = document.body[i].rfind("\\length ");
|
||||
end = document.body[i].rfind("baselineskip%");
|
||||
baselineskip = float(document.body[i][beg + 7:end]);
|
||||
document.warning("baselineskip: " + str(baselineskip))
|
||||
# we store the value in percent, thus divide by 100
|
||||
baselineskip = baselineskip/100;
|
||||
baselineskip = str(baselineskip);
|
||||
# check if it is the starred version
|
||||
if document.body[i-1].find('*') != -1:
|
||||
star = '*'
|
||||
else:
|
||||
star = ''
|
||||
# now output TeX code
|
||||
endInset = find_end_of_inset(document.body, i)
|
||||
if endInset == -1:
|
||||
document.warning("Malformed LyX document: Missing '\\end_inset' of space inset.")
|
||||
return
|
||||
else:
|
||||
document.body[hspaceLine: endInset + 1] = put_cmd_in_ert("\\hspace" + star + '{' + baselineskip + "\\baselineskip}")
|
||||
|
||||
i = i + 1
|
||||
" Revert baselineskips to TeX code "
|
||||
i = 0
|
||||
regexp = re.compile(r'.*baselineskip%.*')
|
||||
while True:
|
||||
i = i + 1
|
||||
i = find_re(document.body, regexp, i)
|
||||
if i == -1:
|
||||
return
|
||||
if document.body[i].startswith("\\begin_inset VSpace"):
|
||||
# output VSpace inset as TeX code
|
||||
end = find_end_of_inset(document.body, i)
|
||||
if end == -1:
|
||||
document.warning("Malformed LyX document: "
|
||||
"Can't find end of VSpace inset at line %d." % i)
|
||||
continue
|
||||
# read out the value
|
||||
baselineskip = document.body[i].split()[-1]
|
||||
# check if it is the starred version
|
||||
star = '*' if '*' in document.body[i] else ''
|
||||
# now output TeX code
|
||||
cmd = "\\vspace%s{%s}" %(star, latex_length(baselineskip)[1])
|
||||
document.body[i:end+1] = put_cmd_in_ert(cmd)
|
||||
i += 8
|
||||
continue
|
||||
begin, end = is_in_inset(document.body, i, "\\begin_inset space \\hspace")
|
||||
if begin != - 1:
|
||||
# output space inset as TeX code
|
||||
baselineskip = document.body[i].split()[-1]
|
||||
star = '*' if '*' in document.body[i-1] else ''
|
||||
cmd = "\\hspace%s{%s}" %(star, latex_length(baselineskip)[1])
|
||||
document.body[begin:end+1] = put_cmd_in_ert(cmd)
|
||||
|
||||
|
||||
def revert_rotfloat(document):
|
||||
|
@ -23,7 +23,7 @@ This module offers several free functions to help parse lines.
|
||||
More documentaton is below, but here is a quick guide to what
|
||||
they do. Optional arguments are marked by brackets.
|
||||
|
||||
find_token(lines, token, start[, end[, ignorews]]):
|
||||
find_token(lines, token[, start[, end[, ignorews]]]):
|
||||
Returns the first line i, start <= i < end, on which
|
||||
token is found at the beginning. Returns -1 if not
|
||||
found.
|
||||
@ -31,10 +31,10 @@ find_token(lines, token, start[, end[, ignorews]]):
|
||||
in whitespace do not count, except that there must be no
|
||||
extra whitespace following token itself.
|
||||
|
||||
find_token_exact(lines, token, start[, end]):
|
||||
find_token_exact(lines, token[, start[, end]]]):
|
||||
As find_token, but with ignorews set to True.
|
||||
|
||||
find_tokens(lines, tokens, start[, end[, ignorews]]):
|
||||
find_tokens(lines, tokens[, start[, end[, ignorews]]]):
|
||||
Returns the first line i, start <= i < end, on which
|
||||
one of the tokens in tokens is found at the beginning.
|
||||
Returns -1 if not found.
|
||||
@ -42,7 +42,7 @@ find_tokens(lines, tokens, start[, end[, ignorews]]):
|
||||
in whitespace do not count, except that there must be no
|
||||
extra whitespace following token itself.
|
||||
|
||||
find_tokens_exact(lines, token, start[, end]):
|
||||
find_tokens_exact(lines, token[, start[, end]]):
|
||||
As find_tokens, but with ignorews True.
|
||||
|
||||
find_token_backwards(lines, token, start):
|
||||
@ -543,8 +543,9 @@ def is_in_inset(lines, i, inset, default=(-1,-1)):
|
||||
is_in_inset(document.body, i, "\\begin_inset Tabular")
|
||||
returns (-1,-1) if `i` is not within a "Tabular" inset (i.e. a table).
|
||||
If it is, then it returns the line on which the table begins and the one
|
||||
on which it ends. Note that this pair will evaulate to
|
||||
boolean True, so
|
||||
on which it ends.
|
||||
Note that this pair will evaulate to boolean True, so (with the optional
|
||||
default value set to False)
|
||||
if is_in_inset(..., default=False):
|
||||
will do what you expect.
|
||||
"""
|
||||
|
52
lib/lyx2lyx/test_lyx2lyx_tools.py
Normal file
52
lib/lyx2lyx/test_lyx2lyx_tools.py
Normal file
@ -0,0 +1,52 @@
|
||||
# This file is part of lyx2lyx
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2018 The LyX team
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
" This modules tests the auxiliary functions for lyx2lyx."
|
||||
|
||||
from lyx2lyx_tools import *
|
||||
|
||||
import unittest
|
||||
|
||||
class TestParserTools(unittest.TestCase):
|
||||
|
||||
def test_put_cmd_in_ert(self):
|
||||
ert = ['\\begin_inset ERT',
|
||||
'status collapsed',
|
||||
'',
|
||||
'\\begin_layout Plain Layout',
|
||||
'',
|
||||
u'\\backslash',
|
||||
u'texttt{Gr\\backslash',
|
||||
u'"{u}\\backslash',
|
||||
u'ss{}e}',
|
||||
'\\end_layout',
|
||||
'',
|
||||
'\\end_inset']
|
||||
self.assertEqual(put_cmd_in_ert(u"\\texttt{Grüße}"), ert)
|
||||
self.assertEqual(put_cmd_in_ert([u"\\texttt{Grüße}"]), ert)
|
||||
|
||||
def test_latex_length(self):
|
||||
self.assertEqual(latex_length("-30.5col%"), (True, "-0.305\\columnwidth"))
|
||||
self.assertEqual(latex_length("35baselineskip%"), (True, "0.35\\baselineskip"))
|
||||
self.assertEqual(latex_length("11em"), (False, "11em"))
|
||||
self.assertEqual(latex_length("-0.4pt"), (False, "-0.4pt"))
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -18,7 +18,7 @@
|
||||
|
||||
" Import unicode_reps from this module for access to the unicode<->LaTeX mapping. "
|
||||
|
||||
import sys, os, re
|
||||
import sys, os, re, codecs
|
||||
|
||||
# Provide support for both python 2 and 3
|
||||
PY2 = sys.version_info[0] == 2
|
||||
@ -28,14 +28,13 @@ if not PY2:
|
||||
|
||||
def read_unicodesymbols():
|
||||
" Read the unicodesymbols list of unicode characters and corresponding commands."
|
||||
pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
|
||||
pathname = os.path.abspath(os.path.dirname(__file__))
|
||||
filename = os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols')
|
||||
|
||||
# For python 3+ we have to specify the encoding for those systems
|
||||
# where the default is not UTF-8
|
||||
fp = open(filename, encoding="utf8") if (not PY2) else open(filename)
|
||||
# Read as Unicode strings in both, Python 2 and 3
|
||||
# Specify the encoding for those systems where the default is not UTF-8
|
||||
fp = codecs.open(filename, encoding="utf8")
|
||||
|
||||
spec_chars = []
|
||||
# A backslash, followed by some non-word character, and then a character
|
||||
# in brackets. The idea is to check for constructs like: \"{u}, which is how
|
||||
# they are written in the unicodesymbols file; but they can also be written
|
||||
@ -43,36 +42,42 @@ def read_unicodesymbols():
|
||||
# The two backslashes in the string literal are needed to specify a literal
|
||||
# backslash in the regex. Without r prefix, these would be four backslashes.
|
||||
r = re.compile(r'\\(\W)\{(\w)\}')
|
||||
|
||||
spec_chars = []
|
||||
for line in fp.readlines():
|
||||
if line[0] != '#' and line.strip() != "":
|
||||
# Note: backslashes in the string literals with r prefix are not escaped,
|
||||
# so one backslash in the source file equals one backslash in memory.
|
||||
# Without r prefix backslahses are escaped, so two backslashes in the
|
||||
# source file equal one backslash in memory.
|
||||
line=line.replace(' "',' ') # remove all quotation marks with spaces before
|
||||
line=line.replace('" ',' ') # remove all quotation marks with spaces after
|
||||
line=line.replace(r'\"','"') # unescape "
|
||||
line=line.replace(r'\\','\\') # unescape \
|
||||
try:
|
||||
[ucs4,command,dead] = line.split(None,2)
|
||||
if command[0:1] != "\\":
|
||||
continue
|
||||
if (line.find("notermination=text") < 0 and
|
||||
line.find("notermination=both") < 0 and command[-1] != "}"):
|
||||
command = command + "{}"
|
||||
spec_chars.append([command, unichr(eval(ucs4))])
|
||||
except:
|
||||
if not line.strip() or line.startswith('#'):
|
||||
# skip empty lines and comments
|
||||
continue
|
||||
# Note: backslashes in the string literals with r prefix are not escaped,
|
||||
# so one backslash in the source file equals one backslash in memory.
|
||||
# Without r prefix backslahses are escaped, so two backslashes in the
|
||||
# source file equal one backslash in memory.
|
||||
line=line.replace(' "',' ') # remove all quotation marks with spaces before
|
||||
line=line.replace('" ',' ') # remove all quotation marks with spaces after
|
||||
line=line.replace(r'\"','"') # unescape "
|
||||
line=line.replace(r'\\','\\') # unescape \
|
||||
try:
|
||||
[ucs4,command,dead] = line.split(None,2)
|
||||
if command[0:1] != "\\":
|
||||
continue
|
||||
m = r.match(command)
|
||||
if m != None:
|
||||
command = "\\"
|
||||
commandbl = command
|
||||
command += m.group(1) + m.group(2)
|
||||
commandbl += m.group(1) + ' ' + m.group(2)
|
||||
spec_chars.append([command, unichr(eval(ucs4))])
|
||||
spec_chars.append([commandbl, unichr(eval(ucs4))])
|
||||
literal_char = unichr(int(ucs4, 16))
|
||||
if (line.find("notermination=text") < 0 and
|
||||
line.find("notermination=both") < 0 and command[-1] != "}"):
|
||||
command = command + "{}"
|
||||
spec_chars.append([command, literal_char])
|
||||
except:
|
||||
continue
|
||||
m = r.match(command)
|
||||
if m != None:
|
||||
command = "\\"
|
||||
commandbl = command
|
||||
command += m.group(1) + m.group(2)
|
||||
commandbl += m.group(1) + ' ' + m.group(2)
|
||||
spec_chars.append([command, literal_char])
|
||||
spec_chars.append([commandbl, literal_char])
|
||||
fp.close()
|
||||
return spec_chars
|
||||
|
||||
|
||||
unicode_reps = read_unicodesymbols()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user