Fix preamble-code removal in lyx2lyx. Do some optimizations.

Fix failure of revert_dashes() found by lyx2lyx ctests
using an efficient function to find a given sequence of lines
in a list of lines.

Some optimizations using Python idioms instead of C-like code.
This commit is contained in:
Günter Milde 2018-01-21 19:55:27 +01:00
parent 5fce313ffc
commit 8e825de4b2
3 changed files with 89 additions and 43 deletions

View File

@ -29,14 +29,14 @@ import sys, os
# find_token_backwards, is_in_inset, get_value, get_quoted_value, \ # find_token_backwards, is_in_inset, get_value, get_quoted_value, \
# del_token, check_token, get_option_value # del_token, check_token, get_option_value
from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, get_ert, lyx2latex, \ from lyx2lyx_tools import (add_to_preamble, put_cmd_in_ert, get_ert,
lyx2verbatim, length_in_bp, convert_info_insets lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets)
# insert_to_preamble, latex_length, revert_flex_inset, \ # insert_to_preamble, latex_length, revert_flex_inset,
# revert_font_attrs, hex2ratio, str2bool # revert_font_attrs, hex2ratio, str2bool
from parser_tools import find_token, find_token_backwards, find_re, \ from parser_tools import (find_end_of_inset, find_end_of_layout,
find_end_of_inset, find_end_of_layout, find_nonempty_line, \ find_nonempty_line, find_re, find_slice, find_token, find_token_backwards,
get_containing_layout, get_value, check_token get_containing_layout, get_value, check_token)
#################################################################### ####################################################################
# Private helper functions # Private helper functions
@ -706,24 +706,20 @@ def revert_dashes(document):
Remove preamble code from 2.3->2.2 conversion. Remove preamble code from 2.3->2.2 conversion.
""" """
# Remove preamble code from 2.3->2.2 conversion: # Remove preamble code from 2.3->2.2 conversion:
for i, line in enumerate(document.preamble): dash_renew_lines = find_slice(document.preamble,
if (line == '% Added by lyx2lyx' and ['% Added by lyx2lyx',
document.preamble[i+1] == r'\renewcommand{\textendash}{--}' and r'\renewcommand{\textendash}{--}',
document.preamble[i+2] == r'\renewcommand{\textemdash}{---}'): r'\renewcommand{\textemdash}{---}'])
del document.preamble[i:i+3] del(document.preamble[dash_renew_lines])
break
# Prevent ligation of hyphens: # Prevent ligation of hyphens:
i = 0 i = 0
while i < len(document.body)-1: while i < len(document.body)-1:
# increment i, skip some insets (cf. convert_dashes) # increment i, skip some insets (cf. convert_dashes)
i = _dashes_next_line(document, i) i = _dashes_next_line(document, i)
line = document.body[i] line = document.body[i]
while "--" in line: if "--" in line:
line = line.replace("--", "-\\SpecialChar \\textcompwordmark{}\n-") line = line.replace("--", "-\\SpecialChar \\textcompwordmark{}\n-")
parts = line.split('\n') document.body[i:i+1] = line.split('\n')
if len(parts) > 1:
document.body[i:i+1] = parts
i += len(parts)-1
# Convert \twohyphens and \threehyphens: # Convert \twohyphens and \threehyphens:
i = 0 i = 0
while i < len(document.body): while i < len(document.body):

View File

@ -24,9 +24,10 @@ import sys, os
# Uncomment only what you need to import, please. # Uncomment only what you need to import, please.
from parser_tools import find_end_of, find_token_backwards, find_end_of_layout, \ from parser_tools import del_token, find_end_of, find_end_of_layout, \
find_token, find_end_of_inset, get_value, get_bool_value, \ find_end_of_inset, find_re, find_slice, find_token, \
get_containing_layout, get_quoted_value, del_token, find_re find_token_backwards, get_containing_layout, \
get_bool_value, get_value, get_quoted_value
# find_tokens, find_token_exact, is_in_inset, \ # find_tokens, find_token_exact, is_in_inset, \
# check_token, get_option_value # check_token, get_option_value
@ -1843,17 +1844,18 @@ def revert_chapterbib(document):
def convert_dashligatures(document): def convert_dashligatures(document):
"Set 'use_dash_ligatures' according to content." "Set 'use_dash_ligatures' according to content."
use_dash_ligatures = None use_dash_ligatures = None
# eventually remove preamble code from 2.3->2.2 conversion: # Eventually remove preamble code from 2.3->2.2 conversion:
for i, line in enumerate(document.preamble): dash_renew_lines = find_slice(document.preamble,
if i > 1 and line == r'\renewcommand{\textemdash}{---}': ['% Added by lyx2lyx',
if (document.preamble[i-1] == r'\renewcommand{\textendash}{--}' r'\renewcommand{\textendash}{--}',
and document.preamble[i-2] == '% Added by lyx2lyx'): r'\renewcommand{\textemdash}{---}'])
del document.preamble[i-2:i+1] del(document.preamble[dash_renew_lines])
use_dash_ligatures = True use_dash_ligatures = bool(dash_renew_lines.stop)
if use_dash_ligatures is None: if use_dash_ligatures is None:
# Look for dashes: # Look for dashes:
# (Documents by LyX 2.1 or older have "\twohyphens\n" or "\threehyphens\n" # (Documents by LyX 2.1 or older have "\twohyphens\n" or "\threehyphens\n"
# as interim representation for dash ligatures in 2.2.) # as interim representation for dash ligatures)
has_literal_dashes = False has_literal_dashes = False
has_ligature_dashes = False has_ligature_dashes = False
j = 0 j = 0
@ -1882,9 +1884,8 @@ def convert_dashligatures(document):
flags=re.UNICODE): flags=re.UNICODE):
has_literal_dashes = True has_literal_dashes = True
# ligature dash followed by word or no-break space on next line: # ligature dash followed by word or no-break space on next line:
if re.search(u"(\\\\twohyphens|\\\\threehyphens)", line, if (re.search(r"(\\twohyphens|\\threehyphens)", line) and
flags=re.UNICODE) and re.match(u"[\w\u00A0]", re.match(u"[\w\u00A0]", document.body[i+1], flags=re.UNICODE)):
document.body[i+1], flags=re.UNICODE):
has_ligature_dashes = True has_ligature_dashes = True
if has_literal_dashes and has_ligature_dashes: if has_literal_dashes and has_ligature_dashes:
# TODO: insert a warning note in the document? # TODO: insert a warning note in the document?
@ -1920,11 +1921,10 @@ def revert_dashligatures(document):
if (i < j) or line.startswith("\\labelwidthstring"): if (i < j) or line.startswith("\\labelwidthstring"):
new_body.append(line) new_body.append(line)
continue continue
words = line.split() if (line.startswith("\\begin_inset ") and
if (len(words) > 1 and words[0] == "\\begin_inset" line[13:].split()[0] in ["CommandInset", "ERT", "External",
and (words[1] in ["CommandInset", "ERT", "External", "Formula", "Formula", "FormulaMacro", "Graphics", "IPA", "listings"]
"FormulaMacro", "Graphics", "IPA", "listings"] or line == "\\begin_inset Flex Code"):
or ' '.join(words[1:]) == "Flex Code")):
j = find_end_of_inset(document.body, i) j = find_end_of_inset(document.body, i)
if j == -1: if j == -1:
document.warning("Malformed LyX document: Can't find end of " document.warning("Malformed LyX document: Can't find end of "

View File

@ -18,7 +18,7 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
''' """
This module offers several free functions to help parse lines. This module offers several free functions to help parse lines.
More documentaton is below, but here is a quick guide to what More documentaton is below, but here is a quick guide to what
they do. Optional arguments are marked by brackets. they do. Optional arguments are marked by brackets.
@ -152,18 +152,68 @@ is_nonempty_line(line):
count_pars_in_inset(lines, i): count_pars_in_inset(lines, i):
Counts the paragraphs inside an inset. Counts the paragraphs inside an inset.
''' """
import re import re
# Fast search in lists
def find_slice(l, sl, start = 0, stop = None):
"""Return position of first occurence of sequence `sl` in list `l`
as a `slice` object.
>>> find_slice([1, 2, 3, 1, 1, 2], (1, 2))
slice(0, 2, None)
The return value can be used to delete or substitute the sub-list:
>>> l = [1, 0, 1, 1, 1, 2]
>>> s = find_slice(l, [0, 1, 1])
>>> del(l[s]); l
[1, 1, 2]
>>> s = find_slice(l, (1, 2))
>>> l[s] = [3]; l
[1, 3]
The start argument works similar to list.index()
>>> find_slice([1, 2, 3, 1, 1 ,2], (1, 2), start = 1)
slice(4, 6, None)
Use the `stop` attribute of the returned `slice` to test for success:
>>> s1 = find_slice([2, 3, 1], (3, 1))
>>> s2 = find_slice([2, 3, 1], (2, 1))
>>> if s1.stop and not s2.stop:
... print "wow"
wow
"""
stop = stop or len(l)
N = len(sl) # lenght of sub-list
try:
while True:
for j, value in enumerate(sl):
i = l.index(value, start, stop)
if j and i != start:
start = i-j
break
start = i +1
else:
return slice(i+1-N, i+1)
except ValueError: # sub list `sl` not found
return slice(0, 0)
# Utilities for one line # Utilities for one line
def check_token(line, token): def check_token(line, token):
""" check_token(line, token) -> bool """ check_token(line, token) -> bool
Return True if token is present in line and is the first element Return True if token is present in line and is the first element
else returns False.""" else returns False.
return line[:len(token)] == token Deprecated. Use line.startswith(token).
"""
return line.startswith(token)
def is_nonempty_line(line): def is_nonempty_line(line):
@ -171,7 +221,7 @@ def is_nonempty_line(line):
Return False if line is either empty or it has only whitespaces, Return False if line is either empty or it has only whitespaces,
else return True.""" else return True."""
return line != " "*len(line) return bool(line.strip())
# Utilities for a list of lines # Utilities for a list of lines