New lyx2lyx tools.

New lyx2lyx parser tools find_complete_lines() (replaces find_slice),
del_complete_lines(), and find_across_lines(). Default value 0
for start argument in utility functions.

Rework the implementation of dash-conversion.
This commit is contained in:
Günter Milde 2018-01-23 08:45:19 +01:00
parent 40e32d4d27
commit 8da6cdcf23
4 changed files with 345 additions and 214 deletions

View File

@ -34,9 +34,10 @@ from lyx2lyx_tools import (add_to_preamble, put_cmd_in_ert, get_ert,
# insert_to_preamble, latex_length, revert_flex_inset, # insert_to_preamble, latex_length, revert_flex_inset,
# revert_font_attrs, hex2ratio, str2bool # revert_font_attrs, hex2ratio, str2bool
from parser_tools import (find_end_of_inset, find_end_of_layout, from parser_tools import (del_complete_lines,
find_nonempty_line, find_re, find_slice, find_token, find_token_backwards, find_end_of_inset, find_end_of_layout, find_nonempty_line, find_re,
get_containing_layout, get_value, check_token) find_token, find_token_backwards, get_containing_layout,
get_value, check_token)
#################################################################### ####################################################################
# Private helper functions # Private helper functions
@ -615,130 +616,105 @@ def convert_dashes(document):
if document.backend != "latex": if document.backend != "latex":
return return
lines = document.body
i = 0 i = 0
while i < len(document.body): while i+1 < len(lines):
words = document.body[i].split() i += 1
line = lines[i]
words = line.split()
if (len(words) > 1 and words[0] == "\\begin_inset" if (len(words) > 1 and words[0] == "\\begin_inset"
and (words[1] in ["CommandInset", "ERT", "External", "Formula", and (words[1] in ["CommandInset", "ERT", "External", "Formula",
"FormulaMacro", "Graphics", "IPA", "listings"] "FormulaMacro", "Graphics", "IPA", "listings"]
or ' '.join(words[1:]) == "Flex Code")): or line.endswith("Flex Code"))):
# must not replace anything in insets that store LaTeX contents in .lyx files # must not replace anything in insets that store LaTeX contents in .lyx files
# (math and command insets without overridden read() and write() methods # (math and command insets without overridden read() and write() methods
# filtering out IPA makes Text::readParToken() more simple # filtering out IPA makes Text::readParToken() more simple
# skip ERT as well since it is not needed there # skip ERT as well since it is not needed there
# Flex Code is logical markup, typically rendered as typewriter # Flex Code is logical markup, typically rendered as typewriter
j = find_end_of_inset(document.body, i) j = find_end_of_inset(lines, i)
if j == -1: if j == -1:
document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i)) document.warning("Malformed LyX document: Can't find end of " +
i += 1 words[1] + " inset at line " + str(i))
else: else:
i = j i = j
continue continue
if document.body[i] == "\\begin_layout LyX-Code": if lines[i] == "\\begin_layout LyX-Code":
j = find_end_of_layout(document.body, i) j = find_end_of_layout(lines, i)
if j == -1: if j == -1:
document.warning("Malformed LyX document: " document.warning("Malformed LyX document: "
"Can't find end of %s layout at line %d" % (words[1],i)) "Can't find end of %s layout at line %d" % (words[1],i))
i += 1
else: else:
i = j i = j
continue continue
if line.startswith("\\labelwidthstring"):
if len(words) > 0 and words[0] in ["\\leftindent", "\\paragraph_spacing", "\\align", "\\labelwidthstring"]: # skip label width string (bug 10243)
# skip paragraph parameters (bug 10243)
i += 1
continue continue
while True:
j = document.body[i].find("--") if "--" in line:
if j == -1:
break
front = document.body[i][:j]
back = document.body[i][j+2:]
# We can have an arbitrary number of consecutive hyphens. # We can have an arbitrary number of consecutive hyphens.
# These must be split into the corresponding number of two and three hyphens # Replace as LaTeX does: First try emdash, then endash
# We must match what LaTeX does: First try emdash, then endash, then single hyphen line = line.replace("---", "\\threehyphens\n")
if back.find("-") == 0: line = line.replace("--", "\\twohyphens\n")
back = back[1:] lines[i:i+1] = line.splitlines()
if len(back) > 0:
document.body.insert(i+1, back)
document.body[i] = front + "\\threehyphens"
else:
if len(back) > 0:
document.body.insert(i+1, back)
document.body[i] = front + "\\twohyphens"
i += 1
i = 0 # remove ligature breaks between dashes
while i < len(document.body): i = 1
line = document.body[i] while i < len(lines):
while (line.endswith(r"-\SpecialChar \textcompwordmark{}") and line = lines[i]
document.body[i+1].startswith("-")): if (line.endswith(r"-\SpecialChar \textcompwordmark{}") and
line = line.replace(r"\SpecialChar \textcompwordmark{}", lines[i+1].startswith("-")):
document.body.pop(i+1)) lines[i] = line.replace(r"\SpecialChar \textcompwordmark{}",
document.body[i] = line lines.pop(i+1))
i += 1 else:
i += 1
# Return number of the next line to check for dashes.
def _dashes_next_line(document, i):
i +=1
words = document.body[i].split()
# skip paragraph parameters (bug 10243):
if words and words[0] in ["\\leftindent", "\\paragraph_spacing",
"\\align", "\\labelwidthstring"]:
i += 1
words = document.body[i].split()
# some insets should be skipped in revert_dashes (cf. convert_dashes)
if (len(words) > 1 and words[0] == "\\begin_inset" and
words[1] in ["CommandInset", "ERT", "External", "Formula",
"FormulaMacro", "Graphics", "IPA", "listings"]):
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Malformed LyX document: Can't find end of "
+ words[1] + " inset at line " + str(i))
return i
return j+1
return i
def revert_dashes(document): def revert_dashes(document):
""" """
Prevent ligatures of existing --- and --. Prevent ligatures of existing --- and --.
Convert \\twohyphens and \\threehyphens to -- and ---. Revert \\twohyphens and \\threehyphens to -- and ---.
Remove preamble code from 2.3->2.2 conversion. Remove preamble code from 2.3->2.2 conversion.
""" """
# Remove preamble code from 2.3->2.2 conversion: del_complete_lines(document.preamble,
dash_renew_lines = find_slice(document.preamble, ['% Added by lyx2lyx',
['% Added by lyx2lyx', r'\renewcommand{\textendash}{--}',
r'\renewcommand{\textendash}{--}', r'\renewcommand{\textemdash}{---}'])
r'\renewcommand{\textemdash}{---}']) # Insert ligature breaks to prevent ligation of hyphens to dashes:
del(document.preamble[dash_renew_lines]) lines = document.body
# Prevent ligation of hyphens:
i = 0 i = 0
while i < len(document.body)-1: while i+1 < len(lines):
# increment i, skip some insets (cf. convert_dashes) i += 1
i = _dashes_next_line(document, i) line = lines[i]
line = document.body[i] # skip label width string (bug 10243):
if line.startswith("\\labelwidthstring"):
continue
# do not touch hyphens in some insets (cf. convert_dashes):
if line.startswith("\\begin_inset"):
try:
if line.split()[1] in ["CommandInset", "ERT", "External",
"Formula", "FormulaMacro", "Graphics",
"IPA", "listings"]:
j = find_end_of_inset(lines, i)
if j == -1:
document.warning("Malformed LyX document: Can't find "
"end of %s inset at line %d." % (itype, i))
continue
i = j
except IndexError:
continue
if "--" in line: if "--" in line:
line = line.replace("--", "-\\SpecialChar \\textcompwordmark{}\n-") line = line.replace("--", "-\\SpecialChar \\textcompwordmark{}\n-")
document.body[i:i+1] = line.split('\n') document.body[i:i+1] = line.split('\n')
# Convert \twohyphens and \threehyphens: # Revert \twohyphens and \threehyphens:
i = 0 i = 1
while i < len(document.body): while i < len(lines):
# skip some insets (see convert_dashes()) line = lines[i]
i = _dashes_next_line(document, i-1) if not line.endswith("hyphens"):
replaced = False i +=1
if document.body[i].find("\\twohyphens") >= 0: elif line.endswith("\\twohyphens") or line.endswith("\\threehyphens"):
document.body[i] = document.body[i].replace("\\twohyphens", "--") line = line.replace("\\twohyphens", "--")
replaced = True line = line.replace("\\threehyphens", "---")
if document.body[i].find("\\threehyphens") >= 0: lines[i] = line + lines.pop(i+1)
document.body[i] = document.body[i].replace("\\threehyphens", "---")
replaced = True
if replaced and i+1 < len(document.body) and \
(document.body[i+1].find("\\") != 0 or \
document.body[i+1].find("\\twohyphens") == 0 or
document.body[i+1].find("\\threehyphens") == 0) and \
len(document.body[i]) + len(document.body[i+1]) <= 80:
document.body[i] = document.body[i] + document.body[i+1]
document.body[i+1:i+2] = []
else: else:
i += 1 i += 1
@ -879,16 +855,16 @@ def revert_georgian(document):
document.language = "english" document.language = "english"
i = find_token(document.header, "\\language georgian", 0) i = find_token(document.header, "\\language georgian", 0)
if i != -1: if i != -1:
document.header[i] = "\\language english" document.header[i] = "\\language english"
j = find_token(document.header, "\\language_package default", 0) j = find_token(document.header, "\\language_package default", 0)
if j != -1: if j != -1:
document.header[j] = "\\language_package babel" document.header[j] = "\\language_package babel"
k = find_token(document.header, "\\options", 0) k = find_token(document.header, "\\options", 0)
if k != -1: if k != -1:
document.header[k] = document.header[k].replace("\\options", "\\options georgian,") document.header[k] = document.header[k].replace("\\options", "\\options georgian,")
else: else:
l = find_token(document.header, "\\use_default_options", 0) l = find_token(document.header, "\\use_default_options", 0)
document.header.insert(l + 1, "\\options georgian") document.header.insert(l + 1, "\\options georgian")
def revert_sigplan_doi(document): def revert_sigplan_doi(document):

View File

@ -24,9 +24,9 @@ import sys, os
# Uncomment only what you need to import, please. # Uncomment only what you need to import, please.
from parser_tools import del_token, find_end_of, find_end_of_layout, \ from parser_tools import del_token, del_value, del_complete_lines, \
find_end_of_inset, find_re, find_slice, find_token, \ find_end_of, find_end_of_layout, find_end_of_inset, find_re, \
find_token_backwards, get_containing_layout, \ find_token, find_token_backwards, get_containing_layout, \
get_bool_value, get_value, get_quoted_value get_bool_value, get_value, get_quoted_value
# find_tokens, find_token_exact, is_in_inset, \ # find_tokens, find_token_exact, is_in_inset, \
# check_token, get_option_value # check_token, get_option_value
@ -1303,7 +1303,7 @@ def revert_biblatex(document):
"Citealt*", "Citealp*", "Citeauthor*", "fullcite", "footcite",\ "Citealt*", "Citealp*", "Citeauthor*", "fullcite", "footcite",\
"footcitet", "footcitep", "footcitealt", "footcitealp",\ "footcitet", "footcitep", "footcitealt", "footcitealp",\
"footciteauthor", "footciteyear", "footciteyearpar",\ "footciteauthor", "footciteyear", "footciteyearpar",\
"citefield", "citetitle", "cite*" ] "citefield", "citetitle", "cite*" ]
i = 0 i = 0
while (True): while (True):
@ -1843,19 +1843,16 @@ def revert_chapterbib(document):
def convert_dashligatures(document): def convert_dashligatures(document):
"Set 'use_dash_ligatures' according to content." "Set 'use_dash_ligatures' according to content."
use_dash_ligatures = None # Look for and remove dashligatures workaround from 2.3->2.2 reversion,
# Eventually remove preamble code from 2.3->2.2 conversion: # set use_dash_ligatures to True if found, to None else.
dash_renew_lines = find_slice(document.preamble, use_dash_ligatures = del_complete_lines(document.preamble,
['% Added by lyx2lyx', ['% Added by lyx2lyx',
r'\renewcommand{\textendash}{--}', r'\renewcommand{\textendash}{--}',
r'\renewcommand{\textemdash}{---}']) r'\renewcommand{\textemdash}{---}']) or None
del(document.preamble[dash_renew_lines])
use_dash_ligatures = bool(dash_renew_lines.stop)
if use_dash_ligatures is None: if use_dash_ligatures is None:
# Look for dashes: # Look for dashes (Documents by LyX 2.1 or older have "\twohyphens\n"
# (Documents by LyX 2.1 or older have "\twohyphens\n" or "\threehyphens\n" # or "\threehyphens\n" as interim representation for -- an ---.)
# as interim representation for dash ligatures)
has_literal_dashes = False has_literal_dashes = False
has_ligature_dashes = False has_ligature_dashes = False
j = 0 j = 0
@ -1863,16 +1860,19 @@ def convert_dashligatures(document):
# Skip some document parts where dashes are not converted # Skip some document parts where dashes are not converted
if (i < j) or line.startswith("\\labelwidthstring"): if (i < j) or line.startswith("\\labelwidthstring"):
continue continue
words = line.split() if line.startswith("\\begin_inset"):
if (len(words) > 1 and words[0] == "\\begin_inset" try:
and (words[1] in ["CommandInset", "ERT", "External", "Formula", it = line.split()[1]
"FormulaMacro", "Graphics", "IPA", "listings"] except IndexError:
or ' '.join(words[1:]) == "Flex Code")): continue
j = find_end_of_inset(document.body, i) if (it in ["CommandInset", "ERT", "External", "Formula",
if j == -1: "FormulaMacro", "Graphics", "IPA", "listings"]
document.warning("Malformed LyX document: " or line.endswith("Flex Code")):
"Can't find end of %s inset at line %d" % (words[1],i)) j = find_end_of_inset(document.body, i)
continue if j == -1:
document.warning("Malformed LyX document: Can't "
"find end of %s inset at line %d." % (itype, i))
continue
if line == "\\begin_layout LyX-Code": if line == "\\begin_layout LyX-Code":
j = find_end_of_layout(document.body, i) j = find_end_of_layout(document.body, i)
if j == -1: if j == -1:
@ -1898,22 +1898,16 @@ def convert_dashligatures(document):
use_dash_ligatures = True use_dash_ligatures = True
# insert the setting if there is a preferred value # insert the setting if there is a preferred value
if use_dash_ligatures is not None: if use_dash_ligatures is not None:
i = find_token(document.header, "\\use_microtype", 0) i = find_token(document.header, "\\graphics")
if i != -1: document.header.insert(i, "\\use_dash_ligatures %s"
document.header.insert(i+1, "\\use_dash_ligatures %s" % str(use_dash_ligatures).lower())
% str(use_dash_ligatures).lower())
def revert_dashligatures(document): def revert_dashligatures(document):
"""Remove font ligature settings for en- and em-dashes. """Remove font ligature settings for en- and em-dashes.
Revert conversion of \twodashes or \threedashes to literal dashes.""" Revert conversion of \twodashes or \threedashes to literal dashes."""
i = find_token(document.header, "\\use_dash_ligatures", 0) use_dash_ligatures = del_value(document.header, "\\use_dash_ligatures")
if i == -1: if use_dash_ligatures != "true" or document.backend != "latex":
return return
use_dash_ligatures = get_bool_value(document.header, "\\use_dash_ligatures", i)
del document.header[i]
if not use_dash_ligatures or document.backend != "latex":
return
j = 0 j = 0
new_body = [] new_body = []
for i, line in enumerate(document.body): for i, line in enumerate(document.body):
@ -2018,8 +2012,8 @@ def revert_mathindent(document):
else: else:
k = find_token(document.header, "\\options", 0) k = find_token(document.header, "\\options", 0)
if k != -1: if k != -1:
document.header[k] = document.header[k].replace("\\options", "\\options fleqn,") document.header[k] = document.header[k].replace("\\options", "\\options fleqn,")
del document.header[i] del document.header[i]
else: else:
l = find_token(document.header, "\\use_default_options", 0) l = find_token(document.header, "\\use_default_options", 0)
document.header.insert(l, "\\options fleqn") document.header.insert(l, "\\options fleqn")

View File

@ -156,53 +156,6 @@ count_pars_in_inset(lines, i):
import re import re
# Fast search in lists
def find_slice(l, sl, start = 0, stop = None):
"""Return position of first occurence of sequence `sl` in list `l`
as a `slice` object.
>>> find_slice([1, 2, 3, 1, 1, 2], (1, 2))
slice(0, 2, None)
The return value can be used to delete or substitute the sub-list:
>>> l = [1, 0, 1, 1, 1, 2]
>>> s = find_slice(l, [0, 1, 1])
>>> del(l[s]); l
[1, 1, 2]
>>> s = find_slice(l, (1, 2))
>>> l[s] = [3]; l
[1, 3]
The start argument works similar to list.index()
>>> find_slice([1, 2, 3, 1, 1 ,2], (1, 2), start = 1)
slice(4, 6, None)
Use the `stop` attribute of the returned `slice` to test for success:
>>> s1 = find_slice([2, 3, 1], (3, 1))
>>> s2 = find_slice([2, 3, 1], (2, 1))
>>> if s1.stop and not s2.stop:
... print "wow"
wow
"""
stop = stop or len(l)
N = len(sl) # lenght of sub-list
try:
while True:
for j, value in enumerate(sl):
i = l.index(value, start, stop)
if j and i != start:
start = i-j
break
start = i +1
else:
return slice(i+1-N, i+1)
except ValueError: # sub list `sl` not found
return slice(0, 0)
# Utilities for one line # Utilities for one line
def check_token(line, token): def check_token(line, token):
""" check_token(line, token) -> bool """ check_token(line, token) -> bool
@ -212,7 +165,6 @@ def check_token(line, token):
Deprecated. Use line.startswith(token). Deprecated. Use line.startswith(token).
""" """
return line.startswith(token) return line.startswith(token)
@ -225,40 +177,40 @@ def is_nonempty_line(line):
# Utilities for a list of lines # Utilities for a list of lines
def find_token(lines, token, start, end = 0, ignorews = False): def find_token(lines, token, start=0, end=0, ignorews=False):
""" find_token(lines, token, start[[, end], ignorews]) -> int """ find_token(lines, token, start[[, end], ignorews]) -> int
Return the lowest line where token is found, and is the first Return the lowest line where token is found, and is the first
element, in lines[start, end]. element, in lines[start, end].
If ignorews is True (default is False), then differences in If ignorews is True (default is False), then differences in
whitespace are ignored, except that there must be no extra whitespace are ignored, but there must be whitespace following
whitespace following token itself. token itself.
Return -1 on failure.""" Return -1 on failure."""
if end == 0 or end > len(lines): if end == 0 or end > len(lines):
end = len(lines) end = len(lines)
m = len(token) if ignorews:
y = token.split()
for i in range(start, end): for i in range(start, end):
if ignorews: if ignorews:
x = lines[i].split() x = lines[i].split()
y = token.split()
if len(x) < len(y): if len(x) < len(y):
continue continue
if x[:len(y)] == y: if x[:len(y)] == y:
return i return i
else: else:
if lines[i][:m] == token: if lines[i].startswith(token):
return i return i
return -1 return -1
def find_token_exact(lines, token, start, end = 0): def find_token_exact(lines, token, start=0, end=0):
return find_token(lines, token, start, end, True) return find_token(lines, token, start, end, True)
def find_tokens(lines, tokens, start, end = 0, ignorews = False): def find_tokens(lines, tokens, start=0, end=0, ignorews=False):
""" find_tokens(lines, tokens, start[[, end], ignorews]) -> int """ find_tokens(lines, tokens, start[[, end], ignorews]) -> int
Return the lowest line where one token in tokens is found, and is Return the lowest line where one token in tokens is found, and is
@ -278,17 +230,17 @@ def find_tokens(lines, tokens, start, end = 0, ignorews = False):
if x[:len(y)] == y: if x[:len(y)] == y:
return i return i
else: else:
if lines[i][:len(token)] == token: if lines[i].startswith(token):
return i return i
return -1 return -1
def find_tokens_exact(lines, tokens, start, end = 0): def find_tokens_exact(lines, tokens, start=0, end=0):
return find_tokens(lines, tokens, start, end, True) return find_tokens(lines, tokens, start, end, True)
def find_re(lines, rexp, start, end = 0): def find_re(lines, rexp, start=0, end=0):
""" find_token_re(lines, rexp, start[, end]) -> int """ find_re(lines, rexp, start[, end]) -> int
Return the lowest line where rexp, a regular expression, is found Return the lowest line where rexp, a regular expression, is found
in lines[start, end]. in lines[start, end].
@ -310,10 +262,8 @@ def find_token_backwards(lines, token, start):
element, in lines[start, end]. element, in lines[start, end].
Return -1 on failure.""" Return -1 on failure."""
m = len(token)
for i in range(start, -1, -1): for i in range(start, -1, -1):
line = lines[i] if lines[i].startswith(token):
if line[:m] == token:
return i return i
return -1 return -1
@ -328,12 +278,86 @@ def find_tokens_backwards(lines, tokens, start):
for i in range(start, -1, -1): for i in range(start, -1, -1):
line = lines[i] line = lines[i]
for token in tokens: for token in tokens:
if line[:len(token)] == token: if line.startswith(token):
return i return i
return -1 return -1
def get_value(lines, token, start, end = 0, default = ""): def find_complete_lines(lines, sublines, start=0, end=0):
"""Find first occurence of sequence `sublines` in list `lines`.
Return index of first line or -1 on failure.
Efficient search for a sub-list in a large list. Works for any values.
>>> find_complete_lines([1, 2, 3, 1, 1, 2], [1, 2])
0
The `start` and `end` arguments work similar to list.index()
>>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1)
4
>>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1, end=4)
-1
The return value can be used to substitute the sub-list.
Take care to check before use:
>>> l = [1, 1, 2]
>>> s = find_complete_lines(l, [1, 2])
>>> if s != -1:
... l[s:s+2] = [3]; l
[1, 3]
See also del_complete_lines().
"""
if not sublines:
return start
end = end or len(lines)
N = len(sublines)
try:
while True:
for j, value in enumerate(sublines):
i = lines.index(value, start, end)
if j and i != start:
start = i-j
break
start = i + 1
else:
return i +1 - N
except ValueError: # `sublines` not found
return -1
def find_across_lines(lines, sub, start=0, end=0):
sublines = sub.splitlines()
if len(sublines) > 2:
# at least 3 lines: the middle one(s) are complete -> use index search
i = find_complete_lines(lines, sublines[1:-1], start+1, end-1)
if i < start+1:
return -1
try:
if (lines[i-1].endswith(sublines[0]) and
lines[i+len(sublines)].startswith(sublines[-1])):
return i-1
except IndexError:
pass
elif len(sublines) > 1:
# last subline must start a line
i = find_token(lines, sublines[-1], start, end)
if i < start + 1:
return -1
if lines[i-1].endswith(sublines[0]):
return i-1
else: # no line-break, may be in the middle of a line
if end == 0 or end > len(lines):
end = len(lines)
for i in range(start, end):
if sub in lines[i]:
return i
return -1
def get_value(lines, token, start=0, end=0, default=""):
""" get_value(lines, token, start[[, end], default]) -> string """ get_value(lines, token, start[[, end], default]) -> string
Find the next line that looks like: Find the next line that looks like:
@ -341,17 +365,19 @@ def get_value(lines, token, start, end = 0, default = ""):
Returns "followed by other stuff" with leading and trailing Returns "followed by other stuff" with leading and trailing
whitespace removed. whitespace removed.
""" """
i = find_token_exact(lines, token, start, end) i = find_token_exact(lines, token, start, end)
if i == -1: if i == -1:
return default return default
# TODO: establish desired behaviour, eventually change to
# return lines.pop(i)[len(token):].strip() # or default
# see test_parser_tools.py
l = lines[i].split(None, 1) l = lines[i].split(None, 1)
if len(l) > 1: if len(l) > 1:
return l[1].strip() return l[1].strip()
return default return default
def get_quoted_value(lines, token, start, end = 0, default = ""): def get_quoted_value(lines, token, start=0, end=0, default=""):
""" get_quoted_value(lines, token, start[[, end], default]) -> string """ get_quoted_value(lines, token, start[[, end], default]) -> string
Find the next line that looks like: Find the next line that looks like:
@ -368,8 +394,8 @@ def get_quoted_value(lines, token, start, end = 0, default = ""):
return val.strip('"') return val.strip('"')
def get_bool_value(lines, token, start, end = 0, default = None): def get_bool_value(lines, token, start=0, end=0, default=None):
""" get_value(lines, token, start[[, end], default]) -> string """ get_bool_value(lines, token, start[[, end], default]) -> string
Find the next line that looks like: Find the next line that looks like:
token bool_value token bool_value
@ -405,7 +431,7 @@ def set_option_value(line, option, value):
return re.sub(rx, '\g<1>' + value + '"', line) return re.sub(rx, '\g<1>' + value + '"', line)
def del_token(lines, token, start, end = 0): def del_token(lines, token, start=0, end=0):
""" del_token(lines, token, start, end) -> int """ del_token(lines, token, start, end) -> int
Find the first line in lines where token is the first element Find the first line in lines where token is the first element
@ -418,6 +444,41 @@ def del_token(lines, token, start, end = 0):
del lines[k] del lines[k]
return True return True
def del_complete_lines(lines, sublines, start=0, end=0):
"""Delete first occurence of `sublines` in list `lines`.
Efficient deletion of a sub-list in a list. Works for any values.
The `start` and `end` arguments work similar to list.index()
Returns True if a deletion was done and False if not.
>>> l = [1, 0, 1, 1, 1, 2]
>>> del_complete_lines(l, [0, 1, 1])
True
>>> l
[1, 1, 2]
"""
i = find_complete_lines(lines, sublines, start, end)
if i == -1:
return False
del(lines[i:i+len(sublines)])
return True
def del_value(lines, token, start=0, end=0, default=None):
"""
Find the next line that looks like:
token followed by other stuff
Delete that line and return "followed by other stuff"
with leading and trailing whitespace removed.
If token is not found, return `default`.
"""
i = find_token_exact(lines, token, start, end)
if i == -1:
return default
return lines.pop(i)[len(token):].strip()
def find_beginning_of(lines, i, start_token, end_token): def find_beginning_of(lines, i, start_token, end_token):
count = 1 count = 1
@ -425,7 +486,7 @@ def find_beginning_of(lines, i, start_token, end_token):
i = find_tokens_backwards(lines, [start_token, end_token], i-1) i = find_tokens_backwards(lines, [start_token, end_token], i-1)
if i == -1: if i == -1:
return -1 return -1
if check_token(lines[i], end_token): if lines[i].startswith(end_token):
count = count+1 count = count+1
else: else:
count = count-1 count = count-1
@ -441,7 +502,7 @@ def find_end_of(lines, i, start_token, end_token):
i = find_tokens(lines, [end_token, start_token], i+1) i = find_tokens(lines, [end_token, start_token], i+1)
if i == -1: if i == -1:
return -1 return -1
if check_token(lines[i], start_token): if lines[i].startswith(start_token):
count = count+1 count = count+1
else: else:
count = count-1 count = count-1
@ -450,11 +511,11 @@ def find_end_of(lines, i, start_token, end_token):
return -1 return -1
def find_nonempty_line(lines, start, end = 0): def find_nonempty_line(lines, start=0, end=0):
if end == 0: if end == 0:
end = len(lines) end = len(lines)
for i in range(start, end): for i in range(start, end):
if is_nonempty_line(lines[i]): if lines[i].strip():
return i return i
return -1 return -1

View File

@ -77,10 +77,21 @@ class TestParserTools(unittest.TestCase):
def test_find_token(self): def test_find_token(self):
self.assertEqual(find_token(lines, '\\emph', 0), 7) self.assertEqual(find_token(lines, '\\emph', 0), 7)
self.assertEqual(find_token(lines, '\\emph', 0, 5), -1) # no line starts with "emph" (without backspace):
self.assertEqual(find_token(lines, '\\emp', 0, 0, True), -1)
self.assertEqual(find_token(lines, '\\emp', 0, 0, False), 7)
self.assertEqual(find_token(lines, 'emph', 0), -1) self.assertEqual(find_token(lines, 'emph', 0), -1)
# token on line[start] is found:
self.assertEqual(find_token(lines, '\\emph', 7), 7)
self.assertEqual(find_token(lines, '\\emph', 8), 9)
# token on line[end] is not found:
self.assertEqual(find_token(lines, '\\emph', 0, 7), -1)
# `ignorews` looks for whitespace-separated tokens:
self.assertEqual(find_token(lines, '\\emp', 0, ignorews=True), -1)
self.assertEqual(find_token(lines, '\\emph',0, ignorews=True), 7)
self.assertEqual(find_token(lines, '\\emph', 7, ignorews=True), 7)
self.assertEqual(find_token(lines, '\\emph', 0, 7, True), -1)
# only first token is found:
self.assertEqual(find_token(lines, 'Quotes', 0), -1)
self.assertEqual(find_token(lines, 'Quotes', 0, ignorews=True), -1)
def test_find_tokens(self): def test_find_tokens(self):
@ -89,5 +100,94 @@ class TestParserTools(unittest.TestCase):
self.assertEqual(find_tokens(lines, tokens, 0, 4), -1) self.assertEqual(find_tokens(lines, tokens, 0, 4), -1)
def test_find_complete_lines(self):
sublines = ["\\begin_inset Quotes eld",
"\\end_inset"]
# return index of first line of sublines:
self.assertEqual(find_complete_lines(lines, sublines), 3)
self.assertEqual(find_complete_lines(lines, ["\\end_inset"]), 4)
# return -1 if sublines is not found:
self.assertEqual(find_complete_lines(lines, ['x']), -1)
# search includes line `start`:
self.assertEqual(find_complete_lines(lines, sublines, 3), 3)
self.assertEqual(find_complete_lines(lines, sublines, 4), 20)
self.assertEqual(find_complete_lines(lines, sublines, 21), -1)
# serch excludes line `end`
self.assertEqual(find_complete_lines(lines, sublines, 4, 20), -1)
# an empty list is always found
self.assertEqual(find_complete_lines(lines, []), 0)
def test_find_across_lines(self):
# sub with at least 2 line-breaks (uses find_complete_lines):
sub = "Quotes eld\n\\end_inset\n\n\n"
self.assertEqual(find_across_lines(lines, sub), 3)
# Return -1 if not found
self.assertEqual(find_across_lines(lines, sub, 4), -1)
self.assertEqual(find_across_lines(lines, sub, 0, 6), -1)
sub = "Quotes eld\n\\end_inset\nx\n"
self.assertEqual(find_across_lines(lines, sub), -1)
sub = "Quotes X\n\\end_inset\n\n"
self.assertEqual(find_across_lines(lines, sub), -1)
sub = "Quotes eld\n\\end_insert\n\n"
self.assertEqual(find_across_lines(lines, sub), -1)
# sub with up to 1 line-break:
sub = "Quotes eld\n\\end_inset"
self.assertEqual(find_across_lines(lines, sub), 3)
self.assertEqual(find_across_lines(lines, sub, 4), -1)
self.assertEqual(find_across_lines(lines, sub, 0, 4), -1)
self.assertEqual(find_across_lines(lines, sub, 4, 3), -1)
sub = "Quotes X eld\n\\end_inset\n"
self.assertEqual(find_across_lines(lines, sub), -1)
sub = "Quotes eld\n\\end_insert\n"
self.assertEqual(find_across_lines(lines, sub), -1)
# sub without line-break
sub = "end_"
self.assertEqual(find_across_lines(lines, sub), 4)
self.assertEqual(find_across_lines(lines, sub, 5), 12)
self.assertEqual(find_across_lines(lines, sub, 0, 4), -1)
self.assertEqual(find_across_lines(lines, sub, 2, 1), -1)
self.assertEqual(find_across_lines(lines, "XXX"), -1)
def test_get_value(self):
self.assertEqual(get_value(lines, "\\begin_inset"), "Quotes eld")
# TODO: do we want this:
self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "Quotes eld")
# or only the part after "token":
# self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "eld")
# return default if not found
self.assertEqual(get_value(lines, "\\begin_insert", default=42), 42)
# TODO: do we want this:
self.assertEqual(get_value(lines, "\\end_inset", default=None), None)
# or emtpy string if token is found but has no value:
# self.assertEqual(get_value(lines, "\\end_inset", default=None), "")
def test_del_complete_lines(self):
l = lines[:]
sublines = ["\\begin_inset Quotes eld",
"\\end_inset"]
# normal operation: remove the first occurence of sublines:
self.assertEqual(del_complete_lines(l, sublines), True)
self.assertEqual(l[3], "")
self.assertEqual(len(l), len(lines)-len(sublines))
# special cases:
l = lines[:]
self.assertEqual(del_complete_lines(l, sublines, 21), False)
self.assertEqual(l, lines)
# deleting empty sublist returns success but does not change the list:
self.assertEqual(del_complete_lines(l, [], 21), True)
self.assertEqual(l, lines)
def test_del_value(self):
l = lines[:]
self.assertEqual(del_value(l, "\\begin_inset"), "Quotes eld")
self.assertEqual(del_value(l, "\\begin_inset Quotes"), "erd")
# return default if not found
self.assertEqual(del_value(l, "\\begin_insert", default=42), 42)
self.assertEqual(del_value(l, "\\end_inset", default=None), "")
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()