New lyx2lyx tools.

New lyx2lyx parser tools find_complete_lines() (replaces find_slice),
del_complete_lines(), and find_across_lines(). Default value 0
for start argument in utility functions.

Rework the implementation of dash-conversion.
This commit is contained in:
Günter Milde 2018-01-23 08:45:19 +01:00
parent 40e32d4d27
commit 8da6cdcf23
4 changed files with 345 additions and 214 deletions

View File

@ -34,9 +34,10 @@ from lyx2lyx_tools import (add_to_preamble, put_cmd_in_ert, get_ert,
# insert_to_preamble, latex_length, revert_flex_inset,
# revert_font_attrs, hex2ratio, str2bool
from parser_tools import (find_end_of_inset, find_end_of_layout,
find_nonempty_line, find_re, find_slice, find_token, find_token_backwards,
get_containing_layout, get_value, check_token)
from parser_tools import (del_complete_lines,
find_end_of_inset, find_end_of_layout, find_nonempty_line, find_re,
find_token, find_token_backwards, get_containing_layout,
get_value, check_token)
####################################################################
# Private helper functions
@ -615,130 +616,105 @@ def convert_dashes(document):
if document.backend != "latex":
return
lines = document.body
i = 0
while i < len(document.body):
words = document.body[i].split()
while i+1 < len(lines):
i += 1
line = lines[i]
words = line.split()
if (len(words) > 1 and words[0] == "\\begin_inset"
and (words[1] in ["CommandInset", "ERT", "External", "Formula",
"FormulaMacro", "Graphics", "IPA", "listings"]
or ' '.join(words[1:]) == "Flex Code")):
or line.endswith("Flex Code"))):
# must not replace anything in insets that store LaTeX contents in .lyx files
# (math and command insets without overridden read() and write() methods
# filtering out IPA makes Text::readParToken() more simple
# skip ERT as well since it is not needed there
# Flex Code is logical markup, typically rendered as typewriter
j = find_end_of_inset(document.body, i)
j = find_end_of_inset(lines, i)
if j == -1:
document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
i += 1
document.warning("Malformed LyX document: Can't find end of " +
words[1] + " inset at line " + str(i))
else:
i = j
continue
if document.body[i] == "\\begin_layout LyX-Code":
j = find_end_of_layout(document.body, i)
if lines[i] == "\\begin_layout LyX-Code":
j = find_end_of_layout(lines, i)
if j == -1:
document.warning("Malformed LyX document: "
"Can't find end of %s layout at line %d" % (words[1],i))
i += 1
else:
i = j
continue
if len(words) > 0 and words[0] in ["\\leftindent", "\\paragraph_spacing", "\\align", "\\labelwidthstring"]:
# skip paragraph parameters (bug 10243)
i += 1
if line.startswith("\\labelwidthstring"):
# skip label width string (bug 10243)
continue
while True:
j = document.body[i].find("--")
if j == -1:
break
front = document.body[i][:j]
back = document.body[i][j+2:]
if "--" in line:
# We can have an arbitrary number of consecutive hyphens.
# These must be split into the corresponding number of two and three hyphens
# We must match what LaTeX does: First try emdash, then endash, then single hyphen
if back.find("-") == 0:
back = back[1:]
if len(back) > 0:
document.body.insert(i+1, back)
document.body[i] = front + "\\threehyphens"
else:
if len(back) > 0:
document.body.insert(i+1, back)
document.body[i] = front + "\\twohyphens"
i += 1
# Replace as LaTeX does: First try emdash, then endash
line = line.replace("---", "\\threehyphens\n")
line = line.replace("--", "\\twohyphens\n")
lines[i:i+1] = line.splitlines()
i = 0
while i < len(document.body):
line = document.body[i]
while (line.endswith(r"-\SpecialChar \textcompwordmark{}") and
document.body[i+1].startswith("-")):
line = line.replace(r"\SpecialChar \textcompwordmark{}",
document.body.pop(i+1))
document.body[i] = line
i += 1
# remove ligature breaks between dashes
i = 1
while i < len(lines):
line = lines[i]
if (line.endswith(r"-\SpecialChar \textcompwordmark{}") and
lines[i+1].startswith("-")):
lines[i] = line.replace(r"\SpecialChar \textcompwordmark{}",
lines.pop(i+1))
else:
i += 1
# Return number of the next line to check for dashes.
def _dashes_next_line(document, i):
i +=1
words = document.body[i].split()
# skip paragraph parameters (bug 10243):
if words and words[0] in ["\\leftindent", "\\paragraph_spacing",
"\\align", "\\labelwidthstring"]:
i += 1
words = document.body[i].split()
# some insets should be skipped in revert_dashes (cf. convert_dashes)
if (len(words) > 1 and words[0] == "\\begin_inset" and
words[1] in ["CommandInset", "ERT", "External", "Formula",
"FormulaMacro", "Graphics", "IPA", "listings"]):
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Malformed LyX document: Can't find end of "
+ words[1] + " inset at line " + str(i))
return i
return j+1
return i
def revert_dashes(document):
"""
Prevent ligatures of existing --- and --.
Convert \\twohyphens and \\threehyphens to -- and ---.
Revert \\twohyphens and \\threehyphens to -- and ---.
Remove preamble code from 2.3->2.2 conversion.
"""
# Remove preamble code from 2.3->2.2 conversion:
dash_renew_lines = find_slice(document.preamble,
['% Added by lyx2lyx',
r'\renewcommand{\textendash}{--}',
r'\renewcommand{\textemdash}{---}'])
del(document.preamble[dash_renew_lines])
# Prevent ligation of hyphens:
del_complete_lines(document.preamble,
['% Added by lyx2lyx',
r'\renewcommand{\textendash}{--}',
r'\renewcommand{\textemdash}{---}'])
# Insert ligature breaks to prevent ligation of hyphens to dashes:
lines = document.body
i = 0
while i < len(document.body)-1:
# increment i, skip some insets (cf. convert_dashes)
i = _dashes_next_line(document, i)
line = document.body[i]
while i+1 < len(lines):
i += 1
line = lines[i]
# skip label width string (bug 10243):
if line.startswith("\\labelwidthstring"):
continue
# do not touch hyphens in some insets (cf. convert_dashes):
if line.startswith("\\begin_inset"):
try:
if line.split()[1] in ["CommandInset", "ERT", "External",
"Formula", "FormulaMacro", "Graphics",
"IPA", "listings"]:
j = find_end_of_inset(lines, i)
if j == -1:
document.warning("Malformed LyX document: Can't find "
"end of %s inset at line %d." % (itype, i))
continue
i = j
except IndexError:
continue
if "--" in line:
line = line.replace("--", "-\\SpecialChar \\textcompwordmark{}\n-")
document.body[i:i+1] = line.split('\n')
# Convert \twohyphens and \threehyphens:
i = 0
while i < len(document.body):
# skip some insets (see convert_dashes())
i = _dashes_next_line(document, i-1)
replaced = False
if document.body[i].find("\\twohyphens") >= 0:
document.body[i] = document.body[i].replace("\\twohyphens", "--")
replaced = True
if document.body[i].find("\\threehyphens") >= 0:
document.body[i] = document.body[i].replace("\\threehyphens", "---")
replaced = True
if replaced and i+1 < len(document.body) and \
(document.body[i+1].find("\\") != 0 or \
document.body[i+1].find("\\twohyphens") == 0 or
document.body[i+1].find("\\threehyphens") == 0) and \
len(document.body[i]) + len(document.body[i+1]) <= 80:
document.body[i] = document.body[i] + document.body[i+1]
document.body[i+1:i+2] = []
# Revert \twohyphens and \threehyphens:
i = 1
while i < len(lines):
line = lines[i]
if not line.endswith("hyphens"):
i +=1
elif line.endswith("\\twohyphens") or line.endswith("\\threehyphens"):
line = line.replace("\\twohyphens", "--")
line = line.replace("\\threehyphens", "---")
lines[i] = line + lines.pop(i+1)
else:
i += 1
@ -879,16 +855,16 @@ def revert_georgian(document):
document.language = "english"
i = find_token(document.header, "\\language georgian", 0)
if i != -1:
document.header[i] = "\\language english"
document.header[i] = "\\language english"
j = find_token(document.header, "\\language_package default", 0)
if j != -1:
document.header[j] = "\\language_package babel"
document.header[j] = "\\language_package babel"
k = find_token(document.header, "\\options", 0)
if k != -1:
document.header[k] = document.header[k].replace("\\options", "\\options georgian,")
document.header[k] = document.header[k].replace("\\options", "\\options georgian,")
else:
l = find_token(document.header, "\\use_default_options", 0)
document.header.insert(l + 1, "\\options georgian")
l = find_token(document.header, "\\use_default_options", 0)
document.header.insert(l + 1, "\\options georgian")
def revert_sigplan_doi(document):

View File

@ -24,9 +24,9 @@ import sys, os
# Uncomment only what you need to import, please.
from parser_tools import del_token, find_end_of, find_end_of_layout, \
find_end_of_inset, find_re, find_slice, find_token, \
find_token_backwards, get_containing_layout, \
from parser_tools import del_token, del_value, del_complete_lines, \
find_end_of, find_end_of_layout, find_end_of_inset, find_re, \
find_token, find_token_backwards, get_containing_layout, \
get_bool_value, get_value, get_quoted_value
# find_tokens, find_token_exact, is_in_inset, \
# check_token, get_option_value
@ -1303,7 +1303,7 @@ def revert_biblatex(document):
"Citealt*", "Citealp*", "Citeauthor*", "fullcite", "footcite",\
"footcitet", "footcitep", "footcitealt", "footcitealp",\
"footciteauthor", "footciteyear", "footciteyearpar",\
"citefield", "citetitle", "cite*" ]
"citefield", "citetitle", "cite*" ]
i = 0
while (True):
@ -1843,19 +1843,16 @@ def revert_chapterbib(document):
def convert_dashligatures(document):
"Set 'use_dash_ligatures' according to content."
use_dash_ligatures = None
# Eventually remove preamble code from 2.3->2.2 conversion:
dash_renew_lines = find_slice(document.preamble,
['% Added by lyx2lyx',
r'\renewcommand{\textendash}{--}',
r'\renewcommand{\textemdash}{---}'])
del(document.preamble[dash_renew_lines])
use_dash_ligatures = bool(dash_renew_lines.stop)
# Look for and remove dashligatures workaround from 2.3->2.2 reversion,
# set use_dash_ligatures to True if found, to None else.
use_dash_ligatures = del_complete_lines(document.preamble,
['% Added by lyx2lyx',
r'\renewcommand{\textendash}{--}',
r'\renewcommand{\textemdash}{---}']) or None
if use_dash_ligatures is None:
# Look for dashes:
# (Documents by LyX 2.1 or older have "\twohyphens\n" or "\threehyphens\n"
# as interim representation for dash ligatures)
# Look for dashes (Documents by LyX 2.1 or older have "\twohyphens\n"
# or "\threehyphens\n" as interim representation for -- an ---.)
has_literal_dashes = False
has_ligature_dashes = False
j = 0
@ -1863,16 +1860,19 @@ def convert_dashligatures(document):
# Skip some document parts where dashes are not converted
if (i < j) or line.startswith("\\labelwidthstring"):
continue
words = line.split()
if (len(words) > 1 and words[0] == "\\begin_inset"
and (words[1] in ["CommandInset", "ERT", "External", "Formula",
"FormulaMacro", "Graphics", "IPA", "listings"]
or ' '.join(words[1:]) == "Flex Code")):
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Malformed LyX document: "
"Can't find end of %s inset at line %d" % (words[1],i))
continue
if line.startswith("\\begin_inset"):
try:
it = line.split()[1]
except IndexError:
continue
if (it in ["CommandInset", "ERT", "External", "Formula",
"FormulaMacro", "Graphics", "IPA", "listings"]
or line.endswith("Flex Code")):
j = find_end_of_inset(document.body, i)
if j == -1:
document.warning("Malformed LyX document: Can't "
"find end of %s inset at line %d." % (itype, i))
continue
if line == "\\begin_layout LyX-Code":
j = find_end_of_layout(document.body, i)
if j == -1:
@ -1898,22 +1898,16 @@ def convert_dashligatures(document):
use_dash_ligatures = True
# insert the setting if there is a preferred value
if use_dash_ligatures is not None:
i = find_token(document.header, "\\use_microtype", 0)
if i != -1:
document.header.insert(i+1, "\\use_dash_ligatures %s"
% str(use_dash_ligatures).lower())
i = find_token(document.header, "\\graphics")
document.header.insert(i, "\\use_dash_ligatures %s"
% str(use_dash_ligatures).lower())
def revert_dashligatures(document):
"""Remove font ligature settings for en- and em-dashes.
Revert conversion of \twodashes or \threedashes to literal dashes."""
i = find_token(document.header, "\\use_dash_ligatures", 0)
if i == -1:
use_dash_ligatures = del_value(document.header, "\\use_dash_ligatures")
if use_dash_ligatures != "true" or document.backend != "latex":
return
use_dash_ligatures = get_bool_value(document.header, "\\use_dash_ligatures", i)
del document.header[i]
if not use_dash_ligatures or document.backend != "latex":
return
j = 0
new_body = []
for i, line in enumerate(document.body):
@ -2018,8 +2012,8 @@ def revert_mathindent(document):
else:
k = find_token(document.header, "\\options", 0)
if k != -1:
document.header[k] = document.header[k].replace("\\options", "\\options fleqn,")
del document.header[i]
document.header[k] = document.header[k].replace("\\options", "\\options fleqn,")
del document.header[i]
else:
l = find_token(document.header, "\\use_default_options", 0)
document.header.insert(l, "\\options fleqn")

View File

@ -156,53 +156,6 @@ count_pars_in_inset(lines, i):
import re
# Fast search in lists
def find_slice(l, sl, start = 0, stop = None):
"""Return position of first occurence of sequence `sl` in list `l`
as a `slice` object.
>>> find_slice([1, 2, 3, 1, 1, 2], (1, 2))
slice(0, 2, None)
The return value can be used to delete or substitute the sub-list:
>>> l = [1, 0, 1, 1, 1, 2]
>>> s = find_slice(l, [0, 1, 1])
>>> del(l[s]); l
[1, 1, 2]
>>> s = find_slice(l, (1, 2))
>>> l[s] = [3]; l
[1, 3]
The start argument works similar to list.index()
>>> find_slice([1, 2, 3, 1, 1 ,2], (1, 2), start = 1)
slice(4, 6, None)
Use the `stop` attribute of the returned `slice` to test for success:
>>> s1 = find_slice([2, 3, 1], (3, 1))
>>> s2 = find_slice([2, 3, 1], (2, 1))
>>> if s1.stop and not s2.stop:
... print "wow"
wow
"""
stop = stop or len(l)
N = len(sl) # lenght of sub-list
try:
while True:
for j, value in enumerate(sl):
i = l.index(value, start, stop)
if j and i != start:
start = i-j
break
start = i +1
else:
return slice(i+1-N, i+1)
except ValueError: # sub list `sl` not found
return slice(0, 0)
# Utilities for one line
def check_token(line, token):
""" check_token(line, token) -> bool
@ -212,7 +165,6 @@ def check_token(line, token):
Deprecated. Use line.startswith(token).
"""
return line.startswith(token)
@ -225,40 +177,40 @@ def is_nonempty_line(line):
# Utilities for a list of lines
def find_token(lines, token, start, end = 0, ignorews = False):
def find_token(lines, token, start=0, end=0, ignorews=False):
""" find_token(lines, token, start[[, end], ignorews]) -> int
Return the lowest line where token is found, and is the first
element, in lines[start, end].
If ignorews is True (default is False), then differences in
whitespace are ignored, except that there must be no extra
whitespace following token itself.
whitespace are ignored, but there must be whitespace following
token itself.
Return -1 on failure."""
if end == 0 or end > len(lines):
end = len(lines)
m = len(token)
if ignorews:
y = token.split()
for i in range(start, end):
if ignorews:
x = lines[i].split()
y = token.split()
if len(x) < len(y):
continue
if x[:len(y)] == y:
return i
else:
if lines[i][:m] == token:
if lines[i].startswith(token):
return i
return -1
def find_token_exact(lines, token, start, end = 0):
def find_token_exact(lines, token, start=0, end=0):
return find_token(lines, token, start, end, True)
def find_tokens(lines, tokens, start, end = 0, ignorews = False):
def find_tokens(lines, tokens, start=0, end=0, ignorews=False):
""" find_tokens(lines, tokens, start[[, end], ignorews]) -> int
Return the lowest line where one token in tokens is found, and is
@ -278,17 +230,17 @@ def find_tokens(lines, tokens, start, end = 0, ignorews = False):
if x[:len(y)] == y:
return i
else:
if lines[i][:len(token)] == token:
if lines[i].startswith(token):
return i
return -1
def find_tokens_exact(lines, tokens, start, end = 0):
def find_tokens_exact(lines, tokens, start=0, end=0):
return find_tokens(lines, tokens, start, end, True)
def find_re(lines, rexp, start, end = 0):
""" find_token_re(lines, rexp, start[, end]) -> int
def find_re(lines, rexp, start=0, end=0):
""" find_re(lines, rexp, start[, end]) -> int
Return the lowest line where rexp, a regular expression, is found
in lines[start, end].
@ -310,10 +262,8 @@ def find_token_backwards(lines, token, start):
element, in lines[start, end].
Return -1 on failure."""
m = len(token)
for i in range(start, -1, -1):
line = lines[i]
if line[:m] == token:
if lines[i].startswith(token):
return i
return -1
@ -328,12 +278,86 @@ def find_tokens_backwards(lines, tokens, start):
for i in range(start, -1, -1):
line = lines[i]
for token in tokens:
if line[:len(token)] == token:
if line.startswith(token):
return i
return -1
def get_value(lines, token, start, end = 0, default = ""):
def find_complete_lines(lines, sublines, start=0, end=0):
"""Find first occurence of sequence `sublines` in list `lines`.
Return index of first line or -1 on failure.
Efficient search for a sub-list in a large list. Works for any values.
>>> find_complete_lines([1, 2, 3, 1, 1, 2], [1, 2])
0
The `start` and `end` arguments work similar to list.index()
>>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1)
4
>>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1, end=4)
-1
The return value can be used to substitute the sub-list.
Take care to check before use:
>>> l = [1, 1, 2]
>>> s = find_complete_lines(l, [1, 2])
>>> if s != -1:
... l[s:s+2] = [3]; l
[1, 3]
See also del_complete_lines().
"""
if not sublines:
return start
end = end or len(lines)
N = len(sublines)
try:
while True:
for j, value in enumerate(sublines):
i = lines.index(value, start, end)
if j and i != start:
start = i-j
break
start = i + 1
else:
return i +1 - N
except ValueError: # `sublines` not found
return -1
def find_across_lines(lines, sub, start=0, end=0):
sublines = sub.splitlines()
if len(sublines) > 2:
# at least 3 lines: the middle one(s) are complete -> use index search
i = find_complete_lines(lines, sublines[1:-1], start+1, end-1)
if i < start+1:
return -1
try:
if (lines[i-1].endswith(sublines[0]) and
lines[i+len(sublines)].startswith(sublines[-1])):
return i-1
except IndexError:
pass
elif len(sublines) > 1:
# last subline must start a line
i = find_token(lines, sublines[-1], start, end)
if i < start + 1:
return -1
if lines[i-1].endswith(sublines[0]):
return i-1
else: # no line-break, may be in the middle of a line
if end == 0 or end > len(lines):
end = len(lines)
for i in range(start, end):
if sub in lines[i]:
return i
return -1
def get_value(lines, token, start=0, end=0, default=""):
""" get_value(lines, token, start[[, end], default]) -> string
Find the next line that looks like:
@ -341,17 +365,19 @@ def get_value(lines, token, start, end = 0, default = ""):
Returns "followed by other stuff" with leading and trailing
whitespace removed.
"""
i = find_token_exact(lines, token, start, end)
if i == -1:
return default
# TODO: establish desired behaviour, eventually change to
# return lines.pop(i)[len(token):].strip() # or default
# see test_parser_tools.py
l = lines[i].split(None, 1)
if len(l) > 1:
return l[1].strip()
return default
def get_quoted_value(lines, token, start, end = 0, default = ""):
def get_quoted_value(lines, token, start=0, end=0, default=""):
""" get_quoted_value(lines, token, start[[, end], default]) -> string
Find the next line that looks like:
@ -368,8 +394,8 @@ def get_quoted_value(lines, token, start, end = 0, default = ""):
return val.strip('"')
def get_bool_value(lines, token, start, end = 0, default = None):
""" get_value(lines, token, start[[, end], default]) -> string
def get_bool_value(lines, token, start=0, end=0, default=None):
""" get_bool_value(lines, token, start[[, end], default]) -> string
Find the next line that looks like:
token bool_value
@ -405,7 +431,7 @@ def set_option_value(line, option, value):
return re.sub(rx, '\g<1>' + value + '"', line)
def del_token(lines, token, start, end = 0):
def del_token(lines, token, start=0, end=0):
""" del_token(lines, token, start, end) -> int
Find the first line in lines where token is the first element
@ -418,6 +444,41 @@ def del_token(lines, token, start, end = 0):
del lines[k]
return True
def del_complete_lines(lines, sublines, start=0, end=0):
"""Delete first occurence of `sublines` in list `lines`.
Efficient deletion of a sub-list in a list. Works for any values.
The `start` and `end` arguments work similar to list.index()
Returns True if a deletion was done and False if not.
>>> l = [1, 0, 1, 1, 1, 2]
>>> del_complete_lines(l, [0, 1, 1])
True
>>> l
[1, 1, 2]
"""
i = find_complete_lines(lines, sublines, start, end)
if i == -1:
return False
del(lines[i:i+len(sublines)])
return True
def del_value(lines, token, start=0, end=0, default=None):
"""
Find the next line that looks like:
token followed by other stuff
Delete that line and return "followed by other stuff"
with leading and trailing whitespace removed.
If token is not found, return `default`.
"""
i = find_token_exact(lines, token, start, end)
if i == -1:
return default
return lines.pop(i)[len(token):].strip()
def find_beginning_of(lines, i, start_token, end_token):
count = 1
@ -425,7 +486,7 @@ def find_beginning_of(lines, i, start_token, end_token):
i = find_tokens_backwards(lines, [start_token, end_token], i-1)
if i == -1:
return -1
if check_token(lines[i], end_token):
if lines[i].startswith(end_token):
count = count+1
else:
count = count-1
@ -441,7 +502,7 @@ def find_end_of(lines, i, start_token, end_token):
i = find_tokens(lines, [end_token, start_token], i+1)
if i == -1:
return -1
if check_token(lines[i], start_token):
if lines[i].startswith(start_token):
count = count+1
else:
count = count-1
@ -450,11 +511,11 @@ def find_end_of(lines, i, start_token, end_token):
return -1
def find_nonempty_line(lines, start, end = 0):
def find_nonempty_line(lines, start=0, end=0):
if end == 0:
end = len(lines)
for i in range(start, end):
if is_nonempty_line(lines[i]):
if lines[i].strip():
return i
return -1

View File

@ -77,10 +77,21 @@ class TestParserTools(unittest.TestCase):
def test_find_token(self):
self.assertEqual(find_token(lines, '\\emph', 0), 7)
self.assertEqual(find_token(lines, '\\emph', 0, 5), -1)
self.assertEqual(find_token(lines, '\\emp', 0, 0, True), -1)
self.assertEqual(find_token(lines, '\\emp', 0, 0, False), 7)
# no line starts with "emph" (without backspace):
self.assertEqual(find_token(lines, 'emph', 0), -1)
# token on line[start] is found:
self.assertEqual(find_token(lines, '\\emph', 7), 7)
self.assertEqual(find_token(lines, '\\emph', 8), 9)
# token on line[end] is not found:
self.assertEqual(find_token(lines, '\\emph', 0, 7), -1)
# `ignorews` looks for whitespace-separated tokens:
self.assertEqual(find_token(lines, '\\emp', 0, ignorews=True), -1)
self.assertEqual(find_token(lines, '\\emph',0, ignorews=True), 7)
self.assertEqual(find_token(lines, '\\emph', 7, ignorews=True), 7)
self.assertEqual(find_token(lines, '\\emph', 0, 7, True), -1)
# only first token is found:
self.assertEqual(find_token(lines, 'Quotes', 0), -1)
self.assertEqual(find_token(lines, 'Quotes', 0, ignorews=True), -1)
def test_find_tokens(self):
@ -89,5 +100,94 @@ class TestParserTools(unittest.TestCase):
self.assertEqual(find_tokens(lines, tokens, 0, 4), -1)
def test_find_complete_lines(self):
sublines = ["\\begin_inset Quotes eld",
"\\end_inset"]
# return index of first line of sublines:
self.assertEqual(find_complete_lines(lines, sublines), 3)
self.assertEqual(find_complete_lines(lines, ["\\end_inset"]), 4)
# return -1 if sublines is not found:
self.assertEqual(find_complete_lines(lines, ['x']), -1)
# search includes line `start`:
self.assertEqual(find_complete_lines(lines, sublines, 3), 3)
self.assertEqual(find_complete_lines(lines, sublines, 4), 20)
self.assertEqual(find_complete_lines(lines, sublines, 21), -1)
# serch excludes line `end`
self.assertEqual(find_complete_lines(lines, sublines, 4, 20), -1)
# an empty list is always found
self.assertEqual(find_complete_lines(lines, []), 0)
def test_find_across_lines(self):
# sub with at least 2 line-breaks (uses find_complete_lines):
sub = "Quotes eld\n\\end_inset\n\n\n"
self.assertEqual(find_across_lines(lines, sub), 3)
# Return -1 if not found
self.assertEqual(find_across_lines(lines, sub, 4), -1)
self.assertEqual(find_across_lines(lines, sub, 0, 6), -1)
sub = "Quotes eld\n\\end_inset\nx\n"
self.assertEqual(find_across_lines(lines, sub), -1)
sub = "Quotes X\n\\end_inset\n\n"
self.assertEqual(find_across_lines(lines, sub), -1)
sub = "Quotes eld\n\\end_insert\n\n"
self.assertEqual(find_across_lines(lines, sub), -1)
# sub with up to 1 line-break:
sub = "Quotes eld\n\\end_inset"
self.assertEqual(find_across_lines(lines, sub), 3)
self.assertEqual(find_across_lines(lines, sub, 4), -1)
self.assertEqual(find_across_lines(lines, sub, 0, 4), -1)
self.assertEqual(find_across_lines(lines, sub, 4, 3), -1)
sub = "Quotes X eld\n\\end_inset\n"
self.assertEqual(find_across_lines(lines, sub), -1)
sub = "Quotes eld\n\\end_insert\n"
self.assertEqual(find_across_lines(lines, sub), -1)
# sub without line-break
sub = "end_"
self.assertEqual(find_across_lines(lines, sub), 4)
self.assertEqual(find_across_lines(lines, sub, 5), 12)
self.assertEqual(find_across_lines(lines, sub, 0, 4), -1)
self.assertEqual(find_across_lines(lines, sub, 2, 1), -1)
self.assertEqual(find_across_lines(lines, "XXX"), -1)
def test_get_value(self):
self.assertEqual(get_value(lines, "\\begin_inset"), "Quotes eld")
# TODO: do we want this:
self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "Quotes eld")
# or only the part after "token":
# self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "eld")
# return default if not found
self.assertEqual(get_value(lines, "\\begin_insert", default=42), 42)
# TODO: do we want this:
self.assertEqual(get_value(lines, "\\end_inset", default=None), None)
# or emtpy string if token is found but has no value:
# self.assertEqual(get_value(lines, "\\end_inset", default=None), "")
def test_del_complete_lines(self):
l = lines[:]
sublines = ["\\begin_inset Quotes eld",
"\\end_inset"]
# normal operation: remove the first occurence of sublines:
self.assertEqual(del_complete_lines(l, sublines), True)
self.assertEqual(l[3], "")
self.assertEqual(len(l), len(lines)-len(sublines))
# special cases:
l = lines[:]
self.assertEqual(del_complete_lines(l, sublines, 21), False)
self.assertEqual(l, lines)
# deleting empty sublist returns success but does not change the list:
self.assertEqual(del_complete_lines(l, [], 21), True)
self.assertEqual(l, lines)
def test_del_value(self):
l = lines[:]
self.assertEqual(del_value(l, "\\begin_inset"), "Quotes eld")
self.assertEqual(del_value(l, "\\begin_inset Quotes"), "erd")
# return default if not found
self.assertEqual(del_value(l, "\\begin_insert", default=42), 42)
self.assertEqual(del_value(l, "\\end_inset", default=None), "")
if __name__ == '__main__':
unittest.main()