mirror of
https://git.lyx.org/repos/lyx.git
synced 2025-01-21 23:09:40 +00:00
New lyx2lyx tools.
New lyx2lyx parser tools find_complete_lines() (replaces find_slice), del_complete_lines(), and find_across_lines(). Default value 0 for start argument in utility functions. Rework the implementation of dash-conversion.
This commit is contained in:
parent
40e32d4d27
commit
8da6cdcf23
@ -34,9 +34,10 @@ from lyx2lyx_tools import (add_to_preamble, put_cmd_in_ert, get_ert,
|
||||
# insert_to_preamble, latex_length, revert_flex_inset,
|
||||
# revert_font_attrs, hex2ratio, str2bool
|
||||
|
||||
from parser_tools import (find_end_of_inset, find_end_of_layout,
|
||||
find_nonempty_line, find_re, find_slice, find_token, find_token_backwards,
|
||||
get_containing_layout, get_value, check_token)
|
||||
from parser_tools import (del_complete_lines,
|
||||
find_end_of_inset, find_end_of_layout, find_nonempty_line, find_re,
|
||||
find_token, find_token_backwards, get_containing_layout,
|
||||
get_value, check_token)
|
||||
|
||||
####################################################################
|
||||
# Private helper functions
|
||||
@ -615,130 +616,105 @@ def convert_dashes(document):
|
||||
if document.backend != "latex":
|
||||
return
|
||||
|
||||
lines = document.body
|
||||
i = 0
|
||||
while i < len(document.body):
|
||||
words = document.body[i].split()
|
||||
while i+1 < len(lines):
|
||||
i += 1
|
||||
line = lines[i]
|
||||
words = line.split()
|
||||
if (len(words) > 1 and words[0] == "\\begin_inset"
|
||||
and (words[1] in ["CommandInset", "ERT", "External", "Formula",
|
||||
"FormulaMacro", "Graphics", "IPA", "listings"]
|
||||
or ' '.join(words[1:]) == "Flex Code")):
|
||||
or line.endswith("Flex Code"))):
|
||||
# must not replace anything in insets that store LaTeX contents in .lyx files
|
||||
# (math and command insets without overridden read() and write() methods
|
||||
# filtering out IPA makes Text::readParToken() more simple
|
||||
# skip ERT as well since it is not needed there
|
||||
# Flex Code is logical markup, typically rendered as typewriter
|
||||
j = find_end_of_inset(document.body, i)
|
||||
j = find_end_of_inset(lines, i)
|
||||
if j == -1:
|
||||
document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
|
||||
i += 1
|
||||
document.warning("Malformed LyX document: Can't find end of " +
|
||||
words[1] + " inset at line " + str(i))
|
||||
else:
|
||||
i = j
|
||||
continue
|
||||
if document.body[i] == "\\begin_layout LyX-Code":
|
||||
j = find_end_of_layout(document.body, i)
|
||||
if lines[i] == "\\begin_layout LyX-Code":
|
||||
j = find_end_of_layout(lines, i)
|
||||
if j == -1:
|
||||
document.warning("Malformed LyX document: "
|
||||
"Can't find end of %s layout at line %d" % (words[1],i))
|
||||
i += 1
|
||||
else:
|
||||
i = j
|
||||
continue
|
||||
|
||||
if len(words) > 0 and words[0] in ["\\leftindent", "\\paragraph_spacing", "\\align", "\\labelwidthstring"]:
|
||||
# skip paragraph parameters (bug 10243)
|
||||
i += 1
|
||||
if line.startswith("\\labelwidthstring"):
|
||||
# skip label width string (bug 10243)
|
||||
continue
|
||||
while True:
|
||||
j = document.body[i].find("--")
|
||||
if j == -1:
|
||||
break
|
||||
front = document.body[i][:j]
|
||||
back = document.body[i][j+2:]
|
||||
|
||||
if "--" in line:
|
||||
# We can have an arbitrary number of consecutive hyphens.
|
||||
# These must be split into the corresponding number of two and three hyphens
|
||||
# We must match what LaTeX does: First try emdash, then endash, then single hyphen
|
||||
if back.find("-") == 0:
|
||||
back = back[1:]
|
||||
if len(back) > 0:
|
||||
document.body.insert(i+1, back)
|
||||
document.body[i] = front + "\\threehyphens"
|
||||
else:
|
||||
if len(back) > 0:
|
||||
document.body.insert(i+1, back)
|
||||
document.body[i] = front + "\\twohyphens"
|
||||
i += 1
|
||||
# Replace as LaTeX does: First try emdash, then endash
|
||||
line = line.replace("---", "\\threehyphens\n")
|
||||
line = line.replace("--", "\\twohyphens\n")
|
||||
lines[i:i+1] = line.splitlines()
|
||||
|
||||
i = 0
|
||||
while i < len(document.body):
|
||||
line = document.body[i]
|
||||
while (line.endswith(r"-\SpecialChar \textcompwordmark{}") and
|
||||
document.body[i+1].startswith("-")):
|
||||
line = line.replace(r"\SpecialChar \textcompwordmark{}",
|
||||
document.body.pop(i+1))
|
||||
document.body[i] = line
|
||||
i += 1
|
||||
# remove ligature breaks between dashes
|
||||
i = 1
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
if (line.endswith(r"-\SpecialChar \textcompwordmark{}") and
|
||||
lines[i+1].startswith("-")):
|
||||
lines[i] = line.replace(r"\SpecialChar \textcompwordmark{}",
|
||||
lines.pop(i+1))
|
||||
else:
|
||||
i += 1
|
||||
|
||||
# Return number of the next line to check for dashes.
|
||||
def _dashes_next_line(document, i):
|
||||
i +=1
|
||||
words = document.body[i].split()
|
||||
# skip paragraph parameters (bug 10243):
|
||||
if words and words[0] in ["\\leftindent", "\\paragraph_spacing",
|
||||
"\\align", "\\labelwidthstring"]:
|
||||
i += 1
|
||||
words = document.body[i].split()
|
||||
# some insets should be skipped in revert_dashes (cf. convert_dashes)
|
||||
if (len(words) > 1 and words[0] == "\\begin_inset" and
|
||||
words[1] in ["CommandInset", "ERT", "External", "Formula",
|
||||
"FormulaMacro", "Graphics", "IPA", "listings"]):
|
||||
j = find_end_of_inset(document.body, i)
|
||||
if j == -1:
|
||||
document.warning("Malformed LyX document: Can't find end of "
|
||||
+ words[1] + " inset at line " + str(i))
|
||||
return i
|
||||
return j+1
|
||||
return i
|
||||
|
||||
def revert_dashes(document):
|
||||
"""
|
||||
Prevent ligatures of existing --- and --.
|
||||
Convert \\twohyphens and \\threehyphens to -- and ---.
|
||||
Revert \\twohyphens and \\threehyphens to -- and ---.
|
||||
Remove preamble code from 2.3->2.2 conversion.
|
||||
"""
|
||||
# Remove preamble code from 2.3->2.2 conversion:
|
||||
dash_renew_lines = find_slice(document.preamble,
|
||||
['% Added by lyx2lyx',
|
||||
r'\renewcommand{\textendash}{--}',
|
||||
r'\renewcommand{\textemdash}{---}'])
|
||||
del(document.preamble[dash_renew_lines])
|
||||
# Prevent ligation of hyphens:
|
||||
del_complete_lines(document.preamble,
|
||||
['% Added by lyx2lyx',
|
||||
r'\renewcommand{\textendash}{--}',
|
||||
r'\renewcommand{\textemdash}{---}'])
|
||||
# Insert ligature breaks to prevent ligation of hyphens to dashes:
|
||||
lines = document.body
|
||||
i = 0
|
||||
while i < len(document.body)-1:
|
||||
# increment i, skip some insets (cf. convert_dashes)
|
||||
i = _dashes_next_line(document, i)
|
||||
line = document.body[i]
|
||||
while i+1 < len(lines):
|
||||
i += 1
|
||||
line = lines[i]
|
||||
# skip label width string (bug 10243):
|
||||
if line.startswith("\\labelwidthstring"):
|
||||
continue
|
||||
# do not touch hyphens in some insets (cf. convert_dashes):
|
||||
if line.startswith("\\begin_inset"):
|
||||
try:
|
||||
if line.split()[1] in ["CommandInset", "ERT", "External",
|
||||
"Formula", "FormulaMacro", "Graphics",
|
||||
"IPA", "listings"]:
|
||||
j = find_end_of_inset(lines, i)
|
||||
if j == -1:
|
||||
document.warning("Malformed LyX document: Can't find "
|
||||
"end of %s inset at line %d." % (itype, i))
|
||||
continue
|
||||
i = j
|
||||
except IndexError:
|
||||
continue
|
||||
if "--" in line:
|
||||
line = line.replace("--", "-\\SpecialChar \\textcompwordmark{}\n-")
|
||||
document.body[i:i+1] = line.split('\n')
|
||||
# Convert \twohyphens and \threehyphens:
|
||||
i = 0
|
||||
while i < len(document.body):
|
||||
# skip some insets (see convert_dashes())
|
||||
i = _dashes_next_line(document, i-1)
|
||||
replaced = False
|
||||
if document.body[i].find("\\twohyphens") >= 0:
|
||||
document.body[i] = document.body[i].replace("\\twohyphens", "--")
|
||||
replaced = True
|
||||
if document.body[i].find("\\threehyphens") >= 0:
|
||||
document.body[i] = document.body[i].replace("\\threehyphens", "---")
|
||||
replaced = True
|
||||
if replaced and i+1 < len(document.body) and \
|
||||
(document.body[i+1].find("\\") != 0 or \
|
||||
document.body[i+1].find("\\twohyphens") == 0 or
|
||||
document.body[i+1].find("\\threehyphens") == 0) and \
|
||||
len(document.body[i]) + len(document.body[i+1]) <= 80:
|
||||
document.body[i] = document.body[i] + document.body[i+1]
|
||||
document.body[i+1:i+2] = []
|
||||
# Revert \twohyphens and \threehyphens:
|
||||
i = 1
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
if not line.endswith("hyphens"):
|
||||
i +=1
|
||||
elif line.endswith("\\twohyphens") or line.endswith("\\threehyphens"):
|
||||
line = line.replace("\\twohyphens", "--")
|
||||
line = line.replace("\\threehyphens", "---")
|
||||
lines[i] = line + lines.pop(i+1)
|
||||
else:
|
||||
i += 1
|
||||
|
||||
@ -879,16 +855,16 @@ def revert_georgian(document):
|
||||
document.language = "english"
|
||||
i = find_token(document.header, "\\language georgian", 0)
|
||||
if i != -1:
|
||||
document.header[i] = "\\language english"
|
||||
document.header[i] = "\\language english"
|
||||
j = find_token(document.header, "\\language_package default", 0)
|
||||
if j != -1:
|
||||
document.header[j] = "\\language_package babel"
|
||||
document.header[j] = "\\language_package babel"
|
||||
k = find_token(document.header, "\\options", 0)
|
||||
if k != -1:
|
||||
document.header[k] = document.header[k].replace("\\options", "\\options georgian,")
|
||||
document.header[k] = document.header[k].replace("\\options", "\\options georgian,")
|
||||
else:
|
||||
l = find_token(document.header, "\\use_default_options", 0)
|
||||
document.header.insert(l + 1, "\\options georgian")
|
||||
l = find_token(document.header, "\\use_default_options", 0)
|
||||
document.header.insert(l + 1, "\\options georgian")
|
||||
|
||||
|
||||
def revert_sigplan_doi(document):
|
||||
|
@ -24,9 +24,9 @@ import sys, os
|
||||
|
||||
# Uncomment only what you need to import, please.
|
||||
|
||||
from parser_tools import del_token, find_end_of, find_end_of_layout, \
|
||||
find_end_of_inset, find_re, find_slice, find_token, \
|
||||
find_token_backwards, get_containing_layout, \
|
||||
from parser_tools import del_token, del_value, del_complete_lines, \
|
||||
find_end_of, find_end_of_layout, find_end_of_inset, find_re, \
|
||||
find_token, find_token_backwards, get_containing_layout, \
|
||||
get_bool_value, get_value, get_quoted_value
|
||||
# find_tokens, find_token_exact, is_in_inset, \
|
||||
# check_token, get_option_value
|
||||
@ -1303,7 +1303,7 @@ def revert_biblatex(document):
|
||||
"Citealt*", "Citealp*", "Citeauthor*", "fullcite", "footcite",\
|
||||
"footcitet", "footcitep", "footcitealt", "footcitealp",\
|
||||
"footciteauthor", "footciteyear", "footciteyearpar",\
|
||||
"citefield", "citetitle", "cite*" ]
|
||||
"citefield", "citetitle", "cite*" ]
|
||||
|
||||
i = 0
|
||||
while (True):
|
||||
@ -1843,19 +1843,16 @@ def revert_chapterbib(document):
|
||||
|
||||
def convert_dashligatures(document):
|
||||
"Set 'use_dash_ligatures' according to content."
|
||||
use_dash_ligatures = None
|
||||
# Eventually remove preamble code from 2.3->2.2 conversion:
|
||||
dash_renew_lines = find_slice(document.preamble,
|
||||
['% Added by lyx2lyx',
|
||||
r'\renewcommand{\textendash}{--}',
|
||||
r'\renewcommand{\textemdash}{---}'])
|
||||
del(document.preamble[dash_renew_lines])
|
||||
use_dash_ligatures = bool(dash_renew_lines.stop)
|
||||
# Look for and remove dashligatures workaround from 2.3->2.2 reversion,
|
||||
# set use_dash_ligatures to True if found, to None else.
|
||||
use_dash_ligatures = del_complete_lines(document.preamble,
|
||||
['% Added by lyx2lyx',
|
||||
r'\renewcommand{\textendash}{--}',
|
||||
r'\renewcommand{\textemdash}{---}']) or None
|
||||
|
||||
if use_dash_ligatures is None:
|
||||
# Look for dashes:
|
||||
# (Documents by LyX 2.1 or older have "\twohyphens\n" or "\threehyphens\n"
|
||||
# as interim representation for dash ligatures)
|
||||
# Look for dashes (Documents by LyX 2.1 or older have "\twohyphens\n"
|
||||
# or "\threehyphens\n" as interim representation for -- an ---.)
|
||||
has_literal_dashes = False
|
||||
has_ligature_dashes = False
|
||||
j = 0
|
||||
@ -1863,16 +1860,19 @@ def convert_dashligatures(document):
|
||||
# Skip some document parts where dashes are not converted
|
||||
if (i < j) or line.startswith("\\labelwidthstring"):
|
||||
continue
|
||||
words = line.split()
|
||||
if (len(words) > 1 and words[0] == "\\begin_inset"
|
||||
and (words[1] in ["CommandInset", "ERT", "External", "Formula",
|
||||
"FormulaMacro", "Graphics", "IPA", "listings"]
|
||||
or ' '.join(words[1:]) == "Flex Code")):
|
||||
j = find_end_of_inset(document.body, i)
|
||||
if j == -1:
|
||||
document.warning("Malformed LyX document: "
|
||||
"Can't find end of %s inset at line %d" % (words[1],i))
|
||||
continue
|
||||
if line.startswith("\\begin_inset"):
|
||||
try:
|
||||
it = line.split()[1]
|
||||
except IndexError:
|
||||
continue
|
||||
if (it in ["CommandInset", "ERT", "External", "Formula",
|
||||
"FormulaMacro", "Graphics", "IPA", "listings"]
|
||||
or line.endswith("Flex Code")):
|
||||
j = find_end_of_inset(document.body, i)
|
||||
if j == -1:
|
||||
document.warning("Malformed LyX document: Can't "
|
||||
"find end of %s inset at line %d." % (itype, i))
|
||||
continue
|
||||
if line == "\\begin_layout LyX-Code":
|
||||
j = find_end_of_layout(document.body, i)
|
||||
if j == -1:
|
||||
@ -1898,22 +1898,16 @@ def convert_dashligatures(document):
|
||||
use_dash_ligatures = True
|
||||
# insert the setting if there is a preferred value
|
||||
if use_dash_ligatures is not None:
|
||||
i = find_token(document.header, "\\use_microtype", 0)
|
||||
if i != -1:
|
||||
document.header.insert(i+1, "\\use_dash_ligatures %s"
|
||||
% str(use_dash_ligatures).lower())
|
||||
i = find_token(document.header, "\\graphics")
|
||||
document.header.insert(i, "\\use_dash_ligatures %s"
|
||||
% str(use_dash_ligatures).lower())
|
||||
|
||||
def revert_dashligatures(document):
|
||||
"""Remove font ligature settings for en- and em-dashes.
|
||||
Revert conversion of \twodashes or \threedashes to literal dashes."""
|
||||
i = find_token(document.header, "\\use_dash_ligatures", 0)
|
||||
if i == -1:
|
||||
use_dash_ligatures = del_value(document.header, "\\use_dash_ligatures")
|
||||
if use_dash_ligatures != "true" or document.backend != "latex":
|
||||
return
|
||||
use_dash_ligatures = get_bool_value(document.header, "\\use_dash_ligatures", i)
|
||||
del document.header[i]
|
||||
if not use_dash_ligatures or document.backend != "latex":
|
||||
return
|
||||
|
||||
j = 0
|
||||
new_body = []
|
||||
for i, line in enumerate(document.body):
|
||||
@ -2018,8 +2012,8 @@ def revert_mathindent(document):
|
||||
else:
|
||||
k = find_token(document.header, "\\options", 0)
|
||||
if k != -1:
|
||||
document.header[k] = document.header[k].replace("\\options", "\\options fleqn,")
|
||||
del document.header[i]
|
||||
document.header[k] = document.header[k].replace("\\options", "\\options fleqn,")
|
||||
del document.header[i]
|
||||
else:
|
||||
l = find_token(document.header, "\\use_default_options", 0)
|
||||
document.header.insert(l, "\\options fleqn")
|
||||
|
@ -156,53 +156,6 @@ count_pars_in_inset(lines, i):
|
||||
|
||||
import re
|
||||
|
||||
# Fast search in lists
|
||||
def find_slice(l, sl, start = 0, stop = None):
|
||||
"""Return position of first occurence of sequence `sl` in list `l`
|
||||
as a `slice` object.
|
||||
|
||||
>>> find_slice([1, 2, 3, 1, 1, 2], (1, 2))
|
||||
slice(0, 2, None)
|
||||
|
||||
The return value can be used to delete or substitute the sub-list:
|
||||
|
||||
>>> l = [1, 0, 1, 1, 1, 2]
|
||||
>>> s = find_slice(l, [0, 1, 1])
|
||||
>>> del(l[s]); l
|
||||
[1, 1, 2]
|
||||
>>> s = find_slice(l, (1, 2))
|
||||
>>> l[s] = [3]; l
|
||||
[1, 3]
|
||||
|
||||
The start argument works similar to list.index()
|
||||
|
||||
>>> find_slice([1, 2, 3, 1, 1 ,2], (1, 2), start = 1)
|
||||
slice(4, 6, None)
|
||||
|
||||
Use the `stop` attribute of the returned `slice` to test for success:
|
||||
|
||||
>>> s1 = find_slice([2, 3, 1], (3, 1))
|
||||
>>> s2 = find_slice([2, 3, 1], (2, 1))
|
||||
>>> if s1.stop and not s2.stop:
|
||||
... print "wow"
|
||||
wow
|
||||
"""
|
||||
stop = stop or len(l)
|
||||
N = len(sl) # lenght of sub-list
|
||||
try:
|
||||
while True:
|
||||
for j, value in enumerate(sl):
|
||||
i = l.index(value, start, stop)
|
||||
if j and i != start:
|
||||
start = i-j
|
||||
break
|
||||
start = i +1
|
||||
else:
|
||||
return slice(i+1-N, i+1)
|
||||
except ValueError: # sub list `sl` not found
|
||||
return slice(0, 0)
|
||||
|
||||
|
||||
# Utilities for one line
|
||||
def check_token(line, token):
|
||||
""" check_token(line, token) -> bool
|
||||
@ -212,7 +165,6 @@ def check_token(line, token):
|
||||
|
||||
Deprecated. Use line.startswith(token).
|
||||
"""
|
||||
|
||||
return line.startswith(token)
|
||||
|
||||
|
||||
@ -225,40 +177,40 @@ def is_nonempty_line(line):
|
||||
|
||||
|
||||
# Utilities for a list of lines
|
||||
def find_token(lines, token, start, end = 0, ignorews = False):
|
||||
def find_token(lines, token, start=0, end=0, ignorews=False):
|
||||
""" find_token(lines, token, start[[, end], ignorews]) -> int
|
||||
|
||||
Return the lowest line where token is found, and is the first
|
||||
element, in lines[start, end].
|
||||
|
||||
If ignorews is True (default is False), then differences in
|
||||
whitespace are ignored, except that there must be no extra
|
||||
whitespace following token itself.
|
||||
whitespace are ignored, but there must be whitespace following
|
||||
token itself.
|
||||
|
||||
Return -1 on failure."""
|
||||
|
||||
if end == 0 or end > len(lines):
|
||||
end = len(lines)
|
||||
m = len(token)
|
||||
if ignorews:
|
||||
y = token.split()
|
||||
for i in range(start, end):
|
||||
if ignorews:
|
||||
x = lines[i].split()
|
||||
y = token.split()
|
||||
if len(x) < len(y):
|
||||
continue
|
||||
if x[:len(y)] == y:
|
||||
return i
|
||||
else:
|
||||
if lines[i][:m] == token:
|
||||
if lines[i].startswith(token):
|
||||
return i
|
||||
return -1
|
||||
|
||||
|
||||
def find_token_exact(lines, token, start, end = 0):
|
||||
def find_token_exact(lines, token, start=0, end=0):
|
||||
return find_token(lines, token, start, end, True)
|
||||
|
||||
|
||||
def find_tokens(lines, tokens, start, end = 0, ignorews = False):
|
||||
def find_tokens(lines, tokens, start=0, end=0, ignorews=False):
|
||||
""" find_tokens(lines, tokens, start[[, end], ignorews]) -> int
|
||||
|
||||
Return the lowest line where one token in tokens is found, and is
|
||||
@ -278,17 +230,17 @@ def find_tokens(lines, tokens, start, end = 0, ignorews = False):
|
||||
if x[:len(y)] == y:
|
||||
return i
|
||||
else:
|
||||
if lines[i][:len(token)] == token:
|
||||
if lines[i].startswith(token):
|
||||
return i
|
||||
return -1
|
||||
|
||||
|
||||
def find_tokens_exact(lines, tokens, start, end = 0):
|
||||
def find_tokens_exact(lines, tokens, start=0, end=0):
|
||||
return find_tokens(lines, tokens, start, end, True)
|
||||
|
||||
|
||||
def find_re(lines, rexp, start, end = 0):
|
||||
""" find_token_re(lines, rexp, start[, end]) -> int
|
||||
def find_re(lines, rexp, start=0, end=0):
|
||||
""" find_re(lines, rexp, start[, end]) -> int
|
||||
|
||||
Return the lowest line where rexp, a regular expression, is found
|
||||
in lines[start, end].
|
||||
@ -310,10 +262,8 @@ def find_token_backwards(lines, token, start):
|
||||
element, in lines[start, end].
|
||||
|
||||
Return -1 on failure."""
|
||||
m = len(token)
|
||||
for i in range(start, -1, -1):
|
||||
line = lines[i]
|
||||
if line[:m] == token:
|
||||
if lines[i].startswith(token):
|
||||
return i
|
||||
return -1
|
||||
|
||||
@ -328,12 +278,86 @@ def find_tokens_backwards(lines, tokens, start):
|
||||
for i in range(start, -1, -1):
|
||||
line = lines[i]
|
||||
for token in tokens:
|
||||
if line[:len(token)] == token:
|
||||
if line.startswith(token):
|
||||
return i
|
||||
return -1
|
||||
|
||||
|
||||
def get_value(lines, token, start, end = 0, default = ""):
|
||||
def find_complete_lines(lines, sublines, start=0, end=0):
|
||||
"""Find first occurence of sequence `sublines` in list `lines`.
|
||||
Return index of first line or -1 on failure.
|
||||
|
||||
Efficient search for a sub-list in a large list. Works for any values.
|
||||
|
||||
>>> find_complete_lines([1, 2, 3, 1, 1, 2], [1, 2])
|
||||
0
|
||||
|
||||
The `start` and `end` arguments work similar to list.index()
|
||||
|
||||
>>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1)
|
||||
4
|
||||
>>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1, end=4)
|
||||
-1
|
||||
|
||||
The return value can be used to substitute the sub-list.
|
||||
Take care to check before use:
|
||||
|
||||
>>> l = [1, 1, 2]
|
||||
>>> s = find_complete_lines(l, [1, 2])
|
||||
>>> if s != -1:
|
||||
... l[s:s+2] = [3]; l
|
||||
[1, 3]
|
||||
|
||||
See also del_complete_lines().
|
||||
"""
|
||||
if not sublines:
|
||||
return start
|
||||
end = end or len(lines)
|
||||
N = len(sublines)
|
||||
try:
|
||||
while True:
|
||||
for j, value in enumerate(sublines):
|
||||
i = lines.index(value, start, end)
|
||||
if j and i != start:
|
||||
start = i-j
|
||||
break
|
||||
start = i + 1
|
||||
else:
|
||||
return i +1 - N
|
||||
except ValueError: # `sublines` not found
|
||||
return -1
|
||||
|
||||
|
||||
def find_across_lines(lines, sub, start=0, end=0):
|
||||
sublines = sub.splitlines()
|
||||
if len(sublines) > 2:
|
||||
# at least 3 lines: the middle one(s) are complete -> use index search
|
||||
i = find_complete_lines(lines, sublines[1:-1], start+1, end-1)
|
||||
if i < start+1:
|
||||
return -1
|
||||
try:
|
||||
if (lines[i-1].endswith(sublines[0]) and
|
||||
lines[i+len(sublines)].startswith(sublines[-1])):
|
||||
return i-1
|
||||
except IndexError:
|
||||
pass
|
||||
elif len(sublines) > 1:
|
||||
# last subline must start a line
|
||||
i = find_token(lines, sublines[-1], start, end)
|
||||
if i < start + 1:
|
||||
return -1
|
||||
if lines[i-1].endswith(sublines[0]):
|
||||
return i-1
|
||||
else: # no line-break, may be in the middle of a line
|
||||
if end == 0 or end > len(lines):
|
||||
end = len(lines)
|
||||
for i in range(start, end):
|
||||
if sub in lines[i]:
|
||||
return i
|
||||
return -1
|
||||
|
||||
|
||||
def get_value(lines, token, start=0, end=0, default=""):
|
||||
""" get_value(lines, token, start[[, end], default]) -> string
|
||||
|
||||
Find the next line that looks like:
|
||||
@ -341,17 +365,19 @@ def get_value(lines, token, start, end = 0, default = ""):
|
||||
Returns "followed by other stuff" with leading and trailing
|
||||
whitespace removed.
|
||||
"""
|
||||
|
||||
i = find_token_exact(lines, token, start, end)
|
||||
if i == -1:
|
||||
return default
|
||||
# TODO: establish desired behaviour, eventually change to
|
||||
# return lines.pop(i)[len(token):].strip() # or default
|
||||
# see test_parser_tools.py
|
||||
l = lines[i].split(None, 1)
|
||||
if len(l) > 1:
|
||||
return l[1].strip()
|
||||
return default
|
||||
|
||||
|
||||
def get_quoted_value(lines, token, start, end = 0, default = ""):
|
||||
def get_quoted_value(lines, token, start=0, end=0, default=""):
|
||||
""" get_quoted_value(lines, token, start[[, end], default]) -> string
|
||||
|
||||
Find the next line that looks like:
|
||||
@ -368,8 +394,8 @@ def get_quoted_value(lines, token, start, end = 0, default = ""):
|
||||
return val.strip('"')
|
||||
|
||||
|
||||
def get_bool_value(lines, token, start, end = 0, default = None):
|
||||
""" get_value(lines, token, start[[, end], default]) -> string
|
||||
def get_bool_value(lines, token, start=0, end=0, default=None):
|
||||
""" get_bool_value(lines, token, start[[, end], default]) -> string
|
||||
|
||||
Find the next line that looks like:
|
||||
token bool_value
|
||||
@ -405,7 +431,7 @@ def set_option_value(line, option, value):
|
||||
return re.sub(rx, '\g<1>' + value + '"', line)
|
||||
|
||||
|
||||
def del_token(lines, token, start, end = 0):
|
||||
def del_token(lines, token, start=0, end=0):
|
||||
""" del_token(lines, token, start, end) -> int
|
||||
|
||||
Find the first line in lines where token is the first element
|
||||
@ -418,6 +444,41 @@ def del_token(lines, token, start, end = 0):
|
||||
del lines[k]
|
||||
return True
|
||||
|
||||
def del_complete_lines(lines, sublines, start=0, end=0):
|
||||
"""Delete first occurence of `sublines` in list `lines`.
|
||||
|
||||
Efficient deletion of a sub-list in a list. Works for any values.
|
||||
The `start` and `end` arguments work similar to list.index()
|
||||
|
||||
Returns True if a deletion was done and False if not.
|
||||
|
||||
>>> l = [1, 0, 1, 1, 1, 2]
|
||||
>>> del_complete_lines(l, [0, 1, 1])
|
||||
True
|
||||
>>> l
|
||||
[1, 1, 2]
|
||||
"""
|
||||
i = find_complete_lines(lines, sublines, start, end)
|
||||
if i == -1:
|
||||
return False
|
||||
del(lines[i:i+len(sublines)])
|
||||
return True
|
||||
|
||||
|
||||
def del_value(lines, token, start=0, end=0, default=None):
|
||||
"""
|
||||
Find the next line that looks like:
|
||||
token followed by other stuff
|
||||
Delete that line and return "followed by other stuff"
|
||||
with leading and trailing whitespace removed.
|
||||
|
||||
If token is not found, return `default`.
|
||||
"""
|
||||
i = find_token_exact(lines, token, start, end)
|
||||
if i == -1:
|
||||
return default
|
||||
return lines.pop(i)[len(token):].strip()
|
||||
|
||||
|
||||
def find_beginning_of(lines, i, start_token, end_token):
|
||||
count = 1
|
||||
@ -425,7 +486,7 @@ def find_beginning_of(lines, i, start_token, end_token):
|
||||
i = find_tokens_backwards(lines, [start_token, end_token], i-1)
|
||||
if i == -1:
|
||||
return -1
|
||||
if check_token(lines[i], end_token):
|
||||
if lines[i].startswith(end_token):
|
||||
count = count+1
|
||||
else:
|
||||
count = count-1
|
||||
@ -441,7 +502,7 @@ def find_end_of(lines, i, start_token, end_token):
|
||||
i = find_tokens(lines, [end_token, start_token], i+1)
|
||||
if i == -1:
|
||||
return -1
|
||||
if check_token(lines[i], start_token):
|
||||
if lines[i].startswith(start_token):
|
||||
count = count+1
|
||||
else:
|
||||
count = count-1
|
||||
@ -450,11 +511,11 @@ def find_end_of(lines, i, start_token, end_token):
|
||||
return -1
|
||||
|
||||
|
||||
def find_nonempty_line(lines, start, end = 0):
|
||||
def find_nonempty_line(lines, start=0, end=0):
|
||||
if end == 0:
|
||||
end = len(lines)
|
||||
for i in range(start, end):
|
||||
if is_nonempty_line(lines[i]):
|
||||
if lines[i].strip():
|
||||
return i
|
||||
return -1
|
||||
|
||||
|
@ -77,10 +77,21 @@ class TestParserTools(unittest.TestCase):
|
||||
|
||||
def test_find_token(self):
|
||||
self.assertEqual(find_token(lines, '\\emph', 0), 7)
|
||||
self.assertEqual(find_token(lines, '\\emph', 0, 5), -1)
|
||||
self.assertEqual(find_token(lines, '\\emp', 0, 0, True), -1)
|
||||
self.assertEqual(find_token(lines, '\\emp', 0, 0, False), 7)
|
||||
# no line starts with "emph" (without backspace):
|
||||
self.assertEqual(find_token(lines, 'emph', 0), -1)
|
||||
# token on line[start] is found:
|
||||
self.assertEqual(find_token(lines, '\\emph', 7), 7)
|
||||
self.assertEqual(find_token(lines, '\\emph', 8), 9)
|
||||
# token on line[end] is not found:
|
||||
self.assertEqual(find_token(lines, '\\emph', 0, 7), -1)
|
||||
# `ignorews` looks for whitespace-separated tokens:
|
||||
self.assertEqual(find_token(lines, '\\emp', 0, ignorews=True), -1)
|
||||
self.assertEqual(find_token(lines, '\\emph',0, ignorews=True), 7)
|
||||
self.assertEqual(find_token(lines, '\\emph', 7, ignorews=True), 7)
|
||||
self.assertEqual(find_token(lines, '\\emph', 0, 7, True), -1)
|
||||
# only first token is found:
|
||||
self.assertEqual(find_token(lines, 'Quotes', 0), -1)
|
||||
self.assertEqual(find_token(lines, 'Quotes', 0, ignorews=True), -1)
|
||||
|
||||
|
||||
def test_find_tokens(self):
|
||||
@ -89,5 +100,94 @@ class TestParserTools(unittest.TestCase):
|
||||
self.assertEqual(find_tokens(lines, tokens, 0, 4), -1)
|
||||
|
||||
|
||||
def test_find_complete_lines(self):
|
||||
sublines = ["\\begin_inset Quotes eld",
|
||||
"\\end_inset"]
|
||||
# return index of first line of sublines:
|
||||
self.assertEqual(find_complete_lines(lines, sublines), 3)
|
||||
self.assertEqual(find_complete_lines(lines, ["\\end_inset"]), 4)
|
||||
# return -1 if sublines is not found:
|
||||
self.assertEqual(find_complete_lines(lines, ['x']), -1)
|
||||
# search includes line `start`:
|
||||
self.assertEqual(find_complete_lines(lines, sublines, 3), 3)
|
||||
self.assertEqual(find_complete_lines(lines, sublines, 4), 20)
|
||||
self.assertEqual(find_complete_lines(lines, sublines, 21), -1)
|
||||
# serch excludes line `end`
|
||||
self.assertEqual(find_complete_lines(lines, sublines, 4, 20), -1)
|
||||
# an empty list is always found
|
||||
self.assertEqual(find_complete_lines(lines, []), 0)
|
||||
|
||||
|
||||
def test_find_across_lines(self):
|
||||
# sub with at least 2 line-breaks (uses find_complete_lines):
|
||||
sub = "Quotes eld\n\\end_inset\n\n\n"
|
||||
self.assertEqual(find_across_lines(lines, sub), 3)
|
||||
# Return -1 if not found
|
||||
self.assertEqual(find_across_lines(lines, sub, 4), -1)
|
||||
self.assertEqual(find_across_lines(lines, sub, 0, 6), -1)
|
||||
sub = "Quotes eld\n\\end_inset\nx\n"
|
||||
self.assertEqual(find_across_lines(lines, sub), -1)
|
||||
sub = "Quotes X\n\\end_inset\n\n"
|
||||
self.assertEqual(find_across_lines(lines, sub), -1)
|
||||
sub = "Quotes eld\n\\end_insert\n\n"
|
||||
self.assertEqual(find_across_lines(lines, sub), -1)
|
||||
# sub with up to 1 line-break:
|
||||
sub = "Quotes eld\n\\end_inset"
|
||||
self.assertEqual(find_across_lines(lines, sub), 3)
|
||||
self.assertEqual(find_across_lines(lines, sub, 4), -1)
|
||||
self.assertEqual(find_across_lines(lines, sub, 0, 4), -1)
|
||||
self.assertEqual(find_across_lines(lines, sub, 4, 3), -1)
|
||||
sub = "Quotes X eld\n\\end_inset\n"
|
||||
self.assertEqual(find_across_lines(lines, sub), -1)
|
||||
sub = "Quotes eld\n\\end_insert\n"
|
||||
self.assertEqual(find_across_lines(lines, sub), -1)
|
||||
# sub without line-break
|
||||
sub = "end_"
|
||||
self.assertEqual(find_across_lines(lines, sub), 4)
|
||||
self.assertEqual(find_across_lines(lines, sub, 5), 12)
|
||||
self.assertEqual(find_across_lines(lines, sub, 0, 4), -1)
|
||||
self.assertEqual(find_across_lines(lines, sub, 2, 1), -1)
|
||||
self.assertEqual(find_across_lines(lines, "XXX"), -1)
|
||||
|
||||
|
||||
def test_get_value(self):
|
||||
self.assertEqual(get_value(lines, "\\begin_inset"), "Quotes eld")
|
||||
# TODO: do we want this:
|
||||
self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "Quotes eld")
|
||||
# or only the part after "token":
|
||||
# self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "eld")
|
||||
# return default if not found
|
||||
self.assertEqual(get_value(lines, "\\begin_insert", default=42), 42)
|
||||
# TODO: do we want this:
|
||||
self.assertEqual(get_value(lines, "\\end_inset", default=None), None)
|
||||
# or emtpy string if token is found but has no value:
|
||||
# self.assertEqual(get_value(lines, "\\end_inset", default=None), "")
|
||||
|
||||
|
||||
def test_del_complete_lines(self):
|
||||
l = lines[:]
|
||||
sublines = ["\\begin_inset Quotes eld",
|
||||
"\\end_inset"]
|
||||
# normal operation: remove the first occurence of sublines:
|
||||
self.assertEqual(del_complete_lines(l, sublines), True)
|
||||
self.assertEqual(l[3], "")
|
||||
self.assertEqual(len(l), len(lines)-len(sublines))
|
||||
# special cases:
|
||||
l = lines[:]
|
||||
self.assertEqual(del_complete_lines(l, sublines, 21), False)
|
||||
self.assertEqual(l, lines)
|
||||
# deleting empty sublist returns success but does not change the list:
|
||||
self.assertEqual(del_complete_lines(l, [], 21), True)
|
||||
self.assertEqual(l, lines)
|
||||
|
||||
def test_del_value(self):
|
||||
l = lines[:]
|
||||
self.assertEqual(del_value(l, "\\begin_inset"), "Quotes eld")
|
||||
self.assertEqual(del_value(l, "\\begin_inset Quotes"), "erd")
|
||||
# return default if not found
|
||||
self.assertEqual(del_value(l, "\\begin_insert", default=42), 42)
|
||||
self.assertEqual(del_value(l, "\\end_inset", default=None), "")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user