mirror of
https://git.lyx.org/repos/lyx.git
synced 2025-01-18 13:40:19 +00:00
New lyx2lyx tools.
New lyx2lyx parser tools find_complete_lines() (replaces find_slice), del_complete_lines(), and find_across_lines(). Default value 0 for start argument in utility functions. Rework the implementation of dash-conversion.
This commit is contained in:
parent
40e32d4d27
commit
8da6cdcf23
@ -34,9 +34,10 @@ from lyx2lyx_tools import (add_to_preamble, put_cmd_in_ert, get_ert,
|
|||||||
# insert_to_preamble, latex_length, revert_flex_inset,
|
# insert_to_preamble, latex_length, revert_flex_inset,
|
||||||
# revert_font_attrs, hex2ratio, str2bool
|
# revert_font_attrs, hex2ratio, str2bool
|
||||||
|
|
||||||
from parser_tools import (find_end_of_inset, find_end_of_layout,
|
from parser_tools import (del_complete_lines,
|
||||||
find_nonempty_line, find_re, find_slice, find_token, find_token_backwards,
|
find_end_of_inset, find_end_of_layout, find_nonempty_line, find_re,
|
||||||
get_containing_layout, get_value, check_token)
|
find_token, find_token_backwards, get_containing_layout,
|
||||||
|
get_value, check_token)
|
||||||
|
|
||||||
####################################################################
|
####################################################################
|
||||||
# Private helper functions
|
# Private helper functions
|
||||||
@ -615,130 +616,105 @@ def convert_dashes(document):
|
|||||||
if document.backend != "latex":
|
if document.backend != "latex":
|
||||||
return
|
return
|
||||||
|
|
||||||
|
lines = document.body
|
||||||
i = 0
|
i = 0
|
||||||
while i < len(document.body):
|
while i+1 < len(lines):
|
||||||
words = document.body[i].split()
|
i += 1
|
||||||
|
line = lines[i]
|
||||||
|
words = line.split()
|
||||||
if (len(words) > 1 and words[0] == "\\begin_inset"
|
if (len(words) > 1 and words[0] == "\\begin_inset"
|
||||||
and (words[1] in ["CommandInset", "ERT", "External", "Formula",
|
and (words[1] in ["CommandInset", "ERT", "External", "Formula",
|
||||||
"FormulaMacro", "Graphics", "IPA", "listings"]
|
"FormulaMacro", "Graphics", "IPA", "listings"]
|
||||||
or ' '.join(words[1:]) == "Flex Code")):
|
or line.endswith("Flex Code"))):
|
||||||
# must not replace anything in insets that store LaTeX contents in .lyx files
|
# must not replace anything in insets that store LaTeX contents in .lyx files
|
||||||
# (math and command insets without overridden read() and write() methods
|
# (math and command insets without overridden read() and write() methods
|
||||||
# filtering out IPA makes Text::readParToken() more simple
|
# filtering out IPA makes Text::readParToken() more simple
|
||||||
# skip ERT as well since it is not needed there
|
# skip ERT as well since it is not needed there
|
||||||
# Flex Code is logical markup, typically rendered as typewriter
|
# Flex Code is logical markup, typically rendered as typewriter
|
||||||
j = find_end_of_inset(document.body, i)
|
j = find_end_of_inset(lines, i)
|
||||||
if j == -1:
|
if j == -1:
|
||||||
document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
|
document.warning("Malformed LyX document: Can't find end of " +
|
||||||
i += 1
|
words[1] + " inset at line " + str(i))
|
||||||
else:
|
else:
|
||||||
i = j
|
i = j
|
||||||
continue
|
continue
|
||||||
if document.body[i] == "\\begin_layout LyX-Code":
|
if lines[i] == "\\begin_layout LyX-Code":
|
||||||
j = find_end_of_layout(document.body, i)
|
j = find_end_of_layout(lines, i)
|
||||||
if j == -1:
|
if j == -1:
|
||||||
document.warning("Malformed LyX document: "
|
document.warning("Malformed LyX document: "
|
||||||
"Can't find end of %s layout at line %d" % (words[1],i))
|
"Can't find end of %s layout at line %d" % (words[1],i))
|
||||||
i += 1
|
|
||||||
else:
|
else:
|
||||||
i = j
|
i = j
|
||||||
continue
|
continue
|
||||||
|
if line.startswith("\\labelwidthstring"):
|
||||||
if len(words) > 0 and words[0] in ["\\leftindent", "\\paragraph_spacing", "\\align", "\\labelwidthstring"]:
|
# skip label width string (bug 10243)
|
||||||
# skip paragraph parameters (bug 10243)
|
|
||||||
i += 1
|
|
||||||
continue
|
continue
|
||||||
while True:
|
|
||||||
j = document.body[i].find("--")
|
if "--" in line:
|
||||||
if j == -1:
|
|
||||||
break
|
|
||||||
front = document.body[i][:j]
|
|
||||||
back = document.body[i][j+2:]
|
|
||||||
# We can have an arbitrary number of consecutive hyphens.
|
# We can have an arbitrary number of consecutive hyphens.
|
||||||
# These must be split into the corresponding number of two and three hyphens
|
# Replace as LaTeX does: First try emdash, then endash
|
||||||
# We must match what LaTeX does: First try emdash, then endash, then single hyphen
|
line = line.replace("---", "\\threehyphens\n")
|
||||||
if back.find("-") == 0:
|
line = line.replace("--", "\\twohyphens\n")
|
||||||
back = back[1:]
|
lines[i:i+1] = line.splitlines()
|
||||||
if len(back) > 0:
|
|
||||||
document.body.insert(i+1, back)
|
|
||||||
document.body[i] = front + "\\threehyphens"
|
|
||||||
else:
|
|
||||||
if len(back) > 0:
|
|
||||||
document.body.insert(i+1, back)
|
|
||||||
document.body[i] = front + "\\twohyphens"
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
i = 0
|
# remove ligature breaks between dashes
|
||||||
while i < len(document.body):
|
i = 1
|
||||||
line = document.body[i]
|
while i < len(lines):
|
||||||
while (line.endswith(r"-\SpecialChar \textcompwordmark{}") and
|
line = lines[i]
|
||||||
document.body[i+1].startswith("-")):
|
if (line.endswith(r"-\SpecialChar \textcompwordmark{}") and
|
||||||
line = line.replace(r"\SpecialChar \textcompwordmark{}",
|
lines[i+1].startswith("-")):
|
||||||
document.body.pop(i+1))
|
lines[i] = line.replace(r"\SpecialChar \textcompwordmark{}",
|
||||||
document.body[i] = line
|
lines.pop(i+1))
|
||||||
i += 1
|
else:
|
||||||
|
i += 1
|
||||||
|
|
||||||
# Return number of the next line to check for dashes.
|
|
||||||
def _dashes_next_line(document, i):
|
|
||||||
i +=1
|
|
||||||
words = document.body[i].split()
|
|
||||||
# skip paragraph parameters (bug 10243):
|
|
||||||
if words and words[0] in ["\\leftindent", "\\paragraph_spacing",
|
|
||||||
"\\align", "\\labelwidthstring"]:
|
|
||||||
i += 1
|
|
||||||
words = document.body[i].split()
|
|
||||||
# some insets should be skipped in revert_dashes (cf. convert_dashes)
|
|
||||||
if (len(words) > 1 and words[0] == "\\begin_inset" and
|
|
||||||
words[1] in ["CommandInset", "ERT", "External", "Formula",
|
|
||||||
"FormulaMacro", "Graphics", "IPA", "listings"]):
|
|
||||||
j = find_end_of_inset(document.body, i)
|
|
||||||
if j == -1:
|
|
||||||
document.warning("Malformed LyX document: Can't find end of "
|
|
||||||
+ words[1] + " inset at line " + str(i))
|
|
||||||
return i
|
|
||||||
return j+1
|
|
||||||
return i
|
|
||||||
|
|
||||||
def revert_dashes(document):
|
def revert_dashes(document):
|
||||||
"""
|
"""
|
||||||
Prevent ligatures of existing --- and --.
|
Prevent ligatures of existing --- and --.
|
||||||
Convert \\twohyphens and \\threehyphens to -- and ---.
|
Revert \\twohyphens and \\threehyphens to -- and ---.
|
||||||
Remove preamble code from 2.3->2.2 conversion.
|
Remove preamble code from 2.3->2.2 conversion.
|
||||||
"""
|
"""
|
||||||
# Remove preamble code from 2.3->2.2 conversion:
|
del_complete_lines(document.preamble,
|
||||||
dash_renew_lines = find_slice(document.preamble,
|
['% Added by lyx2lyx',
|
||||||
['% Added by lyx2lyx',
|
r'\renewcommand{\textendash}{--}',
|
||||||
r'\renewcommand{\textendash}{--}',
|
r'\renewcommand{\textemdash}{---}'])
|
||||||
r'\renewcommand{\textemdash}{---}'])
|
# Insert ligature breaks to prevent ligation of hyphens to dashes:
|
||||||
del(document.preamble[dash_renew_lines])
|
lines = document.body
|
||||||
# Prevent ligation of hyphens:
|
|
||||||
i = 0
|
i = 0
|
||||||
while i < len(document.body)-1:
|
while i+1 < len(lines):
|
||||||
# increment i, skip some insets (cf. convert_dashes)
|
i += 1
|
||||||
i = _dashes_next_line(document, i)
|
line = lines[i]
|
||||||
line = document.body[i]
|
# skip label width string (bug 10243):
|
||||||
|
if line.startswith("\\labelwidthstring"):
|
||||||
|
continue
|
||||||
|
# do not touch hyphens in some insets (cf. convert_dashes):
|
||||||
|
if line.startswith("\\begin_inset"):
|
||||||
|
try:
|
||||||
|
if line.split()[1] in ["CommandInset", "ERT", "External",
|
||||||
|
"Formula", "FormulaMacro", "Graphics",
|
||||||
|
"IPA", "listings"]:
|
||||||
|
j = find_end_of_inset(lines, i)
|
||||||
|
if j == -1:
|
||||||
|
document.warning("Malformed LyX document: Can't find "
|
||||||
|
"end of %s inset at line %d." % (itype, i))
|
||||||
|
continue
|
||||||
|
i = j
|
||||||
|
except IndexError:
|
||||||
|
continue
|
||||||
if "--" in line:
|
if "--" in line:
|
||||||
line = line.replace("--", "-\\SpecialChar \\textcompwordmark{}\n-")
|
line = line.replace("--", "-\\SpecialChar \\textcompwordmark{}\n-")
|
||||||
document.body[i:i+1] = line.split('\n')
|
document.body[i:i+1] = line.split('\n')
|
||||||
# Convert \twohyphens and \threehyphens:
|
# Revert \twohyphens and \threehyphens:
|
||||||
i = 0
|
i = 1
|
||||||
while i < len(document.body):
|
while i < len(lines):
|
||||||
# skip some insets (see convert_dashes())
|
line = lines[i]
|
||||||
i = _dashes_next_line(document, i-1)
|
if not line.endswith("hyphens"):
|
||||||
replaced = False
|
i +=1
|
||||||
if document.body[i].find("\\twohyphens") >= 0:
|
elif line.endswith("\\twohyphens") or line.endswith("\\threehyphens"):
|
||||||
document.body[i] = document.body[i].replace("\\twohyphens", "--")
|
line = line.replace("\\twohyphens", "--")
|
||||||
replaced = True
|
line = line.replace("\\threehyphens", "---")
|
||||||
if document.body[i].find("\\threehyphens") >= 0:
|
lines[i] = line + lines.pop(i+1)
|
||||||
document.body[i] = document.body[i].replace("\\threehyphens", "---")
|
|
||||||
replaced = True
|
|
||||||
if replaced and i+1 < len(document.body) and \
|
|
||||||
(document.body[i+1].find("\\") != 0 or \
|
|
||||||
document.body[i+1].find("\\twohyphens") == 0 or
|
|
||||||
document.body[i+1].find("\\threehyphens") == 0) and \
|
|
||||||
len(document.body[i]) + len(document.body[i+1]) <= 80:
|
|
||||||
document.body[i] = document.body[i] + document.body[i+1]
|
|
||||||
document.body[i+1:i+2] = []
|
|
||||||
else:
|
else:
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
@ -879,16 +855,16 @@ def revert_georgian(document):
|
|||||||
document.language = "english"
|
document.language = "english"
|
||||||
i = find_token(document.header, "\\language georgian", 0)
|
i = find_token(document.header, "\\language georgian", 0)
|
||||||
if i != -1:
|
if i != -1:
|
||||||
document.header[i] = "\\language english"
|
document.header[i] = "\\language english"
|
||||||
j = find_token(document.header, "\\language_package default", 0)
|
j = find_token(document.header, "\\language_package default", 0)
|
||||||
if j != -1:
|
if j != -1:
|
||||||
document.header[j] = "\\language_package babel"
|
document.header[j] = "\\language_package babel"
|
||||||
k = find_token(document.header, "\\options", 0)
|
k = find_token(document.header, "\\options", 0)
|
||||||
if k != -1:
|
if k != -1:
|
||||||
document.header[k] = document.header[k].replace("\\options", "\\options georgian,")
|
document.header[k] = document.header[k].replace("\\options", "\\options georgian,")
|
||||||
else:
|
else:
|
||||||
l = find_token(document.header, "\\use_default_options", 0)
|
l = find_token(document.header, "\\use_default_options", 0)
|
||||||
document.header.insert(l + 1, "\\options georgian")
|
document.header.insert(l + 1, "\\options georgian")
|
||||||
|
|
||||||
|
|
||||||
def revert_sigplan_doi(document):
|
def revert_sigplan_doi(document):
|
||||||
|
@ -24,9 +24,9 @@ import sys, os
|
|||||||
|
|
||||||
# Uncomment only what you need to import, please.
|
# Uncomment only what you need to import, please.
|
||||||
|
|
||||||
from parser_tools import del_token, find_end_of, find_end_of_layout, \
|
from parser_tools import del_token, del_value, del_complete_lines, \
|
||||||
find_end_of_inset, find_re, find_slice, find_token, \
|
find_end_of, find_end_of_layout, find_end_of_inset, find_re, \
|
||||||
find_token_backwards, get_containing_layout, \
|
find_token, find_token_backwards, get_containing_layout, \
|
||||||
get_bool_value, get_value, get_quoted_value
|
get_bool_value, get_value, get_quoted_value
|
||||||
# find_tokens, find_token_exact, is_in_inset, \
|
# find_tokens, find_token_exact, is_in_inset, \
|
||||||
# check_token, get_option_value
|
# check_token, get_option_value
|
||||||
@ -1303,7 +1303,7 @@ def revert_biblatex(document):
|
|||||||
"Citealt*", "Citealp*", "Citeauthor*", "fullcite", "footcite",\
|
"Citealt*", "Citealp*", "Citeauthor*", "fullcite", "footcite",\
|
||||||
"footcitet", "footcitep", "footcitealt", "footcitealp",\
|
"footcitet", "footcitep", "footcitealt", "footcitealp",\
|
||||||
"footciteauthor", "footciteyear", "footciteyearpar",\
|
"footciteauthor", "footciteyear", "footciteyearpar",\
|
||||||
"citefield", "citetitle", "cite*" ]
|
"citefield", "citetitle", "cite*" ]
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
while (True):
|
while (True):
|
||||||
@ -1843,19 +1843,16 @@ def revert_chapterbib(document):
|
|||||||
|
|
||||||
def convert_dashligatures(document):
|
def convert_dashligatures(document):
|
||||||
"Set 'use_dash_ligatures' according to content."
|
"Set 'use_dash_ligatures' according to content."
|
||||||
use_dash_ligatures = None
|
# Look for and remove dashligatures workaround from 2.3->2.2 reversion,
|
||||||
# Eventually remove preamble code from 2.3->2.2 conversion:
|
# set use_dash_ligatures to True if found, to None else.
|
||||||
dash_renew_lines = find_slice(document.preamble,
|
use_dash_ligatures = del_complete_lines(document.preamble,
|
||||||
['% Added by lyx2lyx',
|
['% Added by lyx2lyx',
|
||||||
r'\renewcommand{\textendash}{--}',
|
r'\renewcommand{\textendash}{--}',
|
||||||
r'\renewcommand{\textemdash}{---}'])
|
r'\renewcommand{\textemdash}{---}']) or None
|
||||||
del(document.preamble[dash_renew_lines])
|
|
||||||
use_dash_ligatures = bool(dash_renew_lines.stop)
|
|
||||||
|
|
||||||
if use_dash_ligatures is None:
|
if use_dash_ligatures is None:
|
||||||
# Look for dashes:
|
# Look for dashes (Documents by LyX 2.1 or older have "\twohyphens\n"
|
||||||
# (Documents by LyX 2.1 or older have "\twohyphens\n" or "\threehyphens\n"
|
# or "\threehyphens\n" as interim representation for -- an ---.)
|
||||||
# as interim representation for dash ligatures)
|
|
||||||
has_literal_dashes = False
|
has_literal_dashes = False
|
||||||
has_ligature_dashes = False
|
has_ligature_dashes = False
|
||||||
j = 0
|
j = 0
|
||||||
@ -1863,16 +1860,19 @@ def convert_dashligatures(document):
|
|||||||
# Skip some document parts where dashes are not converted
|
# Skip some document parts where dashes are not converted
|
||||||
if (i < j) or line.startswith("\\labelwidthstring"):
|
if (i < j) or line.startswith("\\labelwidthstring"):
|
||||||
continue
|
continue
|
||||||
words = line.split()
|
if line.startswith("\\begin_inset"):
|
||||||
if (len(words) > 1 and words[0] == "\\begin_inset"
|
try:
|
||||||
and (words[1] in ["CommandInset", "ERT", "External", "Formula",
|
it = line.split()[1]
|
||||||
"FormulaMacro", "Graphics", "IPA", "listings"]
|
except IndexError:
|
||||||
or ' '.join(words[1:]) == "Flex Code")):
|
continue
|
||||||
j = find_end_of_inset(document.body, i)
|
if (it in ["CommandInset", "ERT", "External", "Formula",
|
||||||
if j == -1:
|
"FormulaMacro", "Graphics", "IPA", "listings"]
|
||||||
document.warning("Malformed LyX document: "
|
or line.endswith("Flex Code")):
|
||||||
"Can't find end of %s inset at line %d" % (words[1],i))
|
j = find_end_of_inset(document.body, i)
|
||||||
continue
|
if j == -1:
|
||||||
|
document.warning("Malformed LyX document: Can't "
|
||||||
|
"find end of %s inset at line %d." % (itype, i))
|
||||||
|
continue
|
||||||
if line == "\\begin_layout LyX-Code":
|
if line == "\\begin_layout LyX-Code":
|
||||||
j = find_end_of_layout(document.body, i)
|
j = find_end_of_layout(document.body, i)
|
||||||
if j == -1:
|
if j == -1:
|
||||||
@ -1898,22 +1898,16 @@ def convert_dashligatures(document):
|
|||||||
use_dash_ligatures = True
|
use_dash_ligatures = True
|
||||||
# insert the setting if there is a preferred value
|
# insert the setting if there is a preferred value
|
||||||
if use_dash_ligatures is not None:
|
if use_dash_ligatures is not None:
|
||||||
i = find_token(document.header, "\\use_microtype", 0)
|
i = find_token(document.header, "\\graphics")
|
||||||
if i != -1:
|
document.header.insert(i, "\\use_dash_ligatures %s"
|
||||||
document.header.insert(i+1, "\\use_dash_ligatures %s"
|
% str(use_dash_ligatures).lower())
|
||||||
% str(use_dash_ligatures).lower())
|
|
||||||
|
|
||||||
def revert_dashligatures(document):
|
def revert_dashligatures(document):
|
||||||
"""Remove font ligature settings for en- and em-dashes.
|
"""Remove font ligature settings for en- and em-dashes.
|
||||||
Revert conversion of \twodashes or \threedashes to literal dashes."""
|
Revert conversion of \twodashes or \threedashes to literal dashes."""
|
||||||
i = find_token(document.header, "\\use_dash_ligatures", 0)
|
use_dash_ligatures = del_value(document.header, "\\use_dash_ligatures")
|
||||||
if i == -1:
|
if use_dash_ligatures != "true" or document.backend != "latex":
|
||||||
return
|
return
|
||||||
use_dash_ligatures = get_bool_value(document.header, "\\use_dash_ligatures", i)
|
|
||||||
del document.header[i]
|
|
||||||
if not use_dash_ligatures or document.backend != "latex":
|
|
||||||
return
|
|
||||||
|
|
||||||
j = 0
|
j = 0
|
||||||
new_body = []
|
new_body = []
|
||||||
for i, line in enumerate(document.body):
|
for i, line in enumerate(document.body):
|
||||||
@ -2018,8 +2012,8 @@ def revert_mathindent(document):
|
|||||||
else:
|
else:
|
||||||
k = find_token(document.header, "\\options", 0)
|
k = find_token(document.header, "\\options", 0)
|
||||||
if k != -1:
|
if k != -1:
|
||||||
document.header[k] = document.header[k].replace("\\options", "\\options fleqn,")
|
document.header[k] = document.header[k].replace("\\options", "\\options fleqn,")
|
||||||
del document.header[i]
|
del document.header[i]
|
||||||
else:
|
else:
|
||||||
l = find_token(document.header, "\\use_default_options", 0)
|
l = find_token(document.header, "\\use_default_options", 0)
|
||||||
document.header.insert(l, "\\options fleqn")
|
document.header.insert(l, "\\options fleqn")
|
||||||
|
@ -156,53 +156,6 @@ count_pars_in_inset(lines, i):
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
# Fast search in lists
|
|
||||||
def find_slice(l, sl, start = 0, stop = None):
|
|
||||||
"""Return position of first occurence of sequence `sl` in list `l`
|
|
||||||
as a `slice` object.
|
|
||||||
|
|
||||||
>>> find_slice([1, 2, 3, 1, 1, 2], (1, 2))
|
|
||||||
slice(0, 2, None)
|
|
||||||
|
|
||||||
The return value can be used to delete or substitute the sub-list:
|
|
||||||
|
|
||||||
>>> l = [1, 0, 1, 1, 1, 2]
|
|
||||||
>>> s = find_slice(l, [0, 1, 1])
|
|
||||||
>>> del(l[s]); l
|
|
||||||
[1, 1, 2]
|
|
||||||
>>> s = find_slice(l, (1, 2))
|
|
||||||
>>> l[s] = [3]; l
|
|
||||||
[1, 3]
|
|
||||||
|
|
||||||
The start argument works similar to list.index()
|
|
||||||
|
|
||||||
>>> find_slice([1, 2, 3, 1, 1 ,2], (1, 2), start = 1)
|
|
||||||
slice(4, 6, None)
|
|
||||||
|
|
||||||
Use the `stop` attribute of the returned `slice` to test for success:
|
|
||||||
|
|
||||||
>>> s1 = find_slice([2, 3, 1], (3, 1))
|
|
||||||
>>> s2 = find_slice([2, 3, 1], (2, 1))
|
|
||||||
>>> if s1.stop and not s2.stop:
|
|
||||||
... print "wow"
|
|
||||||
wow
|
|
||||||
"""
|
|
||||||
stop = stop or len(l)
|
|
||||||
N = len(sl) # lenght of sub-list
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
for j, value in enumerate(sl):
|
|
||||||
i = l.index(value, start, stop)
|
|
||||||
if j and i != start:
|
|
||||||
start = i-j
|
|
||||||
break
|
|
||||||
start = i +1
|
|
||||||
else:
|
|
||||||
return slice(i+1-N, i+1)
|
|
||||||
except ValueError: # sub list `sl` not found
|
|
||||||
return slice(0, 0)
|
|
||||||
|
|
||||||
|
|
||||||
# Utilities for one line
|
# Utilities for one line
|
||||||
def check_token(line, token):
|
def check_token(line, token):
|
||||||
""" check_token(line, token) -> bool
|
""" check_token(line, token) -> bool
|
||||||
@ -212,7 +165,6 @@ def check_token(line, token):
|
|||||||
|
|
||||||
Deprecated. Use line.startswith(token).
|
Deprecated. Use line.startswith(token).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return line.startswith(token)
|
return line.startswith(token)
|
||||||
|
|
||||||
|
|
||||||
@ -225,40 +177,40 @@ def is_nonempty_line(line):
|
|||||||
|
|
||||||
|
|
||||||
# Utilities for a list of lines
|
# Utilities for a list of lines
|
||||||
def find_token(lines, token, start, end = 0, ignorews = False):
|
def find_token(lines, token, start=0, end=0, ignorews=False):
|
||||||
""" find_token(lines, token, start[[, end], ignorews]) -> int
|
""" find_token(lines, token, start[[, end], ignorews]) -> int
|
||||||
|
|
||||||
Return the lowest line where token is found, and is the first
|
Return the lowest line where token is found, and is the first
|
||||||
element, in lines[start, end].
|
element, in lines[start, end].
|
||||||
|
|
||||||
If ignorews is True (default is False), then differences in
|
If ignorews is True (default is False), then differences in
|
||||||
whitespace are ignored, except that there must be no extra
|
whitespace are ignored, but there must be whitespace following
|
||||||
whitespace following token itself.
|
token itself.
|
||||||
|
|
||||||
Return -1 on failure."""
|
Return -1 on failure."""
|
||||||
|
|
||||||
if end == 0 or end > len(lines):
|
if end == 0 or end > len(lines):
|
||||||
end = len(lines)
|
end = len(lines)
|
||||||
m = len(token)
|
if ignorews:
|
||||||
|
y = token.split()
|
||||||
for i in range(start, end):
|
for i in range(start, end):
|
||||||
if ignorews:
|
if ignorews:
|
||||||
x = lines[i].split()
|
x = lines[i].split()
|
||||||
y = token.split()
|
|
||||||
if len(x) < len(y):
|
if len(x) < len(y):
|
||||||
continue
|
continue
|
||||||
if x[:len(y)] == y:
|
if x[:len(y)] == y:
|
||||||
return i
|
return i
|
||||||
else:
|
else:
|
||||||
if lines[i][:m] == token:
|
if lines[i].startswith(token):
|
||||||
return i
|
return i
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
|
|
||||||
def find_token_exact(lines, token, start, end = 0):
|
def find_token_exact(lines, token, start=0, end=0):
|
||||||
return find_token(lines, token, start, end, True)
|
return find_token(lines, token, start, end, True)
|
||||||
|
|
||||||
|
|
||||||
def find_tokens(lines, tokens, start, end = 0, ignorews = False):
|
def find_tokens(lines, tokens, start=0, end=0, ignorews=False):
|
||||||
""" find_tokens(lines, tokens, start[[, end], ignorews]) -> int
|
""" find_tokens(lines, tokens, start[[, end], ignorews]) -> int
|
||||||
|
|
||||||
Return the lowest line where one token in tokens is found, and is
|
Return the lowest line where one token in tokens is found, and is
|
||||||
@ -278,17 +230,17 @@ def find_tokens(lines, tokens, start, end = 0, ignorews = False):
|
|||||||
if x[:len(y)] == y:
|
if x[:len(y)] == y:
|
||||||
return i
|
return i
|
||||||
else:
|
else:
|
||||||
if lines[i][:len(token)] == token:
|
if lines[i].startswith(token):
|
||||||
return i
|
return i
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
|
|
||||||
def find_tokens_exact(lines, tokens, start, end = 0):
|
def find_tokens_exact(lines, tokens, start=0, end=0):
|
||||||
return find_tokens(lines, tokens, start, end, True)
|
return find_tokens(lines, tokens, start, end, True)
|
||||||
|
|
||||||
|
|
||||||
def find_re(lines, rexp, start, end = 0):
|
def find_re(lines, rexp, start=0, end=0):
|
||||||
""" find_token_re(lines, rexp, start[, end]) -> int
|
""" find_re(lines, rexp, start[, end]) -> int
|
||||||
|
|
||||||
Return the lowest line where rexp, a regular expression, is found
|
Return the lowest line where rexp, a regular expression, is found
|
||||||
in lines[start, end].
|
in lines[start, end].
|
||||||
@ -310,10 +262,8 @@ def find_token_backwards(lines, token, start):
|
|||||||
element, in lines[start, end].
|
element, in lines[start, end].
|
||||||
|
|
||||||
Return -1 on failure."""
|
Return -1 on failure."""
|
||||||
m = len(token)
|
|
||||||
for i in range(start, -1, -1):
|
for i in range(start, -1, -1):
|
||||||
line = lines[i]
|
if lines[i].startswith(token):
|
||||||
if line[:m] == token:
|
|
||||||
return i
|
return i
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
@ -328,12 +278,86 @@ def find_tokens_backwards(lines, tokens, start):
|
|||||||
for i in range(start, -1, -1):
|
for i in range(start, -1, -1):
|
||||||
line = lines[i]
|
line = lines[i]
|
||||||
for token in tokens:
|
for token in tokens:
|
||||||
if line[:len(token)] == token:
|
if line.startswith(token):
|
||||||
return i
|
return i
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
|
|
||||||
def get_value(lines, token, start, end = 0, default = ""):
|
def find_complete_lines(lines, sublines, start=0, end=0):
|
||||||
|
"""Find first occurence of sequence `sublines` in list `lines`.
|
||||||
|
Return index of first line or -1 on failure.
|
||||||
|
|
||||||
|
Efficient search for a sub-list in a large list. Works for any values.
|
||||||
|
|
||||||
|
>>> find_complete_lines([1, 2, 3, 1, 1, 2], [1, 2])
|
||||||
|
0
|
||||||
|
|
||||||
|
The `start` and `end` arguments work similar to list.index()
|
||||||
|
|
||||||
|
>>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1)
|
||||||
|
4
|
||||||
|
>>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1, end=4)
|
||||||
|
-1
|
||||||
|
|
||||||
|
The return value can be used to substitute the sub-list.
|
||||||
|
Take care to check before use:
|
||||||
|
|
||||||
|
>>> l = [1, 1, 2]
|
||||||
|
>>> s = find_complete_lines(l, [1, 2])
|
||||||
|
>>> if s != -1:
|
||||||
|
... l[s:s+2] = [3]; l
|
||||||
|
[1, 3]
|
||||||
|
|
||||||
|
See also del_complete_lines().
|
||||||
|
"""
|
||||||
|
if not sublines:
|
||||||
|
return start
|
||||||
|
end = end or len(lines)
|
||||||
|
N = len(sublines)
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
for j, value in enumerate(sublines):
|
||||||
|
i = lines.index(value, start, end)
|
||||||
|
if j and i != start:
|
||||||
|
start = i-j
|
||||||
|
break
|
||||||
|
start = i + 1
|
||||||
|
else:
|
||||||
|
return i +1 - N
|
||||||
|
except ValueError: # `sublines` not found
|
||||||
|
return -1
|
||||||
|
|
||||||
|
|
||||||
|
def find_across_lines(lines, sub, start=0, end=0):
|
||||||
|
sublines = sub.splitlines()
|
||||||
|
if len(sublines) > 2:
|
||||||
|
# at least 3 lines: the middle one(s) are complete -> use index search
|
||||||
|
i = find_complete_lines(lines, sublines[1:-1], start+1, end-1)
|
||||||
|
if i < start+1:
|
||||||
|
return -1
|
||||||
|
try:
|
||||||
|
if (lines[i-1].endswith(sublines[0]) and
|
||||||
|
lines[i+len(sublines)].startswith(sublines[-1])):
|
||||||
|
return i-1
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
|
elif len(sublines) > 1:
|
||||||
|
# last subline must start a line
|
||||||
|
i = find_token(lines, sublines[-1], start, end)
|
||||||
|
if i < start + 1:
|
||||||
|
return -1
|
||||||
|
if lines[i-1].endswith(sublines[0]):
|
||||||
|
return i-1
|
||||||
|
else: # no line-break, may be in the middle of a line
|
||||||
|
if end == 0 or end > len(lines):
|
||||||
|
end = len(lines)
|
||||||
|
for i in range(start, end):
|
||||||
|
if sub in lines[i]:
|
||||||
|
return i
|
||||||
|
return -1
|
||||||
|
|
||||||
|
|
||||||
|
def get_value(lines, token, start=0, end=0, default=""):
|
||||||
""" get_value(lines, token, start[[, end], default]) -> string
|
""" get_value(lines, token, start[[, end], default]) -> string
|
||||||
|
|
||||||
Find the next line that looks like:
|
Find the next line that looks like:
|
||||||
@ -341,17 +365,19 @@ def get_value(lines, token, start, end = 0, default = ""):
|
|||||||
Returns "followed by other stuff" with leading and trailing
|
Returns "followed by other stuff" with leading and trailing
|
||||||
whitespace removed.
|
whitespace removed.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
i = find_token_exact(lines, token, start, end)
|
i = find_token_exact(lines, token, start, end)
|
||||||
if i == -1:
|
if i == -1:
|
||||||
return default
|
return default
|
||||||
|
# TODO: establish desired behaviour, eventually change to
|
||||||
|
# return lines.pop(i)[len(token):].strip() # or default
|
||||||
|
# see test_parser_tools.py
|
||||||
l = lines[i].split(None, 1)
|
l = lines[i].split(None, 1)
|
||||||
if len(l) > 1:
|
if len(l) > 1:
|
||||||
return l[1].strip()
|
return l[1].strip()
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
||||||
def get_quoted_value(lines, token, start, end = 0, default = ""):
|
def get_quoted_value(lines, token, start=0, end=0, default=""):
|
||||||
""" get_quoted_value(lines, token, start[[, end], default]) -> string
|
""" get_quoted_value(lines, token, start[[, end], default]) -> string
|
||||||
|
|
||||||
Find the next line that looks like:
|
Find the next line that looks like:
|
||||||
@ -368,8 +394,8 @@ def get_quoted_value(lines, token, start, end = 0, default = ""):
|
|||||||
return val.strip('"')
|
return val.strip('"')
|
||||||
|
|
||||||
|
|
||||||
def get_bool_value(lines, token, start, end = 0, default = None):
|
def get_bool_value(lines, token, start=0, end=0, default=None):
|
||||||
""" get_value(lines, token, start[[, end], default]) -> string
|
""" get_bool_value(lines, token, start[[, end], default]) -> string
|
||||||
|
|
||||||
Find the next line that looks like:
|
Find the next line that looks like:
|
||||||
token bool_value
|
token bool_value
|
||||||
@ -405,7 +431,7 @@ def set_option_value(line, option, value):
|
|||||||
return re.sub(rx, '\g<1>' + value + '"', line)
|
return re.sub(rx, '\g<1>' + value + '"', line)
|
||||||
|
|
||||||
|
|
||||||
def del_token(lines, token, start, end = 0):
|
def del_token(lines, token, start=0, end=0):
|
||||||
""" del_token(lines, token, start, end) -> int
|
""" del_token(lines, token, start, end) -> int
|
||||||
|
|
||||||
Find the first line in lines where token is the first element
|
Find the first line in lines where token is the first element
|
||||||
@ -418,6 +444,41 @@ def del_token(lines, token, start, end = 0):
|
|||||||
del lines[k]
|
del lines[k]
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def del_complete_lines(lines, sublines, start=0, end=0):
|
||||||
|
"""Delete first occurence of `sublines` in list `lines`.
|
||||||
|
|
||||||
|
Efficient deletion of a sub-list in a list. Works for any values.
|
||||||
|
The `start` and `end` arguments work similar to list.index()
|
||||||
|
|
||||||
|
Returns True if a deletion was done and False if not.
|
||||||
|
|
||||||
|
>>> l = [1, 0, 1, 1, 1, 2]
|
||||||
|
>>> del_complete_lines(l, [0, 1, 1])
|
||||||
|
True
|
||||||
|
>>> l
|
||||||
|
[1, 1, 2]
|
||||||
|
"""
|
||||||
|
i = find_complete_lines(lines, sublines, start, end)
|
||||||
|
if i == -1:
|
||||||
|
return False
|
||||||
|
del(lines[i:i+len(sublines)])
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def del_value(lines, token, start=0, end=0, default=None):
|
||||||
|
"""
|
||||||
|
Find the next line that looks like:
|
||||||
|
token followed by other stuff
|
||||||
|
Delete that line and return "followed by other stuff"
|
||||||
|
with leading and trailing whitespace removed.
|
||||||
|
|
||||||
|
If token is not found, return `default`.
|
||||||
|
"""
|
||||||
|
i = find_token_exact(lines, token, start, end)
|
||||||
|
if i == -1:
|
||||||
|
return default
|
||||||
|
return lines.pop(i)[len(token):].strip()
|
||||||
|
|
||||||
|
|
||||||
def find_beginning_of(lines, i, start_token, end_token):
|
def find_beginning_of(lines, i, start_token, end_token):
|
||||||
count = 1
|
count = 1
|
||||||
@ -425,7 +486,7 @@ def find_beginning_of(lines, i, start_token, end_token):
|
|||||||
i = find_tokens_backwards(lines, [start_token, end_token], i-1)
|
i = find_tokens_backwards(lines, [start_token, end_token], i-1)
|
||||||
if i == -1:
|
if i == -1:
|
||||||
return -1
|
return -1
|
||||||
if check_token(lines[i], end_token):
|
if lines[i].startswith(end_token):
|
||||||
count = count+1
|
count = count+1
|
||||||
else:
|
else:
|
||||||
count = count-1
|
count = count-1
|
||||||
@ -441,7 +502,7 @@ def find_end_of(lines, i, start_token, end_token):
|
|||||||
i = find_tokens(lines, [end_token, start_token], i+1)
|
i = find_tokens(lines, [end_token, start_token], i+1)
|
||||||
if i == -1:
|
if i == -1:
|
||||||
return -1
|
return -1
|
||||||
if check_token(lines[i], start_token):
|
if lines[i].startswith(start_token):
|
||||||
count = count+1
|
count = count+1
|
||||||
else:
|
else:
|
||||||
count = count-1
|
count = count-1
|
||||||
@ -450,11 +511,11 @@ def find_end_of(lines, i, start_token, end_token):
|
|||||||
return -1
|
return -1
|
||||||
|
|
||||||
|
|
||||||
def find_nonempty_line(lines, start, end = 0):
|
def find_nonempty_line(lines, start=0, end=0):
|
||||||
if end == 0:
|
if end == 0:
|
||||||
end = len(lines)
|
end = len(lines)
|
||||||
for i in range(start, end):
|
for i in range(start, end):
|
||||||
if is_nonempty_line(lines[i]):
|
if lines[i].strip():
|
||||||
return i
|
return i
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
|
@ -77,10 +77,21 @@ class TestParserTools(unittest.TestCase):
|
|||||||
|
|
||||||
def test_find_token(self):
|
def test_find_token(self):
|
||||||
self.assertEqual(find_token(lines, '\\emph', 0), 7)
|
self.assertEqual(find_token(lines, '\\emph', 0), 7)
|
||||||
self.assertEqual(find_token(lines, '\\emph', 0, 5), -1)
|
# no line starts with "emph" (without backspace):
|
||||||
self.assertEqual(find_token(lines, '\\emp', 0, 0, True), -1)
|
|
||||||
self.assertEqual(find_token(lines, '\\emp', 0, 0, False), 7)
|
|
||||||
self.assertEqual(find_token(lines, 'emph', 0), -1)
|
self.assertEqual(find_token(lines, 'emph', 0), -1)
|
||||||
|
# token on line[start] is found:
|
||||||
|
self.assertEqual(find_token(lines, '\\emph', 7), 7)
|
||||||
|
self.assertEqual(find_token(lines, '\\emph', 8), 9)
|
||||||
|
# token on line[end] is not found:
|
||||||
|
self.assertEqual(find_token(lines, '\\emph', 0, 7), -1)
|
||||||
|
# `ignorews` looks for whitespace-separated tokens:
|
||||||
|
self.assertEqual(find_token(lines, '\\emp', 0, ignorews=True), -1)
|
||||||
|
self.assertEqual(find_token(lines, '\\emph',0, ignorews=True), 7)
|
||||||
|
self.assertEqual(find_token(lines, '\\emph', 7, ignorews=True), 7)
|
||||||
|
self.assertEqual(find_token(lines, '\\emph', 0, 7, True), -1)
|
||||||
|
# only first token is found:
|
||||||
|
self.assertEqual(find_token(lines, 'Quotes', 0), -1)
|
||||||
|
self.assertEqual(find_token(lines, 'Quotes', 0, ignorews=True), -1)
|
||||||
|
|
||||||
|
|
||||||
def test_find_tokens(self):
|
def test_find_tokens(self):
|
||||||
@ -89,5 +100,94 @@ class TestParserTools(unittest.TestCase):
|
|||||||
self.assertEqual(find_tokens(lines, tokens, 0, 4), -1)
|
self.assertEqual(find_tokens(lines, tokens, 0, 4), -1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_complete_lines(self):
|
||||||
|
sublines = ["\\begin_inset Quotes eld",
|
||||||
|
"\\end_inset"]
|
||||||
|
# return index of first line of sublines:
|
||||||
|
self.assertEqual(find_complete_lines(lines, sublines), 3)
|
||||||
|
self.assertEqual(find_complete_lines(lines, ["\\end_inset"]), 4)
|
||||||
|
# return -1 if sublines is not found:
|
||||||
|
self.assertEqual(find_complete_lines(lines, ['x']), -1)
|
||||||
|
# search includes line `start`:
|
||||||
|
self.assertEqual(find_complete_lines(lines, sublines, 3), 3)
|
||||||
|
self.assertEqual(find_complete_lines(lines, sublines, 4), 20)
|
||||||
|
self.assertEqual(find_complete_lines(lines, sublines, 21), -1)
|
||||||
|
# serch excludes line `end`
|
||||||
|
self.assertEqual(find_complete_lines(lines, sublines, 4, 20), -1)
|
||||||
|
# an empty list is always found
|
||||||
|
self.assertEqual(find_complete_lines(lines, []), 0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_across_lines(self):
|
||||||
|
# sub with at least 2 line-breaks (uses find_complete_lines):
|
||||||
|
sub = "Quotes eld\n\\end_inset\n\n\n"
|
||||||
|
self.assertEqual(find_across_lines(lines, sub), 3)
|
||||||
|
# Return -1 if not found
|
||||||
|
self.assertEqual(find_across_lines(lines, sub, 4), -1)
|
||||||
|
self.assertEqual(find_across_lines(lines, sub, 0, 6), -1)
|
||||||
|
sub = "Quotes eld\n\\end_inset\nx\n"
|
||||||
|
self.assertEqual(find_across_lines(lines, sub), -1)
|
||||||
|
sub = "Quotes X\n\\end_inset\n\n"
|
||||||
|
self.assertEqual(find_across_lines(lines, sub), -1)
|
||||||
|
sub = "Quotes eld\n\\end_insert\n\n"
|
||||||
|
self.assertEqual(find_across_lines(lines, sub), -1)
|
||||||
|
# sub with up to 1 line-break:
|
||||||
|
sub = "Quotes eld\n\\end_inset"
|
||||||
|
self.assertEqual(find_across_lines(lines, sub), 3)
|
||||||
|
self.assertEqual(find_across_lines(lines, sub, 4), -1)
|
||||||
|
self.assertEqual(find_across_lines(lines, sub, 0, 4), -1)
|
||||||
|
self.assertEqual(find_across_lines(lines, sub, 4, 3), -1)
|
||||||
|
sub = "Quotes X eld\n\\end_inset\n"
|
||||||
|
self.assertEqual(find_across_lines(lines, sub), -1)
|
||||||
|
sub = "Quotes eld\n\\end_insert\n"
|
||||||
|
self.assertEqual(find_across_lines(lines, sub), -1)
|
||||||
|
# sub without line-break
|
||||||
|
sub = "end_"
|
||||||
|
self.assertEqual(find_across_lines(lines, sub), 4)
|
||||||
|
self.assertEqual(find_across_lines(lines, sub, 5), 12)
|
||||||
|
self.assertEqual(find_across_lines(lines, sub, 0, 4), -1)
|
||||||
|
self.assertEqual(find_across_lines(lines, sub, 2, 1), -1)
|
||||||
|
self.assertEqual(find_across_lines(lines, "XXX"), -1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_value(self):
|
||||||
|
self.assertEqual(get_value(lines, "\\begin_inset"), "Quotes eld")
|
||||||
|
# TODO: do we want this:
|
||||||
|
self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "Quotes eld")
|
||||||
|
# or only the part after "token":
|
||||||
|
# self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "eld")
|
||||||
|
# return default if not found
|
||||||
|
self.assertEqual(get_value(lines, "\\begin_insert", default=42), 42)
|
||||||
|
# TODO: do we want this:
|
||||||
|
self.assertEqual(get_value(lines, "\\end_inset", default=None), None)
|
||||||
|
# or emtpy string if token is found but has no value:
|
||||||
|
# self.assertEqual(get_value(lines, "\\end_inset", default=None), "")
|
||||||
|
|
||||||
|
|
||||||
|
def test_del_complete_lines(self):
|
||||||
|
l = lines[:]
|
||||||
|
sublines = ["\\begin_inset Quotes eld",
|
||||||
|
"\\end_inset"]
|
||||||
|
# normal operation: remove the first occurence of sublines:
|
||||||
|
self.assertEqual(del_complete_lines(l, sublines), True)
|
||||||
|
self.assertEqual(l[3], "")
|
||||||
|
self.assertEqual(len(l), len(lines)-len(sublines))
|
||||||
|
# special cases:
|
||||||
|
l = lines[:]
|
||||||
|
self.assertEqual(del_complete_lines(l, sublines, 21), False)
|
||||||
|
self.assertEqual(l, lines)
|
||||||
|
# deleting empty sublist returns success but does not change the list:
|
||||||
|
self.assertEqual(del_complete_lines(l, [], 21), True)
|
||||||
|
self.assertEqual(l, lines)
|
||||||
|
|
||||||
|
def test_del_value(self):
|
||||||
|
l = lines[:]
|
||||||
|
self.assertEqual(del_value(l, "\\begin_inset"), "Quotes eld")
|
||||||
|
self.assertEqual(del_value(l, "\\begin_inset Quotes"), "erd")
|
||||||
|
# return default if not found
|
||||||
|
self.assertEqual(del_value(l, "\\begin_insert", default=42), 42)
|
||||||
|
self.assertEqual(del_value(l, "\\end_inset", default=None), "")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user