New lyx2lyx parser tools find_substring() and set_bool_value().

This commit is contained in:
Günter Milde 2018-02-05 23:19:43 +01:00
parent c6861923f0
commit 2ddaa0a59b
2 changed files with 117 additions and 14 deletions

View File

@ -49,6 +49,9 @@ find_token_backwards(lines, token, start):
find_tokens_backwards(lines, tokens, start): find_tokens_backwards(lines, tokens, start):
As before, but look backwards. As before, but look backwards.
find_substring(lines, sub[, start[, end]]) -> int
As find_token, but sub may be anywhere in the line.
find_re(lines, rexp, start[, end]): find_re(lines, rexp, start[, end]):
As find_token, but rexp is a regular expression object, As find_token, but rexp is a regular expression object,
so it has to be passed as e.g.: re.compile(r'...'). so it has to be passed as e.g.: re.compile(r'...').
@ -77,7 +80,11 @@ get_option_value(line, option):
get_bool_value(lines, token[, start[, end[, default, delete]]]]): get_bool_value(lines, token[, start[, end[, default, delete]]]]):
Like get_value, but returns a boolean. Like get_value, but returns a boolean.
del_token(lines, token, start[, end]): set_bool_value(lines, token, value[, start[, end]]):
Find `token` in `lines[start:end]` and set to boolean value bool(`value`).
Return old value. Raise ValueError if token is not in lines.
del_token(lines, token[, start[, end]]):
Like find_token, but deletes the line if it finds one. Like find_token, but deletes the line if it finds one.
Returns True if a line got deleted, otherwise False. Returns True if a line got deleted, otherwise False.
@ -187,6 +194,8 @@ def find_token(lines, token, start=0, end=0, ignorews=False):
whitespace are ignored, but there must be whitespace following whitespace are ignored, but there must be whitespace following
token itself. token itself.
Use find_substring(lines, sub) to find a substring anywhere in `lines`.
Return -1 on failure.""" Return -1 on failure."""
if end == 0 or end > len(lines): if end == 0 or end > len(lines):
@ -239,14 +248,32 @@ def find_tokens_exact(lines, tokens, start=0, end=0):
return find_tokens(lines, tokens, start, end, True) return find_tokens(lines, tokens, start, end, True)
def find_re(lines, rexp, start=0, end=0): def find_substring(lines, sub, start=0, end=0):
""" find_re(lines, rexp, start[, end]) -> int """ find_substring(lines, sub[, start[, end]]) -> int
Return the lowest line where rexp, a regular expression, is found Return the lowest line number `i` in [start, end] where
in lines[start, end]. `sub` is a substring of line[i].
Return -1 on failure.""" Return -1 on failure."""
if end == 0 or end > len(lines):
end = len(lines)
for i in range(start, end):
if sub in lines[i]:
return i
return -1
def find_re(lines, rexp, start=0, end=0):
""" find_re(lines, rexp[, start[, end]]) -> int
Return the lowest line number `i` in [start, end] where the regular
expression object `rexp` matches at the beginning of line[i].
Return -1 on failure.
Start your pattern with the wildcard ".*" to find a match anywhere in a
line. Use find_substring() to find a substring anywhere in the lines.
"""
if end == 0 or end > len(lines): if end == 0 or end > len(lines):
end = len(lines) end = len(lines)
for i in range(start, end): for i in range(start, end):
@ -398,26 +425,51 @@ def get_quoted_value(lines, token, start=0, end=0, default="", delete=False):
return default return default
return val.strip('"') return val.strip('"')
bool_values = {True: ("true", "1"),
False: ("false", "0")}
def get_bool_value(lines, token, start=0, end=0, default=None, delete=False): def get_bool_value(lines, token, start=0, end=0, default=None, delete=False):
""" get_bool_value(lines, token, start[[, end], default]) -> string """ get_bool_value(lines, token, start[[, end], default]) -> string
Find the next line that looks like: Find the next line that looks like:
token bool_value token <bool_value>
Returns True if bool_value is 1 or true and Return True if <bool_value> is 1 or "true", False if bool_value
False if bool_value is 0 or false is 0 or "false", else `default`.
""" """
val = get_quoted_value(lines, token, start, end, default, delete) val = get_quoted_value(lines, token, start, end, default, delete)
if val in bool_values[True]:
if val == "1" or val == "true":
return True return True
if val == "0" or val == "false": if val in bool_values[False]:
return False return False
return default return default
def set_bool_value(lines, token, value, start=0, end=0):
"""Find `token` in `lines` and set to bool(`value`).
Return previous value. Raise `ValueError` if `token` is not in lines.
Cf. find_token(), get_bool_value().
"""
i = find_token(lines, token, start, end)
if i == -1:
raise ValueError
oldvalue = get_bool_value(lines, token, i, i+1)
if oldvalue is value:
return oldvalue
# Use 0/1 or true/false?
if get_quoted_value(lines, token, i, i+1) in ('0', '1'):
value_string = bool_values[value][1]
else:
value_string = bool_values[value][0]
# set to new value
lines[i] = "%s %s" % (token, value_string)
return oldvalue
def get_option_value(line, option): def get_option_value(line, option):
rx = option + '\s*=\s*"([^"]+)"' rx = option + '\s*=\s*"([^"]+)"'
rx = re.compile(rx) rx = re.compile(rx)

View File

@ -22,7 +22,7 @@ from parser_tools import *
import unittest import unittest
ug = r""" lines = r"""
\begin_layout Standard \begin_layout Standard
The The
\begin_inset Quotes eld \begin_inset Quotes eld
@ -56,9 +56,26 @@ Introduction
describes that, too. describes that, too.
\end_layout \end_layout
""" """.splitlines()
header = r"""\begin_header
\origin unavailable
\paperpagestyle default
\output_changes false
\html_math_output 0
\html_css_as_file 0
\html_be_strict fallse
\end_header""".splitlines()
newheader = r"""\begin_header
\origin unavailable
\paperpagestyle default
\output_changes true
\html_math_output 0
\html_css_as_file 1
\html_be_strict false
\end_header""".splitlines()
lines = ug.splitlines()
class TestParserTools(unittest.TestCase): class TestParserTools(unittest.TestCase):
@ -100,6 +117,23 @@ class TestParserTools(unittest.TestCase):
self.assertEqual(find_tokens(lines, tokens, 0, 4), -1) self.assertEqual(find_tokens(lines, tokens, 0, 4), -1)
def test_find_substring(self):
# Quotes is not a "token" (substring at the start of any line):
self.assertEqual(find_token(lines, "Quotes", 0), -1)
self.assertEqual(find_substring(lines, "Quotes", 0), 3)
# return -1 on failure:
self.assertEqual(find_substring(lines, "Qualen", 0), -1)
def test_find_re(self):
regexp_object = re.compile(r'\\begin.*Quote')
# matching starts with line[start] (default: start=0)
self.assertEqual(find_re(lines, regexp_object), 3)
self.assertEqual(find_re(lines, regexp_object, start=3), 3)
# matching ends one line *before* line[end]:
self.assertEqual(find_re(lines, regexp_object, start=4), 11)
self.assertEqual(find_re(lines, regexp_object, start=4, end=11), -1)
def test_find_complete_lines(self): def test_find_complete_lines(self):
sublines = ["\\begin_inset Quotes eld", sublines = ["\\begin_inset Quotes eld",
"\\end_inset"] "\\end_inset"]
@ -163,6 +197,23 @@ class TestParserTools(unittest.TestCase):
# or emtpy string if token is found but has no value: # or emtpy string if token is found but has no value:
# self.assertEqual(get_value(lines, "\\end_inset", default=None), "") # self.assertEqual(get_value(lines, "\\end_inset", default=None), "")
def test_get_bool_value(self):
self.assertEqual(get_bool_value(header, "\\output_changes"), False)
self.assertEqual(get_bool_value(newheader, "\\output_changes"), True)
self.assertEqual(get_bool_value(header, "\\html_css_as_file"), False)
self.assertEqual(get_bool_value(newheader, "\\html_css_as_file"), True)
self.assertEqual(get_bool_value(header, "\\something"), None)
self.assertEqual(get_bool_value(header, "\\output_changes", 4), None)
def test_set_bool_value(self):
# set to new value, return old value
self.assertEqual(set_bool_value(header, "\\output_changes", True), False)
self.assertEqual(set_bool_value(header, "\\html_css_as_file", True), False)
# return default if misspelled:
self.assertEqual(set_bool_value(header, "\\html_be_strict", False), None)
# catch error and insert new setting:
self.assertRaises(ValueError, set_bool_value, header, "\\something", 0)
self.assertEqual(header, newheader)
def test_del_complete_lines(self): def test_del_complete_lines(self):
l = lines[:] l = lines[:]