From 2ddaa0a59b220436ee4f907672ae0dfd6b01f296 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnter=20Milde?= Date: Mon, 5 Feb 2018 23:19:43 +0100 Subject: [PATCH] New lyx2lyx parser tools find_substring() and set_bool_value(). --- lib/lyx2lyx/parser_tools.py | 74 +++++++++++++++++++++++++++----- lib/lyx2lyx/test_parser_tools.py | 57 ++++++++++++++++++++++-- 2 files changed, 117 insertions(+), 14 deletions(-) diff --git a/lib/lyx2lyx/parser_tools.py b/lib/lyx2lyx/parser_tools.py index f36cd42b94..c53dbfa1ab 100644 --- a/lib/lyx2lyx/parser_tools.py +++ b/lib/lyx2lyx/parser_tools.py @@ -49,6 +49,9 @@ find_token_backwards(lines, token, start): find_tokens_backwards(lines, tokens, start): As before, but look backwards. +find_substring(lines, sub[, start[, end]]) -> int + As find_token, but sub may be anywhere in the line. + find_re(lines, rexp, start[, end]): As find_token, but rexp is a regular expression object, so it has to be passed as e.g.: re.compile(r'...'). @@ -77,7 +80,11 @@ get_option_value(line, option): get_bool_value(lines, token[, start[, end[, default, delete]]]]): Like get_value, but returns a boolean. -del_token(lines, token, start[, end]): +set_bool_value(lines, token, value[, start[, end]]): + Find `token` in `lines[start:end]` and set to boolean value bool(`value`). + Return old value. Raise ValueError if token is not in lines. + +del_token(lines, token[, start[, end]]): Like find_token, but deletes the line if it finds one. Returns True if a line got deleted, otherwise False. @@ -187,6 +194,8 @@ def find_token(lines, token, start=0, end=0, ignorews=False): whitespace are ignored, but there must be whitespace following token itself. + Use find_substring(lines, sub) to find a substring anywhere in `lines`. + Return -1 on failure.""" if end == 0 or end > len(lines): @@ -239,14 +248,32 @@ def find_tokens_exact(lines, tokens, start=0, end=0): return find_tokens(lines, tokens, start, end, True) -def find_re(lines, rexp, start=0, end=0): - """ find_re(lines, rexp, start[, end]) -> int +def find_substring(lines, sub, start=0, end=0): + """ find_substring(lines, sub[, start[, end]]) -> int - Return the lowest line where rexp, a regular expression, is found - in lines[start, end]. + Return the lowest line number `i` in [start, end] where + `sub` is a substring of line[i]. Return -1 on failure.""" + if end == 0 or end > len(lines): + end = len(lines) + for i in range(start, end): + if sub in lines[i]: + return i + return -1 + + +def find_re(lines, rexp, start=0, end=0): + """ find_re(lines, rexp[, start[, end]]) -> int + + Return the lowest line number `i` in [start, end] where the regular + expression object `rexp` matches at the beginning of line[i]. + Return -1 on failure. + + Start your pattern with the wildcard ".*" to find a match anywhere in a + line. Use find_substring() to find a substring anywhere in the lines. + """ if end == 0 or end > len(lines): end = len(lines) for i in range(start, end): @@ -398,26 +425,51 @@ def get_quoted_value(lines, token, start=0, end=0, default="", delete=False): return default return val.strip('"') +bool_values = {True: ("true", "1"), + False: ("false", "0")} def get_bool_value(lines, token, start=0, end=0, default=None, delete=False): """ get_bool_value(lines, token, start[[, end], default]) -> string Find the next line that looks like: - token bool_value + token - Returns True if bool_value is 1 or true and - False if bool_value is 0 or false + Return True if is 1 or "true", False if bool_value + is 0 or "false", else `default`. """ val = get_quoted_value(lines, token, start, end, default, delete) - - if val == "1" or val == "true": + if val in bool_values[True]: return True - if val == "0" or val == "false": + if val in bool_values[False]: return False return default +def set_bool_value(lines, token, value, start=0, end=0): + """Find `token` in `lines` and set to bool(`value`). + + Return previous value. Raise `ValueError` if `token` is not in lines. + + Cf. find_token(), get_bool_value(). + """ + i = find_token(lines, token, start, end) + if i == -1: + raise ValueError + oldvalue = get_bool_value(lines, token, i, i+1) + if oldvalue is value: + return oldvalue + # Use 0/1 or true/false? + if get_quoted_value(lines, token, i, i+1) in ('0', '1'): + value_string = bool_values[value][1] + else: + value_string = bool_values[value][0] + # set to new value + lines[i] = "%s %s" % (token, value_string) + + return oldvalue + + def get_option_value(line, option): rx = option + '\s*=\s*"([^"]+)"' rx = re.compile(rx) diff --git a/lib/lyx2lyx/test_parser_tools.py b/lib/lyx2lyx/test_parser_tools.py index a9d4faee82..55d6d89e05 100644 --- a/lib/lyx2lyx/test_parser_tools.py +++ b/lib/lyx2lyx/test_parser_tools.py @@ -22,7 +22,7 @@ from parser_tools import * import unittest -ug = r""" +lines = r""" \begin_layout Standard The \begin_inset Quotes eld @@ -56,9 +56,26 @@ Introduction describes that, too. \end_layout -""" +""".splitlines() + +header = r"""\begin_header +\origin unavailable +\paperpagestyle default +\output_changes false +\html_math_output 0 +\html_css_as_file 0 +\html_be_strict fallse +\end_header""".splitlines() + +newheader = r"""\begin_header +\origin unavailable +\paperpagestyle default +\output_changes true +\html_math_output 0 +\html_css_as_file 1 +\html_be_strict false +\end_header""".splitlines() -lines = ug.splitlines() class TestParserTools(unittest.TestCase): @@ -100,6 +117,23 @@ class TestParserTools(unittest.TestCase): self.assertEqual(find_tokens(lines, tokens, 0, 4), -1) + def test_find_substring(self): + # Quotes is not a "token" (substring at the start of any line): + self.assertEqual(find_token(lines, "Quotes", 0), -1) + self.assertEqual(find_substring(lines, "Quotes", 0), 3) + # return -1 on failure: + self.assertEqual(find_substring(lines, "Qualen", 0), -1) + + + def test_find_re(self): + regexp_object = re.compile(r'\\begin.*Quote') + # matching starts with line[start] (default: start=0) + self.assertEqual(find_re(lines, regexp_object), 3) + self.assertEqual(find_re(lines, regexp_object, start=3), 3) + # matching ends one line *before* line[end]: + self.assertEqual(find_re(lines, regexp_object, start=4), 11) + self.assertEqual(find_re(lines, regexp_object, start=4, end=11), -1) + def test_find_complete_lines(self): sublines = ["\\begin_inset Quotes eld", "\\end_inset"] @@ -163,6 +197,23 @@ class TestParserTools(unittest.TestCase): # or emtpy string if token is found but has no value: # self.assertEqual(get_value(lines, "\\end_inset", default=None), "") + def test_get_bool_value(self): + self.assertEqual(get_bool_value(header, "\\output_changes"), False) + self.assertEqual(get_bool_value(newheader, "\\output_changes"), True) + self.assertEqual(get_bool_value(header, "\\html_css_as_file"), False) + self.assertEqual(get_bool_value(newheader, "\\html_css_as_file"), True) + self.assertEqual(get_bool_value(header, "\\something"), None) + self.assertEqual(get_bool_value(header, "\\output_changes", 4), None) + + def test_set_bool_value(self): + # set to new value, return old value + self.assertEqual(set_bool_value(header, "\\output_changes", True), False) + self.assertEqual(set_bool_value(header, "\\html_css_as_file", True), False) + # return default if misspelled: + self.assertEqual(set_bool_value(header, "\\html_be_strict", False), None) + # catch error and insert new setting: + self.assertRaises(ValueError, set_bool_value, header, "\\something", 0) + self.assertEqual(header, newheader) def test_del_complete_lines(self): l = lines[:]