2002-08-01 15:26:32 +00:00
|
|
|
# This file is part of lyx2lyx
|
2006-08-02 14:25:43 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
2018-01-12 16:26:02 +01:00
|
|
|
# Copyright (C) 2002-2011 Dekel Tsur <dekel@lyx.org>,
|
2020-12-05 17:37:21 -05:00
|
|
|
# José Matos <jamatos@lyx.org>, Richard Kimberly Heck <rikiheck@lyx.org>
|
2002-08-01 15:26:32 +00:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or
|
|
|
|
# modify it under the terms of the GNU General Public License
|
|
|
|
# as published by the Free Software Foundation; either version 2
|
|
|
|
# of the License, or (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program; if not, write to the Free Software
|
2011-08-25 23:10:36 +00:00
|
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
2002-08-01 15:26:32 +00:00
|
|
|
|
2010-11-05 16:18:20 +00:00
|
|
|
|
2018-01-21 19:55:27 +01:00
|
|
|
"""
|
2013-05-23 15:12:03 -04:00
|
|
|
This module offers several free functions to help parse lines.
|
2018-01-12 16:26:02 +01:00
|
|
|
More documentaton is below, but here is a quick guide to what
|
2010-11-05 16:18:20 +00:00
|
|
|
they do. Optional arguments are marked by brackets.
|
|
|
|
|
2018-01-31 15:09:32 +01:00
|
|
|
find_token(lines, token[, start[, end[, ignorews]]]):
|
2010-11-05 16:18:20 +00:00
|
|
|
Returns the first line i, start <= i < end, on which
|
2018-01-12 16:26:02 +01:00
|
|
|
token is found at the beginning. Returns -1 if not
|
|
|
|
found.
|
2010-11-05 17:09:43 +00:00
|
|
|
If ignorews is (given and) True, then differences
|
2018-01-12 16:26:02 +01:00
|
|
|
in whitespace do not count, except that there must be no
|
2010-11-05 17:09:43 +00:00
|
|
|
extra whitespace following token itself.
|
2010-11-05 16:18:20 +00:00
|
|
|
|
2018-01-31 15:09:32 +01:00
|
|
|
find_token_exact(lines, token[, start[, end]]]):
|
2013-05-23 15:12:03 -04:00
|
|
|
As find_token, but with ignorews set to True.
|
2010-11-05 16:18:20 +00:00
|
|
|
|
2018-01-31 15:09:32 +01:00
|
|
|
find_tokens(lines, tokens[, start[, end[, ignorews]]]):
|
2010-11-05 16:18:20 +00:00
|
|
|
Returns the first line i, start <= i < end, on which
|
2018-01-12 16:26:02 +01:00
|
|
|
one of the tokens in tokens is found at the beginning.
|
|
|
|
Returns -1 if not found.
|
2010-11-05 17:09:43 +00:00
|
|
|
If ignorews is (given and) True, then differences
|
2018-01-12 16:26:02 +01:00
|
|
|
in whitespace do not count, except that there must be no
|
2010-11-05 17:09:43 +00:00
|
|
|
extra whitespace following token itself.
|
2010-11-05 16:18:20 +00:00
|
|
|
|
2018-01-31 15:09:32 +01:00
|
|
|
find_tokens_exact(lines, token[, start[, end]]):
|
2010-11-05 17:09:43 +00:00
|
|
|
As find_tokens, but with ignorews True.
|
2018-01-12 16:26:02 +01:00
|
|
|
|
2010-11-05 16:18:20 +00:00
|
|
|
find_token_backwards(lines, token, start):
|
|
|
|
find_tokens_backwards(lines, tokens, start):
|
|
|
|
As before, but look backwards.
|
|
|
|
|
2018-02-05 23:19:43 +01:00
|
|
|
find_substring(lines, sub[, start[, end]]) -> int
|
|
|
|
As find_token, but sub may be anywhere in the line.
|
|
|
|
|
2010-11-05 16:18:20 +00:00
|
|
|
find_re(lines, rexp, start[, end]):
|
|
|
|
As find_token, but rexp is a regular expression object,
|
|
|
|
so it has to be passed as e.g.: re.compile(r'...').
|
|
|
|
|
2018-01-23 14:01:30 +01:00
|
|
|
get_value(lines, token[, start[, end[, default[, delete]]]]):
|
2018-01-12 16:26:02 +01:00
|
|
|
Similar to find_token, but it returns what follows the
|
2010-11-05 16:18:20 +00:00
|
|
|
token on the found line. Example:
|
2015-03-11 12:04:46 +00:00
|
|
|
get_value(document.header, "\\use_xetex", 0)
|
2010-11-05 16:18:20 +00:00
|
|
|
will find a line like:
|
2015-03-11 12:04:46 +00:00
|
|
|
\\use_xetex true
|
2010-11-05 16:18:20 +00:00
|
|
|
and, in that case, return "true". (Note that whitespace
|
2018-01-12 16:26:02 +01:00
|
|
|
is stripped.) The final argument, default, defaults to "",
|
2010-11-05 16:18:20 +00:00
|
|
|
and is what is returned if we do not find anything. So you
|
|
|
|
can use that to set a default.
|
2020-04-30 21:57:50 -04:00
|
|
|
If delete is True, then delete the line if found.
|
2018-01-12 16:26:02 +01:00
|
|
|
|
2018-01-23 14:01:30 +01:00
|
|
|
get_quoted_value(lines, token[, start[, end[, default[, delete]]]]):
|
2010-11-05 16:18:20 +00:00
|
|
|
Similar to get_value, but it will strip quotes off the
|
|
|
|
value, if they are present. So use this one for cases
|
|
|
|
where the value is normally quoted.
|
|
|
|
|
2010-11-10 13:41:43 +00:00
|
|
|
get_option_value(line, option):
|
|
|
|
This assumes we have a line with something like:
|
|
|
|
option="value"
|
|
|
|
and returns value. Returns "" if not found.
|
|
|
|
|
2018-01-23 14:01:30 +01:00
|
|
|
get_bool_value(lines, token[, start[, end[, default, delete]]]]):
|
2016-06-18 19:29:15 -04:00
|
|
|
Like get_value, but returns a boolean.
|
|
|
|
|
2018-02-05 23:19:43 +01:00
|
|
|
set_bool_value(lines, token, value[, start[, end]]):
|
|
|
|
Find `token` in `lines[start:end]` and set to boolean value bool(`value`).
|
|
|
|
Return old value. Raise ValueError if token is not in lines.
|
|
|
|
|
|
|
|
del_token(lines, token[, start[, end]]):
|
2010-11-05 16:22:26 +00:00
|
|
|
Like find_token, but deletes the line if it finds one.
|
|
|
|
Returns True if a line got deleted, otherwise False.
|
2019-07-07 23:31:12 +02:00
|
|
|
|
2019-05-24 13:49:32 +02:00
|
|
|
Use get_* with the optional argument "delete=True", if you want to
|
|
|
|
get and delete a token.
|
2010-11-05 16:59:27 +00:00
|
|
|
|
|
|
|
find_beginning_of(lines, i, start_token, end_token):
|
2018-01-12 16:26:02 +01:00
|
|
|
Here, start_token and end_token are meant to be a matching
|
|
|
|
pair, like "\\begin_layout" and "\\end_layout". We look for
|
2010-11-05 16:59:27 +00:00
|
|
|
the start_token that pairs with the end_token that occurs
|
|
|
|
on or after line i. Returns -1 if not found.
|
2018-01-12 16:26:02 +01:00
|
|
|
So, in the layout case, this would find the \\begin_layout
|
|
|
|
for the layout line i is in.
|
2010-11-05 16:59:27 +00:00
|
|
|
Example:
|
|
|
|
ec = find_token(document.body, "</cell", i)
|
|
|
|
bc = find_beginning_of(document.body, ec, \
|
|
|
|
"<cell", "</cell")
|
|
|
|
Now, assuming no -1s, bc-ec wraps the cell for line i.
|
|
|
|
|
|
|
|
find_end_of(lines, i, start_token, end_token):
|
2018-01-12 16:26:02 +01:00
|
|
|
Like find_beginning_of, but looking for the matching
|
2010-11-05 16:59:27 +00:00
|
|
|
end_token. This might look like:
|
|
|
|
bc = find_token_(document.body, "<cell", i)
|
|
|
|
ec = find_end_of(document.body, bc, "<cell", "</cell")
|
|
|
|
Now, assuming no -1s, bc-ec wrap the next cell.
|
|
|
|
|
|
|
|
find_end_of_inset(lines, i):
|
|
|
|
Specialization of find_end_of for insets.
|
|
|
|
|
|
|
|
find_end_of_layout(lines, i):
|
|
|
|
Specialization of find_end_of for layouts.
|
|
|
|
|
2012-12-19 19:33:39 +01:00
|
|
|
find_end_of_sequence(lines, i):
|
|
|
|
Find the end of the sequence of layouts of the same kind.
|
2012-12-20 13:29:04 +01:00
|
|
|
Considers nesting. If the last paragraph in sequence is nested,
|
2022-07-31 00:36:51 +02:00
|
|
|
the position of the last \\end_deeper is returned, else
|
|
|
|
the position of the last \\end_layout.
|
2012-12-19 19:33:39 +01:00
|
|
|
|
2018-01-24 01:02:24 +01:00
|
|
|
is_in_inset(lines, i, inset, default=(-1,-1)):
|
|
|
|
Check if line i is in an inset of the given type.
|
2018-01-12 16:26:02 +01:00
|
|
|
If so, returns starting and ending lines. Otherwise,
|
2018-01-24 01:02:24 +01:00
|
|
|
return default.
|
2010-11-05 16:59:27 +00:00
|
|
|
Example:
|
|
|
|
is_in_inset(document.body, i, "\\begin_inset Tabular")
|
2018-01-24 01:02:24 +01:00
|
|
|
returns (-1,-1) unless i is within a table. If it is, then
|
2010-11-05 16:59:27 +00:00
|
|
|
it returns the line on which the table begins and the one
|
|
|
|
on which it ends. Note that this pair will evaulate to
|
|
|
|
boolean True, so
|
2018-01-24 01:02:24 +01:00
|
|
|
if is_in_inset(..., default=False):
|
2010-11-05 16:59:27 +00:00
|
|
|
will do what you expect.
|
|
|
|
|
|
|
|
get_containing_inset(lines, i):
|
2018-01-12 16:26:02 +01:00
|
|
|
Finds out what kind of inset line i is within. Returns a
|
2022-07-31 00:36:51 +02:00
|
|
|
list containing what follows \\begin_inset on the line
|
2010-11-05 16:59:27 +00:00
|
|
|
on which the inset begins, plus the starting and ending line.
|
|
|
|
Returns False on any kind of error or if it isn't in an inset.
|
|
|
|
So get_containing_inset(document.body, i) might return:
|
|
|
|
("CommandInset ref", 300, 306)
|
|
|
|
if i is within an InsetRef beginning on line 300 and ending
|
|
|
|
on line 306.
|
|
|
|
|
|
|
|
get_containing_layout(lines, i):
|
2012-12-09 11:40:14 +01:00
|
|
|
As get_containing_inset, but for layout. Additionally returns the
|
|
|
|
position of real paragraph start (after par params) as 4th value.
|
2010-11-05 16:59:27 +00:00
|
|
|
|
|
|
|
find_nonempty_line(lines, start[, end):
|
|
|
|
Finds the next non-empty line.
|
|
|
|
|
|
|
|
check_token(line, token):
|
|
|
|
Does line begin with token?
|
|
|
|
|
|
|
|
is_nonempty_line(line):
|
|
|
|
Does line contain something besides whitespace?
|
|
|
|
|
2012-12-19 19:33:39 +01:00
|
|
|
count_pars_in_inset(lines, i):
|
|
|
|
Counts the paragraphs inside an inset.
|
|
|
|
|
2018-01-21 19:55:27 +01:00
|
|
|
"""
|
2002-08-02 19:25:14 +00:00
|
|
|
|
2010-11-10 13:41:43 +00:00
|
|
|
import re
|
|
|
|
|
2006-08-02 14:25:43 +00:00
|
|
|
# Utilities for one line
|
2002-08-02 19:25:14 +00:00
|
|
|
def check_token(line, token):
|
2006-08-02 14:25:43 +00:00
|
|
|
""" check_token(line, token) -> bool
|
2002-08-01 15:26:32 +00:00
|
|
|
|
2006-08-02 14:25:43 +00:00
|
|
|
Return True if token is present in line and is the first element
|
2018-01-21 19:55:27 +01:00
|
|
|
else returns False.
|
|
|
|
|
|
|
|
Deprecated. Use line.startswith(token).
|
|
|
|
"""
|
|
|
|
return line.startswith(token)
|
2006-08-02 14:25:43 +00:00
|
|
|
|
|
|
|
|
|
|
|
def is_nonempty_line(line):
|
|
|
|
""" is_nonempty_line(line) -> bool
|
|
|
|
|
|
|
|
Return False if line is either empty or it has only whitespaces,
|
|
|
|
else return True."""
|
2018-01-21 19:55:27 +01:00
|
|
|
return bool(line.strip())
|
2006-08-02 14:25:43 +00:00
|
|
|
|
|
|
|
|
|
|
|
# Utilities for a list of lines
|
2018-01-23 08:45:19 +01:00
|
|
|
def find_token(lines, token, start=0, end=0, ignorews=False):
|
2010-11-05 17:02:26 +00:00
|
|
|
""" find_token(lines, token, start[[, end], ignorews]) -> int
|
2006-08-02 14:25:43 +00:00
|
|
|
|
|
|
|
Return the lowest line where token is found, and is the first
|
|
|
|
element, in lines[start, end].
|
2018-01-12 16:26:02 +01:00
|
|
|
|
2010-11-05 17:02:26 +00:00
|
|
|
If ignorews is True (default is False), then differences in
|
2018-01-23 08:45:19 +01:00
|
|
|
whitespace are ignored, but there must be whitespace following
|
|
|
|
token itself.
|
2006-08-02 14:25:43 +00:00
|
|
|
|
2018-02-05 23:19:43 +01:00
|
|
|
Use find_substring(lines, sub) to find a substring anywhere in `lines`.
|
|
|
|
|
2006-08-02 14:25:43 +00:00
|
|
|
Return -1 on failure."""
|
2004-05-11 16:13:33 +00:00
|
|
|
|
2010-11-05 01:03:30 +00:00
|
|
|
if end == 0 or end > len(lines):
|
2006-07-01 19:16:09 +00:00
|
|
|
end = len(lines)
|
2018-01-23 08:45:19 +01:00
|
|
|
if ignorews:
|
|
|
|
y = token.split()
|
2015-03-11 12:04:46 +00:00
|
|
|
for i in range(start, end):
|
2010-11-05 17:02:26 +00:00
|
|
|
if ignorews:
|
2006-08-02 14:25:43 +00:00
|
|
|
x = lines[i].split()
|
|
|
|
if len(x) < len(y):
|
|
|
|
continue
|
|
|
|
if x[:len(y)] == y:
|
|
|
|
return i
|
|
|
|
else:
|
2018-01-23 08:45:19 +01:00
|
|
|
if lines[i].startswith(token):
|
2006-08-02 14:25:43 +00:00
|
|
|
return i
|
2002-08-01 15:26:32 +00:00
|
|
|
return -1
|
|
|
|
|
2004-05-11 16:13:33 +00:00
|
|
|
|
2018-01-23 08:45:19 +01:00
|
|
|
def find_token_exact(lines, token, start=0, end=0):
|
2006-08-02 14:25:43 +00:00
|
|
|
return find_token(lines, token, start, end, True)
|
|
|
|
|
2002-08-24 12:13:44 +00:00
|
|
|
|
2018-01-23 08:45:19 +01:00
|
|
|
def find_tokens(lines, tokens, start=0, end=0, ignorews=False):
|
2010-11-05 17:02:26 +00:00
|
|
|
""" find_tokens(lines, tokens, start[[, end], ignorews]) -> int
|
2004-05-11 16:13:33 +00:00
|
|
|
|
2006-08-02 14:25:43 +00:00
|
|
|
Return the lowest line where one token in tokens is found, and is
|
|
|
|
the first element, in lines[start, end].
|
|
|
|
|
|
|
|
Return -1 on failure."""
|
2019-07-07 23:31:12 +02:00
|
|
|
|
2010-11-05 16:43:30 +00:00
|
|
|
if end == 0 or end > len(lines):
|
2006-07-01 19:16:09 +00:00
|
|
|
end = len(lines)
|
2006-08-02 14:25:43 +00:00
|
|
|
|
2015-03-11 12:04:46 +00:00
|
|
|
for i in range(start, end):
|
2006-07-01 19:16:09 +00:00
|
|
|
for token in tokens:
|
2010-11-05 17:02:26 +00:00
|
|
|
if ignorews:
|
2006-08-02 14:25:43 +00:00
|
|
|
x = lines[i].split()
|
|
|
|
y = token.split()
|
|
|
|
if len(x) < len(y):
|
|
|
|
continue
|
|
|
|
if x[:len(y)] == y:
|
2008-03-18 19:41:50 +00:00
|
|
|
return i
|
2006-08-02 14:25:43 +00:00
|
|
|
else:
|
2018-01-23 08:45:19 +01:00
|
|
|
if lines[i].startswith(token):
|
2006-08-02 14:25:43 +00:00
|
|
|
return i
|
2002-08-02 19:25:14 +00:00
|
|
|
return -1
|
|
|
|
|
2004-05-11 16:13:33 +00:00
|
|
|
|
2018-01-23 08:45:19 +01:00
|
|
|
def find_tokens_exact(lines, tokens, start=0, end=0):
|
2006-08-02 14:25:43 +00:00
|
|
|
return find_tokens(lines, tokens, start, end, True)
|
2006-03-17 09:52:13 +00:00
|
|
|
|
|
|
|
|
2018-02-05 23:19:43 +01:00
|
|
|
def find_substring(lines, sub, start=0, end=0):
|
|
|
|
""" find_substring(lines, sub[, start[, end]]) -> int
|
2006-08-02 14:25:43 +00:00
|
|
|
|
2018-02-05 23:19:43 +01:00
|
|
|
Return the lowest line number `i` in [start, end] where
|
|
|
|
`sub` is a substring of line[i].
|
2006-08-02 14:25:43 +00:00
|
|
|
|
|
|
|
Return -1 on failure."""
|
|
|
|
|
2018-02-05 23:19:43 +01:00
|
|
|
if end == 0 or end > len(lines):
|
|
|
|
end = len(lines)
|
|
|
|
for i in range(start, end):
|
|
|
|
if sub in lines[i]:
|
|
|
|
return i
|
|
|
|
return -1
|
|
|
|
|
|
|
|
|
|
|
|
def find_re(lines, rexp, start=0, end=0):
|
|
|
|
""" find_re(lines, rexp[, start[, end]]) -> int
|
|
|
|
|
|
|
|
Return the lowest line number `i` in [start, end] where the regular
|
|
|
|
expression object `rexp` matches at the beginning of line[i].
|
|
|
|
Return -1 on failure.
|
|
|
|
|
|
|
|
Start your pattern with the wildcard ".*" to find a match anywhere in a
|
|
|
|
line. Use find_substring() to find a substring anywhere in the lines.
|
|
|
|
"""
|
2010-11-05 16:43:30 +00:00
|
|
|
if end == 0 or end > len(lines):
|
2006-07-01 19:16:09 +00:00
|
|
|
end = len(lines)
|
2015-03-11 12:04:46 +00:00
|
|
|
for i in range(start, end):
|
2006-07-01 19:16:09 +00:00
|
|
|
if rexp.match(lines[i]):
|
|
|
|
return i
|
2002-08-01 15:26:32 +00:00
|
|
|
return -1
|
|
|
|
|
2004-05-11 16:13:33 +00:00
|
|
|
|
2002-08-01 15:26:32 +00:00
|
|
|
def find_token_backwards(lines, token, start):
|
2006-08-02 14:25:43 +00:00
|
|
|
""" find_token_backwards(lines, token, start) -> int
|
|
|
|
|
|
|
|
Return the highest line where token is found, and is the first
|
|
|
|
element, in lines[start, end].
|
|
|
|
|
|
|
|
Return -1 on failure."""
|
2015-03-11 12:04:46 +00:00
|
|
|
for i in range(start, -1, -1):
|
2018-01-23 08:45:19 +01:00
|
|
|
if lines[i].startswith(token):
|
2006-07-01 19:16:09 +00:00
|
|
|
return i
|
2002-08-01 15:26:32 +00:00
|
|
|
return -1
|
|
|
|
|
2004-05-11 16:13:33 +00:00
|
|
|
|
2002-08-03 14:29:12 +00:00
|
|
|
def find_tokens_backwards(lines, tokens, start):
|
2006-08-02 14:25:43 +00:00
|
|
|
""" find_tokens_backwards(lines, token, start) -> int
|
|
|
|
|
|
|
|
Return the highest line where token is found, and is the first
|
|
|
|
element, in lines[end, start].
|
|
|
|
|
|
|
|
Return -1 on failure."""
|
2015-03-11 12:04:46 +00:00
|
|
|
for i in range(start, -1, -1):
|
2006-07-01 19:16:09 +00:00
|
|
|
line = lines[i]
|
|
|
|
for token in tokens:
|
2018-01-23 08:45:19 +01:00
|
|
|
if line.startswith(token):
|
|
|
|
return i
|
|
|
|
return -1
|
|
|
|
|
|
|
|
|
|
|
|
def find_complete_lines(lines, sublines, start=0, end=0):
|
|
|
|
"""Find first occurence of sequence `sublines` in list `lines`.
|
|
|
|
Return index of first line or -1 on failure.
|
|
|
|
|
|
|
|
Efficient search for a sub-list in a large list. Works for any values.
|
|
|
|
|
|
|
|
>>> find_complete_lines([1, 2, 3, 1, 1, 2], [1, 2])
|
|
|
|
0
|
|
|
|
|
|
|
|
The `start` and `end` arguments work similar to list.index()
|
|
|
|
|
|
|
|
>>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1)
|
|
|
|
4
|
|
|
|
>>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1, end=4)
|
|
|
|
-1
|
|
|
|
|
|
|
|
The return value can be used to substitute the sub-list.
|
|
|
|
Take care to check before use:
|
|
|
|
|
|
|
|
>>> l = [1, 1, 2]
|
|
|
|
>>> s = find_complete_lines(l, [1, 2])
|
|
|
|
>>> if s != -1:
|
|
|
|
... l[s:s+2] = [3]; l
|
|
|
|
[1, 3]
|
|
|
|
|
|
|
|
See also del_complete_lines().
|
|
|
|
"""
|
|
|
|
if not sublines:
|
|
|
|
return start
|
|
|
|
end = end or len(lines)
|
|
|
|
N = len(sublines)
|
|
|
|
try:
|
|
|
|
while True:
|
|
|
|
for j, value in enumerate(sublines):
|
|
|
|
i = lines.index(value, start, end)
|
|
|
|
if j and i != start:
|
|
|
|
start = i-j
|
|
|
|
break
|
|
|
|
start = i + 1
|
|
|
|
else:
|
|
|
|
return i +1 - N
|
|
|
|
except ValueError: # `sublines` not found
|
|
|
|
return -1
|
|
|
|
|
|
|
|
|
|
|
|
def find_across_lines(lines, sub, start=0, end=0):
|
|
|
|
sublines = sub.splitlines()
|
|
|
|
if len(sublines) > 2:
|
|
|
|
# at least 3 lines: the middle one(s) are complete -> use index search
|
|
|
|
i = find_complete_lines(lines, sublines[1:-1], start+1, end-1)
|
|
|
|
if i < start+1:
|
|
|
|
return -1
|
|
|
|
try:
|
|
|
|
if (lines[i-1].endswith(sublines[0]) and
|
|
|
|
lines[i+len(sublines)].startswith(sublines[-1])):
|
|
|
|
return i-1
|
|
|
|
except IndexError:
|
|
|
|
pass
|
|
|
|
elif len(sublines) > 1:
|
|
|
|
# last subline must start a line
|
|
|
|
i = find_token(lines, sublines[-1], start, end)
|
|
|
|
if i < start + 1:
|
|
|
|
return -1
|
|
|
|
if lines[i-1].endswith(sublines[0]):
|
|
|
|
return i-1
|
|
|
|
else: # no line-break, may be in the middle of a line
|
|
|
|
if end == 0 or end > len(lines):
|
|
|
|
end = len(lines)
|
|
|
|
for i in range(start, end):
|
|
|
|
if sub in lines[i]:
|
2006-07-01 19:16:09 +00:00
|
|
|
return i
|
2002-08-03 14:29:12 +00:00
|
|
|
return -1
|
|
|
|
|
2004-05-11 16:13:33 +00:00
|
|
|
|
2018-01-23 14:01:30 +01:00
|
|
|
def get_value(lines, token, start=0, end=0, default="", delete=False):
|
|
|
|
"""Find `token` in `lines` and return part of line that follows it.
|
2006-08-02 14:25:43 +00:00
|
|
|
|
2010-11-05 01:20:50 +00:00
|
|
|
Find the next line that looks like:
|
|
|
|
token followed by other stuff
|
2018-01-23 14:01:30 +01:00
|
|
|
|
|
|
|
If `delete` is True, delete the line (if found).
|
|
|
|
|
|
|
|
Return "followed by other stuff" with leading and trailing
|
2010-11-05 14:26:14 +00:00
|
|
|
whitespace removed.
|
2010-11-05 01:20:50 +00:00
|
|
|
"""
|
2006-03-17 09:52:13 +00:00
|
|
|
i = find_token_exact(lines, token, start, end)
|
2002-08-02 19:25:14 +00:00
|
|
|
if i == -1:
|
2008-03-18 19:41:50 +00:00
|
|
|
return default
|
2018-01-23 08:45:19 +01:00
|
|
|
# TODO: establish desired behaviour, eventually change to
|
|
|
|
# return lines.pop(i)[len(token):].strip() # or default
|
|
|
|
# see test_parser_tools.py
|
2010-11-05 01:20:50 +00:00
|
|
|
l = lines[i].split(None, 1)
|
2018-01-23 14:01:30 +01:00
|
|
|
if delete:
|
|
|
|
del(lines[i])
|
2010-11-05 01:20:50 +00:00
|
|
|
if len(l) > 1:
|
2010-11-05 14:26:14 +00:00
|
|
|
return l[1].strip()
|
2010-11-05 01:20:50 +00:00
|
|
|
return default
|
2002-08-02 20:34:20 +00:00
|
|
|
|
2004-05-11 16:13:33 +00:00
|
|
|
|
2018-01-23 14:01:30 +01:00
|
|
|
def get_quoted_value(lines, token, start=0, end=0, default="", delete=False):
|
2010-11-05 15:11:37 +00:00
|
|
|
""" get_quoted_value(lines, token, start[[, end], default]) -> string
|
|
|
|
|
|
|
|
Find the next line that looks like:
|
|
|
|
token "followed by other stuff"
|
|
|
|
Returns "followed by other stuff" with leading and trailing
|
|
|
|
whitespace and quotes removed. If there are no quotes, that is OK too.
|
|
|
|
So use get_value to preserve possible quotes, this one to remove them,
|
|
|
|
if they are there.
|
|
|
|
Note that we will NOT strip quotes from default!
|
|
|
|
"""
|
2018-01-23 14:01:30 +01:00
|
|
|
val = get_value(lines, token, start, end, "", delete)
|
2010-11-05 15:11:37 +00:00
|
|
|
if not val:
|
|
|
|
return default
|
|
|
|
return val.strip('"')
|
|
|
|
|
2019-06-03 16:45:05 +02:00
|
|
|
|
2019-07-07 23:31:12 +02:00
|
|
|
bool_values = {"true": True, "1": True,
|
2019-06-03 16:45:05 +02:00
|
|
|
"false": False, "0": False}
|
2010-11-05 15:11:37 +00:00
|
|
|
|
2018-01-23 14:01:30 +01:00
|
|
|
def get_bool_value(lines, token, start=0, end=0, default=None, delete=False):
|
2018-01-23 08:45:19 +01:00
|
|
|
""" get_bool_value(lines, token, start[[, end], default]) -> string
|
2016-07-13 15:01:17 +01:00
|
|
|
|
|
|
|
Find the next line that looks like:
|
2019-06-03 16:45:05 +02:00
|
|
|
`token` <bool_value>
|
2016-07-13 15:01:17 +01:00
|
|
|
|
2019-06-03 16:45:05 +02:00
|
|
|
Return True if <bool_value> is 1 or "true", False if <bool_value>
|
2018-02-05 23:19:43 +01:00
|
|
|
is 0 or "false", else `default`.
|
2016-07-13 15:01:17 +01:00
|
|
|
"""
|
2018-01-23 14:01:30 +01:00
|
|
|
val = get_quoted_value(lines, token, start, end, default, delete)
|
2019-06-03 16:45:05 +02:00
|
|
|
return bool_values.get(val, default)
|
2016-07-13 15:01:17 +01:00
|
|
|
|
|
|
|
|
2018-02-05 23:19:43 +01:00
|
|
|
def set_bool_value(lines, token, value, start=0, end=0):
|
|
|
|
"""Find `token` in `lines` and set to bool(`value`).
|
|
|
|
|
|
|
|
Return previous value. Raise `ValueError` if `token` is not in lines.
|
|
|
|
|
|
|
|
Cf. find_token(), get_bool_value().
|
|
|
|
"""
|
|
|
|
i = find_token(lines, token, start, end)
|
|
|
|
if i == -1:
|
|
|
|
raise ValueError
|
|
|
|
oldvalue = get_bool_value(lines, token, i, i+1)
|
|
|
|
if oldvalue is value:
|
|
|
|
return oldvalue
|
2019-06-03 16:45:05 +02:00
|
|
|
# set to new value
|
2018-02-05 23:19:43 +01:00
|
|
|
if get_quoted_value(lines, token, i, i+1) in ('0', '1'):
|
2019-06-03 16:45:05 +02:00
|
|
|
lines[i] = "%s %d" % (token, value)
|
2018-02-05 23:19:43 +01:00
|
|
|
else:
|
2019-06-03 16:45:05 +02:00
|
|
|
lines[i] = "%s %s" % (token, str(value).lower())
|
2018-02-05 23:19:43 +01:00
|
|
|
|
|
|
|
return oldvalue
|
|
|
|
|
|
|
|
|
2010-11-10 13:41:43 +00:00
|
|
|
def get_option_value(line, option):
|
2022-07-31 00:36:51 +02:00
|
|
|
rx = option + r'\s*=\s*"([^"]+)"'
|
2010-11-10 13:41:43 +00:00
|
|
|
rx = re.compile(rx)
|
|
|
|
m = rx.search(line)
|
|
|
|
if not m:
|
|
|
|
return ""
|
|
|
|
return m.group(1)
|
|
|
|
|
|
|
|
|
2011-12-18 21:27:17 +00:00
|
|
|
def set_option_value(line, option, value):
|
2022-07-31 00:36:51 +02:00
|
|
|
rx = '(' + option + r'\s*=\s*")[^"]+"'
|
2011-12-18 21:27:17 +00:00
|
|
|
rx = re.compile(rx)
|
|
|
|
m = rx.search(line)
|
|
|
|
if not m:
|
|
|
|
return line
|
2022-07-31 00:36:51 +02:00
|
|
|
return re.sub(rx, r'\g<1>' + value + '"', line)
|
2011-12-18 21:27:17 +00:00
|
|
|
|
|
|
|
|
2018-01-23 08:45:19 +01:00
|
|
|
def del_token(lines, token, start=0, end=0):
|
2006-08-02 14:25:43 +00:00
|
|
|
""" del_token(lines, token, start, end) -> int
|
|
|
|
|
2018-01-12 16:26:02 +01:00
|
|
|
Find the first line in lines where token is the first element
|
2010-11-05 16:22:26 +00:00
|
|
|
and delete that line. Returns True if we deleted a line, False
|
|
|
|
if we did not."""
|
2006-08-02 14:25:43 +00:00
|
|
|
|
2006-07-27 11:37:26 +00:00
|
|
|
k = find_token_exact(lines, token, start, end)
|
2002-08-24 12:13:44 +00:00
|
|
|
if k == -1:
|
2010-11-05 16:22:26 +00:00
|
|
|
return False
|
|
|
|
del lines[k]
|
|
|
|
return True
|
2002-08-24 12:13:44 +00:00
|
|
|
|
2018-01-23 08:45:19 +01:00
|
|
|
def del_complete_lines(lines, sublines, start=0, end=0):
|
|
|
|
"""Delete first occurence of `sublines` in list `lines`.
|
|
|
|
|
|
|
|
Efficient deletion of a sub-list in a list. Works for any values.
|
|
|
|
The `start` and `end` arguments work similar to list.index()
|
|
|
|
|
|
|
|
Returns True if a deletion was done and False if not.
|
|
|
|
|
|
|
|
>>> l = [1, 0, 1, 1, 1, 2]
|
|
|
|
>>> del_complete_lines(l, [0, 1, 1])
|
|
|
|
True
|
|
|
|
>>> l
|
|
|
|
[1, 1, 2]
|
|
|
|
"""
|
|
|
|
i = find_complete_lines(lines, sublines, start, end)
|
|
|
|
if i == -1:
|
|
|
|
return False
|
|
|
|
del(lines[i:i+len(sublines)])
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
def del_value(lines, token, start=0, end=0, default=None):
|
|
|
|
"""
|
|
|
|
Find the next line that looks like:
|
|
|
|
token followed by other stuff
|
|
|
|
Delete that line and return "followed by other stuff"
|
|
|
|
with leading and trailing whitespace removed.
|
|
|
|
|
|
|
|
If token is not found, return `default`.
|
|
|
|
"""
|
|
|
|
i = find_token_exact(lines, token, start, end)
|
|
|
|
if i == -1:
|
|
|
|
return default
|
|
|
|
return lines.pop(i)[len(token):].strip()
|
|
|
|
|
2004-05-11 16:13:33 +00:00
|
|
|
|
2006-08-02 14:25:43 +00:00
|
|
|
def find_beginning_of(lines, i, start_token, end_token):
|
2002-08-06 12:10:09 +00:00
|
|
|
count = 1
|
2006-08-02 14:25:43 +00:00
|
|
|
while i > 0:
|
|
|
|
i = find_tokens_backwards(lines, [start_token, end_token], i-1)
|
2007-02-16 18:33:36 +00:00
|
|
|
if i == -1:
|
|
|
|
return -1
|
2018-01-23 08:45:19 +01:00
|
|
|
if lines[i].startswith(end_token):
|
2006-07-01 19:16:09 +00:00
|
|
|
count = count+1
|
|
|
|
else:
|
|
|
|
count = count-1
|
|
|
|
if count == 0:
|
|
|
|
return i
|
2002-08-31 11:27:01 +00:00
|
|
|
return -1
|
2002-08-19 19:51:01 +00:00
|
|
|
|
2004-05-11 16:13:33 +00:00
|
|
|
|
2006-08-02 14:25:43 +00:00
|
|
|
def find_end_of(lines, i, start_token, end_token):
|
2002-08-19 19:51:01 +00:00
|
|
|
count = 1
|
2006-08-02 14:25:43 +00:00
|
|
|
n = len(lines)
|
|
|
|
while i < n:
|
|
|
|
i = find_tokens(lines, [end_token, start_token], i+1)
|
2007-02-16 18:33:36 +00:00
|
|
|
if i == -1:
|
|
|
|
return -1
|
2018-01-23 08:45:19 +01:00
|
|
|
if lines[i].startswith(start_token):
|
2006-07-01 19:16:09 +00:00
|
|
|
count = count+1
|
|
|
|
else:
|
|
|
|
count = count-1
|
|
|
|
if count == 0:
|
|
|
|
return i
|
2002-08-31 11:27:01 +00:00
|
|
|
return -1
|
|
|
|
|
2004-05-11 16:13:33 +00:00
|
|
|
|
2018-01-23 08:45:19 +01:00
|
|
|
def find_nonempty_line(lines, start=0, end=0):
|
2002-08-02 20:34:20 +00:00
|
|
|
if end == 0:
|
2006-07-01 19:16:09 +00:00
|
|
|
end = len(lines)
|
2015-03-11 12:04:46 +00:00
|
|
|
for i in range(start, end):
|
2018-01-23 08:45:19 +01:00
|
|
|
if lines[i].strip():
|
2006-07-01 19:16:09 +00:00
|
|
|
return i
|
2002-08-02 20:34:20 +00:00
|
|
|
return -1
|
2010-11-04 15:44:32 +00:00
|
|
|
|
|
|
|
|
|
|
|
def find_end_of_inset(lines, i):
|
|
|
|
" Find end of inset, where lines[i] is included."
|
|
|
|
return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
|
|
|
|
|
|
|
|
|
|
|
|
def find_end_of_layout(lines, i):
|
|
|
|
" Find end of layout, where lines[i] is included."
|
|
|
|
return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
|
2010-11-04 17:39:36 +00:00
|
|
|
|
|
|
|
|
2018-01-24 01:02:24 +01:00
|
|
|
def is_in_inset(lines, i, inset, default=(-1,-1)):
|
|
|
|
"""
|
|
|
|
Check if line i is in an inset of the given type.
|
|
|
|
If so, return starting and ending lines, otherwise `default`.
|
2010-11-05 16:04:17 +00:00
|
|
|
Example:
|
|
|
|
is_in_inset(document.body, i, "\\begin_inset Tabular")
|
2018-01-24 01:02:24 +01:00
|
|
|
returns (-1,-1) if `i` is not within a "Tabular" inset (i.e. a table).
|
|
|
|
If it is, then it returns the line on which the table begins and the one
|
2018-01-31 15:09:32 +01:00
|
|
|
on which it ends.
|
|
|
|
Note that this pair will evaulate to boolean True, so (with the optional
|
|
|
|
default value set to False)
|
2018-01-24 01:02:24 +01:00
|
|
|
if is_in_inset(..., default=False):
|
2010-11-05 16:04:17 +00:00
|
|
|
will do what you expect.
|
2018-01-24 01:02:24 +01:00
|
|
|
"""
|
|
|
|
start = find_token_backwards(lines, inset, i)
|
|
|
|
if start == -1:
|
|
|
|
return default
|
|
|
|
end = find_end_of_inset(lines, start)
|
|
|
|
if end < i: # this includes the notfound case.
|
|
|
|
return default
|
|
|
|
return (start, end)
|
2010-11-05 16:04:17 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_containing_inset(lines, i):
|
2018-01-12 16:26:02 +01:00
|
|
|
'''
|
|
|
|
Finds out what kind of inset line i is within. Returns a
|
2022-07-31 00:36:51 +02:00
|
|
|
list containing (i) what follows \\begin_inset on the line
|
2010-11-05 16:04:17 +00:00
|
|
|
on which the inset begins, plus the starting and ending line.
|
|
|
|
Returns False on any kind of error or if it isn't in an inset.
|
|
|
|
'''
|
2012-12-03 08:42:26 +01:00
|
|
|
j = i
|
|
|
|
while True:
|
|
|
|
stins = find_token_backwards(lines, "\\begin_inset", j)
|
|
|
|
if stins == -1:
|
|
|
|
return False
|
|
|
|
endins = find_end_of_inset(lines, stins)
|
|
|
|
if endins > j:
|
|
|
|
break
|
|
|
|
j = stins - 1
|
2014-04-11 11:31:24 -04:00
|
|
|
|
2015-02-13 10:13:43 +01:00
|
|
|
if endins < i:
|
|
|
|
return False
|
|
|
|
|
2010-11-05 16:04:17 +00:00
|
|
|
inset = get_value(lines, "\\begin_inset", stins)
|
|
|
|
if inset == "":
|
|
|
|
# shouldn't happen
|
|
|
|
return False
|
|
|
|
return (inset, stins, endins)
|
|
|
|
|
|
|
|
|
|
|
|
def get_containing_layout(lines, i):
|
2018-01-12 16:26:02 +01:00
|
|
|
'''
|
2019-01-18 00:20:19 +01:00
|
|
|
Find out what kind of layout line `i` is within.
|
|
|
|
Return a tuple
|
2013-05-30 09:12:48 -04:00
|
|
|
(layoutname, layoutstart, layoutend, startofcontent)
|
2019-01-18 00:20:19 +01:00
|
|
|
containing
|
|
|
|
* layout style/name,
|
|
|
|
* start line number,
|
|
|
|
* end line number, and
|
|
|
|
* number of first paragraph line (after all params).
|
|
|
|
Return `False` on any kind of error.
|
2010-11-05 16:04:17 +00:00
|
|
|
'''
|
2012-12-03 08:42:26 +01:00
|
|
|
j = i
|
|
|
|
while True:
|
|
|
|
stlay = find_token_backwards(lines, "\\begin_layout", j)
|
|
|
|
if stlay == -1:
|
|
|
|
return False
|
|
|
|
endlay = find_end_of_layout(lines, stlay)
|
|
|
|
if endlay > i:
|
|
|
|
break
|
|
|
|
j = stlay - 1
|
2014-04-11 11:31:24 -04:00
|
|
|
|
2015-02-13 10:13:43 +01:00
|
|
|
if endlay < i:
|
|
|
|
return False
|
|
|
|
|
2019-01-18 00:20:19 +01:00
|
|
|
layoutname = get_value(lines, "\\begin_layout", stlay)
|
|
|
|
if layoutname == "": # layout style missing
|
|
|
|
# TODO: What shall we do in this case?
|
|
|
|
pass
|
|
|
|
# layoutname == "Standard" # use same fallback as the LyX parser:
|
|
|
|
# raise ValueError("Missing layout name on line %d"%stlay) # diagnosis
|
|
|
|
# return False # generic error response
|
2012-12-09 11:40:14 +01:00
|
|
|
par_params = ["\\noindent", "\\indent", "\\indent-toggle", "\\leftindent",
|
2014-11-29 10:54:05 +01:00
|
|
|
"\\start_of_appendix", "\\paragraph_spacing", "\\align",
|
|
|
|
"\\labelwidthstring"]
|
2012-12-09 11:40:14 +01:00
|
|
|
stpar = stlay
|
|
|
|
while True:
|
2012-12-09 13:23:59 +01:00
|
|
|
stpar += 1
|
2014-11-29 10:54:05 +01:00
|
|
|
if lines[stpar].split(' ', 1)[0] not in par_params:
|
2012-12-09 11:40:14 +01:00
|
|
|
break
|
2019-01-18 00:20:19 +01:00
|
|
|
return (layoutname, stlay, endlay, stpar)
|
2012-12-09 17:19:21 +01:00
|
|
|
|
|
|
|
|
|
|
|
def count_pars_in_inset(lines, i):
|
2014-04-11 11:31:24 -04:00
|
|
|
'''
|
2012-12-09 17:19:21 +01:00
|
|
|
Counts the paragraphs within this inset
|
|
|
|
'''
|
|
|
|
ins = get_containing_inset(lines, i)
|
|
|
|
if ins == -1:
|
|
|
|
return -1
|
|
|
|
pars = 0
|
|
|
|
for j in range(ins[1], ins[2]):
|
|
|
|
m = re.match(r'\\begin_layout (.*)', lines[j])
|
2021-01-27 08:24:17 +01:00
|
|
|
if m and get_containing_inset(lines, j)[1] == ins[1]:
|
2012-12-09 17:19:21 +01:00
|
|
|
pars += 1
|
2014-04-11 11:31:24 -04:00
|
|
|
|
2012-12-09 17:19:21 +01:00
|
|
|
return pars
|
2012-12-19 19:33:39 +01:00
|
|
|
|
|
|
|
|
|
|
|
def find_end_of_sequence(lines, i):
|
2014-04-11 11:31:24 -04:00
|
|
|
'''
|
2012-12-19 19:33:39 +01:00
|
|
|
Returns the end of a sequence of identical layouts.
|
|
|
|
'''
|
|
|
|
lay = get_containing_layout(lines, i)
|
|
|
|
if lay == False:
|
|
|
|
return -1
|
|
|
|
layout = lay[0]
|
|
|
|
endlay = lay[2]
|
|
|
|
i = endlay
|
|
|
|
while True:
|
|
|
|
m = re.match(r'\\begin_layout (.*)', lines[i])
|
|
|
|
if m and m.group(1) != layout:
|
|
|
|
return endlay
|
|
|
|
elif lines[i] == "\\begin_deeper":
|
|
|
|
j = find_end_of(lines, i, "\\begin_deeper", "\\end_deeper")
|
|
|
|
if j != -1:
|
|
|
|
i = j
|
2012-12-20 13:29:04 +01:00
|
|
|
endlay = j
|
2012-12-19 19:33:39 +01:00
|
|
|
continue
|
|
|
|
if m and m.group(1) == layout:
|
|
|
|
endlay = find_end_of_layout(lines, i)
|
|
|
|
i = endlay
|
|
|
|
continue
|
|
|
|
if i == len(lines) - 1:
|
|
|
|
break
|
|
|
|
i = i + 1
|
|
|
|
|
|
|
|
return endlay
|