lyx_mirror/lib/lyx2lyx/parser_tools.py

262 lines
7.3 KiB
Python
Raw Normal View History

# This file is part of lyx2lyx
# -*- coding: utf-8 -*-
# Copyright (C) 2002-2010 Dekel Tsur <dekel@lyx.org>,
# José Matos <jamatos@lyx.org>, Richard Heck <rgheck@comcast.net>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
" This modules offer several free functions to help parse lines. "
# Utilities for one line
def check_token(line, token):
""" check_token(line, token) -> bool
Return True if token is present in line and is the first element
else returns False."""
return line[:len(token)] == token
def is_nonempty_line(line):
""" is_nonempty_line(line) -> bool
Return False if line is either empty or it has only whitespaces,
else return True."""
return line != " "*len(line)
# Utilities for a list of lines
def find_token(lines, token, start, end = 0, exact = False):
""" find_token(lines, token, start[[, end], exact]) -> int
Return the lowest line where token is found, and is the first
element, in lines[start, end].
Return -1 on failure."""
if end == 0 or end > len(lines):
end = len(lines)
m = len(token)
for i in xrange(start, end):
if exact:
x = lines[i].split()
y = token.split()
if len(x) < len(y):
continue
if x[:len(y)] == y:
return i
else:
if lines[i][:m] == token:
return i
return -1
def find_token_exact(lines, token, start, end = 0):
return find_token(lines, token, start, end, True)
def find_tokens(lines, tokens, start, end = 0, exact = False):
""" find_tokens(lines, tokens, start[[, end], exact]) -> int
Return the lowest line where one token in tokens is found, and is
the first element, in lines[start, end].
Return -1 on failure."""
if end == 0:
end = len(lines)
for i in xrange(start, end):
for token in tokens:
if exact:
x = lines[i].split()
y = token.split()
if len(x) < len(y):
continue
if x[:len(y)] == y:
return i
else:
if lines[i][:len(token)] == token:
return i
return -1
def find_tokens_exact(lines, tokens, start, end = 0):
return find_tokens(lines, tokens, start, end, True)
def find_re(lines, rexp, start, end = 0):
""" find_token_re(lines, rexp, start[, end]) -> int
Return the lowest line where rexp, a regular expression, is found
in lines[start, end].
Return -1 on failure."""
if end == 0:
end = len(lines)
for i in xrange(start, end):
if rexp.match(lines[i]):
return i
return -1
def find_token_backwards(lines, token, start):
""" find_token_backwards(lines, token, start) -> int
Return the highest line where token is found, and is the first
element, in lines[start, end].
Return -1 on failure."""
m = len(token)
for i in xrange(start, -1, -1):
line = lines[i]
if line[:m] == token:
return i
return -1
def find_tokens_backwards(lines, tokens, start):
""" find_tokens_backwards(lines, token, start) -> int
Return the highest line where token is found, and is the first
element, in lines[end, start].
Return -1 on failure."""
for i in xrange(start, -1, -1):
line = lines[i]
for token in tokens:
if line[:len(token)] == token:
return i
return -1
def get_value(lines, token, start, end = 0, default = ""):
""" get_value(lines, token, start[[, end], default]) -> string
Find the next line that looks like:
token followed by other stuff
Returns "followed by other stuff" with leading and trailing
whitespace removed.
"""
i = find_token_exact(lines, token, start, end)
if i == -1:
return default
l = lines[i].split(None, 1)
if len(l) > 1:
return l[1].strip()
return default
def get_quoted_value(lines, token, start, end = 0, default = ""):
""" get_quoted_value(lines, token, start[[, end], default]) -> string
Find the next line that looks like:
token "followed by other stuff"
Returns "followed by other stuff" with leading and trailing
whitespace and quotes removed. If there are no quotes, that is OK too.
So use get_value to preserve possible quotes, this one to remove them,
if they are there.
Note that we will NOT strip quotes from default!
"""
val = get_value(lines, token, start, end, "")
if not val:
return default
return val.strip('"')
def del_token(lines, token, start, end):
""" del_token(lines, token, start, end) -> int
Find the lower line in lines where token is the first element and
delete that line.
Returns the number of lines remaining."""
k = find_token_exact(lines, token, start, end)
if k == -1:
return end
else:
del lines[k]
return end - 1
def find_beginning_of(lines, i, start_token, end_token):
count = 1
while i > 0:
i = find_tokens_backwards(lines, [start_token, end_token], i-1)
if i == -1:
return -1
if check_token(lines[i], end_token):
count = count+1
else:
count = count-1
if count == 0:
return i
return -1
def find_end_of(lines, i, start_token, end_token):
count = 1
n = len(lines)
while i < n:
i = find_tokens(lines, [end_token, start_token], i+1)
if i == -1:
return -1
if check_token(lines[i], start_token):
count = count+1
else:
count = count-1
if count == 0:
return i
return -1
def find_nonempty_line(lines, start, end = 0):
if end == 0:
end = len(lines)
for i in xrange(start, end):
if is_nonempty_line(lines[i]):
return i
return -1
def find_end_of_inset(lines, i):
" Find end of inset, where lines[i] is included."
return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
def find_end_of_layout(lines, i):
" Find end of layout, where lines[i] is included."
return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
# checks if line i is in the given inset
# if so, returns starting and ending lines
# otherwise, returns (-1, -1)
# Example:
# get_containing_inset(document.body, i, "\\begin_inset Tabular")
# returns (-1, -1) unless i is within a table.
def get_containing_inset(lines, i, inset):
defval = (-1, -1)
stins = find_token_backwards(lines, inset, i)
if stins == -1:
return defval
endins = find_end_of_inset(lines, stins)
# note that this includes the notfound case.
if endins < i:
return defval
return (stins, endins)