lyx_mirror/lib/lyx2lyx/parser_tools.py

# This file is part of lyx2lyx
# -*- coding: utf-8 -*-
# Copyright (C) 2002-2010 Dekel Tsur <dekel@lyx.org>, 
# José Matos <jamatos@lyx.org>, Richard Heck <rgheck@comcast.net>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

" This modules offer several free functions to help parse lines. "

# Utilities for one line
def check_token(line, token):
    """ check_token(line, token) -> bool

    Return True if token is present in line and is the first element
    else returns False."""

    return line[:len(token)] == token


def is_nonempty_line(line):
    """ is_nonempty_line(line) -> bool

    Return False if line is either empty or it has only whitespaces,
    else return True."""
    return line != " "*len(line)


# Utilities for a list of lines
def find_token(lines, token, start, end = 0, exact = False):
    """ find_token(lines, token, start[[, end], exact]) -> int

    Return the lowest line where token is found, and is the first
    element, in lines[start, end].

    Return -1 on failure."""

    if end == 0 or end > len(lines):
        end = len(lines)
    m = len(token)
    for i in xrange(start, end):
        if exact:
            x = lines[i].split()
            y = token.split()
            if len(x) < len(y):
                continue
            if x[:len(y)] == y:
                return i
        else:
            if lines[i][:m] == token:
                return i
    return -1


def find_token_exact(lines, token, start, end = 0):
    return find_token(lines, token, start, end, True)


def find_tokens(lines, tokens, start, end = 0, exact = False):
    """ find_tokens(lines, tokens, start[[, end], exact]) -> int

    Return the lowest line where one token in tokens is found, and is
    the first element, in lines[start, end].

    Return -1 on failure."""
    if end == 0:
        end = len(lines)

    for i in xrange(start, end):
        for token in tokens:
            if exact:
                x = lines[i].split()
                y = token.split()
                if len(x) < len(y):
                    continue
                if x[:len(y)] == y:
                    return i
            else:
                if lines[i][:len(token)] == token:
                    return i
    return -1


def find_tokens_exact(lines, tokens, start, end = 0):
    return find_tokens(lines, tokens, start, end, True)


def find_re(lines, rexp, start, end = 0):
    """ find_token_re(lines, rexp, start[, end]) -> int

    Return the lowest line where rexp, a regular expression, is found
    in lines[start, end].

    Return -1 on failure."""

    if end == 0:
        end = len(lines)
    for i in xrange(start, end):
        if rexp.match(lines[i]):
                return i
    return -1


def find_token_backwards(lines, token, start):
    """ find_token_backwards(lines, token, start) -> int

    Return the highest line where token is found, and is the first
    element, in lines[start, end].

    Return -1 on failure."""
    m = len(token)
    for i in xrange(start, -1, -1):
        line = lines[i]
        if line[:m] == token:
            return i
    return -1


def find_tokens_backwards(lines, tokens, start):
    """ find_tokens_backwards(lines, token, start) -> int

    Return the highest line where token is found, and is the first
    element, in lines[end, start].

    Return -1 on failure."""
    for i in xrange(start, -1, -1):
        line = lines[i]
        for token in tokens:
            if line[:len(token)] == token:
                return i
    return -1


def get_value(lines, token, start, end = 0, default = ""):
    """ get_value(lines, token, start[[, end], default]) -> string

    Find the next line that looks like:
      token followed by other stuff
    Returns "followed by other stuff" with leading and trailing
    whitespace removed.
    """

    i = find_token_exact(lines, token, start, end)
    if i == -1:
        return default
    l = lines[i].split(None, 1)
    if len(l) > 1:
        return l[1].strip()
    return default


def get_quoted_value(lines, token, start, end = 0, default = ""):
    """ get_quoted_value(lines, token, start[[, end], default]) -> string

    Find the next line that looks like:
      token "followed by other stuff"
    Returns "followed by other stuff" with leading and trailing
    whitespace and quotes removed. If there are no quotes, that is OK too.
    So use get_value to preserve possible quotes, this one to remove them,
    if they are there.
    Note that we will NOT strip quotes from default!
    """
    val = get_value(lines, token, start, end, "")
    if not val:
      return default
    return val.strip('"')


def del_token(lines, token, start, end):
    """ del_token(lines, token, start, end) -> int

    Find the lower line in lines where token is the first element and
    delete that line.

    Returns the number of lines remaining."""

    k = find_token_exact(lines, token, start, end)
    if k == -1:
        return end
    else:
        del lines[k]
        return end - 1


def find_beginning_of(lines, i, start_token, end_token):
    count = 1
    while i > 0:
        i = find_tokens_backwards(lines, [start_token, end_token], i-1)
        if i == -1:
            return -1
        if check_token(lines[i], end_token):
            count = count+1
        else:
            count = count-1
        if count == 0:
            return i
    return -1


def find_end_of(lines, i, start_token, end_token):
    count = 1
    n = len(lines)
    while i < n:
        i = find_tokens(lines, [end_token, start_token], i+1)
        if i == -1:
            return -1
        if check_token(lines[i], start_token):
            count = count+1
        else:
            count = count-1
        if count == 0:
            return i
    return -1


def find_nonempty_line(lines, start, end = 0):
    if end == 0:
        end = len(lines)
    for i in xrange(start, end):
        if is_nonempty_line(lines[i]):
            return i
    return -1


def find_end_of_inset(lines, i):
    " Find end of inset, where lines[i] is included."
    return find_end_of(lines, i, "\\begin_inset", "\\end_inset")


def find_end_of_layout(lines, i):
    " Find end of layout, where lines[i] is included."
    return find_end_of(lines, i, "\\begin_layout", "\\end_layout")


# checks if line i is in the given inset
# if so, returns starting and ending lines
# otherwise, returns (-1, -1)
# Example:
#  get_containing_inset(document.body, i, "\\begin_inset Tabular")
# returns (-1, -1) unless i is within a table.
def get_containing_inset(lines, i, inset):
    defval = (-1, -1)
    stins = find_token_backwards(lines, inset, i)
    if stins == -1:
      return defval
    endins = find_end_of_inset(lines, stins)
    # note that this includes the notfound case.
    if endins < i:
      return defval
    return (stins, endins)