lyx_mirror/src/mathed/math_parser.C

845 lines
19 KiB
C++
Raw Normal View History

/*
* File: math_parser.C
* Purpose: Parser for mathed
* Author: Alejandro Aguilar Sierra <asierra@servidor.unam.mx>
* Created: January 1996
* Description: Parse LaTeX2e math mode code.
*
* Dependencies: Xlib, XForms
*
* Copyright: (c) 1996, Alejandro Aguilar Sierra
*
* Version: 0.8beta.
*
* You are free to use and modify this code under the terms of
* the GNU General Public Licence version 2 or later.
*/
#include <config.h>
#include <stdlib.h>
#include <ctype.h>
#ifdef __GNUG__
#pragma implementation "math_parser.h"
#endif
#include "math_parser.h"
#include "math_iter.h"
#include "math_inset.h"
#include "math_macro.h"
#include "math_root.h"
#include "error.h"
enum {
FLAG_BRACE = 1, // A { needed
FLAG_BRACE_ARG = 2, // Next { is argument
FLAG_BRACE_OPT = 4, // Optional {
FLAG_BRACE_LAST = 8, // Last } ends the parsing process
FLAG_BRACK_ARG = 16, // Optional [
FLAG_RIGHT = 32, // Next right ends the parsing process
FLAG_END = 64, // Next end ends the parsing process
FLAG_BRACE_FONT = 128, // Next } closes a font
FLAG_BRACK_END = 256 // Next ] ends the parsing process
};
YYSTYPE yylval;
static short mathed_env = LM_EN_INTEXT;
char *mathed_label = NULL;
char const *latex_mathenv[] = {
"math",
"displaymath",
"equation",
"eqnarray*",
"eqnarray",
"array"
};
char const *latex_mathspace[] = {
"!", ",", ":", ";", "quad", "qquad"
};
char const *latex_special_chars = "#$%&_{}";
// These are lexical codes, not semantic
enum lexcode_enum {
LexNone,
LexESC,
LexAlpha,
LexDigit,
LexBOP, // Binary operators or relations
LexMathSpace,
LexOpen,
LexClose,
LexComment,
LexArgument,
LexSpace,
LexNewLine,
LexOther,
LexSelf
};
static lexcode_enum lexcode[256];
static char yytext[256];
static int yylineno;
static FILE *yyin;
static bool yy_mtextmode=false;
inline
char *strnew(char const* s)
{
char *s1 = new char[strlen(s)+1]; // this leaks when not delete[]'ed
strcpy(s1, s);
return s1;
}
static void mathPrintError(char const *msg)
{
fprintf(stderr, "Line ~%d: Math parse error: %s\n", yylineno, msg);
}
static void LexInitCodes()
{
int i;
for (i=0; i<=255; i++) {
if (isalpha(i)) lexcode[i] = LexAlpha;
else if (isdigit(i)) lexcode[i] = LexDigit;
else if (isspace(i)) lexcode[i] = LexSpace;
else lexcode[i] = LexNone;
}
lexcode['\t'] = lexcode['\f'] = lexcode[' '] = LexSpace;
lexcode['\n'] = LexNewLine;
lexcode['%'] = LexComment;
lexcode['#'] = LexArgument;
lexcode['+'] = lexcode['-'] = lexcode['*'] = lexcode['/'] =
lexcode['<'] = lexcode['>'] = lexcode['='] = LexBOP;
lexcode['!'] = lexcode[','] = lexcode[':'] = lexcode[';'] = LexMathSpace;
lexcode['('] = lexcode[')'] = lexcode['|'] = lexcode['.'] = lexcode['?'] = LexOther;
lexcode['\'']= LexAlpha;
lexcode['['] = lexcode[']'] = lexcode['^'] = lexcode['_'] =
lexcode['&'] = LexSelf;
lexcode['\\'] = LexESC;
lexcode['{'] = LexOpen;
lexcode['}'] = LexClose;
}
static char LexGetArg(char lf, bool accept_spaces=false)
{
char c, rg, *p = &yytext[0];
int bcnt =1;
while (!feof(yyin)) {
c = getc(yyin);
if (c>' ') {
if (!lf) lf = c; else
if (c!=lf)
fprintf(stderr, "Math parse error: unexpected '%c'\n", c);
break;
}
}
rg = (lf=='{') ? '}': ((lf=='[') ? ']': ((lf=='(') ? ')': 0));
if (!rg) {
fprintf(stderr, "Math parse error: unknown bracket '%c'\n", lf);
return '\0';
}
do {
c = getc(yyin);
if (c==lf) bcnt++;
if (c==rg) bcnt--;
if ((c>' ' || (c==' ' && accept_spaces)) && bcnt>0) *(p++) = c;
} while (bcnt>0 && !feof(yyin));
*p = '\0';
return rg;
}
static int yylex(void)
{
static int init_done = 0;
unsigned char c;
if (!init_done) LexInitCodes();
while (!feof(yyin)) {
c = getc(yyin);
if (yy_mtextmode && c==' ') {
yylval.i=' ';
return LM_TK_ALPHA;
}
if (lexcode[c]==LexNewLine) {
yylineno++;
continue;
}
if (lexcode[c]==LexComment)
do c = getc(yyin); while (c!='\n' % !feof(yyin)); // eat comments
if (lexcode[c]==LexDigit || lexcode[c]==LexOther || lexcode[c]==LexMathSpace)
{ yylval.i= c; return LM_TK_STR; }
if (lexcode[c]==LexAlpha) { yylval.i=c; return LM_TK_ALPHA; }
if (lexcode[c]==LexBOP) { yylval.i=c; return LM_TK_BOP; }
if (lexcode[c]==LexSelf) { return c; }
if (lexcode[c]==LexArgument) {
c = getc(yyin);
yylval.i = c - '0';
return LM_TK_ARGUMENT;
}
if (lexcode[c]==LexOpen) { return LM_TK_OPEN; }
if (lexcode[c]==LexClose) { return LM_TK_CLOSE; }
if (lexcode[c]==LexESC) {
c = getc(yyin);
if (c=='\\') { return LM_TK_NEWLINE; }
if (c=='(') { yylval.i = LM_EN_INTEXT; return LM_TK_BEGIN; }
if (c==')') { yylval.i = LM_EN_INTEXT; return LM_TK_END; }
if (c=='[') { yylval.i = LM_EN_DISPLAY; return LM_TK_BEGIN; }
if (c==']') { yylval.i = LM_EN_DISPLAY; return LM_TK_END; }
if (strchr(latex_special_chars, c)) {
yylval.i = c;
return LM_TK_SPECIAL;
}
if (lexcode[c]==LexMathSpace) {
int i;
for (i=0; i<4 && c!=latex_mathspace[i][0]; i++);
yylval.i = (i<4) ? i: 0;
return LM_TK_SPACE;
}
if (lexcode[c]==LexAlpha || lexcode[c]==LexDigit) {
char* p = &yytext[0];
while (lexcode[c]==LexAlpha || lexcode[c]==LexDigit) {
*p = c;
c = getc(yyin);
p++;
}
*p = '\0';
if (!feof(yyin)) ungetc(c, yyin);
latexkeys *l = in_word_set (yytext, strlen(yytext));
if (l) {
if (l->token==LM_TK_BEGIN || l->token==LM_TK_END) {
int i;
LexGetArg('{');
// for (i=0; i<5 && strncmp(yytext, latex_mathenv[i],
// strlen(latex_mathenv[i])); i++);
for (i=0; i<6 && strcmp(yytext, latex_mathenv[i]); i++);
yylval.i = i;
} else
if (l->token==LM_TK_SPACE)
yylval.i = l->id;
else
yylval.l = l;
return l->token;
} else {
yylval.s = yytext;
return LM_TK_UNDEF;
}
}
}
}
return 0;
}
int parse_align(char *hor, char *)
{
char *c;
int nc = 0;
for (c=hor; c && *c>' '; c++) nc++;
return nc;
}
// Accent hacks only for 0.12. Stolen from Cursor.
int accent = 0;
int nestaccent[8];
void setAccent(int ac)
{
if (ac > 0 && accent < 8) {
nestaccent[accent++] = ac;
} else
accent = 0; // consumed!
}
MathedInset *doAccent(byte c, MathedTextCodes t)
{
MathedInset *ac = 0;
for (int i=accent-1; i>=0; i--) {
if (i==accent-1)
ac = new MathAccentInset(c, t, nestaccent[i]);
else
ac = new MathAccentInset(ac, nestaccent[i]);
}
accent = 0; // consumed!
return ac;
}
MathedInset *doAccent(MathedInset *p)
{
MathedInset *ac = 0;
for (int i=accent-1; i>=0; i--) {
if (i==accent-1)
ac = new MathAccentInset(p, nestaccent[i]);
else
ac = new MathAccentInset(ac, nestaccent[i]);
}
accent = 0; // consumed!
return ac;
}
LyxArrayBase *mathed_parse(unsigned flags, LyxArrayBase *array, MathParInset **mtx)
{
int t = yylex(), tprev = 0;
bool panic = false;
static int plevel = -1;
static int size = LM_ST_TEXT;
MathedTextCodes varcode = LM_TC_VAR;
MathedInset* binset = 0;
static MathMacroTemplate *macro=0;
int brace = 0;
int acc_brace = 0;
int acc_braces[8];
MathParInset *mt = (mtx) ? *mtx: 0;//(MathParInset*)NULL;
MathedRowSt *crow = (mt) ? mt->getRowSt(): 0;
plevel++;
if (!array) array = new LyxArrayBase;
MathedIter data(array);
while (t) {
if ((flags & FLAG_BRACE) && t != LM_TK_OPEN) {
if ((flags & FLAG_BRACK_ARG) && t=='[') {
}
else {
mathPrintError("Expected {. Maybe you forgot to enclose an argument in {}");
panic = true;
break;
}
}
MathedInsetTypes fractype = LM_OT_FRAC;
switch (t) {
case LM_TK_ALPHA:
{
if (accent) {
data.Insert(doAccent(yylval.i, varcode));
} else
data.Insert (yylval.i, varcode); //LM_TC_VAR);
break;
}
case LM_TK_ARGUMENT:
{
if (macro) {
data.Insert(macro->getMacroPar(yylval.i-1), LM_TC_INSET);
}
break;
}
case LM_TK_NEWCOMMAND:
{
int na = 0;
LexGetArg('{');
// This name lives until quitting, for that reason
// I didn't care on deleting explicitly. Later I will.
char const *name = strnew(&yytext[1]);
// ugly trick to be removed soon (lyx3)
char c = getc(yyin);
ungetc(c, yyin);
if (c=='[') {
LexGetArg('[');
na = atoi(yytext);
}
macro = new MathMacroTemplate(name, na);
flags = FLAG_BRACE|FLAG_BRACE_LAST;
*mtx = macro;
macro->SetData(array);
break;
}
case LM_TK_SPECIAL:
{
data.Insert (yylval.i, LM_TC_SPECIAL);
break;
}
case LM_TK_STR:
{
if (accent) {
data.Insert(doAccent(yylval.i, LM_TC_CONST));
} else
data.Insert (yylval.i, LM_TC_CONST);
break;
}
case LM_TK_OPEN:
{
brace++;
if (accent && tprev==LM_TK_ACCENT) {
acc_braces[acc_brace++] = brace;
break;
}
if (flags & FLAG_BRACE_OPT) {
flags &= ~FLAG_BRACE_OPT;
flags |= FLAG_BRACE;
}
if (flags & FLAG_BRACE)
flags &= ~FLAG_BRACE;
else {
data.Insert ('{', LM_TC_TEX);
}
break;
}
case LM_TK_CLOSE:
{
brace--;
if (brace < 0) {
mathPrintError("Unmatching braces");
panic = true;
break;
}
if (acc_brace && brace==acc_braces[acc_brace-1]-1) {
acc_brace--;
break;
}
if (flags & FLAG_BRACE_FONT) {
varcode = LM_TC_VAR;
yy_mtextmode = false;
flags &= ~FLAG_BRACE_FONT;
break;
}
if (brace == 0 && (flags & FLAG_BRACE_LAST)) {
plevel--;
return array;
} else {
data.Insert ('}', LM_TC_TEX);
}
break;
}
case '[':
{
if (flags & FLAG_BRACK_ARG) {
flags &= ~FLAG_BRACK_ARG;
char rg=LexGetArg('[');
if (rg!=']') {
mathPrintError("Expected ']'");
panic = true;
break;
}
// if (arg) strcpy(arg, yytext);
} else
data.Insert ('[');
break;
}
case ']':
{
if (flags & FLAG_BRACK_END) {
plevel--;
return array;
} else
data.Insert (']');
break;
}
case '^':
{
MathParInset *p = new MathParInset(size, "", LM_OT_SCRIPT);
LyxArrayBase * ar = mathed_parse(FLAG_BRACE_OPT|FLAG_BRACE_LAST, NULL);
p->SetData(ar);
// fprintf(stderr, "UP[%d]", p->GetStyle());
data.Insert (p, LM_TC_UP);
break;
}
case '_':
{
MathParInset *p = new MathParInset(size, "", LM_OT_SCRIPT);
LyxArrayBase * ar = mathed_parse(FLAG_BRACE_OPT|FLAG_BRACE_LAST, NULL);
p->SetData(ar);
data.Insert (p, LM_TC_DOWN);
break;
}
case LM_TK_LIMIT:
{
if (binset) {
binset->SetLimits((bool)(yylval.l->id));
binset = NULL;
}
break;
}
case '&': // Tab
{
if ((flags & FLAG_END) && mt && data.getCol()<mt->GetColumns()-1) {
data.setNumCols(mt->GetColumns());
data.Insert('T', LM_TC_TAB);
} else
mathPrintError("Unexpected tab");
// debug info. [made that conditional -JMarc]
if (lyxerr.debugging(Error::MATHED))
fprintf(stderr, "%d %d\n", data.getCol(), mt->GetColumns());
break;
}
case LM_TK_NEWLINE:
{
if (mt && (flags & FLAG_END)) {
if (mt->Permit(LMPF_ALLOW_CR)) {
if (crow) {
crow->setNext(new MathedRowSt(mt->GetColumns()+1)); // this leaks
crow = crow->getNext();
}
data.Insert('K', LM_TC_CR);
} else
mathPrintError("Unexpected newline");
}
break;
}
case LM_TK_BIGSYM:
{
binset = new MathBigopInset(yylval.l->name,yylval.l->id);
data.Insert(binset);
break;
}
case LM_TK_SYM:
{
if (yylval.l->id < 256) {
MathedTextCodes tc = MathIsBOPS(yylval.l->id) ? LM_TC_BOPS: LM_TC_SYMB;
if (accent) {
data.Insert(doAccent(yylval.l->id, tc));
} else
data.Insert (yylval.l->id, tc);
} else {
MathFuncInset *bg = new MathFuncInset(yylval.l->name);
if (accent) {
data.Insert(doAccent(bg));
} else
data.Insert(bg, true);
}
break;
}
case LM_TK_BOP:
{
if (accent) {
data.Insert(doAccent(yylval.i, LM_TC_BOP));
} else
data.Insert (yylval.i, LM_TC_BOP);
break;
}
case LM_TK_STY:
{
if (mt) {
mt->UserSetSize(yylval.l->id);
}
break;
}
case LM_TK_SPACE:
{
if (yylval.i>=0) {
MathSpaceInset *sp = new MathSpaceInset(yylval.i);
data.Insert(sp);
}
break;
}
case LM_TK_DOTS:
{
MathDotsInset *p = new MathDotsInset(yylval.l->name, yylval.l->id);
data.Insert(p);
break;
}
case LM_TK_STACK:
fractype = LM_OT_STACKREL;
case LM_TK_FRAC:
{
MathFracInset *fc = new MathFracInset(fractype);
LyxArrayBase* num = mathed_parse(FLAG_BRACE|FLAG_BRACE_LAST);
LyxArrayBase* den = mathed_parse(FLAG_BRACE|FLAG_BRACE_LAST);
fc->SetData(num, den);
data.Insert(fc, LM_TC_ACTIVE_INSET);
break;
}
case LM_TK_SQRT:
{
MathParInset *rt;
char c = getc(yyin);
if (c=='[') {
rt = new MathRootInset(size);
rt->setArgumentIdx(0);
rt->SetData(mathed_parse(FLAG_BRACK_END, 0, &rt));
rt->setArgumentIdx(1);
} else {
ungetc(c, yyin);
rt = new MathSqrtInset(size);
}
rt->SetData(mathed_parse(FLAG_BRACE|FLAG_BRACE_LAST, 0, &rt));
data.Insert(rt, LM_TC_ACTIVE_INSET);
break;
}
case LM_TK_LEFT:
{
int lfd, rgd;
lfd=yylex();
if (lfd==LM_TK_SYM || lfd==LM_TK_STR || lfd==LM_TK_BOP|| lfd==LM_TK_SPECIAL)
lfd = (lfd==LM_TK_SYM) ? yylval.l->id: yylval.i;
// fprintf(stderr, "L[%d %c]", lfd, lfd);
LyxArrayBase* a = mathed_parse(FLAG_RIGHT);
rgd=yylex();
// fprintf(stderr, "R[%d]", rgd);
if (rgd==LM_TK_SYM || rgd==LM_TK_STR || rgd==LM_TK_BOP || rgd==LM_TK_SPECIAL)
rgd = (rgd==LM_TK_SYM) ? yylval.l->id: yylval.i;
MathDelimInset *dl = new MathDelimInset(lfd, rgd);
dl->SetData(a);
data.Insert(dl, LM_TC_ACTIVE_INSET);
// fprintf(stderr, "RL[%d %d]", lfd, rgd);
break;
}
case LM_TK_RIGHT:
{
if (flags & FLAG_RIGHT) {
plevel--;
return array;
} else {
mathPrintError("Unmatched right delimiter");
// panic = true;
}
break;
}
case LM_TK_FONT:
{
varcode = (MathedTextCodes)yylval.l->id;
yy_mtextmode = (bool)(varcode==LM_TC_TEXTRM);
flags |= (FLAG_BRACE|FLAG_BRACE_FONT);
break;
}
case LM_TK_WIDE:
{
MathDecorationInset *sq = new MathDecorationInset(yylval.l->id, size);
sq->SetData(mathed_parse(FLAG_BRACE|FLAG_BRACE_LAST));
data.Insert(sq, LM_TC_ACTIVE_INSET);
break;
}
case LM_TK_ACCENT: setAccent(yylval.l->id); break;
case LM_TK_NONUM:
{
if (crow)
crow->setNumbered(false);
break;
}
case LM_TK_PMOD:
case LM_TK_FUNC:
{
MathedInset *bg = new MathFuncInset(yylval.l->name);
if (accent) {
data.Insert(t);
} else
data.Insert(bg);
break;
}
case LM_TK_FUNCLIM:
{
data.Insert(new MathFuncInset(yylval.l->name, LM_OT_FUNCLIM));
break;
}
case LM_TK_UNDEF:
{
MathMacro* p =
MathMacroTable::mathMTable.getMacro(yylval.s);
if (p) {
if (accent)
data.Insert(doAccent(p), p->getTCode());
else
data.Insert(p, p->getTCode());
for (int i=0; p->setArgumentIdx(i); i++)
p->SetData(mathed_parse(FLAG_BRACE|FLAG_BRACE_LAST));
}
else {
MathedInset *q = new MathFuncInset(yylval.s, LM_OT_UNDEF);
if (accent) {
data.Insert(doAccent(q));
} else {
data.Insert(q);
}
}
break;
}
case LM_TK_END:
{
if (mathed_env != yylval.i && yylval.i!=LM_EN_ARRAY)
mathPrintError("Unmatched environment");
// debug info [made that conditional -JMarc]
if (lyxerr.debugging(Error::MATHED))
fprintf(stderr, "[%d]\n", yylval.i);
plevel--;
if (mt) { // && (flags & FLAG_END)) {
mt->SetData(array);
array = NULL;
}
return array;
}
case LM_TK_BEGIN:
{
if (yylval.i==LM_EN_ARRAY) {
char ar[120], ar2[8];
ar[0] = ar2[0] = '\0';
char rg=LexGetArg(0);
if (rg==']') {
strcpy(ar2, yytext);
rg = LexGetArg('{');
}
strcpy(ar, yytext);
int nc = parse_align(ar, ar2);
MathParInset* mm = new MathMatrixInset(nc, 0);
mm->SetAlign(ar2[0], ar);
data.Insert(mm, LM_TC_ACTIVE_INSET);
mathed_parse(FLAG_END, mm->GetData(), &mm);
} else
if (yylval.i>=LM_EN_INTEXT && yylval.i<=LM_EN_EQNARRAY) {
if (plevel!=0) {
mathPrintError("Misplaced environment");
break;
}
if (!mt) {
mathPrintError("NULL paragraph.");
panic = true;
}
mathed_env = yylval.i;
if (mathed_env>=LM_EN_DISPLAY) {
size = LM_ST_DISPLAY;
if (mathed_env>LM_EN_EQUATION) {
mt = new MathMatrixInset(3, -1);
mt->SetAlign(' ', "rcl");
if (mtx) *mtx = mt;
flags |= FLAG_END;
// data.Insert(' ', LM_TC_TAB);
// data.Insert(' ', LM_TC_TAB);
// data.Reset();
}
mt->SetStyle(size);
mt->SetType(mathed_env);
crow = mt->getRowSt();
}
#ifdef DEBUG
fprintf(stderr, "MATH BEGIN[%d]\n", mathed_env);
#endif
} else {
// fprintf(stderr, "MATHCRO[%s]",yytext);
MathMacro* p =
MathMacroTable::mathMTable.getMacro(yytext);
if (p) {
data.Insert(p, p->getTCode());
p->setArgumentIdx(0);
mathed_parse(FLAG_END, p->GetData(), (MathParInset**)&p);
// for (int i=0; p->setArgumentIdx(i); i++)
// p->SetData(mathed_parse(FLAG_BRACE|FLAG_BRACE_LAST));
} else
mathPrintError("Unrecognized environment");
}
break;
}
case LM_TK_MACRO:
{
MathedInset* p =
MathMacroTable::mathMTable.getMacro(yylval.l->name);
if (p) {
if (accent) {
data.Insert(doAccent(p));
} else
data.Insert(p, ((MathMacro*)p)->getTCode());
}
break;
}
case LM_TK_LABEL:
{
char rg = LexGetArg('\0', true);
if (rg != '}') {
mathPrintError("Expected '{'");
// debug info
fprintf(stderr, "[%s]\n", yytext); fflush(stderr);
panic = true;
break;
}
if (crow) {
// This is removed by crow's destructor. Bad design? yes, this
// will be changed after 0.12
crow->setLabel(strnew(yytext));
}
else {
// where is this math_label free'ed?
// Supposedly in ~formula, another bad hack,
// give me some time please.
mathed_label = strnew(yytext);
}
#ifdef DEBUG
fprintf(stderr, "Label[%d]\n", mathed_label);
#endif
break;
}
default:
mathPrintError("Unrecognized token");
// debug info
fprintf(stderr, "[%d %s]\n", t, yytext);
break;
}
tprev = t;
if (panic) {
fprintf(stderr, " Math Panic, expect problems!\n");
// Search for the end command.
do t = yylex (); while (t != LM_TK_END && t);
} else
t = yylex ();
if ((flags & FLAG_BRACE_OPT)/* && t!='^' && t!='_'*/) {
flags &= ~FLAG_BRACE_OPT;
//data.Insert (LM_TC_CLOSE);
break;
}
}
plevel--;
return array;
}
void mathed_parser_file(FILE* file, int lineno)
{
yyin = file;
yylineno = lineno;
if (!MathMacroTable::built)
MathMacroTable::mathMTable.builtinMacros();
}
int mathed_parser_lineno()
{
return yylineno;
}