From c7a6b81ceafd86b38cfe6cfd246b7751f8df7721 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20P=C3=B6nitz?= Date: Wed, 23 Apr 2003 15:14:43 +0000 Subject: [PATCH] lots of small improvements git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@6838 a592a061-630c-0410-9148-cb99ea01b6c8 --- src/tex2lyx/math.C | 20 +-- src/tex2lyx/preamble.C | 41 +++--- src/tex2lyx/table.C | 196 ++++++++++++++++---------- src/tex2lyx/tex2lyx.C | 73 +++++----- src/tex2lyx/tex2lyx.h | 6 +- src/tex2lyx/texparser.C | 48 ++++--- src/tex2lyx/texparser.h | 8 +- src/tex2lyx/text.C | 303 +++++++++++++++++++++++++++++----------- 8 files changed, 450 insertions(+), 245 deletions(-) diff --git a/src/tex2lyx/math.C b/src/tex2lyx/math.C index b02a58c88a..c053437e38 100644 --- a/src/tex2lyx/math.C +++ b/src/tex2lyx/math.C @@ -35,7 +35,7 @@ bool is_math_env(string const & name) void parse_math(Parser & p, ostream & os, unsigned flags, const mode_type mode) { while (p.good()) { - Token const & t = p.getToken(); + Token const & t = p.get_token(); #ifdef FILEDEBUG cerr << "t: " << t << " flags: " << flags << "\n"; @@ -64,13 +64,13 @@ void parse_math(Parser & p, ostream & os, unsigned flags, const mode_type mode) if (t.cat() == catMath) { if (mode == MATHTEXT_MODE) { // we are inside some text mode thingy, so opening new math is allowed - Token const & n = p.getToken(); + Token const & n = p.get_token(); if (n.cat() == catMath) { // TeX's $$...$$ syntax for displayed math os << "\\["; parse_math(p, os, FLAG_SIMPLE, MATH_MODE); os << "\\]"; - p.getToken(); // skip the second '$' token + p.get_token(); // skip the second '$' token } else { // simple $...$ stuff p.putback(); @@ -103,8 +103,8 @@ void parse_math(Parser & p, ostream & os, unsigned flags, const mode_type mode) os << t.character(); else if (t.cat() == catNewline) { - //if (p.nextToken().cat() == catNewline) { - // p.getToken(); + //if (p.next_token().cat() == catNewline) { + // p.get_token(); // handle_par(os); //} else { os << "\n "; // note the space @@ -148,7 +148,7 @@ void parse_math(Parser & p, ostream & os, unsigned flags, const mode_type mode) else if (t.cs() == "begin") { string const name = p.getArg('{', '}'); - active_environments_push(name); + active_environments.push_back(name); os << "\\begin{" << name << "}"; if (name == "tabular") parse_math(p, os, FLAG_END, MATHTEXT_MODE); @@ -161,10 +161,10 @@ void parse_math(Parser & p, ostream & os, unsigned flags, const mode_type mode) if (flags & FLAG_END) { // eat environment name string const name = p.getArg('{', '}'); - if (name != curr_env()) + if (name != active_environment()) p.error("\\end{" + name + "} does not match \\begin{" - + curr_env() + "}"); - active_environments_pop(); + + active_environment() + "}"); + active_environments.pop_back(); return; } p.error("found 'end' unexpectedly"); @@ -196,7 +196,7 @@ void parse_math(Parser & p, ostream & os, unsigned flags, const mode_type mode) } else if (t.cs() == "\"") { - string const name = p.verbatimItem(); + string const name = p.verbatim_item(); if (name == "a") os << 'ä'; else if (name == "o") os << 'ö'; else if (name == "u") os << 'ü'; diff --git a/src/tex2lyx/preamble.C b/src/tex2lyx/preamble.C index e94968c8d4..4447fa83b8 100644 --- a/src/tex2lyx/preamble.C +++ b/src/tex2lyx/preamble.C @@ -45,7 +45,7 @@ char const * known_fontsizes[] = { "10pt", "11pt", "12pt", 0 }; // some ugly stuff ostringstream h_preamble; string h_textclass = "article"; -string h_options = ""; +string h_options = string(); string h_language = "english"; string h_inputencoding = "latin1"; string h_fontscheme = "default"; @@ -130,9 +130,10 @@ void end_preamble(ostream & os) os << "# tex2lyx 0.0.2 created this file\n" << "\\lyxformat 222\n" << "\\textclass " << h_textclass << "\n" - << "\\begin_preamble\n" << h_preamble.str() << "\n\\end_preamble\n" - << "\\options " << h_options << "\n" - << "\\language " << h_language << "\n" + << "\\begin_preamble\n" << h_preamble.str() << "\n\\end_preamble\n"; + if (h_options.size()) + os << "\\options " << h_options << "\n"; + os << "\\language " << h_language << "\n" << "\\inputencoding " << h_inputencoding << "\n" << "\\fontscheme " << h_fontscheme << "\n" << "\\graphics " << h_graphics << "\n" @@ -154,7 +155,8 @@ void end_preamble(ostream & os) << "\\papercolumns " << h_papercolumns << "\n" << "\\papersides " << h_papersides << "\n" << "\\paperpagestyle " << h_paperpagestyle << "\n" - << "\\tracking_changes " << h_tracking_changes << "\n"; + << "\\tracking_changes " << h_tracking_changes << "\n" + << "\\end_header\n\n\\layout Standard\n"; } @@ -163,7 +165,7 @@ void end_preamble(ostream & os) void parse_preamble(Parser & p, ostream & os) { while (p.good()) { - Token const & t = p.getToken(); + Token const & t = p.get_token(); #ifdef FILEDEBUG cerr << "t: " << t << " flags: " << flags << "\n"; @@ -191,7 +193,7 @@ void parse_preamble(Parser & p, ostream & os) handle_comment(p); else if (t.cs() == "pagestyle") - h_paperpagestyle == p.verbatimItem(); + h_paperpagestyle == p.verbatim_item(); else if (t.cs() == "makeatletter") { p.setCatCode('@', catLetter); @@ -206,13 +208,13 @@ void parse_preamble(Parser & p, ostream & os) else if (t.cs() == "newcommand" || t.cs() == "renewcommand" || t.cs() == "providecommand") { bool star = false; - if (p.nextToken().character() == '*') { - p.getToken(); + if (p.next_token().character() == '*') { + p.get_token(); star = true; } - string const name = p.verbatimItem(); + string const name = p.verbatim_item(); string const opts = p.getOpt(); - string const body = p.verbatimItem(); + string const body = p.verbatim_item(); // only non-lyxspecific stuff if (name != "\\noun " && name != "\\tabularnewline " @@ -267,8 +269,8 @@ void parse_preamble(Parser & p, ostream & os) ss << "\\newenvironment{" << name << "}"; ss << p.getOpt(); ss << p.getOpt(); - ss << '{' << p.verbatimItem() << '}'; - ss << '{' << p.verbatimItem() << '}'; + ss << '{' << p.verbatim_item() << '}'; + ss << '{' << p.verbatim_item() << '}'; ss << '\n'; if (name != "lyxcode" && name != "lyxlist" && name != "lyxrightadress" && name != "lyxaddress") @@ -276,10 +278,10 @@ void parse_preamble(Parser & p, ostream & os) } else if (t.cs() == "def") { - string name = p.getToken().cs(); - while (p.nextToken().cat() != catBegin) - name += p.getToken().asString(); - h_preamble << "\\def\\" << name << '{' << p.verbatimItem() << "}\n"; + string name = p.get_token().cs(); + while (p.next_token().cat() != catBegin) + name += p.get_token().asString(); + h_preamble << "\\def\\" << name << '{' << p.verbatim_item() << "}\n"; } else if (t.cs() == "setcounter") { @@ -294,8 +296,8 @@ void parse_preamble(Parser & p, ostream & os) } else if (t.cs() == "setlength") { - string const name = p.verbatimItem(); - string const content = p.verbatimItem(); + string const name = p.verbatim_item(); + string const content = p.verbatim_item(); if (name == "parskip") h_paragraph_separation = "skip"; else if (name == "parindent") @@ -311,7 +313,6 @@ void parse_preamble(Parser & p, ostream & os) string const name = p.getArg('{', '}'); if (name == "document") { end_preamble(os); - os << "\n\n\\layout Standard\n\n"; return; } h_preamble << "\\begin{" << name << "}"; diff --git a/src/tex2lyx/table.C b/src/tex2lyx/table.C index abbb57235b..c39e6584d9 100644 --- a/src/tex2lyx/table.C +++ b/src/tex2lyx/table.C @@ -28,10 +28,11 @@ namespace { struct ColInfo { - ColInfo() : rightline(false) {} + ColInfo() : rightline(0), leftline(false) {} string align; // column alignment string width; // column width - bool rightline; // a line on the right? + int rightline; // a line on the right? + bool leftline; }; @@ -39,7 +40,7 @@ struct RowInfo { RowInfo() : topline(false), bottomline(false) {} bool topline; // horizontal line above - bool bottomline; // horizontal line below + int bottomline; // horizontal line below }; @@ -74,18 +75,18 @@ string read_hlines(Parser & p) ostringstream os; p.skipSpaces(); while (p.good()) { - if (p.nextToken().cs() == "hline") { - p.getToken(); + if (p.next_token().cs() == "hline") { + p.get_token(); os << "\\hline"; - } else if (p.nextToken().cs() == "cline") { - p.getToken(); - os << "\\cline{" << p.verbatimItem() << "}"; + } else if (p.next_token().cs() == "cline") { + p.get_token(); + os << "\\cline{" << p.verbatim_item() << "}"; } else break; p.skipSpaces(); }; //cerr << "read_hlines(), read: '" << os.str() << "'\n"; - //cerr << "read_hlines(), next token: " << p.nextToken() << "\n"; + //cerr << "read_hlines(), next token: " << p.next_token() << "\n"; return os.str(); } @@ -110,46 +111,57 @@ char const TAB = '\001'; char const LINE = '\002'; char const HLINE = '\004'; - -bool handle_colalign(Parser & p, vector & colinfo) +string get_align(char c) { - if (p.getToken().cat() != catBegin) + switch (c) { + case 'c': return "center"; + case 'l': return "left"; + case 'r': return "right"; + case 'b': return "block"; + } + return "center"; +} + + +void handle_colalign(Parser & p, vector & colinfo) +{ + if (p.get_token().cat() != catBegin) cerr << "wrong syntax for table column alignment. '{' expected\n"; string nextalign = "block"; bool leftline = false; - for (Token t = p.getToken(); p.good() && t.cat() != catEnd; t = p.getToken()){ + for (Token t=p.get_token(); p.good() && t.cat() != catEnd; t = p.get_token()){ #ifdef FILEDEBUG cerr << "t: " << t << " c: '" << t.character() << "'\n"; #endif switch (t.character()) { case 'c': - colinfo.push_back(ColInfo()); - colinfo.back().align = "center"; - break; case 'l': - colinfo.push_back(ColInfo()); - colinfo.back().align = "left"; - break; - case 'r': - colinfo.push_back(ColInfo()); - colinfo.back().align = "right"; + case 'r': { + ColInfo ci; + ci.align = get_align(t.character()); + if (colinfo.size() && colinfo.back().rightline > 1) { + ci.leftline = true; + --colinfo.back().rightline; + } + colinfo.push_back(ci); break; + } case 'p': colinfo.push_back(ColInfo()); colinfo.back().align = nextalign; - colinfo.back().width = p.verbatimItem(); + colinfo.back().width = p.verbatim_item(); nextalign = "block"; break; case '|': if (colinfo.empty()) leftline = true; else - colinfo.back().rightline = true; + ++colinfo.back().rightline; break; case '>': { - string s = p.verbatimItem(); + string s = p.verbatim_item(); if (s == "\\raggedleft ") nextalign = "left"; else if (s == "\\raggedright ") @@ -163,7 +175,8 @@ bool handle_colalign(Parser & p, vector & colinfo) break; } } - return leftline; + if (colinfo.size() && leftline) + colinfo[0].leftline = true; } @@ -175,7 +188,7 @@ void parse_table(Parser & p, ostream & os, unsigned flags) string hlines; while (p.good()) { - Token const & t = p.getToken(); + Token const & t = p.get_token(); #ifdef FILEDEBUG cerr << "t: " << t << " flags: " << flags << "\n"; @@ -186,13 +199,13 @@ void parse_table(Parser & p, ostream & os, unsigned flags) // if (t.cat() == catMath) { // we are inside some text mode thingy, so opening new math is allowed - Token const & n = p.getToken(); + Token const & n = p.get_token(); if (n.cat() == catMath) { // TeX's $$...$$ syntax for displayed math os << "\\["; parse_math(p, os, FLAG_SIMPLE, MATH_MODE); os << "\\]"; - p.getToken(); // skip the second '$' token + p.get_token(); // skip the second '$' token } else { // simple $...$ stuff p.putback(); @@ -228,7 +241,8 @@ void parse_table(Parser & p, ostream & os, unsigned flags) os << TAB; } - else if (t.cs() == "tabularnewline" || t.cs() == "\\") { + //else if (t.cs() == "tabularnewline" || t.cs() == "\\") { + else if (t.cs() == "tabularnewline") { // stuff before the line break // and look ahead for stuff after the line break os << HLINE << hlines << HLINE << LINE << read_hlines(p) << HLINE; @@ -239,7 +253,7 @@ void parse_table(Parser & p, ostream & os, unsigned flags) hlines += "\\hline"; else if (t.cs() == "cline") - hlines += "\\cline{" + p.verbatimItem() + '}'; + hlines += "\\cline{" + p.verbatim_item() + '}'; else if (t.cat() == catComment) handle_comment(p); @@ -258,7 +272,7 @@ void parse_table(Parser & p, ostream & os, unsigned flags) else if (t.cs() == "begin") { string const name = p.getArg('{', '}'); - active_environments_push(name); + active_environments.push_back(name); parse_table(p, os, FLAG_END); } @@ -266,18 +280,37 @@ void parse_table(Parser & p, ostream & os, unsigned flags) if (flags & FLAG_END) { // eat environment name string const name = p.getArg('{', '}'); - if (name != curr_env()) + if (name != active_environment()) p.error("\\end{" + name + "} does not match \\begin{" - + curr_env() + "}"); - active_environments_pop(); + + active_environment() + "}"); + active_environments.pop_back(); return; } p.error("found 'end' unexpectedly"); } + + else + os << t.asInput(); } } +void handle_hline_above(RowInfo & ri, vector & ci) +{ + ri.topline = true; + for (size_t col = 0; col < ci.size(); ++col) + ci[col].topline = true; +} + + +void handle_hline_below(RowInfo & ri, vector & ci) +{ + ri.bottomline = true; + for (size_t col = 0; col < ci.size(); ++col) + ci[col].bottomline = true; +} + + void handle_tabular(Parser & p, ostream & os) { string posopts = p.getOpt(); @@ -287,7 +320,7 @@ void handle_tabular(Parser & p, ostream & os) vector colinfo; // handle column formatting - bool leftline = handle_colalign(p, colinfo); + handle_colalign(p, colinfo); // handle initial hlines @@ -308,8 +341,7 @@ void handle_tabular(Parser & p, ostream & os) for (size_t row = 0; row < rowinfo.size(); ++row) { // init row - vector & cellinfos = cellinfo[row]; - cellinfos.resize(colinfo.size()); + cellinfo[row].resize(colinfo.size()); // split row vector dummy; @@ -333,31 +365,41 @@ void handle_tabular(Parser & p, ostream & os) //cerr << " reading from line string '" << dummy[i] << "'\n"; Parser p1(dummy[i]); while (p1.good()) { - Token t = p1.getToken(); + Token t = p1.get_token(); //cerr << "read token: " << t << "\n"; if (t.cs() == "hline") { if (i == 0) { - rowinfo[row].topline = true; - for (size_t col = 0; col < colinfo.size(); ++col) - cellinfos[col].topline = true; - } else { - rowinfo[row].bottomline = true; - for (size_t col = 0; col < colinfo.size(); ++col) - cellinfos[col].bottomline = true; + if (rowinfo[row].topline) { + if (row > 0) // extra bottomline above + handle_hline_below(rowinfo[row - 1], cellinfo[row - 1]); + else + cerr << "dropping extra hline\n"; + //cerr << "below row: " << row-1 << endl; + } else { + handle_hline_above(rowinfo[row], cellinfo[row]); + //cerr << "above row: " << row << endl; + } + } else { + //cerr << "below row: " << row << endl; + handle_hline_below(rowinfo[row], cellinfo[row]); } } else if (t.cs() == "cline") { - string arg = p1.verbatimItem(); + string arg = p1.verbatim_item(); //cerr << "read cline arg: '" << arg << "'\n"; vector t; split(arg, t, '-'); t.resize(2); - size_t from = string2int(t[0]); + size_t from = string2int(t[0]) - 1; size_t to = string2int(t[1]); for (size_t col = from; col < to; ++col) { - if (i == 0) - cellinfos[col].topline = true; - else - cellinfos[col].bottomline = true; + //cerr << "row: " << row << " col: " << col << " i: " << i << endl; + if (i == 0) { + rowinfo[row].topline = true; + cellinfo[row][col].topline = true; + } else { + rowinfo[row].bottomline = true; + cellinfo[row][col].bottomline = true; + } } } else { cerr << "unexpected line token: " << t << endl; @@ -368,45 +410,48 @@ void handle_tabular(Parser & p, ostream & os) // split into cells vector cells; split(lines[row], cells, TAB); - for (size_t col = 0, cell = 0; cell < cells.size() && col < colinfo.size(); ++col, ++cell) { - //cerr << "cell content: " << cells[cell] << "\n"; + for (size_t col = 0, cell = 0; + cell < cells.size() && col < colinfo.size(); ++col, ++cell) { + //cerr << "cell content: '" << cells[cell] << "'\n"; Parser p(cells[cell]); p.skipSpaces(); - //cerr << "handling cell: " << p.nextToken().cs() << " '" << //cells[cell] << "'\n"; - if (p.nextToken().cs() == "multicolumn") { + if (p.next_token().cs() == "multicolumn") { // how many cells? - p.getToken(); - size_t ncells = string2int(p.verbatimItem()); + p.get_token(); + size_t const ncells = string2int(p.verbatim_item()); // special cell properties alignment vector t; - bool leftline = handle_colalign(p, t); - CellInfo & ci = cellinfos[col]; - ci.multi = 1; - ci.align = t.front().align; - ci.content = parse_text(p, FLAG_ITEM, false); - ci.leftline = leftline; - ci.rightline = t.front().rightline; + handle_colalign(p, t); + cellinfo[row][col].multi = 1; + cellinfo[row][col].align = t.front().align; + cellinfo[row][col].content = parse_text(p, FLAG_ITEM, false); + cellinfo[row][col].leftline |= t.front().leftline; + cellinfo[row][col].rightline |= t.front().rightline; // add dummy cells for multicol for (size_t i = 0; i < ncells - 1 && col < colinfo.size(); ++i) { ++col; - cellinfos[col].multi = 2; - cellinfos[col].align = "center"; + cellinfo[row][col].multi = 2; + cellinfo[row][col].align = "center"; } - } else { - cellinfos[col].content = parse_text(p, FLAG_ITEM, false); + + // more than one line on the right? + if (t.front().rightline > 1) + cellinfo[row][col + 1].leftline = true; + + } else { + // FLAG_END is a hack, we need to read all of it + cellinfo[row][col].content = parse_text(p, FLAG_END, false); } } - cellinfo.push_back(cellinfos); - //cerr << "// handle almost empty last row what we have\n"; // handle almost empty last row if (row && lines[row].empty() && row + 1 == rowinfo.size()) { //cerr << "remove empty last line\n"; - if (rowinfo[row].topline); + if (rowinfo[row].topline) rowinfo[row - 1].bottomline = true; for (size_t col = 0; col < colinfo.size(); ++col) if (cellinfo[row][col].topline) @@ -427,7 +472,7 @@ void handle_tabular(Parser & p, ostream & os) os << " #include #include -#include #include #include @@ -20,23 +19,22 @@ using std::cout; using std::cerr; using std::endl; using std::getline; +using std::istream; using std::ifstream; using std::istringstream; using std::ostream; using std::ostringstream; -using std::stack; +using std::stringstream; using std::string; using std::vector; -//namespace { - void handle_comment(Parser & p) { string s; while (p.good()) { - Token const & t = p.getToken(); + Token const & t = p.get_token(); if (t.cat() == catNewline) break; s += t.asString(); @@ -46,8 +44,6 @@ void handle_comment(Parser & p) } - - string const trim(string const & a, char const * p) { // lyx::Assert(p); @@ -100,38 +96,43 @@ char const ** is_known(string const & str, char const ** what) // current stack of nested environments -stack active_environments; +vector active_environments; -void active_environments_push(std::string const & name) +string active_environment() { - active_environments.push(name); + return active_environments.empty() ? string() : active_environments.back(); } -void active_environments_pop() +void clean_layouts(istream & is, ostream & os) { - active_environments.pop(); + string last; + string line; + bool eating = false; + while (getline(is, line)) { + string tline = trim(line, " "); + if (line.substr(0, 8) == "\\layout ") { + //cerr << "layout: " << line << "\n"; + last = line; + eating = true; + } else if (eating && tline.empty()) { + //cerr << "eat empty line\n"; + } else if (line.substr(0, 13) == "\\begin_deeper") { + os << line << "\n"; + } else { + // ordinary line + //cerr << "ordinary line\n"; + if (eating) { + eating = false; + os << last << "\n\n"; + } + os << line << "\n"; + } + } } -bool active_environments_empty() -{ - return active_environments.empty(); -} - - -string curr_env() -{ - return active_environments.empty() ? string() : active_environments.top(); -} - - - - -//} // anonymous namespace - - int main(int argc, char * argv[]) { if (argc <= 1) { @@ -141,10 +142,16 @@ int main(int argc, char * argv[]) ifstream is(argv[1]); Parser p(is); - parse_preamble(p, cout); - active_environments.push("document"); - parse_text(p, cout, FLAG_END, true); - cout << "\n\\the_end"; + //p.dump(); + + stringstream ss; + parse_preamble(p, ss); + active_environments.push_back("document"); + parse_text(p, ss, FLAG_END, true); + ss << "\n\\the_end\n"; + + ss.seekg(0); + clean_layouts(ss, cout); return 0; } diff --git a/src/tex2lyx/tex2lyx.h b/src/tex2lyx/tex2lyx.h index 9fcc175d4a..83c897d5e8 100644 --- a/src/tex2lyx/tex2lyx.h +++ b/src/tex2lyx/tex2lyx.h @@ -31,9 +31,7 @@ bool is_math_env(std::string const & name); char const ** is_known(string const & str, char const ** what); // Access to environment stack -std::string curr_env(); -void active_environments_push(std::string const & name); -void active_environments_pop(); -bool active_environments_empty(); +extern std::vector active_environments; +std::string active_environment(); #endif diff --git a/src/tex2lyx/texparser.C b/src/tex2lyx/texparser.C index e9a9b9e6dc..d1014f71c6 100644 --- a/src/tex2lyx/texparser.C +++ b/src/tex2lyx/texparser.C @@ -92,6 +92,8 @@ ostream & operator<<(ostream & os, Token const & t) os << '\\' << t.cs() << ' '; else if (t.cat() == catLetter) os << t.character(); + else if (t.cat() == catNewline) + os << "[\\n," << t.cat() << "]\n"; else os << '[' << t.character() << ',' << t.cat() << ']'; return os; @@ -121,6 +123,7 @@ Parser::Parser(istream & is) tokenize(is); } + Parser::Parser(string const & s) : lineno_(0), pos_(0) { @@ -141,21 +144,21 @@ void Parser::pop_back() } -Token const & Parser::prevToken() const +Token const & Parser::prev_token() const { static const Token dummy; return pos_ > 0 ? tokens_[pos_ - 1] : dummy; } -Token const & Parser::nextToken() const +Token const & Parser::next_token() const { static const Token dummy; return good() ? tokens_[pos_] : dummy; } -Token const & Parser::getToken() +Token const & Parser::get_token() { static const Token dummy; //cerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n'; @@ -166,11 +169,11 @@ Token const & Parser::getToken() void Parser::skipSpaces() { while (1) { - if (nextToken().cat() == catSpace || nextToken().cat() == catNewline) - getToken(); - else if (nextToken().cat() == catComment) - while (nextToken().cat() != catNewline) - getToken(); + if (next_token().cat() == catSpace || next_token().cat() == catNewline) + get_token(); + else if (next_token().cat() == catComment) + while (next_token().cat() != catNewline) + get_token(); else break; } @@ -238,12 +241,15 @@ void Parser::tokenize(istream & is) case catNewline: { ++lineno_; is.get(c); - if (catcode(c) == catNewline) + if (catcode(c) == catNewline) { + //do { + is.get(c); + //} while (catcode(c) == catNewline); push_back(Token("par")); - else { + } else { push_back(Token('\n', catNewline)); - is.putback(c); } + is.putback(c); break; } @@ -317,12 +323,12 @@ void Parser::error(string const & msg) string Parser::verbatimOption() { string res; - if (nextToken().character() == '[') { - Token t = getToken(); - for (Token t = getToken(); t.character() != ']' && good(); t = getToken()) { + if (next_token().character() == '[') { + Token t = get_token(); + for (Token t = get_token(); t.character() != ']' && good(); t = get_token()) { if (t.cat() == catBegin) { putback(); - res += '{' + verbatimItem() + '}'; + res += '{' + verbatim_item() + '}'; } else res += t.asString(); } @@ -331,25 +337,25 @@ string Parser::verbatimOption() } -string Parser::verbatimItem() +string Parser::verbatim_item() { if (!good()) error("stream bad"); skipSpaces(); - if (nextToken().cat() == catBegin) { - Token t = getToken(); // skip brace + if (next_token().cat() == catBegin) { + Token t = get_token(); // skip brace string res; - for (Token t = getToken(); t.cat() != catEnd && good(); t = getToken()) { + for (Token t = get_token(); t.cat() != catEnd && good(); t = get_token()) { if (t.cat() == catBegin) { putback(); - res += '{' + verbatimItem() + '}'; + res += '{' + verbatim_item() + '}'; } else res += t.asInput(); } return res; } - return getToken().asInput(); + return get_token().asInput(); } diff --git a/src/tex2lyx/texparser.h b/src/tex2lyx/texparser.h index ce154e37b7..4370a550a5 100644 --- a/src/tex2lyx/texparser.h +++ b/src/tex2lyx/texparser.h @@ -122,11 +122,11 @@ public: /// void pop_back(); /// - Token const & prevToken() const; + Token const & prev_token() const; /// - Token const & nextToken() const; + Token const & next_token() const; /// - Token const & getToken(); + Token const & get_token(); /// skips spaces if any void skipSpaces(); /// @@ -134,7 +134,7 @@ public: /// bool good() const; /// - string verbatimItem(); + string verbatim_item(); /// string verbatimOption(); diff --git a/src/tex2lyx/text.C b/src/tex2lyx/text.C index 9ca5237f75..9fd9a024eb 100644 --- a/src/tex2lyx/text.C +++ b/src/tex2lyx/text.C @@ -33,10 +33,17 @@ char const * known_latex_commands[] = { "ref", "cite", "label", "index", // LaTeX names for quotes char const * known_quotes[] = { "glqq", "grqq", "quotedblbase", -"textquotedblleft", 0}; +"textquotedblleft", "quotesinglbase", "guilsinglleft", "guilsinglright", 0}; // the same as known_quotes with .lyx names -char const * known_coded_quotes[] = { "gld", "grd", "gld", "grd", 0}; +char const * known_coded_quotes[] = { "gld", "grd", "gld", +"grd", "gls", "fls", "frd", 0}; + +char const * known_sizes[] = { "tiny", "scriptsize", "footnotesize", +"small", "normalsize", "large", "Large", "LARGE", "huge", "Huge", 0}; + +char const * known_coded_sizes[] = { "tiny", "scriptsize", "footnotesize", +"small", "normal", "large", "larger", "largest", "huge", "giant", 0}; string cap(string s) @@ -75,6 +82,19 @@ void end_inset(ostream & os) } +void skip_braces(Parser & p) +{ + if (p.next_token().cat() != catBegin) + return; + p.get_token(); + if (p.next_token().cat() == catEnd) { + p.get_token(); + return; + } + p.putback(); +} + + void handle_ert(ostream & os, string const & s) { begin_inset(os, "ERT"); @@ -91,11 +111,11 @@ void handle_ert(ostream & os, string const & s) void handle_par(ostream & os) { - if (active_environments_empty()) + if (active_environments.empty()) return; os << "\n\\layout "; - string s = curr_env(); - if (s == "document" || s == "table") + string s = active_environment(); + if (s == "document" || s == "table" || s == "center") os << "Standard\n\n"; else if (s == "lyxcode") os << "LyX-Code\n\n"; @@ -114,7 +134,7 @@ void handle_par(ostream & os) void parse_text(Parser & p, ostream & os, unsigned flags, bool outer) { while (p.good()) { - Token const & t = p.getToken(); + Token const & t = p.get_token(); #ifdef FILEDEBUG cerr << "t: " << t << " flags: " << flags << "\n"; @@ -136,6 +156,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer) flags |= FLAG_LEAVE; } + if (t.character() == ']' && (flags & FLAG_BRACK_LAST)) + return; // // cat codes @@ -143,13 +165,13 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer) if (t.cat() == catMath) { // we are inside some text mode thingy, so opening new math is allowed begin_inset(os, "Formula "); - Token const & n = p.getToken(); + Token const & n = p.get_token(); if (n.cat() == catMath && outer) { // TeX's $$...$$ syntax for displayed math os << "\\["; parse_math(p, os, FLAG_SIMPLE, MATH_MODE); os << "\\]"; - p.getToken(); // skip the second '$' token + p.get_token(); // skip the second '$' token } else { // simple $...$ stuff p.putback(); @@ -171,17 +193,17 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer) os << t.character(); else if (t.cat() == catNewline) { - if (p.nextToken().cat() == catNewline) { - p.getToken(); + if (p.next_token().cat() == catNewline) { + p.get_token(); handle_par(os); } else { - os << "\n "; // note the space + os << " "; // note the space } } else if (t.cat() == catActive) { if (t.character() == '~') { - if (curr_env() == "lyxcode") + if (active_environment() == "lyxcode") os << ' '; else os << "\\SpecialChar ~\n"; @@ -190,9 +212,18 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer) } else if (t.cat() == catBegin) { - handle_ert(os, "{"); - parse_text(p, os, FLAG_BRACE_LAST, outer); - handle_ert(os, "}"); + // special handling of size changes + bool const is_size = is_known(p.next_token().cs(), known_sizes); + string const s = parse_text(p, FLAG_BRACE_LAST, outer); + if (s.empty() && p.next_token().character() == '`') + ; // ignore it in {}`` + else if (is_size || s == "[" || s == "]" || s == "*") + os << s; + else { + handle_ert(os, "{"); + os << s; + handle_ert(os, "}"); + } } else if (t.cat() == catEnd) { @@ -212,8 +243,10 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer) // control sequences // - else if (t.cs() == "ldots") + else if (t.cs() == "ldots") { + skip_braces(p); os << "\n\\SpecialChar \\ldots{}\n"; + } else if (t.cs() == "(") { begin_inset(os, "Formula"); @@ -233,11 +266,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer) else if (t.cs() == "begin") { string const name = p.getArg('{', '}'); - active_environments_push(name); - if (name == "abstract") { - handle_par(os); - parse_text(p, os, FLAG_END, outer); - } else if (is_math_env(name)) { + active_environments.push_back(name); + if (is_math_env(name)) { begin_inset(os, "Formula "); os << "\\begin{" << name << "}"; parse_math(p, os, FLAG_END, MATH_MODE); @@ -252,21 +282,37 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer) begin_inset(os, "Float " + name + "\n"); if (opts.size()) os << "placement " << opts << '\n'; - os << "wide false\n" - << "collapsed false\n" - << "\n" + os << "wide false\ncollapsed false\n\n" << "\\layout Standard\n"; parse_text(p, os, FLAG_END, outer); end_inset(os); - } else if (name == "lyxlist") { - p.verbatimItem(); // swallow next arg + } else if (name == "center") { + active_environments.pop_back(); + handle_par(os); + active_environments.push_back(name); + os << "\\align center\n"; parse_text(p, os, FLAG_END, outer); - os << "\n\\layout Bibliography\n\n"; + } else if (name == "enumerate" || name == "itemize" + || name == "lyxlist") { + size_t const n = active_environments.size(); + string const s = active_environments[n - 2]; + bool const deeper = s == "enumerate" || s == "itemize" + || s == "lyxlist"; + if (deeper) + os << "\n\\begin_deeper"; + os << "\n\\layout " << cap(name) << "\n\n"; + if (name == "lyxlist") + p.verbatim_item(); // swallow next arg + parse_text(p, os, FLAG_END, outer); + if (deeper) + os << "\n\\end_deeper\n"; + handle_par(os); } else if (name == "thebibliography") { - p.verbatimItem(); // swallow next arg + p.verbatim_item(); // swallow next arg parse_text(p, os, FLAG_END, outer); os << "\n\\layout Bibliography\n\n"; } else { + handle_par(os); parse_text(p, os, FLAG_END, outer); } } @@ -275,32 +321,45 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer) if (flags & FLAG_END) { // eat environment name string const name = p.getArg('{', '}'); - if (name != curr_env()) - p.error("\\end{" + name + "} does not match \\begin{" - + curr_env() + "}"); - active_environments_pop(); + if (name != active_environment()) + cerr << "\\end{" + name + "} does not match \\begin{" + + active_environment() + "}\n"; + active_environments.pop_back(); + handle_par(os); return; } p.error("found 'end' unexpectedly"); } - else if (t.cs() == "item") + else if (t.cs() == "item") { + p.skipSpaces(); + string s; + if (p.next_token().character() == '[') { + p.get_token(); // eat '[' + s = parse_text(p, FLAG_BRACK_LAST, outer); + } handle_par(os); - - else if (t.cs() == "def") { - string name = p.getToken().cs(); - while (p.nextToken().cat() != catBegin) - name += p.getToken().asString(); - handle_ert(os, "\\def\\" + name + '{' + p.verbatimItem() + '}'); + os << s << ' '; } - else if (t.cs() == "par") - handle_par(os); + else if (t.cs() == "def") { + string name = p.get_token().cs(); + while (p.next_token().cat() != catBegin) + name += p.get_token().asString(); + handle_ert(os, "\\def\\" + name + '{' + p.verbatim_item() + '}'); + } + + else if (t.cs() == "par") { + p.skipSpaces(); + if (p.next_token().cs() != "\\begin") + handle_par(os); + //cerr << "next token: '" << p.next_token().cs() << "'\n"; + } else if (is_known(t.cs(), known_headings)) { string name = t.cs(); - if (p.nextToken().asInput() == "*") { - p.getToken(); + if (p.next_token().asInput() == "*") { + p.get_token(); name += "*"; } os << "\n\n\\layout " << cap(name) << "\n\n"; @@ -316,7 +375,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer) else if (t.cs() == "includegraphics") { map opts = split_map(p.getArg('[', ']')); - string name = p.verbatimItem(); + string name = p.verbatim_item(); begin_inset(os, "Graphics "); os << "\n\tfilename " << name << '\n'; if (opts.find("width") != opts.end()) @@ -333,64 +392,84 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer) end_inset(os); } + else if (t.cs() == "ensuremath") { + string s = parse_text(p, FLAG_ITEM, false); + if (s == "±" || s == "³" || s == "²" || s == "µ") + os << s; + else + handle_ert(os, "\\ensuremath{" + s + "}"); + } + + else if (t.cs() == "marginpar") { + begin_inset(os, "Marginal\n"); + os << "collapsed true\n\n\\layout Standard\n\n"; + parse_text(p, os, FLAG_ITEM, false); + end_inset(os); + } + + else if (t.cs() == "hfill") { + os << "\n\\hfill\n"; + skip_braces(p); + } + else if (t.cs() == "makeindex" || t.cs() == "maketitle") - ; // swallow this + skip_braces(p); // swallow this else if (t.cs() == "tableofcontents") - p.verbatimItem(); // swallow this - - else if (t.cs() == "tiny" || t.cs() == "scriptsize") - os << "\n\\size " << t.cs() << "\n"; - - else if (t.cs() == "Large") - os << "\n\\size larger\n"; + skip_braces(p); // swallow this else if (t.cs() == "textrm") { - os << "\n\\family roman\n"; + os << "\n\\family roman \n"; parse_text(p, os, FLAG_ITEM, outer); - os << "\n\\family default\n"; + os << "\n\\family default \n"; } else if (t.cs() == "textsf") { - os << "\n\\family sans\n"; + os << "\n\\family sans \n"; parse_text(p, os, FLAG_ITEM, outer); - os << "\n\\family default\n"; + os << "\n\\family default \n"; } else if (t.cs() == "texttt") { - os << "\n\\family typewriter\n"; + os << "\n\\family typewriter \n"; parse_text(p, os, FLAG_ITEM, outer); - os << "\n\\family default\n"; + os << "\n\\family default \n"; + } + + else if (t.cs() == "textit") { + os << "\n\\shape italic \n"; + parse_text(p, os, FLAG_ITEM, outer); + os << "\n\\shape default \n"; } else if (t.cs() == "textsc") { - os << "\n\\noun on\n"; + os << "\n\\noun on \n"; parse_text(p, os, FLAG_ITEM, outer); - os << "\n\\noun default\n"; + os << "\n\\noun default \n"; } else if (t.cs() == "textbf") { - os << "\n\\series bold\n"; + os << "\n\\series bold \n"; parse_text(p, os, FLAG_ITEM, outer); - os << "\n\\series default\n"; + os << "\n\\series default \n"; } else if (t.cs() == "underbar") { - os << "\n\\bar under\n"; + os << "\n\\bar under \n"; parse_text(p, os, FLAG_ITEM, outer); - os << "\n\\bar default\n"; + os << "\n\\bar default \n"; } else if (t.cs() == "emph" || t.cs() == "noun") { - os << "\n\\" << t.cs() << " on\n"; + os << "\n\\" << t.cs() << " on \n"; parse_text(p, os, FLAG_ITEM, outer); - os << "\n\\" << t.cs() << " default\n"; + os << "\n\\" << t.cs() << " default \n"; } else if (t.cs() == "bibitem") { os << "\n\\layout Bibliography\n\\bibitem "; os << p.getOpt(); - os << '{' << p.verbatimItem() << '}' << "\n\n"; + os << '{' << p.verbatim_item() << '}' << "\n\n"; } else if (is_known(t.cs(), known_latex_commands)) { @@ -398,7 +477,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer) os << '\\' << t.cs(); os << p.getOpt(); os << p.getOpt(); - os << '{' << p.verbatimItem() << '}'; + os << '{' << p.verbatim_item() << '}'; end_inset(os); } @@ -407,35 +486,71 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer) begin_inset(os, "Quotes "); os << known_coded_quotes[where - known_quotes]; end_inset(os); + skip_braces(p); + } + + else if (is_known(t.cs(), known_sizes)) { + char const ** where = is_known(t.cs(), known_sizes); + os << "\n\\size " << known_coded_sizes[where - known_sizes] << "\n"; } else if (t.cs() == "LyX" || t.cs() == "TeX" || t.cs() == "LaTeX" || t.cs() == "LaTeXe") { - p.verbatimItem(); // eat {} - os << "LyX"; + os << t.cs(); + skip_braces(p); // eat {} } else if (t.cs() == "lyxarrow") { - p.verbatimItem(); os << "\\SpecialChar \\menuseparator\n"; + skip_braces(p); } else if (t.cs() == "ldots") { - p.verbatimItem(); os << "\\SpecialChar \\ldots{}\n"; + skip_braces(p); } - else if (t.cs() == "@") + else if (t.cs() == "@") { os << "\\SpecialChar \\@"; + skip_braces(p); + } - else if (t.cs() == "textasciitilde") + else if (t.cs() == "textasciitilde") { os << '~'; + skip_braces(p); + } - else if (t.cs() == "_" || t.cs() == "&" || t.cs() == "#") + else if (t.cs() == "textasciicircum") { + os << '^'; + skip_braces(p); + } + + else if (t.cs() == "textbackslash") { + os << "\n\\backslash\n"; + skip_braces(p); + } + + else if (t.cs() == "_" || t.cs() == "&" || t.cs() == "#" || t.cs() == "$" + || t.cs() == "{" || t.cs() == "}" || t.cs() == "%") os << t.cs(); + else if (t.cs() == "char") { + if (p.next_token().character() == '`') { + p.get_token(); + if (p.next_token().cs() == "\"") { + p.get_token(); + os << '"'; + skip_braces(p); + } else { + handle_ert(os, "\\char`"); + } + } else { + handle_ert(os, "\\char"); + } + } + else if (t.cs() == "\"") { - string const name = p.verbatimItem(); + string const name = p.verbatim_item(); if (name == "a") os << 'ä'; else if (name == "o") os << 'ö'; else if (name == "u") os << 'ü'; @@ -445,17 +560,45 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer) else handle_ert(os, "\"{" + name + "}"); } + else if (t.cs() == "=" || t.cs() == "H" || t.cs() == "c" + || t.cs() == "^" || t.cs() == "'" || t.cs() == "~") { + // we need the trim as the LyX parser chokes on such spaces + os << "\n\\i \\" << t.cs() << "{" + << trim(parse_text(p, FLAG_ITEM, outer), " ") << "}\n"; + } + else if (t.cs() == "ss") os << "ß"; + else if (t.cs() == "i" || t.cs() == "j") + os << "\\" << t.cs() << ' '; + + else if (t.cs() == "-") + os << "\\SpecialChar \\-\n"; + + else if (t.cs() == "\\") + os << "\n\\newline\n"; + + else if (t.cs() == "lyxrightaddress") { + os << "\n\\layout Right Address\n"; + parse_text(p, os, FLAG_ITEM, outer); + os << "\n\\layout Standard\n"; + } + + else if (t.cs() == "lyxaddress") { + os << "\n\\layout Address\n"; + parse_text(p, os, FLAG_ITEM, outer); + os << "\n\\layout Standard\n"; + } + else if (t.cs() == "input") - handle_ert(os, "\\input{" + p.verbatimItem() + "}\n"); + handle_ert(os, "\\input{" + p.verbatim_item() + "}\n"); else if (t.cs() == "fancyhead") { ostringstream ss; ss << "\\fancyhead"; ss << p.getOpt(); - ss << '{' << p.verbatimItem() << "}\n"; + ss << '{' << p.verbatim_item() << "}\n"; handle_ert(os, ss.str()); } @@ -464,11 +607,11 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer) // heuristic: read up to next non-nested space /* string s = t.asInput(); - string z = p.verbatimItem(); + string z = p.verbatim_item(); while (p.good() && z != " " && z.size()) { //cerr << "read: " << z << endl; s += z; - z = p.verbatimItem(); + z = p.verbatim_item(); } cerr << "found ERT: " << s << endl; handle_ert(os, s + ' ');