mirror of
https://git.lyx.org/repos/lyx.git
synced 2025-01-13 20:09:59 +00:00
Fix bug #5408: tex2lyx cannot handle verbatim code
- Implement catcode setting in Parser - add a new Parser::verbatimStuff method that reads verbatim contents - use this method to parse "verbatim" environment. - use it to parse \verb too. - rename Parser::verbatimEnvironment to ertEnvironment. TODO: - use for other verbatim-like cases (Sweave chunk, lstlisting...) - factor out the function that outputs ERT (including line breaks) - maybe implement Parser::unparse (if needed)
This commit is contained in:
parent
e609787a0f
commit
be42f1398d
@ -79,6 +79,7 @@ Style Verbatim
|
||||
ParbreakIsNewline 1
|
||||
FreeSpacing 1
|
||||
PassThru 1
|
||||
KeepEmpty 1
|
||||
NewLine 0
|
||||
ParSkip 0.4
|
||||
TopSep 0.7
|
||||
|
@ -22,39 +22,6 @@ namespace lyx {
|
||||
|
||||
namespace {
|
||||
|
||||
CatCode theCatcode[256];
|
||||
|
||||
void catInit()
|
||||
{
|
||||
static bool init_done = false;
|
||||
if (init_done)
|
||||
return;
|
||||
init_done = true;
|
||||
|
||||
fill(theCatcode, theCatcode + 256, catOther);
|
||||
fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
|
||||
fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
|
||||
|
||||
theCatcode[int('\\')] = catEscape;
|
||||
theCatcode[int('{')] = catBegin;
|
||||
theCatcode[int('}')] = catEnd;
|
||||
theCatcode[int('$')] = catMath;
|
||||
theCatcode[int('&')] = catAlign;
|
||||
theCatcode[int('\n')] = catNewline;
|
||||
theCatcode[int('#')] = catParameter;
|
||||
theCatcode[int('^')] = catSuper;
|
||||
theCatcode[int('_')] = catSub;
|
||||
theCatcode[0x7f] = catIgnore;
|
||||
theCatcode[int(' ')] = catSpace;
|
||||
theCatcode[int('\t')] = catSpace;
|
||||
theCatcode[int('\r')] = catNewline;
|
||||
theCatcode[int('~')] = catActive;
|
||||
theCatcode[int('%')] = catComment;
|
||||
|
||||
// This is wrong!
|
||||
theCatcode[int('@')] = catLetter;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Translate a line ending to '\n'.
|
||||
* \p c must have catcode catNewline, and it must be the last character read
|
||||
@ -79,16 +46,8 @@ char_type getNewline(idocstream & is, char_type c)
|
||||
return c;
|
||||
}
|
||||
|
||||
CatCode catcode(char_type c)
|
||||
{
|
||||
if (c < 256)
|
||||
return theCatcode[(unsigned char)c];
|
||||
return catOther;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Token
|
||||
//
|
||||
@ -158,7 +117,8 @@ void debugToken(std::ostream & os, Token const & t, unsigned int flags)
|
||||
|
||||
|
||||
Parser::Parser(idocstream & is)
|
||||
: lineno_(0), pos_(0), iss_(0), is_(is), encoding_iconv_("UTF-8")
|
||||
: lineno_(0), pos_(0), iss_(0), is_(is), encoding_iconv_("UTF-8"),
|
||||
theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES)
|
||||
{
|
||||
}
|
||||
|
||||
@ -166,7 +126,8 @@ Parser::Parser(idocstream & is)
|
||||
Parser::Parser(string const & s)
|
||||
: lineno_(0), pos_(0),
|
||||
iss_(new idocstringstream(from_utf8(s))), is_(*iss_),
|
||||
encoding_iconv_("UTF-8")
|
||||
encoding_iconv_("UTF-8"),
|
||||
theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES)
|
||||
{
|
||||
}
|
||||
|
||||
@ -192,6 +153,57 @@ void Parser::setEncoding(std::string const & e, int const & p)
|
||||
}
|
||||
|
||||
|
||||
void Parser::catInit()
|
||||
{
|
||||
if (curr_cat_ == theCatcodesType_)
|
||||
return;
|
||||
curr_cat_ = theCatcodesType_;
|
||||
|
||||
fill(theCatcode_, theCatcode_ + 256, catOther);
|
||||
fill(theCatcode_ + 'a', theCatcode_ + 'z' + 1, catLetter);
|
||||
fill(theCatcode_ + 'A', theCatcode_ + 'Z' + 1, catLetter);
|
||||
// This is wrong!
|
||||
theCatcode_[int('@')] = catLetter;
|
||||
|
||||
if (theCatcodesType_ == NORMAL_CATCODES) {
|
||||
theCatcode_[int('\\')] = catEscape;
|
||||
theCatcode_[int('{')] = catBegin;
|
||||
theCatcode_[int('}')] = catEnd;
|
||||
theCatcode_[int('$')] = catMath;
|
||||
theCatcode_[int('&')] = catAlign;
|
||||
theCatcode_[int('\n')] = catNewline;
|
||||
theCatcode_[int('#')] = catParameter;
|
||||
theCatcode_[int('^')] = catSuper;
|
||||
theCatcode_[int('_')] = catSub;
|
||||
theCatcode_[0x7f] = catIgnore;
|
||||
theCatcode_[int(' ')] = catSpace;
|
||||
theCatcode_[int('\t')] = catSpace;
|
||||
theCatcode_[int('\r')] = catNewline;
|
||||
theCatcode_[int('~')] = catActive;
|
||||
theCatcode_[int('%')] = catComment;
|
||||
}
|
||||
}
|
||||
|
||||
CatCode Parser::catcode(char_type c) const
|
||||
{
|
||||
if (c < 256)
|
||||
return theCatcode_[(unsigned char)c];
|
||||
return catOther;
|
||||
}
|
||||
|
||||
|
||||
void Parser::setCatcode(char c, CatCode cat)
|
||||
{
|
||||
theCatcode_[(unsigned char)c] = cat;
|
||||
}
|
||||
|
||||
|
||||
void Parser::setCatcodes(cat_type t)
|
||||
{
|
||||
theCatcodesType_ = t;
|
||||
}
|
||||
|
||||
|
||||
void Parser::setEncoding(std::string const & e)
|
||||
{
|
||||
//cerr << "setting encoding to " << e << std::endl;
|
||||
@ -472,7 +484,7 @@ string Parser::getFullParentheseArg()
|
||||
}
|
||||
|
||||
|
||||
string const Parser::verbatimEnvironment(string const & name)
|
||||
string const Parser::ertEnvironment(string const & name)
|
||||
{
|
||||
if (!good())
|
||||
return string();
|
||||
@ -485,7 +497,7 @@ string const Parser::verbatimEnvironment(string const & name)
|
||||
} else if (t.asInput() == "\\begin") {
|
||||
string const env = getArg('{', '}');
|
||||
os << "\\begin{" << env << '}'
|
||||
<< verbatimEnvironment(env)
|
||||
<< ertEnvironment(env)
|
||||
<< "\\end{" << env << '}';
|
||||
} else if (t.asInput() == "\\end") {
|
||||
string const end = getArg('{', '}');
|
||||
@ -545,6 +557,34 @@ string const Parser::plainCommand(char left, char right, string const & name)
|
||||
}
|
||||
|
||||
|
||||
string const Parser::verbatimStuff(string const & end_string)
|
||||
{
|
||||
if (!good())
|
||||
return string();
|
||||
|
||||
ostringstream oss;
|
||||
size_t match_index = 0;
|
||||
setCatcodes(VERBATIM_CATCODES);
|
||||
for (Token t = get_token(); good(); t = get_token()) {
|
||||
// FIXME t.asInput() might be longer than we need ?
|
||||
if (t.asInput() == end_string.substr(match_index,
|
||||
t.asInput().length())) {
|
||||
match_index += t.asInput().length();
|
||||
if (match_index >= end_string.length())
|
||||
break;
|
||||
} else if (match_index) {
|
||||
oss << end_string.substr(0, match_index) << t.asInput();
|
||||
match_index = 0;
|
||||
} else
|
||||
oss << t.asInput();
|
||||
}
|
||||
setCatcodes(NORMAL_CATCODES);
|
||||
if (!good())
|
||||
cerr << "unexpected end of input" << endl;
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
|
||||
void Parser::tokenize_one()
|
||||
{
|
||||
catInit();
|
||||
@ -687,16 +727,4 @@ void Parser::reset()
|
||||
}
|
||||
|
||||
|
||||
void Parser::setCatCode(char c, CatCode cat)
|
||||
{
|
||||
theCatcode[(unsigned char)c] = cat;
|
||||
}
|
||||
|
||||
|
||||
CatCode Parser::getCatCode(char c) const
|
||||
{
|
||||
return theCatcode[(unsigned char)c];
|
||||
}
|
||||
|
||||
|
||||
} // namespace lyx
|
||||
|
@ -46,6 +46,12 @@ enum CatCode {
|
||||
catInvalid // 15 <delete>
|
||||
};
|
||||
|
||||
enum cat_type {
|
||||
NORMAL_CATCODES,
|
||||
VERBATIM_CATCODES,
|
||||
UNDECIDED_CATCODES
|
||||
};
|
||||
|
||||
|
||||
enum {
|
||||
FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing
|
||||
@ -135,6 +141,13 @@ public:
|
||||
///
|
||||
~Parser();
|
||||
|
||||
///
|
||||
CatCode catcode(char_type c) const;
|
||||
///
|
||||
void setCatcode(char c, CatCode cat);
|
||||
/// set parser to normal or verbatim mode
|
||||
void setCatcodes(cat_type t);
|
||||
|
||||
/// change the iconv encoding of the input stream
|
||||
/// according to the latex encoding and package
|
||||
void setEncoding(std::string const & encoding, int const & package);
|
||||
@ -202,11 +215,11 @@ public:
|
||||
/*!
|
||||
* \returns the contents of the environment \p name.
|
||||
* <tt>\begin{name}</tt> must be parsed already, <tt>\end{name}</tt>
|
||||
* is parsed but not returned.
|
||||
* is parsed but not returned. This parses nested environments properly.
|
||||
*/
|
||||
std::string const verbatimEnvironment(std::string const & name);
|
||||
std::string const ertEnvironment(std::string const & name);
|
||||
/*
|
||||
* The same as verbatimEnvironment(std::string const & name) but
|
||||
* The same as ertEnvironment(std::string const & name) but
|
||||
* \begin and \end commands inside the name environment are not parsed.
|
||||
* This function is designed to parse verbatim environments.
|
||||
*/
|
||||
@ -218,6 +231,14 @@ public:
|
||||
* This function is designed to parse verbatim commands.
|
||||
*/
|
||||
std::string const plainCommand(char left, char right, std::string const & name);
|
||||
/*
|
||||
* Basically the same as plainEnvironment() but the parsing is
|
||||
* stopped at string \p end_string. Contrary to the other
|
||||
* methods, this uses proper catcode setting. This function is
|
||||
* designed to parse verbatim environments and command. The
|
||||
* intention is to eventually replace all of its siblings.
|
||||
*/
|
||||
std::string const verbatimStuff(std::string const & end_string);
|
||||
/*!
|
||||
* Returns the character of the current token and increments
|
||||
* the token position.
|
||||
@ -225,7 +246,7 @@ public:
|
||||
char getChar();
|
||||
///
|
||||
void error(std::string const & msg);
|
||||
/// Parses one token from \p is
|
||||
/// Parses one token from \p is
|
||||
void tokenize_one();
|
||||
///
|
||||
void push_back(Token const & t);
|
||||
@ -256,12 +277,10 @@ public:
|
||||
std::string verbatimOption();
|
||||
/// resets the parser to initial state
|
||||
void reset();
|
||||
///
|
||||
void setCatCode(char c, CatCode cat);
|
||||
///
|
||||
CatCode getCatCode(char c) const;
|
||||
|
||||
private:
|
||||
/// Setup catcode table
|
||||
void catInit();
|
||||
///
|
||||
int lineno_;
|
||||
///
|
||||
@ -276,6 +295,12 @@ private:
|
||||
idocstream & is_;
|
||||
/// iconv name of the current encoding
|
||||
std::string encoding_iconv_;
|
||||
///
|
||||
CatCode theCatcode_[256];
|
||||
//
|
||||
cat_type theCatcodesType_;
|
||||
//
|
||||
cat_type curr_cat_;
|
||||
};
|
||||
|
||||
|
||||
|
@ -1299,12 +1299,12 @@ void Preamble::parse(Parser & p, string const & forceclass,
|
||||
|
||||
else if (t.cs() == "makeatletter") {
|
||||
// LyX takes care of this
|
||||
p.setCatCode('@', catLetter);
|
||||
p.setCatcode('@', catLetter);
|
||||
}
|
||||
|
||||
else if (t.cs() == "makeatother") {
|
||||
// LyX takes care of this
|
||||
p.setCatCode('@', catOther);
|
||||
p.setCatcode('@', catOther);
|
||||
}
|
||||
|
||||
else if (t.cs() == "newcommand" || t.cs() == "newcommandx"
|
||||
|
@ -786,7 +786,7 @@ void parse_table(Parser & p, ostream & os, bool is_long_tabular,
|
||||
// treat the nested environment as a block, don't
|
||||
// parse &, \\ etc, because they don't belong to our
|
||||
// table if they appear.
|
||||
os << p.verbatimEnvironment(name);
|
||||
os << p.ertEnvironment(name);
|
||||
os << "\\end{" << name << '}';
|
||||
active_environments.pop_back();
|
||||
}
|
||||
@ -1227,7 +1227,7 @@ void handle_tabular(Parser & p, ostream & os, string const & name,
|
||||
angle = p.getArg('{', '}');
|
||||
}
|
||||
active_environments.push_back(env);
|
||||
p.verbatimEnvironment(env);
|
||||
p.ertEnvironment(env);
|
||||
active_environments.pop_back();
|
||||
p.skip_spaces();
|
||||
if (!p.good() && support::isStrInt(angle))
|
||||
|
@ -924,7 +924,7 @@ void parse_box(Parser & p, ostream & os, unsigned outer_flags,
|
||||
// If yes, we need to output ERT.
|
||||
p.pushPosition();
|
||||
if (inner_flags & FLAG_END)
|
||||
p.verbatimEnvironment(inner_type);
|
||||
p.ertEnvironment(inner_type);
|
||||
else
|
||||
p.verbatim_item();
|
||||
p.skip_spaces(true);
|
||||
@ -1435,27 +1435,36 @@ void parse_environment(Parser & p, ostream & os, bool outer,
|
||||
}
|
||||
|
||||
else if (name == "verbatim") {
|
||||
os << "\n\\end_layout\n\n\\begin_layout Verbatim\n";
|
||||
string const s = p.plainEnvironment("verbatim");
|
||||
// FIXME: this should go in the generic code that
|
||||
// handles environments defined in layout file that
|
||||
// have "PassThru 1". However, the code there is
|
||||
// already too complicated for my taste.
|
||||
parent_context.new_paragraph(os);
|
||||
Context context(true, parent_context.textclass,
|
||||
&parent_context.textclass[from_ascii("Verbatim")]);
|
||||
context.check_layout(os);
|
||||
string s = p.verbatimStuff("\\end{verbatim}");
|
||||
// ignore one newline at beginning or end of string
|
||||
if (prefixIs(s, "\n"))
|
||||
s.erase(0,1);
|
||||
if (suffixIs(s, "\n"))
|
||||
s.erase(s.length(),1);
|
||||
|
||||
string::const_iterator it2 = s.begin();
|
||||
for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) {
|
||||
if (*it == '\\')
|
||||
os << "\\backslash ";
|
||||
else if (*it == '\n') {
|
||||
it2 = it + 1;
|
||||
// avoid adding an empty paragraph at the end
|
||||
// FIXME: if there are 2 consecutive spaces at the end ignore it
|
||||
// because LyX will re-add a \n
|
||||
// This hack must be removed once bug 8049 is fixed!
|
||||
if ((it + 1 != et) && (it + 2 != et || *it2 != '\n'))
|
||||
os << "\n\\end_layout\n\\begin_layout Verbatim\n";
|
||||
} else
|
||||
context.check_layout(os);
|
||||
if (*it == '\\') {
|
||||
os << "\n\\backslash\n";
|
||||
context.need_end_layout = true;
|
||||
} else if (*it == '\n') {
|
||||
context.new_paragraph(os);
|
||||
} else {
|
||||
os << *it;
|
||||
context.need_end_layout = true;
|
||||
}
|
||||
}
|
||||
os << "\n\\end_layout\n\n";
|
||||
context.new_paragraph(os);
|
||||
p.skip_spaces();
|
||||
// reset to Standard layout
|
||||
os << "\n\\begin_layout Standard\n";
|
||||
}
|
||||
|
||||
else if (name == "CJK") {
|
||||
@ -1758,7 +1767,7 @@ void parse_environment(Parser & p, ostream & os, bool outer,
|
||||
parse_arguments("\\begin{" + name + "}", arguments, p, os,
|
||||
outer, parent_context);
|
||||
if (contents == verbatim)
|
||||
handle_ert(os, p.verbatimEnvironment(name),
|
||||
handle_ert(os, p.ertEnvironment(name),
|
||||
parent_context);
|
||||
else
|
||||
parse_text_snippet(p, os, FLAG_END, outer,
|
||||
@ -3819,15 +3828,11 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
|
||||
else if (t.cs() == "verb") {
|
||||
context.check_layout(os);
|
||||
char const delimiter = p.next_token().character();
|
||||
// \verb is special: The usual escaping rules do not
|
||||
// apply, e.g. "\verb+\+" is valid and denotes a single
|
||||
// backslash (bug #4468). Therefore we do not allow
|
||||
// escaping in getArg().
|
||||
string const arg = p.getArg(delimiter, delimiter, false);
|
||||
ostringstream oss;
|
||||
oss << "\\verb" << delimiter << arg << delimiter;
|
||||
handle_ert(os, oss.str(), context);
|
||||
// set catcodes to verbatim early, just in case.
|
||||
p.setCatcodes(VERBATIM_CATCODES);
|
||||
string delim = p.get_token().asInput();
|
||||
string const arg = p.verbatimStuff(delim);
|
||||
handle_ert(os, "\\verb" + delim + arg + delim, context);
|
||||
}
|
||||
|
||||
// Problem: \= creates a tabstop inside the tabbing environment
|
||||
@ -4574,7 +4579,7 @@ string guessLanguage(Parser & p, string const & lang)
|
||||
p.setEncoding(encoding, Encoding::CJK);
|
||||
else
|
||||
p.setEncoding("UTF-8");
|
||||
string const text = p.verbatimEnvironment("CJK");
|
||||
string const text = p.ertEnvironment("CJK");
|
||||
p.setEncoding(encoding_old);
|
||||
p.skip_spaces();
|
||||
if (!where) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user