Next step towards a working roundtrip of files generated by LyX:

Try to recognize modules.
Again, this is needed because the complete LyX preamble is ignored.
It is not possible to recognize a module in all cases, but at least the simple
ones are handled now. As a prerequisite I also had to revive the filling of
known_environments. This has been removed (probably by accident) some time ago.


git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@37306 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Georg Baum 2011-01-23 21:10:20 +00:00
parent 9129b73497
commit d3c385de20
6 changed files with 287 additions and 52 deletions

View File

@ -628,6 +628,7 @@ src_tex2lyx_copied_files = Split('''
Floating.cpp
FontInfo.cpp
Layout.cpp
LayoutFile.cpp
LayoutModuleList.cpp
lengthcommon.cpp
Lexer.cpp

View File

@ -37,6 +37,7 @@ LINKED_FILES = \
../FontInfo.cpp \
../insets/InsetLayout.cpp \
../Layout.cpp \
../LayoutFile.cpp \
../LayoutModuleList.cpp \
../lengthcommon.cpp \
../Lexer.cpp \

View File

@ -44,6 +44,7 @@ namespace lyx {
extern map<char, int> special_columns;
map<string, vector<string> > used_packages;
const char * const modules_placeholder = "\001modules\001";
// needed to handle encodings with babel
bool one_language = true;
@ -674,6 +675,7 @@ void end_preamble(ostream & os, TextClass const & /*textclass*/)
if (!h_options.empty())
os << "\\options " << h_options << "\n";
os << "\\use_default_options " << h_use_default_options << "\n"
<< modules_placeholder
<< "\\language " << h_language << "\n"
<< "\\inputencoding " << h_inputencoding << "\n"
<< "\\font_roman " << h_font_roman << "\n"
@ -795,7 +797,6 @@ void parse_preamble(Parser & p, ostream & os,
h_preamble << t.asInput();
else if (t.cat() == catComment) {
// regex to parse comments (currently not used)
static regex const islyxfile("%% LyX .* created this file");
static regex const usercommands("User specified LaTeX commands");
@ -834,9 +835,11 @@ void parse_preamble(Parser & p, ostream & os,
p.setCatCode('@', catOther);
}
else if (t.cs() == "newcommand" || t.cs() == "renewcommand"
|| t.cs() == "providecommand"
else if (t.cs() == "newcommand" || t.cs() == "newcommandx"
|| t.cs() == "renewcommand" || t.cs() == "renewcommandx"
|| t.cs() == "providecommand" || t.cs() == "providecommandx"
|| t.cs() == "DeclareRobustCommand"
|| t.cs() == "DeclareRobustCommandx"
|| t.cs() == "ProvideTextCommandDefault"
|| t.cs() == "DeclareMathAccent") {
bool star = false;
@ -863,6 +866,10 @@ void parse_preamble(Parser & p, ostream & os,
// remove leading "\"
h_font_default_family = family.erase(0,1);
}
// Add the command to the known commands
add_known_command(name, opt1, !opt2.empty(), from_utf8(body));
// only non-lyxspecific stuff
if (!in_lyx_preamble) {
ostringstream ss;
@ -872,9 +879,6 @@ void parse_preamble(Parser & p, ostream & os,
ss << '{' << name << '}' << opt1 << opt2
<< '{' << body << "}";
h_preamble << ss.str();
// Add the command to the known commands
add_known_command(name, opt1, !opt2.empty());
/*
ostream & out = in_preamble ? h_preamble : os;
out << "\\" << t.cs() << "{" << name << "}"
@ -897,7 +901,7 @@ void parse_preamble(Parser & p, ostream & os,
// options.
handle_opt(opts, known_languages, h_language);
delete_opt(opts, known_languages);
// paper orientation
if ((it = find(opts.begin(), opts.end(), "landscape")) != opts.end()) {
h_paperorientation = "landscape";
@ -932,6 +936,8 @@ void parse_preamble(Parser & p, ostream & os,
delete_opt(opts, known_class_paper_sizes);
// the remaining options
h_options = join(opts, ",");
// FIXME This does not work for classes that have a
// different name in LyX than in LaTeX
h_textclass = p.getArg('{', '}');
}
@ -955,14 +961,18 @@ void parse_preamble(Parser & p, ostream & os,
else if (t.cs() == "newenvironment") {
string const name = p.getArg('{', '}');
ostringstream ss;
ss << "\\newenvironment{" << name << "}";
ss << p.getOpt();
ss << p.getOpt();
ss << '{' << p.verbatim_item() << '}';
ss << '{' << p.verbatim_item() << '}';
if (!in_lyx_preamble)
h_preamble << ss.str();
string const opt1 = p.getOpt();
string const opt2 = p.getOpt();
string const beg = p.verbatim_item();
string const end = p.verbatim_item();
if (!in_lyx_preamble) {
h_preamble << "\\newenvironment{" << name
<< '}' << opt1 << opt2 << '{'
<< beg << "}{" << end << '}';
}
add_known_environment(name, opt1, !opt2.empty(),
from_utf8(beg), from_utf8(end));
}
else if (t.cs() == "def") {
@ -1146,12 +1156,11 @@ void parse_preamble(Parser & p, ostream & os,
h_textclass = forceclass;
if (noweb_mode && !prefixIs(h_textclass, "literate-"))
h_textclass.insert(0, "literate-");
FileName layoutfilename = libFileSearch("layouts", h_textclass, "layout");
if (layoutfilename.empty()) {
cerr << "Error: Could not find layout file for textclass \"" << h_textclass << "\"." << endl;
exit(1);
tc.setName(h_textclass);
if (!tc.load()) {
cerr << "Error: Could not read layout file for textclass \"" << h_textclass << "\"." << endl;
exit(EXIT_FAILURE);
}
tc.read(layoutfilename);
if (h_papersides.empty()) {
ostringstream ss;
ss << tc.sides();

View File

@ -17,6 +17,9 @@
#include "Context.h"
#include "Encoding.h"
#include "Layout.h"
#include "LayoutFile.h"
#include "LayoutModuleList.h"
#include "ModuleList.h"
#include "TextClass.h"
#include "support/convert.h"
@ -135,13 +138,18 @@ string active_environment()
}
TeX2LyXDocClass textclass;
CommandMap known_commands;
CommandMap known_environments;
CommandMap known_math_environments;
FullCommandMap possible_textclass_commands;
FullEnvironmentMap possible_textclass_environments;
/// used modules
LayoutModuleList used_modules;
void add_known_command(string const & command, string const & o1,
bool o2)
void convertArgs(string const & o1, bool o2, vector<ArgumentType> & arguments)
{
// We have to handle the following cases:
// definition o1 o2 invocation result
@ -151,7 +159,6 @@ void add_known_command(string const & command, string const & o1,
// \newcommand{\foo}[1][]{bar #1} "[1]" true \foo[x] bar x
// \newcommand{\foo}[1][x]{bar #1} "[1]" true \foo[x] bar x
unsigned int nargs = 0;
vector<ArgumentType> arguments;
string const opt1 = rtrim(ltrim(o1, "["), "]");
if (isStrUnsignedInt(opt1)) {
// The command has arguments
@ -164,7 +171,155 @@ void add_known_command(string const & command, string const & o1,
}
for (unsigned int i = 0; i < nargs; ++i)
arguments.push_back(required);
}
void add_known_command(string const & command, string const & o1,
bool o2, docstring const & definition)
{
vector<ArgumentType> arguments;
convertArgs(o1, o2, arguments);
known_commands[command] = arguments;
if (!definition.empty())
possible_textclass_commands[command] =
FullCommand(arguments, definition);
}
void add_known_environment(string const & environment, string const & o1,
bool o2, docstring const & beg, docstring const &end)
{
vector<ArgumentType> arguments;
convertArgs(o1, o2, arguments);
known_environments[environment] = arguments;
if (!beg.empty() || ! end.empty())
possible_textclass_environments[environment] =
FullEnvironment(arguments, beg, end);
}
Layout const * findLayoutWithoutModule(TextClass const & textclass,
string const & name, bool command)
{
DocumentClass::const_iterator it = textclass.begin();
DocumentClass::const_iterator en = textclass.end();
for (; it != en; ++it) {
if (it->latexname() == name &&
((command && it->isCommand()) || (!command && it->isEnvironment())))
return &*it;
}
return 0;
}
InsetLayout const * findInsetLayoutWithoutModule(TextClass const & textclass,
string const & name, bool command)
{
DocumentClass::InsetLayouts::const_iterator it = textclass.insetLayouts().begin();
DocumentClass::InsetLayouts::const_iterator en = textclass.insetLayouts().end();
for (; it != en; ++it) {
if (it->second.latexname() == name &&
((command && it->second.latextype() == InsetLayout::COMMAND) ||
(!command && it->second.latextype() == InsetLayout::ENVIRONMENT)))
return &(it->second);
}
return 0;
}
bool checkModule(string const & name, bool command)
{
// Cache to avoid slowdown by repated searches
static set<string> failed[2];
// Only add the module if the command was actually defined in the LyX preamble
if (command) {
if (possible_textclass_commands.find('\\' + name) == possible_textclass_commands.end())
return false;
} else {
if (possible_textclass_environments.find(name) == possible_textclass_environments.end())
return false;
}
if (failed[command].find(name) != failed[command].end())
return false;
// Create list of dummy document classes if not already done.
// This is needed since a module cannot be read on its own, only as
// part of a document class.
LayoutFile const & baseClass = LayoutFileList::get()[textclass.name()];
typedef map<string, DocumentClass *> ModuleMap;
static ModuleMap modules;
static bool init = true;
if (init) {
baseClass.load();
DocumentClassBundle & bundle = DocumentClassBundle::get();
LyXModuleList::const_iterator const end = theModuleList.end();
LyXModuleList::const_iterator it = theModuleList.begin();
for (; it != end; it++) {
string const module = it->getID();
LayoutModuleList m;
// FIXME this excludes all modules that depend on another one
if (!m.moduleCanBeAdded(module, &baseClass))
continue;
m.push_back(module);
modules[module] = &bundle.makeDocumentClass(baseClass, m);
}
init = false;
}
// Try to find a module that defines the command.
// Only add it if the definition can be found in the preamble of the
// style that corresponds to the command. This is a heuristic and
// different from the way how we parse the builtin commands of the
// text class (in that case we only compare the name), but it is
// needed since it is not unlikely that two different modules define a
// command with the same name.
ModuleMap::iterator const end = modules.end();
for (ModuleMap::iterator it = modules.begin(); it != end; it++) {
string const module = it->first;
if (!used_modules.moduleCanBeAdded(module, &baseClass))
continue;
if (findLayoutWithoutModule(textclass, name, command))
continue;
if (findInsetLayoutWithoutModule(textclass, name, command))
continue;
DocumentClass const * c = it->second;
Layout const * layout = findLayoutWithoutModule(*c, name, command);
InsetLayout const * insetlayout = layout ? 0 :
findInsetLayoutWithoutModule(*c, name, command);
docstring preamble;
if (layout)
preamble = layout->preamble();
else if (insetlayout)
preamble = insetlayout->preamble();
if (preamble.empty())
continue;
bool add = false;
if (command) {
FullCommand const & cmd =
possible_textclass_commands['\\' + name];
if (preamble.find(cmd.def) != docstring::npos)
add = true;
} else {
FullEnvironment const & env =
possible_textclass_environments[name];
if (preamble.find(env.beg) != docstring::npos &&
preamble.find(env.end) != docstring::npos)
add = true;
}
if (add) {
FileName layout_file = libFileSearch("layouts", module, "module");
if (textclass.read(layout_file, TextClass::MODULE)) {
used_modules.push_back(module);
// speed up further searches:
// the module does not need to be checked anymore.
modules.erase(it);
return true;
}
}
}
failed[command].insert(name);
return false;
}
@ -460,12 +615,12 @@ void tex2lyx(idocstream & is, ostream & os, string const & encoding)
p.setEncoding(encoding);
//p.dump();
stringstream ss;
TeX2LyXDocClass textclass;
parse_preamble(p, ss, documentclass, textclass);
ostringstream ps;
parse_preamble(p, ps, documentclass, textclass);
active_environments.push_back("document");
Context context(true, textclass);
stringstream ss;
parse_text(p, ss, FLAG_END, true, context);
if (Context::empty)
// Empty document body. LyX needs at least one paragraph.
@ -473,6 +628,19 @@ void tex2lyx(idocstream & is, ostream & os, string const & encoding)
context.check_end_layout(ss);
ss << "\n\\end_body\n\\end_document\n";
active_environments.pop_back();
// We know the used modules only after parsing the full text
ostringstream ms;
if (!used_modules.empty()) {
ms << "\\begin_modules\n";
LayoutModuleList::const_iterator const end = used_modules.end();
LayoutModuleList::const_iterator it = used_modules.begin();
for (; it != end; it++)
ms << *it << '\n';
ms << "\\end_modules\n";
}
os << subst(ps.str(), modules_placeholder, ms.str());
ss.seekg(0);
os << ss.str();
#ifdef TEST_PARSER
@ -637,6 +805,11 @@ int main(int argc, char * argv[])
if (!default_encoding.empty() && !encodings.fromLaTeXName(default_encoding))
error_message("Unknown LaTeX encoding `" + default_encoding + "'");
// Load the layouts
LayoutFileList::get().read();
//...and the modules
theModuleList.read();
// The real work now.
masterFilePath = onlyPath(infilename);
parentFilePath = masterFilePath;

View File

@ -38,7 +38,10 @@ class Context;
/// A trivial subclass, just to give us a public default constructor
class TeX2LyXDocClass : public DocumentClass
{};
{
public:
void setName(std::string const & name) { name_ = name; }
};
/// in preamble.cpp
void parse_preamble(Parser & p, std::ostream & os,
@ -48,6 +51,7 @@ extern std::string babel2lyx(std::string const & language);
/// used packages with options
extern std::map<std::string, std::vector<std::string> > used_packages;
extern const char * const modules_placeholder;
/// in text.cpp
std::string translate_len(std::string const &);
@ -89,13 +93,27 @@ char const * const * is_known(std::string const &, char const * const *);
/*!
* Adds the command \p command to the list of known commands.
* \param o1 first optional parameter to the latex command \newcommand
* \param o1 first optional parameter to the latex command \\newcommand
* (with brackets), or the empty string if there were no optional arguments.
* \param o2 wether \newcommand had a second optional parameter
* \param o2 wether \\newcommand had a second optional parameter.
* If \p definition is not empty the command is assumed to be from the LyX
* preamble and added to possible_textclass_commands.
*/
void add_known_command(std::string const & command, std::string const & o1,
bool o2);
bool o2, docstring const & definition = docstring());
extern void add_known_environment(std::string const & environment,
std::string const & o1, bool o2, docstring const & beg,
docstring const & end);
extern Layout const * findLayoutWithoutModule(TextClass const & textclass,
std::string const & name, bool command);
extern InsetLayout const * findInsetLayoutWithoutModule(
TextClass const & textclass, std::string const & name, bool command);
/*!
* Check whether a module provides command (if \p command is true) or
* environment (if \p command is false) \p name, and add the module to the
* list of used modules if yes.
*/
extern bool checkModule(std::string const & name, bool command);
// Access to environment stack
extern std::vector<std::string> active_environments;
std::string active_environment();
@ -107,7 +125,29 @@ enum ArgumentType {
optional
};
class FullCommand {
public:
FullCommand() {}
FullCommand(std::vector<ArgumentType> const & a, docstring const & d)
: args(a), def(d) {}
std::vector<ArgumentType> args;
docstring def;
};
class FullEnvironment {
public:
FullEnvironment() {}
FullEnvironment(std::vector<ArgumentType> const & a,
docstring const & b, docstring const & e)
: args(a), beg(b), end(e) {}
std::vector<ArgumentType> args;
docstring beg;
docstring end;
};
typedef std::map<std::string, std::vector<ArgumentType> > CommandMap;
typedef std::map<std::string, FullCommand> FullCommandMap;
typedef std::map<std::string, FullEnvironment> FullEnvironmentMap;
/// Known TeX commands with arguments that get parsed into ERT.
extern CommandMap known_commands;
@ -115,6 +155,10 @@ extern CommandMap known_commands;
extern CommandMap known_environments;
/// Known TeX math environments with arguments that get parsed into LyX mathed.
extern CommandMap known_math_environments;
/// Commands that might be defined by the document class or modules
extern FullCommandMap possible_textclass_commands;
/// Environments that might be defined by the document class or modules
extern FullEnvironmentMap possible_textclass_environments;
///
extern bool noweb_mode;
/// Did we recognize any pdflatex-only construct?

View File

@ -452,27 +452,25 @@ void handle_comment(ostream & os, string const & s, Context & context)
}
Layout const * findLayout(TextClass const & textclass, string const & name)
Layout const * findLayout(TextClass const & textclass, string const & name, bool command)
{
DocumentClass::const_iterator lit = textclass.begin();
DocumentClass::const_iterator len = textclass.end();
for (; lit != len; ++lit)
if (lit->latexname() == name)
return &*lit;
return 0;
Layout const * layout = findLayoutWithoutModule(textclass, name, command);
if (layout)
return layout;
if (checkModule(name, command))
return findLayoutWithoutModule(textclass, name, command);
return layout;
}
InsetLayout const * findInsetLayout(TextClass const & textclass, string const & name, bool command)
{
DocumentClass::InsetLayouts::const_iterator it = textclass.insetLayouts().begin();
DocumentClass::InsetLayouts::const_iterator en = textclass.insetLayouts().end();
for (; it != en; ++it)
if (it->second.latexname() == name &&
((command && it->second.latextype() == InsetLayout::COMMAND) ||
(!command && it->second.latextype() == InsetLayout::ENVIRONMENT)))
return &(it->second);
return 0;
InsetLayout const * insetlayout = findInsetLayoutWithoutModule(textclass, name, command);
if (insetlayout)
return insetlayout;
if (checkModule(name, command))
return findInsetLayoutWithoutModule(textclass, name, command);
return insetlayout;
}
@ -952,6 +950,7 @@ void parse_environment(Parser & p, ostream & os, bool outer,
string & last_env, Context & parent_context)
{
Layout const * newlayout;
InsetLayout const * newinsetlayout = 0;
string const name = p.getArg('{', '}');
const bool is_starred = suffixIs(name, '*');
string const unstarred_name = rtrim(name, "*");
@ -1069,8 +1068,7 @@ void parse_environment(Parser & p, ostream & os, bool outer,
}
// The single '=' is meant here.
else if ((newlayout = findLayout(parent_context.textclass, name)) &&
newlayout->isEnvironment()) {
else if ((newlayout = findLayout(parent_context.textclass, name, false))) {
eat_whitespace(p, os, parent_context, false);
Context context(true, parent_context.textclass, newlayout,
parent_context.layout, parent_context.font);
@ -1129,6 +1127,17 @@ void parse_environment(Parser & p, ostream & os, bool outer,
p.skip_spaces();
}
// The single '=' is meant here.
else if ((newinsetlayout = findInsetLayout(parent_context.textclass, name, false))) {
eat_whitespace(p, os, parent_context, false);
parent_context.check_layout(os);
begin_inset(os, "Flex ");
os << to_utf8(newinsetlayout->name()) << '\n'
<< "status collapsed\n";
parse_text_in_inset(p, os, FLAG_END, false, parent_context, newinsetlayout);
end_inset(os);
}
else if (name == "appendix") {
// This is no good latex style, but it works and is used in some documents...
eat_whitespace(p, os, parent_context, false);
@ -1905,8 +1914,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
// Must attempt to parse "Section*" before "Section".
else if ((p.next_token().asInput() == "*") &&
context.new_layout_allowed &&
(newlayout = findLayout(context.textclass, t.cs() + '*')) &&
newlayout->isCommand()) {
(newlayout = findLayout(context.textclass, t.cs() + '*', true))) {
// write the layout
p.get_token();
output_command_layout(os, p, outer, context, newlayout);
@ -1915,8 +1923,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
// Section headings and the like
else if (context.new_layout_allowed &&
(newlayout = findLayout(context.textclass, t.cs())) &&
newlayout->isCommand()) {
(newlayout = findLayout(context.textclass, t.cs(), true))) {
// write the layout
output_command_layout(os, p, outer, context, newlayout);
p.skip_spaces();