Fix encoding for copying LaTeX from clipboard

If we call tex2lyx on a temporary file created from the clipboard, the
file is always in utf8 encoding, without any temporary changes, even if it
contains encoding changing LaTeX commands. Therefore, we must tell tex2lyx
to use a fixed utf8 encoding for the whole file, and this is done using the
new latexclipboard format. Previously, tex2lyx thought the encoding was
latin1.
As a side effect, the -e option is now also documented in the man page.
This commit is contained in:
Georg Baum 2013-11-11 21:52:14 +01:00
parent 73ab0fb665
commit 14a5d07df6
6 changed files with 62 additions and 11 deletions

View File

@ -553,6 +553,7 @@ def checkFormatEntries(dtl_tools):
\Format luatex tex "LaTeX (LuaTeX)" "" "" "%%" "document,menu=export" ""
\Format pdflatex tex "LaTeX (pdflatex)" "" "" "%%" "document,menu=export" ""
\Format xetex tex "LaTeX (XeTeX)" "" "" "%%" "document,menu=export" ""
\Format latexclipboard tex "LaTeX (clipboard)" "" "" "%%" "" ""
\Format text txt "Plain text" a "" "%%" "document,menu=export" "text/plain"
\Format text2 txt "Plain text (pstotext)" "" "" "%%" "document" ""
\Format text3 txt "Plain text (ps2ascii)" "" "" "%%" "document" ""
@ -665,6 +666,7 @@ def checkConverterEntries():
path, t2l = checkProg('a LaTeX/Noweb -> LyX converter', [in_binary_subdir, in_binary_subdir + version_suffix, in_binary_dir, in_binary_dir + version_suffix, 'tex2lyx' + version_suffix, 'tex2lyx'],
rc_entry = [r'''\converter latex lyx "%% -f $$i $$o" ""
\converter latexclipboard lyx "%% -fixedenc utf8 -f $$i $$o" ""
\converter literate lyx "%% -n -m noweb -f $$i $$o" ""'''], not_found = 'tex2lyx')
if path == '':
logger.warning("Failed to find tex2lyx on your system.")

View File

@ -1096,7 +1096,7 @@ bool pasteClipboardText(Cursor & cur, ErrorList & errorList, bool asParagraphs,
// Then try TeX and HTML
Clipboard::TextType types[2] = {Clipboard::HtmlTextType, Clipboard::LaTeXTextType};
string names[2] = {"html", "latex"};
string names[2] = {"html", "latexclipboard"};
for (int i = 0; i < 2; ++i) {
if (type != types[i] && type != Clipboard::AnyTextType)
continue;

View File

@ -154,10 +154,14 @@ iparserdocstream & iparserdocstream::get(char_type &c)
//
Parser::Parser(idocstream & is)
: lineno_(0), pos_(0), iss_(0), is_(is), encoding_iconv_("UTF-8"),
theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES)
Parser::Parser(idocstream & is, std::string const & fixedenc)
: lineno_(0), pos_(0), iss_(0), is_(is),
encoding_iconv_(fixedenc.empty() ? "UTF-8" : fixedenc),
theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES),
fixed_enc_(!fixedenc.empty())
{
if (fixed_enc_)
is_.setEncoding(fixedenc);
}
@ -165,7 +169,9 @@ Parser::Parser(string const & s)
: lineno_(0), pos_(0),
iss_(new idocstringstream(from_utf8(s))), is_(*iss_),
encoding_iconv_("UTF-8"),
theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES)
theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES),
// An idocstringstream can not change the encoding
fixed_enc_(true)
{
}
@ -261,7 +267,12 @@ bool Parser::setEncoding(std::string const & e)
{
//cerr << "setting encoding to " << e << std::endl;
encoding_iconv_ = e;
is_.setEncoding(e);
// If the encoding is fixed, we must not change the stream encoding
// (because the whole input uses that encoding, e.g. if it comes from
// the clipboard). We still need to track the original encoding in
// encoding_iconv_, so that the generated output is correct.
if (!fixed_enc_)
is_.setEncoding(e);
return true;
}

View File

@ -172,7 +172,7 @@ class Parser {
Parser & operator=(Parser const & p);
public:
///
Parser(idocstream & is);
Parser(idocstream & is, std::string const & fixedenc);
///
Parser(std::string const & s);
///
@ -351,10 +351,12 @@ private:
std::string encoding_iconv_;
///
CatCode theCatcode_[256];
//
///
cat_type theCatcodesType_;
//
///
cat_type curr_cat_;
///
bool fixed_enc_;
};

View File

@ -21,6 +21,8 @@ options.
.PP
\fBtex2lyx\fR [ \fB\-userdir\fR \fIuserdir\fR ] [ \fB\-systemdir\fR \fIsystemdir\fR ]
[ \fB\-f\fR ] [ \fB\-n\fR ] [ \fB\-c\fR \fItextclass\fR ]
[ \fB\-e\fR \fIencoding\fR ]
[ \fB\-fixedenc\fR \fIencoding\fR ]
[\ \fB\-m\fR \fImodule1\fR[,\fImodule2\fR...]]
[\ \fB\-s\fR\ \fIsfile1\fR[,\fIsfile2\fR...]] [ \fB\-skipchildren\fR ] [
\fB\-roundtrip\fR ] [ \fB\-copyfiles\fR ] \fIinputfile\fR [ \fIoutputfile\fR ]
@ -53,6 +55,15 @@ are loaded in the given order. If a module \fIfoo\fR depends on a module
Force. \fBtex2lyx\fR will not run if the .lyx file it would generate already exists.
Use the \fB\-f\fR option (carefully) to clobber any existing files.
.TP
.BI \-e
Specify the default encoding using the LaTeX name as defined in the encodings
file. \fBtex2lyx\fR will use this encoding, but switch if it finds any encoding
changing commands in the input.
.TP
.BI \-fixedenc
Specify the encoding using the LaTeX name as defined in the encodings file.
\fBtex2lyx\fR will ignore any encoding changing commands in the input.
.TP
.BI \-n
Noweb. Translate a noweb (aka literate programming) file. This should be
(almost?) equivalent to running \*[lq]noweb2lyx foo.tex foo.lyx\*[rq]. This option
@ -419,10 +430,14 @@ is your personal LyX directory, then the following files are read by tex2lyx:
User's personal layout files for document classes
.IP "\fI\s-1MY_LYXDIR\s0\fR/syntax.default" 4
User's personal syntax file
.IP "\fI\s-1MY_LYXDIR\s0\fR/encodings" 4
User's personal encoding definition file
.IP "\fI\s-1LIBDIR\s0\fR/layouts/*.layout" 4
System-wide layout files for document classes
.IP "\fI\s-1LIBDIR\s0\fR/lib/syntax.default" 4
System-wide LaTeX syntax file
.IP "\fI\s-1LIBDIR\s0\fR/lib/encodings" 4
System-wide encoding definition file
.SH "SEE ALSO"
\fIlyx@version_suffix@\fR\|(1), \fIlatex\fR\|(1)
.SH "AUTHORS"

View File

@ -494,7 +494,7 @@ void read_syntaxfile(FileName const & file_name)
// modeled after TeX.
// Unknown tokens are just silently ignored, this helps us to skip some
// reLyX specific things.
Parser p(is);
Parser p(is, string());
while (p.good()) {
Token const & t = p.get_token();
if (t.cat() == catEscape) {
@ -519,6 +519,7 @@ void read_syntaxfile(FileName const & file_name)
string documentclass;
string default_encoding;
bool fixed_encoding = false;
string syntaxfile;
bool copy_files = false;
bool overwrite_files = false;
@ -537,6 +538,7 @@ int parse_help(string const &, string const &)
"\t-m mod1[,mod2...] Load the given modules.\n"
"\t-copyfiles Copy all included files to the directory of outfile.lyx.\n"
"\t-e encoding Set the default encoding (latex name).\n"
"\t-fixedenc encoding Like -e, but ignore encoding changing commands while parsing.\n"
"\t-f Force overwrite of .lyx files.\n"
"\t-help Print this message and quit.\n"
"\t-n translate literate programming (noweb, sweave,... ) file.\n"
@ -605,6 +607,16 @@ int parse_encoding(string const & arg, string const &)
}
int parse_fixed_encoding(string const & arg, string const &)
{
if (arg.empty())
error_message("Missing encoding string after -fixedenc switch");
default_encoding = arg;
fixed_encoding = true;
return 1;
}
int parse_syntaxfile(string const & arg, string const &)
{
if (arg.empty())
@ -686,6 +698,7 @@ void easyParse(int & argc, char * argv[])
cmdmap["-c"] = parse_class;
cmdmap["-m"] = parse_module;
cmdmap["-e"] = parse_encoding;
cmdmap["-fixedenc"] = parse_fixed_encoding;
cmdmap["-f"] = parse_force;
cmdmap["-s"] = parse_syntaxfile;
cmdmap["-n"] = parse_noweb;
@ -775,6 +788,14 @@ bool roundtripMode()
}
string fixedEncoding()
{
if (fixed_encoding)
return default_encoding;
return "";
}
namespace {
/*!
@ -803,7 +824,7 @@ bool tex2lyx(idocstream & is, ostream & os, string encoding)
}
}
Parser p(is);
Parser p(is, fixed_encoding ? default_encoding : string());
p.setEncoding(encoding);
//p.dump();