If LyX does not know about a given file format, it may easily
happen that the format is recognized as "latex" and this causes
bug #9146. This patch limits the check for a latex format to
non-binary files. The strategy for deciding that a file has
binary content is the same as that adopted by the "less" program.
This commit is contained in:
Enrico Forestieri 2014-06-05 17:33:25 +02:00
parent ac23aed50e
commit 7d31194085
3 changed files with 46 additions and 4 deletions

View File

@ -241,12 +241,17 @@ string guessFormatFromContents(FileName const & fn)
int const max_count = 50; int const max_count = 50;
int count = 0; int count = 0;
// Maximum number of binary chars allowed for latex detection
int const max_bin = 5;
string str; string str;
string format; string format;
bool firstLine = true; bool firstLine = true;
bool backslash = false; bool backslash = false;
bool maybelatex = false;
int binchars = 0;
int dollars = 0; int dollars = 0;
while ((count++ < max_count) && format.empty()) { while ((count++ < max_count) && format.empty() && binchars <= max_bin) {
if (ifs.eof()) if (ifs.eof())
break; break;
@ -364,16 +369,20 @@ string guessFormatFromContents(FileName const & fn)
contains(str, "$$") || contains(str, "$$") ||
contains(str, "\\[") || contains(str, "\\[") ||
contains(str, "\\]")) contains(str, "\\]"))
format = "latex"; maybelatex = true;
else { else {
if (contains(str, '\\')) if (contains(str, '\\'))
backslash = true; backslash = true;
dollars += count_char(str, '$'); dollars += count_char(str, '$');
if (backslash && dollars > 1)
// inline equation
maybelatex = true;
} }
binchars += count_bin_chars(str);
} }
if (format.empty() && backslash && dollars > 1) if (format.empty() && binchars <= max_bin && maybelatex)
// inline equation
format = "latex"; format = "latex";
if (format.empty()) { if (format.empty()) {

View File

@ -943,6 +943,31 @@ int count_char(docstring const & str, docstring::value_type chr)
} }
int count_bin_chars(string const & str)
{
QString const qstr = toqstr(str).simplified();
int count = 0;
QString::const_iterator cit = qstr.begin();
QString::const_iterator end = qstr.end();
for (; cit != end; ++cit) {
switch (cit->category()) {
case QChar::Separator_Line:
case QChar::Separator_Paragraph:
case QChar::Other_Control:
case QChar::Other_Format:
case QChar::Other_Surrogate:
case QChar::Other_PrivateUse:
case QChar::Other_NotAssigned:
++count;
break;
default:
break;
}
}
return count;
}
docstring const trim(docstring const & a, char const * p) docstring const trim(docstring const & a, char const * p)
{ {
LASSERT(p, return a); LASSERT(p, return a);

View File

@ -199,6 +199,14 @@ int count_char(std::string const & str, char chr);
/// Count all occurences of char \a chr inside \a str /// Count all occurences of char \a chr inside \a str
int count_char(docstring const & str, docstring::value_type chr); int count_char(docstring const & str, docstring::value_type chr);
/** Count all occurences of binary chars inside \a str.
It is assumed that \a str is utf-8 encoded and that a binary char
belongs to the unicode class names Zl, Zp, Cc, Cf, Cs, Co, or Cn
(excluding white space characters such as '\t', '\n', '\v', '\f', '\r').
See http://www.unicode.org/Public/6.2.0/ucd/UnicodeData.txt
*/
int count_bin_chars(std::string const & str);
/** Trims characters off the end and beginning of a string. /** Trims characters off the end and beginning of a string.
\code \code
trim("ccabccc", "c") == "ab". trim("ccabccc", "c") == "ab".