From 7d31194085d8665fd10cc2e8fe4a6330b3cd338f Mon Sep 17 00:00:00 2001 From: Enrico Forestieri Date: Thu, 5 Jun 2014 17:33:25 +0200 Subject: [PATCH] Fix bug #9146. If LyX does not know about a given file format, it may easily happen that the format is recognized as "latex" and this causes bug #9146. This patch limits the check for a latex format to non-binary files. The strategy for deciding that a file has binary content is the same as that adopted by the "less" program. --- src/Format.cpp | 17 +++++++++++++---- src/support/lstrings.cpp | 25 +++++++++++++++++++++++++ src/support/lstrings.h | 8 ++++++++ 3 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/Format.cpp b/src/Format.cpp index e2d23b5c65..ff7fb34613 100644 --- a/src/Format.cpp +++ b/src/Format.cpp @@ -241,12 +241,17 @@ string guessFormatFromContents(FileName const & fn) int const max_count = 50; int count = 0; + // Maximum number of binary chars allowed for latex detection + int const max_bin = 5; + string str; string format; bool firstLine = true; bool backslash = false; + bool maybelatex = false; + int binchars = 0; int dollars = 0; - while ((count++ < max_count) && format.empty()) { + while ((count++ < max_count) && format.empty() && binchars <= max_bin) { if (ifs.eof()) break; @@ -364,16 +369,20 @@ string guessFormatFromContents(FileName const & fn) contains(str, "$$") || contains(str, "\\[") || contains(str, "\\]")) - format = "latex"; + maybelatex = true; else { if (contains(str, '\\')) backslash = true; dollars += count_char(str, '$'); + if (backslash && dollars > 1) + // inline equation + maybelatex = true; } + + binchars += count_bin_chars(str); } - if (format.empty() && backslash && dollars > 1) - // inline equation + if (format.empty() && binchars <= max_bin && maybelatex) format = "latex"; if (format.empty()) { diff --git a/src/support/lstrings.cpp b/src/support/lstrings.cpp index a43feeba13..8508e4ef13 100644 --- a/src/support/lstrings.cpp +++ b/src/support/lstrings.cpp @@ -943,6 +943,31 @@ int count_char(docstring const & str, docstring::value_type chr) } +int count_bin_chars(string const & str) +{ + QString const qstr = toqstr(str).simplified(); + int count = 0; + QString::const_iterator cit = qstr.begin(); + QString::const_iterator end = qstr.end(); + for (; cit != end; ++cit) { + switch (cit->category()) { + case QChar::Separator_Line: + case QChar::Separator_Paragraph: + case QChar::Other_Control: + case QChar::Other_Format: + case QChar::Other_Surrogate: + case QChar::Other_PrivateUse: + case QChar::Other_NotAssigned: + ++count; + break; + default: + break; + } + } + return count; +} + + docstring const trim(docstring const & a, char const * p) { LASSERT(p, return a); diff --git a/src/support/lstrings.h b/src/support/lstrings.h index 02b5cf2f2b..0d21e954a4 100644 --- a/src/support/lstrings.h +++ b/src/support/lstrings.h @@ -199,6 +199,14 @@ int count_char(std::string const & str, char chr); /// Count all occurences of char \a chr inside \a str int count_char(docstring const & str, docstring::value_type chr); +/** Count all occurences of binary chars inside \a str. + It is assumed that \a str is utf-8 encoded and that a binary char + belongs to the unicode class names Zl, Zp, Cc, Cf, Cs, Co, or Cn + (excluding white space characters such as '\t', '\n', '\v', '\f', '\r'). + See http://www.unicode.org/Public/6.2.0/ucd/UnicodeData.txt +*/ +int count_bin_chars(std::string const & str); + /** Trims characters off the end and beginning of a string. \code trim("ccabccc", "c") == "ab".