mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-11-22 10:00:33 +00:00
Make binary file detection more robust.
The magic library can detect the charset used by a file. While this detection is not full proof, actually the library seems to be infallible as regards the binary nature of a file. So, use libmagic for the detection and fallback to the previous method if the library is not installed or its database cannot be loaded.
This commit is contained in:
parent
82faa66192
commit
f439609304
@ -241,17 +241,13 @@ string guessFormatFromContents(FileName const & fn)
|
||||
int const max_count = 50;
|
||||
int count = 0;
|
||||
|
||||
// Maximum number of binary chars allowed for latex detection
|
||||
int const max_bin = 5;
|
||||
|
||||
string str;
|
||||
string format;
|
||||
bool firstLine = true;
|
||||
bool backslash = false;
|
||||
bool maybelatex = false;
|
||||
int binchars = 0;
|
||||
int dollars = 0;
|
||||
while ((count++ < max_count) && format.empty() && binchars <= max_bin) {
|
||||
while ((count++ < max_count) && format.empty() && !maybelatex) {
|
||||
if (ifs.eof())
|
||||
break;
|
||||
|
||||
@ -378,17 +374,9 @@ string guessFormatFromContents(FileName const & fn)
|
||||
// inline equation
|
||||
maybelatex = true;
|
||||
}
|
||||
|
||||
// Note that this is formally not correct, since count_bin_chars
|
||||
// expects utf8, and str can be anything: plain text in any
|
||||
// encoding, or really binary data. In practice it works, since
|
||||
// QString::fromUtf8() drops invalid utf8 sequences, and while
|
||||
// the exact number may not be correct, we still get a high
|
||||
// number for truly binary files.
|
||||
binchars += count_bin_chars(str);
|
||||
}
|
||||
|
||||
if (format.empty() && binchars <= max_bin && maybelatex)
|
||||
if (format.empty() && maybelatex && !isBinaryFile(fn))
|
||||
format = "latex";
|
||||
|
||||
if (format.empty()) {
|
||||
|
@ -43,6 +43,9 @@
|
||||
#include "support/regex.h"
|
||||
|
||||
#include <fcntl.h>
|
||||
#ifdef HAVE_MAGIC_H
|
||||
#include <magic.h>
|
||||
#endif
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstdlib>
|
||||
@ -91,6 +94,60 @@ bool isValidDVIFileName(string const & filename)
|
||||
}
|
||||
|
||||
|
||||
bool isBinaryFile(FileName const & filename)
|
||||
{
|
||||
bool isbinary = false;
|
||||
if (filename.empty() || !filename.exists())
|
||||
return isbinary;
|
||||
|
||||
#ifdef HAVE_MAGIC_H
|
||||
magic_t magic_cookie = magic_open(MAGIC_MIME_ENCODING);
|
||||
if (magic_cookie) {
|
||||
bool detected = true;
|
||||
if (magic_load(magic_cookie, NULL) != 0) {
|
||||
LYXERR(Debug::FILES, "isBinaryFile: "
|
||||
"Could not load magic database - "
|
||||
<< magic_error(magic_cookie));
|
||||
detected = false;
|
||||
} else {
|
||||
char const *charset = magic_file(magic_cookie,
|
||||
filename.toFilesystemEncoding().c_str());
|
||||
isbinary = contains(charset, "binary");
|
||||
}
|
||||
magic_close(magic_cookie);
|
||||
if (detected)
|
||||
return isbinary;
|
||||
}
|
||||
#endif
|
||||
// Try by looking for binary chars at the beginning of the file.
|
||||
// Note that this is formally not correct, since count_bin_chars
|
||||
// expects utf8, and the passed string can be anything: plain text
|
||||
// in any encoding, or really binary data. In practice it works,
|
||||
// since QString::fromUtf8() drops invalid utf8 sequences, and
|
||||
// while the exact number may not be correct, we still get a high
|
||||
// number for truly binary files.
|
||||
|
||||
ifstream ifs(filename.toFilesystemEncoding().c_str());
|
||||
if (!ifs)
|
||||
return isbinary;
|
||||
|
||||
// Maximum strings to read
|
||||
int const max_count = 50;
|
||||
|
||||
// Maximum number of binary chars allowed
|
||||
int const max_bin = 5;
|
||||
|
||||
int count = 0;
|
||||
int binchars = 0;
|
||||
string str;
|
||||
while (count++ < max_count && !ifs.eof()) {
|
||||
getline(ifs, str);
|
||||
binchars += count_bin_chars(str);
|
||||
}
|
||||
return binchars > max_bin;
|
||||
}
|
||||
|
||||
|
||||
string const latex_path(string const & original_path,
|
||||
latex_path_extension extension,
|
||||
latex_path_dots dots)
|
||||
|
@ -78,6 +78,9 @@ bool isValidLaTeXFileName(std::string const & filename);
|
||||
*/
|
||||
bool isValidDVIFileName(std::string const & filename);
|
||||
|
||||
/// check whether the file has binary contents
|
||||
bool isBinaryFile(FileName const & filename);
|
||||
|
||||
/** Returns the path of a library data file.
|
||||
Search the file name.ext in the subdirectory dir of
|
||||
-# user_lyxdir
|
||||
|
Loading…
Reference in New Issue
Block a user