mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-11-09 18:31:04 +00:00
Fix mangling names in temp directory (amends eef0c8e8
).
Separate mangled-name tables for xhtml and latex exports. LaTeX route should stay as it were (counter+mangled path within filenames in temp directory) xHTML route uses hash of path + filename only. We won't use counters because (unlike in LaTeX export) they stay visible in final export directory and generally change across LyX sessions. Hash is short with collision handling because path-length limits of Windows. cf https://www.mail-archive.com/lyx-devel@lists.lyx.org/msg220582.html
This commit is contained in:
parent
c79af8e138
commit
6d19388ab8
@ -575,14 +575,14 @@ copyFileIfNeeded(FileName const & file_in, FileName const & file_out)
|
||||
|
||||
|
||||
pair<GraphicsCopyStatus, FileName> const
|
||||
copyToDirIfNeeded(DocFileName const & file, string const & dir)
|
||||
copyToDirIfNeeded(DocFileName const & file, string const & dir, bool encrypt_path)
|
||||
{
|
||||
string const file_in = file.absFileName();
|
||||
string const only_path = onlyPath(file_in);
|
||||
if (rtrim(only_path, "/") == rtrim(dir, "/"))
|
||||
return make_pair(IDENTICAL_PATHS, FileName(file_in));
|
||||
|
||||
string mangled = file.mangledFileName(empty_string(), false, true);
|
||||
string mangled = file.mangledFileName(empty_string(), encrypt_path);
|
||||
if (theFormats().isZippedFile(file)) {
|
||||
// We need to change _eps.gz to .eps.gz. The mangled name is
|
||||
// still unique because of the counter in mangledFileName().
|
||||
@ -680,7 +680,7 @@ string InsetGraphics::prepareFile(OutputParams const & runparams) const
|
||||
// we move it to a temp dir or uncompress it.
|
||||
FileName temp_file;
|
||||
GraphicsCopyStatus status;
|
||||
tie(status, temp_file) = copyToDirIfNeeded(params().filename, temp_path);
|
||||
tie(status, temp_file) = copyToDirIfNeeded(params().filename, temp_path, false);
|
||||
|
||||
if (status == FAILURE)
|
||||
return orig_file;
|
||||
@ -997,7 +997,7 @@ string InsetGraphics::prepareHTMLFile(OutputParams const & runparams) const
|
||||
// Copy to temporary directory.
|
||||
FileName temp_file;
|
||||
GraphicsCopyStatus status;
|
||||
tie(status, temp_file) = copyToDirIfNeeded(params().filename, temp_path);
|
||||
tie(status, temp_file) = copyToDirIfNeeded(params().filename, temp_path, true);
|
||||
|
||||
if (status == FAILURE)
|
||||
return string();
|
||||
|
@ -957,69 +957,102 @@ string DocFileName::outputFileName(string const & path) const
|
||||
return save_abs_path_ ? absFileName() : relFileName(path);
|
||||
}
|
||||
|
||||
|
||||
string DocFileName::mangledFileName(string const & dir) const
|
||||
// check for an existing value in a map
|
||||
bool collision_exists(map<string, string> const & db, string const & value)
|
||||
{
|
||||
return mangledFileName(dir, true, false);
|
||||
for (auto it = db.begin(); it != db.end(); ++it)
|
||||
if (it->second == value)
|
||||
return true;
|
||||
return false;
|
||||
|
||||
/* replace the above, once we support C++17
|
||||
for (const auto& [key, val] : db)
|
||||
if (val == value)
|
||||
return true;
|
||||
return false; */
|
||||
}
|
||||
|
||||
string DocFileName::mangledFileName(string const & dir, bool use_counter, bool encrypt_path) const
|
||||
string DocFileName::mangledFileName(string const & dir, bool encrypt_path) const
|
||||
{
|
||||
// Concurrent access to these variables is possible.
|
||||
|
||||
// We need to make sure that every DocFileName instance for a given
|
||||
// filename returns the same mangled name.
|
||||
typedef map<string, string> MangledMap;
|
||||
static MangledMap mangledNames;
|
||||
static MangledMap mangledPlainNames;
|
||||
static MangledMap mangledEncryptNames; //separate table for xhtml() route
|
||||
static Mutex mangledMutex;
|
||||
// this locks both access to mangledNames and counter below
|
||||
Mutex::Locker lock(&mangledMutex);
|
||||
MangledMap::const_iterator const it = mangledNames.find(absFileName());
|
||||
if (it != mangledNames.end())
|
||||
MangledMap * mangledNames = encrypt_path ? &mangledEncryptNames : &mangledPlainNames;
|
||||
MangledMap::const_iterator const it = mangledNames->find(absFileName());
|
||||
if (it != mangledNames->end())
|
||||
return (*it).second;
|
||||
|
||||
string const name = absFileName();
|
||||
// Now the real work. Remove the extension.
|
||||
string mname = support::changeExtension(name, string());
|
||||
string mname;
|
||||
|
||||
if (encrypt_path)
|
||||
mname = "export_" + onlyFileName() + "_" + toHexHash(mname);
|
||||
// xHTML route
|
||||
// we use hash instead of counter to get stable filenames in export directory
|
||||
if (encrypt_path) {
|
||||
// sanitization probably not neccessary for xhtml, but won't harm
|
||||
string sanfn = support::changeExtension(onlyFileName(), string());
|
||||
sanfn = sanitizeFileName(sanfn);
|
||||
// Add the extension back on
|
||||
sanfn = support::changeExtension(sanfn, getExtension(onlyFileName()));
|
||||
|
||||
// The mangled name must be a valid LaTeX name.
|
||||
mname = sanitizeFileName(mname);
|
||||
// Add the extension back on
|
||||
mname = support::changeExtension(mname, getExtension(name));
|
||||
//various filesystems have filename limit around 2^8
|
||||
if (sanfn.length() > 230)
|
||||
sanfn = "";
|
||||
|
||||
// Prepend a counter to the filename. This is necessary to make
|
||||
// the mangled name unique.
|
||||
static int counter = 0;
|
||||
string enc_name = "e_" + toHexHash(name, true) + "_" + sanfn;
|
||||
|
||||
while (collision_exists(mangledEncryptNames, enc_name))
|
||||
enc_name = "e_" + toHexHash(enc_name, true) + "_" + sanfn;
|
||||
|
||||
mname = enc_name;
|
||||
}
|
||||
|
||||
// LaTeX route
|
||||
if (!encrypt_path) {
|
||||
// Now the real work. Remove the extension.
|
||||
mname = support::changeExtension(name, string());
|
||||
// The mangled name must be a valid LaTeX name.
|
||||
mname = sanitizeFileName(mname);
|
||||
// Add the extension back on
|
||||
mname = support::changeExtension(mname, getExtension(name));
|
||||
|
||||
// Prepend a counter to the filename. This is necessary to make
|
||||
// the mangled name unique, see truncation below.
|
||||
//
|
||||
static int counter = 0;
|
||||
|
||||
if (use_counter) {
|
||||
ostringstream s;
|
||||
s << counter++ << mname;
|
||||
mname = s.str();
|
||||
}
|
||||
|
||||
// MiKTeX's YAP (version 2.4.1803) crashes if the file name
|
||||
// is longer than about 160 characters. MiKTeX's pdflatex
|
||||
// is even pickier. A maximum length of 100 has been proven to work.
|
||||
// If dir.size() > max length, all bets are off for YAP. We truncate
|
||||
// the filename nevertheless, keeping a minimum of 10 chars.
|
||||
|
||||
string::size_type max_length = max(100 - ((int)dir.size() + 1), 10);
|
||||
// MiKTeX's YAP (version 2.4.1803) crashes if the file name
|
||||
// is longer than about 160 characters. MiKTeX's pdflatex
|
||||
// is even pickier. A maximum length of 100 has been proven to work.
|
||||
// If dir.size() > max length, all bets are off for YAP. We truncate
|
||||
// the filename nevertheless, keeping a minimum of 10 chars.
|
||||
|
||||
// If the mangled file name is too long, hack it to fit.
|
||||
// We know we're guaranteed to have a unique file name because
|
||||
// of the counter.
|
||||
if (mname.size() > max_length) {
|
||||
int const half = (int(max_length) / 2) - 2;
|
||||
if (half > 0) {
|
||||
mname = mname.substr(0, half) + "___" +
|
||||
mname.substr(mname.size() - half);
|
||||
string::size_type max_length = max(100 - ((int)dir.size() + 1), 10);
|
||||
|
||||
// If the mangled file name is too long, hack it to fit.
|
||||
// We know we're guaranteed to have a unique file name because
|
||||
// of the counter.
|
||||
if (mname.size() > max_length) {
|
||||
int const half = (int(max_length) / 2) - 2;
|
||||
if (half > 0) {
|
||||
mname = mname.substr(0, half) + "___" +
|
||||
mname.substr(mname.size() - half);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mangledNames[absFileName()] = mname;
|
||||
(*mangledNames)[absFileName()] = mname;
|
||||
return mname;
|
||||
}
|
||||
|
||||
|
@ -288,19 +288,30 @@ public:
|
||||
* - two FileName instances with the same filename have identical
|
||||
* mangled names.
|
||||
*
|
||||
* @param encrypt_path will use short hash instead of path
|
||||
* in the mangled name if set to true. Useful for xHTML export
|
||||
* so we do not leak e.g. user names contained in the paths.
|
||||
* Filename itself is stripped if the resulting filename would
|
||||
* become too long (~250 chars).
|
||||
* Prefix counter is not used because
|
||||
* 1) it's hack useful for MikTeX/YAP only
|
||||
* 2) causes many duplicates within the export directory across different
|
||||
* LyX sessions as (unlike in LaTeX export) we use mangled names in
|
||||
* final xHTML export directory.
|
||||
* An example of hashed mangled case:
|
||||
* C:/foo bar/baz.png - > e_95a42ec852ea_baz.png
|
||||
*
|
||||
* It is guaranteed that
|
||||
* - two different filenames have different mangled names (modulo hash collision)
|
||||
* - two FileName instances with the same filename have identical hashed
|
||||
* mangled names.
|
||||
*
|
||||
*
|
||||
* Only the mangled file name is returned. It is not prepended
|
||||
* with @c dir.
|
||||
*/
|
||||
std::string
|
||||
mangledFileName(std::string const & dir = empty_string()) const;
|
||||
|
||||
/** Identical to mangledFileName, with the following additions:
|
||||
*
|
||||
* @encrypt_path allows using hash (SHA-256) instead of full path.
|
||||
* @use_counter allows disabling the counter in the filename.
|
||||
*/
|
||||
std::string
|
||||
mangledFileName(std::string const & dir, bool use_counter, bool encrypt_path) const;
|
||||
mangledFileName(std::string const & dir = empty_string(), bool encrypt_path=false) const;
|
||||
|
||||
/// \return the absolute file name without its .gz, .z, .Z extension
|
||||
std::string unzippedFileName() const;
|
||||
|
@ -1334,13 +1334,22 @@ void fileUnlock(int fd, const char * /* lock_file*/)
|
||||
}
|
||||
|
||||
|
||||
std::string toHexHash(const std::string & str)
|
||||
std::string toHexHash(const std::string & str, bool shorten)
|
||||
{
|
||||
// Use the best available hashing algorithm.
|
||||
auto hashAlgo = QCryptographicHash::Sha256;
|
||||
|
||||
QByteArray hash = QCryptographicHash::hash(toqstr(str).toLocal8Bit(), hashAlgo);
|
||||
return fromqstr(QString(hash.toHex()));
|
||||
QString qshash=QString(hash.toHex());
|
||||
|
||||
/* For shortened case we take 12 leftmost chars (6 bytes encoded).
|
||||
* Random experiment shows:
|
||||
* 8 chars: 16 collisions for 10^5 graphic filenames
|
||||
* 12 chars: 0 collisions for 10^5 graphic filenames
|
||||
*/
|
||||
if (shorten)
|
||||
qshash=qshash.left(12);
|
||||
|
||||
return fromqstr(qshash);
|
||||
}
|
||||
|
||||
|
||||
|
@ -348,14 +348,16 @@ int fileLock(const char * lock_file);
|
||||
void fileUnlock(int fd, const char * lock_file);
|
||||
|
||||
/** Return the hex-encoded cryptographic hash of a string.
|
||||
* The hash algorithm is not fixed, but it is determined at compile time.
|
||||
* This function is typically used to create relatively stable file names,
|
||||
* because cryptographic hash functions ensure that very small changes in the
|
||||
* input result in large changes in the output.
|
||||
* Due to inherent limits of path length in Windows we allow very short version
|
||||
* (effectively leftmost 6 bytes encoded via 12 chars in hex) while increasing the
|
||||
* probability of collision (via shorten=true).
|
||||
* There is no limit in the length of the input string: it can be a file name
|
||||
* or the contents of a file, for instance.
|
||||
*/
|
||||
std::string toHexHash(const std::string & str);
|
||||
std::string toHexHash(const std::string & str, bool shorten=false);
|
||||
|
||||
/// Replace non-ASCII characters to ensure that the string can be used as a
|
||||
/// file name on all platforms and as a LaTeX name.
|
||||
|
Loading…
Reference in New Issue
Block a user