mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-12-28 06:49:43 +00:00
Make encoding conversions thread safe
This is the result of the discussion on the list "2.1.0 Blocker". Thanks to all contributors! The main idea is to use thread-local storage for all static variables. This solution does not need any mutex. For more details, see the comment in unicode.h.
This commit is contained in:
parent
009809b41d
commit
6a30211fef
@ -69,14 +69,6 @@ string const to_ascii(docstring const & ucs4)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
IconvProcessor & utf8ToUcs4()
|
|
||||||
{
|
|
||||||
static IconvProcessor iconv(ucs4_codeset, "UTF-8");
|
|
||||||
return iconv;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void utf8_to_ucs4(string const & utf8, docstring & ucs4)
|
void utf8_to_ucs4(string const & utf8, docstring & ucs4)
|
||||||
{
|
{
|
||||||
size_t n = utf8.size();
|
size_t n = utf8.size();
|
||||||
|
@ -14,7 +14,8 @@
|
|||||||
|
|
||||||
#include "support/unicode.h"
|
#include "support/unicode.h"
|
||||||
#include "support/debug.h"
|
#include "support/debug.h"
|
||||||
#include "support/mutex.h"
|
|
||||||
|
#include <QThreadStorage>
|
||||||
|
|
||||||
#include <iconv.h>
|
#include <iconv.h>
|
||||||
|
|
||||||
@ -66,8 +67,6 @@ struct IconvProcessor::Impl
|
|||||||
iconv_t cd;
|
iconv_t cd;
|
||||||
string tocode_;
|
string tocode_;
|
||||||
string fromcode_;
|
string fromcode_;
|
||||||
|
|
||||||
Mutex mutex_; // iconv() is not thread save, see #7240
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -124,8 +123,6 @@ bool IconvProcessor::init()
|
|||||||
int IconvProcessor::convert(char const * buf, size_t buflen,
|
int IconvProcessor::convert(char const * buf, size_t buflen,
|
||||||
char * outbuf, size_t maxoutsize)
|
char * outbuf, size_t maxoutsize)
|
||||||
{
|
{
|
||||||
Mutex::Locker lock(&pimpl_->mutex_);
|
|
||||||
|
|
||||||
if (buflen == 0)
|
if (buflen == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@ -228,7 +225,10 @@ iconv_convert(IconvProcessor & processor, InType const * buf, size_t buflen)
|
|||||||
char const * inbuf = reinterpret_cast<char const *>(buf);
|
char const * inbuf = reinterpret_cast<char const *>(buf);
|
||||||
size_t inbytesleft = buflen * sizeof(InType);
|
size_t inbytesleft = buflen * sizeof(InType);
|
||||||
|
|
||||||
static std::vector<char> outbuf(32768);
|
static QThreadStorage<std::vector<char> *> static_outbuf;
|
||||||
|
if (!static_outbuf.hasLocalData())
|
||||||
|
static_outbuf.setLocalData(new std::vector<char>(32768));
|
||||||
|
std::vector<char> & outbuf = *static_outbuf.localData();
|
||||||
// The number of UCS4 code points in buf is at most inbytesleft.
|
// The number of UCS4 code points in buf is at most inbytesleft.
|
||||||
// The output encoding will use at most
|
// The output encoding will use at most
|
||||||
// max_encoded_bytes(pimpl_->tocode_) per UCS4 code point.
|
// max_encoded_bytes(pimpl_->tocode_) per UCS4 code point.
|
||||||
@ -249,6 +249,15 @@ iconv_convert(IconvProcessor & processor, InType const * buf, size_t buflen)
|
|||||||
} // anon namespace
|
} // anon namespace
|
||||||
|
|
||||||
|
|
||||||
|
IconvProcessor & utf8ToUcs4()
|
||||||
|
{
|
||||||
|
static QThreadStorage<IconvProcessor *> processor;
|
||||||
|
if (!processor.hasLocalData())
|
||||||
|
processor.setLocalData(new IconvProcessor(ucs4_codeset, "UTF-8"));
|
||||||
|
return *processor.localData();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
vector<char_type> utf8_to_ucs4(vector<char> const & utf8str)
|
vector<char_type> utf8_to_ucs4(vector<char> const & utf8str)
|
||||||
{
|
{
|
||||||
if (utf8str.empty())
|
if (utf8str.empty())
|
||||||
@ -261,32 +270,43 @@ vector<char_type> utf8_to_ucs4(vector<char> const & utf8str)
|
|||||||
vector<char_type>
|
vector<char_type>
|
||||||
utf8_to_ucs4(char const * utf8str, size_t ls)
|
utf8_to_ucs4(char const * utf8str, size_t ls)
|
||||||
{
|
{
|
||||||
static IconvProcessor processor(ucs4_codeset, "UTF-8");
|
return iconv_convert<char_type>(utf8ToUcs4(), utf8str, ls);
|
||||||
return iconv_convert<char_type>(processor, utf8str, ls);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
vector<char_type>
|
vector<char_type>
|
||||||
utf16_to_ucs4(unsigned short const * s, size_t ls)
|
utf16_to_ucs4(unsigned short const * s, size_t ls)
|
||||||
{
|
{
|
||||||
static IconvProcessor processor(ucs4_codeset, utf16_codeset);
|
static QThreadStorage<IconvProcessor *> processor;
|
||||||
return iconv_convert<char_type>(processor, s, ls);
|
if (!processor.hasLocalData())
|
||||||
|
processor.setLocalData(new IconvProcessor(ucs4_codeset, utf16_codeset));
|
||||||
|
return iconv_convert<char_type>(*processor.localData(), s, ls);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
vector<unsigned short>
|
vector<unsigned short>
|
||||||
ucs4_to_utf16(char_type const * s, size_t ls)
|
ucs4_to_utf16(char_type const * s, size_t ls)
|
||||||
{
|
{
|
||||||
static IconvProcessor processor(utf16_codeset, ucs4_codeset);
|
static QThreadStorage<IconvProcessor *> processor;
|
||||||
return iconv_convert<unsigned short>(processor, s, ls);
|
if (!processor.hasLocalData())
|
||||||
|
processor.setLocalData(new IconvProcessor(utf16_codeset, ucs4_codeset));
|
||||||
|
return iconv_convert<unsigned short>(*processor.localData(), s, ls);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
IconvProcessor & ucs4ToUtf8()
|
||||||
|
{
|
||||||
|
static QThreadStorage<IconvProcessor *> processor;
|
||||||
|
if (!processor.hasLocalData())
|
||||||
|
processor.setLocalData(new IconvProcessor("UTF-8", ucs4_codeset));
|
||||||
|
return *processor.localData();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
vector<char>
|
vector<char>
|
||||||
ucs4_to_utf8(char_type c)
|
ucs4_to_utf8(char_type c)
|
||||||
{
|
{
|
||||||
static IconvProcessor processor("UTF-8", ucs4_codeset);
|
return iconv_convert<char>(ucs4ToUtf8(), &c, 1);
|
||||||
return iconv_convert<char>(processor, &c, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -303,15 +323,17 @@ ucs4_to_utf8(vector<char_type> const & ucs4str)
|
|||||||
vector<char>
|
vector<char>
|
||||||
ucs4_to_utf8(char_type const * ucs4str, size_t ls)
|
ucs4_to_utf8(char_type const * ucs4str, size_t ls)
|
||||||
{
|
{
|
||||||
static IconvProcessor processor("UTF-8", ucs4_codeset);
|
return iconv_convert<char>(ucs4ToUtf8(), ucs4str, ls);
|
||||||
return iconv_convert<char>(processor, ucs4str, ls);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
vector<char_type>
|
vector<char_type>
|
||||||
eightbit_to_ucs4(char const * s, size_t ls, string const & encoding)
|
eightbit_to_ucs4(char const * s, size_t ls, string const & encoding)
|
||||||
{
|
{
|
||||||
static map<string, IconvProcessor> processors;
|
static QThreadStorage<map<string, IconvProcessor> *> static_processors;
|
||||||
|
if (!static_processors.hasLocalData())
|
||||||
|
static_processors.setLocalData(new map<string, IconvProcessor>);
|
||||||
|
map<string, IconvProcessor> & processors = *static_processors.localData();
|
||||||
if (processors.find(encoding) == processors.end()) {
|
if (processors.find(encoding) == processors.end()) {
|
||||||
IconvProcessor processor(ucs4_codeset, encoding.c_str());
|
IconvProcessor processor(ucs4_codeset, encoding.c_str());
|
||||||
processors.insert(make_pair(encoding, processor));
|
processors.insert(make_pair(encoding, processor));
|
||||||
@ -320,10 +342,23 @@ eightbit_to_ucs4(char const * s, size_t ls, string const & encoding)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
map<string, IconvProcessor> & ucs4To8bitProcessors()
|
||||||
|
{
|
||||||
|
static QThreadStorage<map<string, IconvProcessor> *> processors;
|
||||||
|
if (!processors.hasLocalData())
|
||||||
|
processors.setLocalData(new map<string, IconvProcessor>);
|
||||||
|
return *processors.localData();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
vector<char>
|
vector<char>
|
||||||
ucs4_to_eightbit(char_type const * ucs4str, size_t ls, string const & encoding)
|
ucs4_to_eightbit(char_type const * ucs4str, size_t ls, string const & encoding)
|
||||||
{
|
{
|
||||||
static map<string, IconvProcessor> processors;
|
map<string, IconvProcessor> & processors(ucs4To8bitProcessors());
|
||||||
if (processors.find(encoding) == processors.end()) {
|
if (processors.find(encoding) == processors.end()) {
|
||||||
IconvProcessor processor(encoding.c_str(), ucs4_codeset);
|
IconvProcessor processor(encoding.c_str(), ucs4_codeset);
|
||||||
processors.insert(make_pair(encoding, processor));
|
processors.insert(make_pair(encoding, processor));
|
||||||
@ -334,7 +369,7 @@ ucs4_to_eightbit(char_type const * ucs4str, size_t ls, string const & encoding)
|
|||||||
|
|
||||||
char ucs4_to_eightbit(char_type ucs4, string const & encoding)
|
char ucs4_to_eightbit(char_type ucs4, string const & encoding)
|
||||||
{
|
{
|
||||||
static map<string, IconvProcessor> processors;
|
map<string, IconvProcessor> & processors(ucs4To8bitProcessors());
|
||||||
map<string, IconvProcessor>::iterator it = processors.find(encoding);
|
map<string, IconvProcessor>::iterator it = processors.find(encoding);
|
||||||
if (it == processors.end()) {
|
if (it == processors.end()) {
|
||||||
IconvProcessor processor(encoding.c_str(), ucs4_codeset);
|
IconvProcessor processor(encoding.c_str(), ucs4_codeset);
|
||||||
@ -352,7 +387,10 @@ char ucs4_to_eightbit(char_type ucs4, string const & encoding)
|
|||||||
void ucs4_to_multibytes(char_type ucs4, vector<char> & out,
|
void ucs4_to_multibytes(char_type ucs4, vector<char> & out,
|
||||||
string const & encoding)
|
string const & encoding)
|
||||||
{
|
{
|
||||||
static map<string, IconvProcessor> processors;
|
static QThreadStorage<map<string, IconvProcessor> *> static_processors;
|
||||||
|
if (!static_processors.hasLocalData())
|
||||||
|
static_processors.setLocalData(new map<string, IconvProcessor>);
|
||||||
|
map<string, IconvProcessor> & processors = *static_processors.localData();
|
||||||
map<string, IconvProcessor>::iterator it = processors.find(encoding);
|
map<string, IconvProcessor>::iterator it = processors.find(encoding);
|
||||||
if (it == processors.end()) {
|
if (it == processors.end()) {
|
||||||
IconvProcessor processor(encoding.c_str(), ucs4_codeset);
|
IconvProcessor processor(encoding.c_str(), ucs4_codeset);
|
||||||
|
@ -21,6 +21,27 @@
|
|||||||
|
|
||||||
namespace lyx {
|
namespace lyx {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wrapper for iconv(3).
|
||||||
|
*
|
||||||
|
* According to the POSIX standard, all specified functions are thread-safe,
|
||||||
|
* with some exceptions. The iconv() function is not listed as an exception:
|
||||||
|
* http://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xsh_chap02.html#tag_22_02_09_09
|
||||||
|
* http://man7.org/linux/man-pages/man7/pthreads.7.html
|
||||||
|
*
|
||||||
|
* Therefore, you can use as many instances of this class in parallel as you
|
||||||
|
* like. However, you need to ensure that each instance is only used by one
|
||||||
|
* thread at any given time. If this condition is not met you get nasty
|
||||||
|
* mixtures of different thread data as in bug 7240.
|
||||||
|
*
|
||||||
|
* From a performance point of view it is best to use one static instance
|
||||||
|
* per thread for each in/out encoding pair. This can e.g. be achieved by
|
||||||
|
* using helpers for thread-local storage such as QThreadStorage or
|
||||||
|
* boost::thread_specific_ptr. A single static instance protected by a mutex
|
||||||
|
* would work as well, and might be preferrable for exotic encoding pairs.
|
||||||
|
* Creating local IconvProcessor instances should be avoided because of the
|
||||||
|
* overhead in iconv_open().
|
||||||
|
*/
|
||||||
class IconvProcessor
|
class IconvProcessor
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -51,6 +72,10 @@ private:
|
|||||||
Impl * pimpl_;
|
Impl * pimpl_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Get the global IconvProcessor instance of the current thread for
|
||||||
|
/// utf8->ucs4 conversions
|
||||||
|
IconvProcessor & utf8ToUcs4();
|
||||||
|
|
||||||
// A single codepoint conversion for utf8_to_ucs4 does not make
|
// A single codepoint conversion for utf8_to_ucs4 does not make
|
||||||
// sense, so that function is left out.
|
// sense, so that function is left out.
|
||||||
|
|
||||||
@ -66,6 +91,10 @@ std::vector<char_type> utf16_to_ucs4(unsigned short const * s, size_t ls);
|
|||||||
|
|
||||||
std::vector<unsigned short> ucs4_to_utf16(char_type const * s, size_t ls);
|
std::vector<unsigned short> ucs4_to_utf16(char_type const * s, size_t ls);
|
||||||
|
|
||||||
|
/// Get the global IconvProcessor instance of the current thread for
|
||||||
|
/// ucs4->utf8 conversions
|
||||||
|
IconvProcessor & ucs4ToUtf8();
|
||||||
|
|
||||||
// ucs4_to_utf8
|
// ucs4_to_utf8
|
||||||
|
|
||||||
std::vector<char> ucs4_to_utf8(char_type c);
|
std::vector<char> ucs4_to_utf8(char_type c);
|
||||||
|
Loading…
Reference in New Issue
Block a user