unicodesymbols: parse supplementary lines to encode alternative ways to encode symbols in raw LaTeX.

This commit is contained in:
Thibaut Cuvelier 2022-02-19 02:10:45 +01:00
parent 3f9e21b826
commit 0dc7f34e00
4 changed files with 84 additions and 57 deletions

View File

@ -60,6 +60,7 @@
# #
0x00a0 "~" "" "force=cp862;cp1255;cp1256;koi8-u;iso8859-6;iso8859-7;utf8-platex,notermination=both" "~" "" # NO-BREAK SPACE 0x00a0 "~" "" "force=cp862;cp1255;cp1256;koi8-u;iso8859-6;iso8859-7;utf8-platex,notermination=both" "~" "" # NO-BREAK SPACE
0x00a1 "\\textexclamdown" "" "force=cp862;cp1255;euc-jp;euc-jp-platex;euc-kr;utf8-platex" # INVERTED EXCLAMATION MARK 0x00a1 "\\textexclamdown" "" "force=cp862;cp1255;euc-jp;euc-jp-platex;euc-kr;utf8-platex" # INVERTED EXCLAMATION MARK
0x00a1 "!`" ""
0x00a2 "\\textcent" "textcomp" "force=cp862;cp1255;cp1256;euc-jp;euc-jp-platex;jis;shift-jis-platex" #"\\mathcent" "txfonts|pxfonts" # CENT SIGN 0x00a2 "\\textcent" "textcomp" "force=cp862;cp1255;cp1256;euc-jp;euc-jp-platex;jis;shift-jis-platex" #"\\mathcent" "txfonts|pxfonts" # CENT SIGN
0x00a3 "\\pounds" "" "force=cp862;cp1255;cp1256;iso8859-7;euc-jp;euc-jp-platex;jis;shift-jis-platex" "\\pounds" "" # £ POUND SIGN 0x00a3 "\\pounds" "" "force=cp862;cp1255;cp1256;iso8859-7;euc-jp;euc-jp-platex;jis;shift-jis-platex" "\\pounds" "" # £ POUND SIGN
0x00a4 "\\textcurrency" "textcomp" "force=cp1256;euc-cn;euc-jp;euc-jp-platex;euc-kr;gbk;iso8859-6;utf8-platex" # CURRENCY SYMBOL 0x00a4 "\\textcurrency" "textcomp" "force=cp1256;euc-cn;euc-jp;euc-jp-platex;euc-kr;gbk;iso8859-6;utf8-platex" # CURRENCY SYMBOL
@ -90,6 +91,7 @@
0x00bd "\\textonehalf" "textcomp" "force=cp862;cp1255;cp1256;iso8859-7;euc-kr" "\\sfrac{1}{2}" "xfrac" # 1/2 FRACTION 0x00bd "\\textonehalf" "textcomp" "force=cp862;cp1255;cp1256;iso8859-7;euc-kr" "\\sfrac{1}{2}" "xfrac" # 1/2 FRACTION
0x00be "\\textthreequarters" "textcomp" "force=cp1255;cp1256;euc-kr" "\\sfrac{3}{4}" "xfrac" # 3/4 FRACTION 0x00be "\\textthreequarters" "textcomp" "force=cp1255;cp1256;euc-kr" "\\sfrac{3}{4}" "xfrac" # 3/4 FRACTION
0x00bf "\\textquestiondown" "" "force=cp862;cp1255;euc-jp;euc-kr" # INVERTED QUESTION MARK 0x00bf "\\textquestiondown" "" "force=cp862;cp1255;euc-jp;euc-kr" # INVERTED QUESTION MARK
0x00bf "?`" ""
0x00c0 "\\`{A}" "" "mathalpha,force=euc-jp" "\\grave{A}" # LATIN CAPITAL LETTER A WITH GRAVE 0x00c0 "\\`{A}" "" "mathalpha,force=euc-jp" "\\grave{A}" # LATIN CAPITAL LETTER A WITH GRAVE
0x00c1 "\\'{A}" "" "mathalpha,force=euc-jp" "\\acute{A}" # LATIN CAPITAL LETTER A WITH ACUTE 0x00c1 "\\'{A}" "" "mathalpha,force=euc-jp" "\\acute{A}" # LATIN CAPITAL LETTER A WITH ACUTE
0x00c2 "\\^{A}" "" "mathalpha,force=euc-jp" "\\hat{A}" # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 0x00c2 "\\^{A}" "" "mathalpha,force=euc-jp" "\\hat{A}" # LATIN CAPITAL LETTER A WITH CIRCUMFLEX

View File

@ -50,7 +50,7 @@ CharInfoMap unicodesymbols;
typedef set<char_type> CharSet; typedef set<char_type> CharSet;
typedef map<string, CharSet> CharSetMap; typedef map<string, CharSet> CharSetMap;
CharSet forced; CharSet forced;
CharSetMap forcedselected; CharSetMap forcedSelected;
typedef set<char_type> MathAlphaSet; typedef set<char_type> MathAlphaSet;
MathAlphaSet mathalpha; MathAlphaSet mathalpha;
@ -99,7 +99,7 @@ CharInfo::CharInfo(
Encoding::Encoding(string const & n, string const & l, string const & g, Encoding::Encoding(string const & n, string const & l, string const & g,
string const & i, bool f, bool u, Encoding::Package p) string const & i, bool f, bool u, Encoding::Package p)
: name_(n), latexName_(l), guiName_(g), iconvName_(i), fixedwidth_(f), : name_(n), latexName_(l), guiName_(g), iconvName_(i), fixedwidth_(f),
unsafe_(u), forced_(&forcedselected[n]), package_(p) unsafe_(u), forced_(&forcedSelected[n]), package_(p)
{ {
if (n == "ascii") { if (n == "ascii") {
// ASCII can encode 128 code points and nothing else // ASCII can encode 128 code points and nothing else
@ -705,20 +705,20 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
{ {
// We must read the symbolsfile first, because the Encoding // We must read the symbolsfile first, because the Encoding
// constructor depends on it. // constructor depends on it.
CharSetMap forcednotselected; CharSetMap forcedNotSelected;
Lexer symbolslex; Lexer symbolsLex;
symbolslex.setFile(symbolsfile); symbolsLex.setFile(symbolsfile);
bool getNextToken = true; bool getNextToken = true;
while (symbolslex.isOK()) { while (symbolsLex.isOK()) {
char_type symbol; char_type symbol;
if (getNextToken) { if (getNextToken) {
if (!symbolslex.next(true)) if (!symbolsLex.next(true))
break; break;
} else } else
getNextToken = true; getNextToken = true;
istringstream is(symbolslex.getString()); istringstream is(symbolsLex.getString());
// reading symbol directly does not work if // reading symbol directly does not work if
// char_type == wchar_t. // char_type == wchar_t.
uint32_t tmp; uint32_t tmp;
@ -726,20 +726,38 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
break; break;
symbol = tmp; symbol = tmp;
if (!symbolslex.next(true)) // Special case: more than one entry for one character (to add other LaTeX commands).
if (unicodesymbols.contains(symbol)) {
if (!symbolsLex.next(true))
break; break;
docstring textcommand = symbolslex.getDocString(); docstring textCommand = symbolsLex.getDocString();
if (!symbolslex.next(true)) if (!symbolsLex.next(true))
break; break;
string textpreamble = symbolslex.getString(); string mathCommand = symbolsLex.getString();
if (!symbolslex.next(true))
break;
string sflags = symbolslex.getString();
string tipashortcut; if (!textCommand.empty())
unicodesymbols.at(symbol).addTextCommand(textCommand);
if (!mathCommand.empty())
unicodesymbols.at(symbol).addMathCommand(textCommand);
continue;
}
// If the symbol is not the same as the previous entry, consider it is a totally new symbol.
if (!symbolsLex.next(true))
break;
docstring textCommand = symbolsLex.getDocString();
if (!symbolsLex.next(true))
break;
string textPreamble = symbolsLex.getString();
if (!symbolsLex.next(true))
break;
string sflags = symbolsLex.getString();
string tipaShortcut;
int flags = 0; int flags = 0;
if (suffixIs(textcommand, '}')) if (suffixIs(textCommand, '}'))
flags |= CharInfoTextNoTermination; flags |= CharInfoTextNoTermination;
while (!sflags.empty()) { while (!sflags.empty()) {
string flag; string flag;
@ -753,13 +771,13 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
vector<string> encs = vector<string> encs =
getVectorFromString(flag.substr(6), ";"); getVectorFromString(flag.substr(6), ";");
for (auto const & enc : encs) for (auto const & enc : encs)
forcedselected[enc].insert(symbol); forcedSelected[enc].insert(symbol);
flags |= CharInfoForceSelected; flags |= CharInfoForceSelected;
} else if (prefixIs(flag, "force!=")) { } else if (prefixIs(flag, "force!=")) {
vector<string> encs = vector<string> encs =
getVectorFromString(flag.substr(7), ";"); getVectorFromString(flag.substr(7), ";");
for (auto const & enc : encs) for (auto const & enc : encs)
forcednotselected[enc].insert(symbol); forcedNotSelected[enc].insert(symbol);
flags |= CharInfoForceSelected; flags |= CharInfoForceSelected;
} else if (flag == "mathalpha") { } else if (flag == "mathalpha") {
mathalpha.insert(symbol); mathalpha.insert(symbol);
@ -773,8 +791,8 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
} else if (flag == "notermination=none") { } else if (flag == "notermination=none") {
flags &= ~CharInfoTextNoTermination; flags &= ~CharInfoTextNoTermination;
flags &= ~CharInfoMathNoTermination; flags &= ~CharInfoMathNoTermination;
} else if (contains(flag, "tipaShortcut=")) { } else if (contains(flag, "tipashortcut=")) {
tipashortcut = split(flag, '='); tipaShortcut = split(flag, '=');
} else if (flag == "deprecated") { } else if (flag == "deprecated") {
flags |= CharInfoDeprecated; flags |= CharInfoDeprecated;
} else { } else {
@ -786,25 +804,25 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
} }
// mathCommand and mathPreamble have been added for 1.6.0. // mathCommand and mathPreamble have been added for 1.6.0.
// make them optional so that old files still work. // make them optional so that old files still work.
int const lineno = symbolslex.lineNumber(); int const lineNo = symbolsLex.lineNumber();
bool breakout = false; bool breakout = false;
docstring mathcommand; docstring mathCommand;
string mathpreamble; string mathPreamble;
if (symbolslex.next(true)) { if (symbolsLex.next(true)) {
if (symbolslex.lineNumber() != lineno) { if (symbolsLex.lineNumber() != lineNo) {
// line in old format without mathCommand and mathPreamble // line in old format without mathCommand and mathPreamble
getNextToken = false; getNextToken = false;
} else { } else {
mathcommand = symbolslex.getDocString(); mathCommand = symbolsLex.getDocString();
if (suffixIs(mathcommand, '}')) if (suffixIs(mathCommand, '}'))
flags |= CharInfoMathNoTermination; flags |= CharInfoMathNoTermination;
if (symbolslex.next(true)) { if (symbolsLex.next(true)) {
if (symbolslex.lineNumber() != lineno) { if (symbolsLex.lineNumber() != lineNo) {
// line in new format with mathCommand only // line in new format with mathCommand only
getNextToken = false; getNextToken = false;
} else { } else {
// line in new format with mathCommand and mathPreamble // line in new format with mathCommand and mathPreamble
mathpreamble = symbolslex.getString(); mathPreamble = symbolsLex.getString();
} }
} else } else
breakout = true; breakout = true;
@ -814,20 +832,20 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
} }
// backward compatibility // backward compatibility
if (mathpreamble == "esintoramsmath") if (mathPreamble == "esintoramsmath")
mathpreamble = "esint|amsmath"; mathPreamble = "esint|amsmath";
if (!textpreamble.empty()) if (!textPreamble.empty())
if (textpreamble[0] != '\\') if (textPreamble[0] != '\\')
flags |= CharInfoTextFeature; flags |= CharInfoTextFeature;
if (!mathpreamble.empty()) if (!mathPreamble.empty())
if (mathpreamble[0] != '\\') if (mathPreamble[0] != '\\')
flags |= CharInfoMathFeature; flags |= CharInfoMathFeature;
CharInfo info = CharInfo( CharInfo info = CharInfo(
textcommand, mathcommand, textCommand, mathCommand,
textpreamble, mathpreamble, textPreamble, mathPreamble,
tipashortcut, flags); tipaShortcut, flags);
LYXERR(Debug::INFO, "Read unicode symbol " << symbol << " '" LYXERR(Debug::INFO, "Read unicode symbol " << symbol << " '"
<< to_utf8(info.textCommand()) << "' '" << info.textPreamble() << to_utf8(info.textCommand()) << "' '" << info.textPreamble()
<< " '" << info.textFeature() << ' ' << info.textNoTermination() << " '" << info.textFeature() << ' ' << info.textNoTermination()
@ -851,12 +869,12 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
et_end et_end
}; };
LexerKeyword encodingtags[] = { LexerKeyword encodingTags[] = {
{ "encoding", et_encoding }, { "encoding", et_encoding },
{ "end", et_end } { "end", et_end }
}; };
Lexer lex(encodingtags); Lexer lex(encodingTags);
lex.setFile(encfile); lex.setFile(encfile);
lex.setContext("Encodings::read"); lex.setContext("Encodings::read");
while (lex.isOK()) { while (lex.isOK()) {
@ -866,21 +884,21 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
lex.next(); lex.next();
string const name = lex.getString(); string const name = lex.getString();
lex.next(); lex.next();
string const latexname = lex.getString(); string const latexName = lex.getString();
lex.next(); lex.next();
string const guiname = lex.getString(); string const guiName = lex.getString();
lex.next(); lex.next();
string const iconvname = lex.getString(); string const iconvName = lex.getString();
lex.next(); lex.next();
string const width = lex.getString(); string const width = lex.getString();
bool fixedwidth = false; bool fixedWidth = false;
bool unsafe = false; bool unsafe = false;
if (width == "fixed") if (width == "fixed")
fixedwidth = true; fixedWidth = true;
else if (width == "variable") else if (width == "variable")
fixedwidth = false; fixedWidth = false;
else if (width == "variableunsafe") { else if (width == "variableunsafe") {
fixedwidth = false; fixedWidth = false;
unsafe = true; unsafe = true;
} }
else else
@ -901,8 +919,8 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
lex.printError("Unknown package"); lex.printError("Unknown package");
LYXERR(Debug::INFO, "Reading encoding " << name); LYXERR(Debug::INFO, "Reading encoding " << name);
encodinglist[name] = Encoding(name, latexname, encodinglist[name] = Encoding(name, latexName,
guiname, iconvname, fixedwidth, unsafe, guiName, iconvName, fixedWidth, unsafe,
package); package);
if (lex.lex() != et_end) if (lex.lex() != et_end)
@ -920,9 +938,9 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
} }
} }
// Move all information from forcednotselected to forcedselected // Move all information from forcedNotSelected to forcedSelected
for (CharSetMap::const_iterator it1 = forcednotselected.begin(); it1 != forcednotselected.end(); ++it1) { for (CharSetMap::const_iterator it1 = forcedNotSelected.begin(); it1 != forcedNotSelected.end(); ++it1) {
for (CharSetMap::iterator it2 = forcedselected.begin(); it2 != forcedselected.end(); ++it2) { for (CharSetMap::iterator it2 = forcedSelected.begin(); it2 != forcedSelected.end(); ++it2) {
if (it2->first != it1->first) if (it2->first != it1->first)
it2->second.insert(it1->second.begin(), it1->second.end()); it2->second.insert(it1->second.begin(), it1->second.end());
} }

View File

@ -69,6 +69,11 @@ public:
std::vector<docstring> const & text_commands, std::vector<docstring> const & math_commands, std::vector<docstring> const & text_commands, std::vector<docstring> const & math_commands,
std::string const & text_preamble, std::string const & math_preamble, std::string const & text_preamble, std::string const & math_preamble,
std::string const & tipa_shortcut, unsigned int flags); std::string const & tipa_shortcut, unsigned int flags);
// Add a new text command for this symbol.
void addTextCommand(const docstring& newTextCommand) { text_commands_.emplace_back(newTextCommand); }
// Add a new math command for this symbol.
void addMathCommand(const docstring& newMathCommand) { math_commands_.emplace_back(newMathCommand); }
// we assume that at least one command is nonempty when using unicodesymbols // we assume that at least one command is nonempty when using unicodesymbols
bool isUnicodeSymbol() const { return !text_commands_.empty() || !math_commands_.empty(); } bool isUnicodeSymbol() const { return !text_commands_.empty() || !math_commands_.empty(); }
/// LaTeX command (text mode) for this character /// LaTeX command (text mode) for this character

View File

@ -158,6 +158,8 @@ void InsetERT::docbook(XMLStream & xs, OutputParams const & runparams) const
os_trimmed.insert(4, from_ascii("}")); os_trimmed.insert(4, from_ascii("}"));
} }
std::cout << to_utf8(os_trimmed) << std::endl;
// Look into the global table of Unicode characters if there is a match. // Look into the global table of Unicode characters if there is a match.
bool termination; bool termination;
docstring rem; docstring rem;