diff --git a/lib/unicodesymbols b/lib/unicodesymbols index 2a022ba39f..19e080f129 100644 --- a/lib/unicodesymbols +++ b/lib/unicodesymbols @@ -684,7 +684,120 @@ #0x036d "" "" "combining" # COMBINING LATIN SMALL LETTER T #0x036e "" "" "combining" # COMBINING LATIN SMALL LETTER V #0x036f "" "" "combining" # COMBINING LATIN SMALL LETTER X +0x0391 "\\textgreek{A}" "textgreek" "" # GREEK CAPITAL LETTER ALPHA +0x0392 "\\textgreek{B}" "textgreek" "" # GREEK CAPITAL LETTER BETA +0x0393 "\\textgreek{G}" "textgreek" "" # GREEK CAPITAL LETTER GAMMA +0x0394 "\\textgreek{D}" "textgreek" "" # GREEK CAPITAL LETTER DELTA +0x0395 "\\textgreek{E}" "textgreek" "" # GREEK CAPITAL LETTER EPSILON +0x0396 "\\textgreek{Z}" "textgreek" "" # GREEK CAPITAL LETTER ZETA +0x0397 "\\textgreek{H}" "textgreek" "" # GREEK CAPITAL LETTER ETA +0x0398 "\\textgreek{J}" "textgreek" "" # GREEK CAPITAL LETTER THETA +0x0399 "\\textgreek{I}" "textgreek" "" # GREEK CAPITAL LETTER IOTA +0x039a "\\textgreek{K}" "textgreek" "" # GREEK CAPITAL LETTER KAPPA +0x039b "\\textgreek{L}" "textgreek" "" # GREEK CAPITAL LETTER LAMDA +0x039c "\\textgreek{M}" "textgreek" "" # GREEK CAPITAL LETTER MU +0x039d "\\textgreek{N}" "textgreek" "" # GREEK CAPITAL LETTER NU +0x039e "\\textgreek{X}" "textgreek" "" # GREEK CAPITAL LETTER XI +0x039f "\\textgreek{O}" "textgreek" "" # GREEK CAPITAL LETTER OMICRON +0x03a0 "\\textgreek{P}" "textgreek" "" # GREEK CAPITAL LETTER PI +0x03a1 "\\textgreek{R}" "textgreek" "" # GREEK CAPITAL LETTER RHO +0x03a3 "\\textgreek{S}" "textgreek" "" # GREEK CAPITAL LETTER SIGMA +0x03a4 "\\textgreek{T}" "textgreek" "" # GREEK CAPITAL LETTER TAU +0x03a5 "\\textgreek{U}" "textgreek" "" # GREEK CAPITAL LETTER UPSILON +0x03a6 "\\textgreek{F}" "textgreek" "" # GREEK CAPITAL LETTER PHI +0x03a7 "\\textgreek{Q}" "textgreek" "" # GREEK CAPITAL LETTER CHI +0x03a8 "\\textgreek{Y}" "textgreek" "" # GREEK CAPITAL LETTER PSI +0x03a9 "\\textgreek{W}" "textgreek" "" # GREEK CAPITAL LETTER OMEGA +0x03b1 "\\textgreek{a}" "textgreek" "" # GREEK SMALL LETTER ALPHA +0x03b2 "\\textgreek{b}" "textgreek" "" # GREEK SMALL LETTER BETA +0x03b3 "\\textgreek{g}" "textgreek" "" # GREEK SMALL LETTER GAMMA +0x03b4 "\\textgreek{d}" "textgreek" "" # GREEK SMALL LETTER DELTA +0x03b5 "\\textgreek{e}" "textgreek" "" # GREEK SMALL LETTER EPSILON +0x03b6 "\\textgreek{z}" "textgreek" "" # GREEK SMALL LETTER ZETA +0x03b7 "\\textgreek{h}" "textgreek" "" # GREEK SMALL LETTER ETA +0x03b8 "\\textgreek{j}" "textgreek" "" # GREEK SMALL LETTER THETA +0x03b9 "\\textgreek{i}" "textgreek" "" # GREEK SMALL LETTER IOTA +0x03ba "\\textgreek{k}" "textgreek" "" # GREEK SMALL LETTER KAPPA +0x03bb "\\textgreek{l}" "textgreek" "" # GREEK SMALL LETTER LAMDA +0x03bc "\\textgreek{m}" "textgreek" "" # GREEK SMALL LETTER MU +0x03bd "\\textgreek{n}" "textgreek" "" # GREEK SMALL LETTER NU +0x03be "\\textgreek{x}" "textgreek" "" # GREEK SMALL LETTER XI +0x03bf "\\textgreek{o}" "textgreek" "" # GREEK SMALL LETTER OMICRON +0x03c0 "\\textgreek{p}" "textgreek" "" # GREEK SMALL LETTER PI +0x03c1 "\\textgreek{r}" "textgreek" "" # GREEK SMALL LETTER RHO +0x03c2 "\\textgreek{c}" "textgreek" "" # GREEK SMALL LETTER FINAL SIGMA +0x03c3 "\\textgreek{s}" "textgreek" "" # GREEK SMALL LETTER SIGMA +0x03c4 "\\textgreek{t}" "textgreek" "" # GREEK SMALL LETTER TAU +0x03c5 "\\textgreek{u}" "textgreek" "" # GREEK SMALL LETTER UPSILON +0x03c6 "\\textgreek{f}" "textgreek" "" # GREEK SMALL LETTER PHI +0x03c7 "\\textgreek{q}" "textgreek" "" # GREEK SMALL LETTER CHI +0x03c8 "\\textgreek{y}" "textgreek" "" # GREEK SMALL LETTER PSI +0x03c9 "\\textgreek{w}" "textgreek" "" # GREEK SMALL LETTER OMEGA 0x0e3f "\\textbaht" "textcomp" "" # THAI CURRENCY SYMBOL BAHT +0x0410 "\\textcyr{\\char192}" "textcyr" "" # CYRILLIC CAPITAL LETTER A +0x0411 "\\textcyr{\\char193}" "textcyr" "" # CYRILLIC CAPITAL LETTER BE +0x0412 "\\textcyr{\\char194}" "textcyr" "" # CYRILLIC CAPITAL LETTER VE +0x0413 "\\textcyr{\\char195}" "textcyr" "" # CYRILLIC CAPITAL LETTER GHE +0x0414 "\\textcyr{\\char196}" "textcyr" "" # CYRILLIC CAPITAL LETTER DE +0x0415 "\\textcyr{\\char197}" "textcyr" "" # CYRILLIC CAPITAL LETTER IE +0x0416 "\\textcyr{\\char198}" "textcyr" "" # CYRILLIC CAPITAL LETTER ZHE +0x0417 "\\textcyr{\\char199}" "textcyr" "" # CYRILLIC CAPITAL LETTER ZE +0x0418 "\\textcyr{\\char200}" "textcyr" "" # CYRILLIC CAPITAL LETTER I +0x0419 "\\textcyr{\\char201}" "textcyr" "" # CYRILLIC CAPITAL LETTER SHORT I +0x041a "\\textcyr{\\char202}" "textcyr" "" # CYRILLIC CAPITAL LETTER KA +0x041b "\\textcyr{\\char203}" "textcyr" "" # CYRILLIC CAPITAL LETTER EL +0x041c "\\textcyr{\\char204}" "textcyr" "" # CYRILLIC CAPITAL LETTER EM +0x041d "\\textcyr{\\char205}" "textcyr" "" # CYRILLIC CAPITAL LETTER EN +0x041e "\\textcyr{\\char206}" "textcyr" "" # CYRILLIC CAPITAL LETTER O +0x041f "\\textcyr{\\char207}" "textcyr" "" # CYRILLIC CAPITAL LETTER PE +0x0420 "\\textcyr{\\char208}" "textcyr" "" # CYRILLIC CAPITAL LETTER ER +0x0421 "\\textcyr{\\char209}" "textcyr" "" # CYRILLIC CAPITAL LETTER ES +0x0422 "\\textcyr{\\char210}" "textcyr" "" # CYRILLIC CAPITAL LETTER TE +0x0423 "\\textcyr{\\char211}" "textcyr" "" # CYRILLIC CAPITAL LETTER U +0x0424 "\\textcyr{\\char212}" "textcyr" "" # CYRILLIC CAPITAL LETTER EF +0x0425 "\\textcyr{\\char213}" "textcyr" "" # CYRILLIC CAPITAL LETTER HA +0x0426 "\\textcyr{\\char214}" "textcyr" "" # CYRILLIC CAPITAL LETTER TSE +0x0427 "\\textcyr{\\char215}" "textcyr" "" # CYRILLIC CAPITAL LETTER CHE +0x0428 "\\textcyr{\\char216}" "textcyr" "" # CYRILLIC CAPITAL LETTER SHA +0x0429 "\\textcyr{\\char217}" "textcyr" "" # CYRILLIC CAPITAL LETTER SHCHA +0x042a "\\textcyr{\\char218}" "textcyr" "" # CYRILLIC CAPITAL LETTER HARD SIGN +0x042b "\\textcyr{\\char219}" "textcyr" "" # CYRILLIC CAPITAL LETTER YERU +0x042c "\\textcyr{\\char220}" "textcyr" "" # CYRILLIC CAPITAL LETTER SOFT SIGN +0x042d "\\textcyr{\\char221}" "textcyr" "" # CYRILLIC CAPITAL LETTER E +0x042e "\\textcyr{\\char222}" "textcyr" "" # CYRILLIC CAPITAL LETTER YU +0x042f "\\textcyr{\\char223}" "textcyr" "" # CYRILLIC CAPITAL LETTER YA +0x0430 "\\textcyr{\\char224}" "textcyr" "" # CYRILLIC SMALL LETTER A +0x0431 "\\textcyr{\\char225}" "textcyr" "" # CYRILLIC SMALL LETTER BE +0x0432 "\\textcyr{\\char226}" "textcyr" "" # CYRILLIC SMALL LETTER VE +0x0433 "\\textcyr{\\char227}" "textcyr" "" # CYRILLIC SMALL LETTER GHE +0x0434 "\\textcyr{\\char228}" "textcyr" "" # CYRILLIC SMALL LETTER DE +0x0435 "\\textcyr{\\char229}" "textcyr" "" # CYRILLIC SMALL LETTER IE +0x0436 "\\textcyr{\\char230}" "textcyr" "" # CYRILLIC SMALL LETTER ZHE +0x0437 "\\textcyr{\\char231}" "textcyr" "" # CYRILLIC SMALL LETTER ZE +0x0438 "\\textcyr{\\char232}" "textcyr" "" # CYRILLIC SMALL LETTER I +0x0439 "\\textcyr{\\char233}" "textcyr" "" # CYRILLIC SMALL LETTER SHORT I +0x043a "\\textcyr{\\char234}" "textcyr" "" # CYRILLIC SMALL LETTER KA +0x043b "\\textcyr{\\char235}" "textcyr" "" # CYRILLIC SMALL LETTER EL +0x043c "\\textcyr{\\char236}" "textcyr" "" # CYRILLIC SMALL LETTER EM +0x043d "\\textcyr{\\char237}" "textcyr" "" # CYRILLIC SMALL LETTER EN +0x043e "\\textcyr{\\char238}" "textcyr" "" # CYRILLIC SMALL LETTER O +0x043f "\\textcyr{\\char239}" "textcyr" "" # CYRILLIC SMALL LETTER PE +0x0440 "\\textcyr{\\char240}" "textcyr" "" # CYRILLIC SMALL LETTER ER +0x0441 "\\textcyr{\\char241}" "textcyr" "" # CYRILLIC SMALL LETTER ES +0x0442 "\\textcyr{\\char242}" "textcyr" "" # CYRILLIC SMALL LETTER TE +0x0443 "\\textcyr{\\char243}" "textcyr" "" # CYRILLIC SMALL LETTER U +0x0444 "\\textcyr{\\char244}" "textcyr" "" # CYRILLIC SMALL LETTER EF +0x0445 "\\textcyr{\\char245}" "textcyr" "" # CYRILLIC SMALL LETTER HA +0x0446 "\\textcyr{\\char246}" "textcyr" "" # CYRILLIC SMALL LETTER TSE +0x0447 "\\textcyr{\\char247}" "textcyr" "" # CYRILLIC SMALL LETTER CHE +0x0448 "\\textcyr{\\char248}" "textcyr" "" # CYRILLIC SMALL LETTER SHA +0x0449 "\\textcyr{\\char249}" "textcyr" "" # CYRILLIC SMALL LETTER SHCHA +0x044a "\\textcyr{\\char250}" "textcyr" "" # CYRILLIC SMALL LETTER HARD SIGN +0x044b "\\textcyr{\\char251}" "textcyr" "" # CYRILLIC SMALL LETTER YERU +0x044c "\\textcyr{\\char252}" "textcyr" "" # CYRILLIC SMALL LETTER SOFT SIGN +0x044d "\\textcyr{\\char253}" "textcyr" "" # CYRILLIC SMALL LETTER E +0x044e "\\textcyr{\\char254}" "textcyr" "" # CYRILLIC SMALL LETTER YU +0x044f "\\textcyr{\\char255}" "textcyr" "" # CYRILLIC SMALL LETTER YA 0x1e00 "\\textsubring{A}" "tipa" "" # LATIN CAPITAL LETTER A WITH RING BELOW 0x1e01 "\\textsubring{a}" "tipa" "" # LATIN SMALL LETTER A WITH RING BELOW 0x1e02 "\\.{B}" "" "" # LATIN CAPITAL LETTER B WITH DOT ABOVE diff --git a/src/Encoding.cpp b/src/Encoding.cpp index c6441f1dc0..a1beb9d727 100644 --- a/src/Encoding.cpp +++ b/src/Encoding.cpp @@ -401,6 +401,23 @@ bool Encodings::isCombiningChar(char_type c) } +bool Encodings::isKnownLangChar(char_type c, string & preamble) +{ + CharInfoMap::const_iterator const it = unicodesymbols.find(c); + if (it != unicodesymbols.end()) { + if (it->second.preamble != "textgreek" && + it->second.preamble != "textcyr") + return false; + if (preamble.empty()) { + preamble = it->second.preamble; + return true; + } + return it->second.preamble == preamble; + } + return false; +} + + Encoding const * Encodings::getFromLyXName(string const & name) const { EncodingList::const_iterator it = encodinglist.find(name); diff --git a/src/Encoding.h b/src/Encoding.h index 699153f1fd..99afe269e5 100644 --- a/src/Encoding.h +++ b/src/Encoding.h @@ -142,6 +142,15 @@ public: static char_type transformChar(char_type c, Letter_Form form); /// Is this a combining char? static bool isCombiningChar(char_type c); + /** + * Is this a known char from some language? + * If \p preamble is empty and code point \p c is known to belong + * to a supported language, true is returned and \p preamble is set + * to the corresponding entry in the unicodesymbols file. + * If \p preamble is not empty, a check is made whether code point + * \p c is a known character matching the preamble entry. + */ + static bool isKnownLangChar(char_type c, std::string & preamble); /** * Add the preamble snippet needed for the output of \p c to * \p features. diff --git a/src/LaTeXFeatures.cpp b/src/LaTeXFeatures.cpp index 2db41bf4ca..25be2c1866 100644 --- a/src/LaTeXFeatures.cpp +++ b/src/LaTeXFeatures.cpp @@ -186,6 +186,20 @@ static string const changetracking_none_def = "\\newcommand{\\lyxadded}[3]{#3}\n" "\\newcommand{\\lyxdeleted}[3]{}\n"; +static string const textgreek_def = + "\\DeclareRobustCommand{\\greektext}{%\n" + " \\fontencoding{LGR}\\selectfont\n" + " \\def\\encodingdefault{LGR}}\n" + "\\DeclareRobustCommand{\\textgreek}[1]{\\leavevmode{\\greektext #1}}\n" + "\\DeclareFontEncoding{LGR}{}{}\n"; + +static string const textcyr_def = + "\\DeclareRobustCommand{\\cyrtext}{%\n" + " \\fontencoding{T2A}\\selectfont\n" + " \\def\\encodingdefault{T2A}}\n" + "\\DeclareRobustCommand{\\textcyr}[1]{\\leavevmode{\\cyrtext #1}}\n" + "\\DeclareFontEncoding{T2A}{}{}\n"; + ///////////////////////////////////////////////////////////////////// // @@ -618,6 +632,12 @@ string const LaTeXFeatures::getMacros() const if (mustProvide("lyxarrow")) macros << lyxarrow_def << '\n'; + if (mustProvide("textgreek")) + macros << textgreek_def << '\n'; + + if (mustProvide("textcyr")) + macros << textcyr_def << '\n'; + // quotes. if (mustProvide("quotesinglbase")) macros << quotesinglbase_def << '\n'; diff --git a/src/Paragraph.cpp b/src/Paragraph.cpp index 80a7009c81..a53c57be4e 100644 --- a/src/Paragraph.cpp +++ b/src/Paragraph.cpp @@ -66,8 +66,10 @@ using std::ostream; namespace lyx { using support::contains; +using support::prefixIs; using support::suffixIs; using support::rsplit; +using support::rtrim; ///////////////////////////////////////////////////////////////////// @@ -187,6 +189,11 @@ public: unsigned int & column, Font const & font, Layout const & style); + /// Output consecutive known unicode chars, belonging to the same + /// language as specified by \p preamble, to \p os starting from \p c. + /// \return the number of characters written. + int knownLangChars(odocstream & os, value_type c, string & preamble, + Change &, Encoding const &, pos_type &); /// void simpleTeXSpecialChars(Buffer const &, BufferParams const &, odocstream &, @@ -627,6 +634,61 @@ bool Paragraph::Pimpl::simpleTeXBlanks(Encoding const & encoding, } +int Paragraph::Pimpl::knownLangChars(odocstream & os, + value_type c, + string & preamble, + Change & runningChange, + Encoding const & encoding, + pos_type & i) +{ + // The latex command is "\textLANG{}" and we have to retain + // "\textLANG{" for the first char but only "" for all + // subsequent chars (this also works when we are passed untranslated + // unicode). + docstring const latex1 = rtrim(encoding.latexChar(c), "}"); + int length = latex1.length(); + os << latex1; + while (i < size() - 1) { + char_type next = getChar(i + 1); + if (!Encodings::isKnownLangChar(next, preamble) || + runningChange != lookupChange(i + 1)) + break; + Font prev_font; + bool found = false; + FontList::const_iterator cit = fontlist.begin(); + FontList::const_iterator end = fontlist.end(); + for (; cit != end; ++cit) { + if (cit->pos() >= i && !found) { + prev_font = cit->font(); + found = true; + } + if (cit->pos() >= i + 1) + break; + } + if (found && cit != end && prev_font != cit->font()) + break; + docstring const latex = rtrim(encoding.latexChar(next), "}"); + docstring::size_type const j = + latex.find_first_of(from_ascii("{")); + if (j == docstring::npos) { + os << latex; + length += latex.length(); + } else { + os << latex.substr(j + 1); + length += latex.substr(j + 1).length(); + } + ++i; + } + // When the proper language is set, we are passed the straight unicode, + // so we should not try to close the \textLANG command. + if (prefixIs(latex1, from_ascii("\\" + preamble))) { + os << '}'; + ++length; + } + return length; +} + + bool Paragraph::Pimpl::isTextAt(string const & str, pos_type pos) const { pos_type const len = str.length(); @@ -952,6 +1014,14 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf, break; } } + string preamble; + if (Encodings::isKnownLangChar(c, preamble)) { + column += + knownLangChars(os, c, preamble, + running_change, + encoding, i) - 1; + break; + } docstring const latex = encoding.latexChar(c); if (latex.length() > 1 && latex[latex.length() - 1] != '}') {