Add support for greek and cyrillic chars such that it is not

necessary to mark them in the proper language for typesetting.

	* src/Paragraph.cpp
	(Paragraph::Pimpl::knownLangChars): new method.
	(Paragraph::Pimpl::simpleTeXSpecialChars): output proper
	latex code for greek and cyrillic chars.

	* src/LaTeXFeatures.cpp: add textgreek and textcyr features.

	* src/Encoding.{cpp,h}
	(Encodings::isKnownLangChar): new method.

	* lib/unicodesymbols: add greek and cyrillic alphabetic chars.


git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@20931 a592a061-630c-0410-9148-cb99ea01b6c8
This commit is contained in:
Enrico Forestieri 2007-10-12 18:21:51 +00:00
parent c3c98d0d3b
commit d88aac0e1f
5 changed files with 229 additions and 0 deletions

View File

@ -684,7 +684,120 @@
#0x036d "" "" "combining" # COMBINING LATIN SMALL LETTER T #0x036d "" "" "combining" # COMBINING LATIN SMALL LETTER T
#0x036e "" "" "combining" # COMBINING LATIN SMALL LETTER V #0x036e "" "" "combining" # COMBINING LATIN SMALL LETTER V
#0x036f "" "" "combining" # COMBINING LATIN SMALL LETTER X #0x036f "" "" "combining" # COMBINING LATIN SMALL LETTER X
0x0391 "\\textgreek{A}" "textgreek" "" # GREEK CAPITAL LETTER ALPHA
0x0392 "\\textgreek{B}" "textgreek" "" # GREEK CAPITAL LETTER BETA
0x0393 "\\textgreek{G}" "textgreek" "" # GREEK CAPITAL LETTER GAMMA
0x0394 "\\textgreek{D}" "textgreek" "" # GREEK CAPITAL LETTER DELTA
0x0395 "\\textgreek{E}" "textgreek" "" # GREEK CAPITAL LETTER EPSILON
0x0396 "\\textgreek{Z}" "textgreek" "" # GREEK CAPITAL LETTER ZETA
0x0397 "\\textgreek{H}" "textgreek" "" # GREEK CAPITAL LETTER ETA
0x0398 "\\textgreek{J}" "textgreek" "" # GREEK CAPITAL LETTER THETA
0x0399 "\\textgreek{I}" "textgreek" "" # GREEK CAPITAL LETTER IOTA
0x039a "\\textgreek{K}" "textgreek" "" # GREEK CAPITAL LETTER KAPPA
0x039b "\\textgreek{L}" "textgreek" "" # GREEK CAPITAL LETTER LAMDA
0x039c "\\textgreek{M}" "textgreek" "" # GREEK CAPITAL LETTER MU
0x039d "\\textgreek{N}" "textgreek" "" # GREEK CAPITAL LETTER NU
0x039e "\\textgreek{X}" "textgreek" "" # GREEK CAPITAL LETTER XI
0x039f "\\textgreek{O}" "textgreek" "" # GREEK CAPITAL LETTER OMICRON
0x03a0 "\\textgreek{P}" "textgreek" "" # GREEK CAPITAL LETTER PI
0x03a1 "\\textgreek{R}" "textgreek" "" # GREEK CAPITAL LETTER RHO
0x03a3 "\\textgreek{S}" "textgreek" "" # GREEK CAPITAL LETTER SIGMA
0x03a4 "\\textgreek{T}" "textgreek" "" # GREEK CAPITAL LETTER TAU
0x03a5 "\\textgreek{U}" "textgreek" "" # GREEK CAPITAL LETTER UPSILON
0x03a6 "\\textgreek{F}" "textgreek" "" # GREEK CAPITAL LETTER PHI
0x03a7 "\\textgreek{Q}" "textgreek" "" # GREEK CAPITAL LETTER CHI
0x03a8 "\\textgreek{Y}" "textgreek" "" # GREEK CAPITAL LETTER PSI
0x03a9 "\\textgreek{W}" "textgreek" "" # GREEK CAPITAL LETTER OMEGA
0x03b1 "\\textgreek{a}" "textgreek" "" # GREEK SMALL LETTER ALPHA
0x03b2 "\\textgreek{b}" "textgreek" "" # GREEK SMALL LETTER BETA
0x03b3 "\\textgreek{g}" "textgreek" "" # GREEK SMALL LETTER GAMMA
0x03b4 "\\textgreek{d}" "textgreek" "" # GREEK SMALL LETTER DELTA
0x03b5 "\\textgreek{e}" "textgreek" "" # GREEK SMALL LETTER EPSILON
0x03b6 "\\textgreek{z}" "textgreek" "" # GREEK SMALL LETTER ZETA
0x03b7 "\\textgreek{h}" "textgreek" "" # GREEK SMALL LETTER ETA
0x03b8 "\\textgreek{j}" "textgreek" "" # GREEK SMALL LETTER THETA
0x03b9 "\\textgreek{i}" "textgreek" "" # GREEK SMALL LETTER IOTA
0x03ba "\\textgreek{k}" "textgreek" "" # GREEK SMALL LETTER KAPPA
0x03bb "\\textgreek{l}" "textgreek" "" # GREEK SMALL LETTER LAMDA
0x03bc "\\textgreek{m}" "textgreek" "" # GREEK SMALL LETTER MU
0x03bd "\\textgreek{n}" "textgreek" "" # GREEK SMALL LETTER NU
0x03be "\\textgreek{x}" "textgreek" "" # GREEK SMALL LETTER XI
0x03bf "\\textgreek{o}" "textgreek" "" # GREEK SMALL LETTER OMICRON
0x03c0 "\\textgreek{p}" "textgreek" "" # GREEK SMALL LETTER PI
0x03c1 "\\textgreek{r}" "textgreek" "" # GREEK SMALL LETTER RHO
0x03c2 "\\textgreek{c}" "textgreek" "" # GREEK SMALL LETTER FINAL SIGMA
0x03c3 "\\textgreek{s}" "textgreek" "" # GREEK SMALL LETTER SIGMA
0x03c4 "\\textgreek{t}" "textgreek" "" # GREEK SMALL LETTER TAU
0x03c5 "\\textgreek{u}" "textgreek" "" # GREEK SMALL LETTER UPSILON
0x03c6 "\\textgreek{f}" "textgreek" "" # GREEK SMALL LETTER PHI
0x03c7 "\\textgreek{q}" "textgreek" "" # GREEK SMALL LETTER CHI
0x03c8 "\\textgreek{y}" "textgreek" "" # GREEK SMALL LETTER PSI
0x03c9 "\\textgreek{w}" "textgreek" "" # GREEK SMALL LETTER OMEGA
0x0e3f "\\textbaht" "textcomp" "" # THAI CURRENCY SYMBOL BAHT 0x0e3f "\\textbaht" "textcomp" "" # THAI CURRENCY SYMBOL BAHT
0x0410 "\\textcyr{\\char192}" "textcyr" "" # CYRILLIC CAPITAL LETTER A
0x0411 "\\textcyr{\\char193}" "textcyr" "" # CYRILLIC CAPITAL LETTER BE
0x0412 "\\textcyr{\\char194}" "textcyr" "" # CYRILLIC CAPITAL LETTER VE
0x0413 "\\textcyr{\\char195}" "textcyr" "" # CYRILLIC CAPITAL LETTER GHE
0x0414 "\\textcyr{\\char196}" "textcyr" "" # CYRILLIC CAPITAL LETTER DE
0x0415 "\\textcyr{\\char197}" "textcyr" "" # CYRILLIC CAPITAL LETTER IE
0x0416 "\\textcyr{\\char198}" "textcyr" "" # CYRILLIC CAPITAL LETTER ZHE
0x0417 "\\textcyr{\\char199}" "textcyr" "" # CYRILLIC CAPITAL LETTER ZE
0x0418 "\\textcyr{\\char200}" "textcyr" "" # CYRILLIC CAPITAL LETTER I
0x0419 "\\textcyr{\\char201}" "textcyr" "" # CYRILLIC CAPITAL LETTER SHORT I
0x041a "\\textcyr{\\char202}" "textcyr" "" # CYRILLIC CAPITAL LETTER KA
0x041b "\\textcyr{\\char203}" "textcyr" "" # CYRILLIC CAPITAL LETTER EL
0x041c "\\textcyr{\\char204}" "textcyr" "" # CYRILLIC CAPITAL LETTER EM
0x041d "\\textcyr{\\char205}" "textcyr" "" # CYRILLIC CAPITAL LETTER EN
0x041e "\\textcyr{\\char206}" "textcyr" "" # CYRILLIC CAPITAL LETTER O
0x041f "\\textcyr{\\char207}" "textcyr" "" # CYRILLIC CAPITAL LETTER PE
0x0420 "\\textcyr{\\char208}" "textcyr" "" # CYRILLIC CAPITAL LETTER ER
0x0421 "\\textcyr{\\char209}" "textcyr" "" # CYRILLIC CAPITAL LETTER ES
0x0422 "\\textcyr{\\char210}" "textcyr" "" # CYRILLIC CAPITAL LETTER TE
0x0423 "\\textcyr{\\char211}" "textcyr" "" # CYRILLIC CAPITAL LETTER U
0x0424 "\\textcyr{\\char212}" "textcyr" "" # CYRILLIC CAPITAL LETTER EF
0x0425 "\\textcyr{\\char213}" "textcyr" "" # CYRILLIC CAPITAL LETTER HA
0x0426 "\\textcyr{\\char214}" "textcyr" "" # CYRILLIC CAPITAL LETTER TSE
0x0427 "\\textcyr{\\char215}" "textcyr" "" # CYRILLIC CAPITAL LETTER CHE
0x0428 "\\textcyr{\\char216}" "textcyr" "" # CYRILLIC CAPITAL LETTER SHA
0x0429 "\\textcyr{\\char217}" "textcyr" "" # CYRILLIC CAPITAL LETTER SHCHA
0x042a "\\textcyr{\\char218}" "textcyr" "" # CYRILLIC CAPITAL LETTER HARD SIGN
0x042b "\\textcyr{\\char219}" "textcyr" "" # CYRILLIC CAPITAL LETTER YERU
0x042c "\\textcyr{\\char220}" "textcyr" "" # CYRILLIC CAPITAL LETTER SOFT SIGN
0x042d "\\textcyr{\\char221}" "textcyr" "" # CYRILLIC CAPITAL LETTER E
0x042e "\\textcyr{\\char222}" "textcyr" "" # CYRILLIC CAPITAL LETTER YU
0x042f "\\textcyr{\\char223}" "textcyr" "" # CYRILLIC CAPITAL LETTER YA
0x0430 "\\textcyr{\\char224}" "textcyr" "" # CYRILLIC SMALL LETTER A
0x0431 "\\textcyr{\\char225}" "textcyr" "" # CYRILLIC SMALL LETTER BE
0x0432 "\\textcyr{\\char226}" "textcyr" "" # CYRILLIC SMALL LETTER VE
0x0433 "\\textcyr{\\char227}" "textcyr" "" # CYRILLIC SMALL LETTER GHE
0x0434 "\\textcyr{\\char228}" "textcyr" "" # CYRILLIC SMALL LETTER DE
0x0435 "\\textcyr{\\char229}" "textcyr" "" # CYRILLIC SMALL LETTER IE
0x0436 "\\textcyr{\\char230}" "textcyr" "" # CYRILLIC SMALL LETTER ZHE
0x0437 "\\textcyr{\\char231}" "textcyr" "" # CYRILLIC SMALL LETTER ZE
0x0438 "\\textcyr{\\char232}" "textcyr" "" # CYRILLIC SMALL LETTER I
0x0439 "\\textcyr{\\char233}" "textcyr" "" # CYRILLIC SMALL LETTER SHORT I
0x043a "\\textcyr{\\char234}" "textcyr" "" # CYRILLIC SMALL LETTER KA
0x043b "\\textcyr{\\char235}" "textcyr" "" # CYRILLIC SMALL LETTER EL
0x043c "\\textcyr{\\char236}" "textcyr" "" # CYRILLIC SMALL LETTER EM
0x043d "\\textcyr{\\char237}" "textcyr" "" # CYRILLIC SMALL LETTER EN
0x043e "\\textcyr{\\char238}" "textcyr" "" # CYRILLIC SMALL LETTER O
0x043f "\\textcyr{\\char239}" "textcyr" "" # CYRILLIC SMALL LETTER PE
0x0440 "\\textcyr{\\char240}" "textcyr" "" # CYRILLIC SMALL LETTER ER
0x0441 "\\textcyr{\\char241}" "textcyr" "" # CYRILLIC SMALL LETTER ES
0x0442 "\\textcyr{\\char242}" "textcyr" "" # CYRILLIC SMALL LETTER TE
0x0443 "\\textcyr{\\char243}" "textcyr" "" # CYRILLIC SMALL LETTER U
0x0444 "\\textcyr{\\char244}" "textcyr" "" # CYRILLIC SMALL LETTER EF
0x0445 "\\textcyr{\\char245}" "textcyr" "" # CYRILLIC SMALL LETTER HA
0x0446 "\\textcyr{\\char246}" "textcyr" "" # CYRILLIC SMALL LETTER TSE
0x0447 "\\textcyr{\\char247}" "textcyr" "" # CYRILLIC SMALL LETTER CHE
0x0448 "\\textcyr{\\char248}" "textcyr" "" # CYRILLIC SMALL LETTER SHA
0x0449 "\\textcyr{\\char249}" "textcyr" "" # CYRILLIC SMALL LETTER SHCHA
0x044a "\\textcyr{\\char250}" "textcyr" "" # CYRILLIC SMALL LETTER HARD SIGN
0x044b "\\textcyr{\\char251}" "textcyr" "" # CYRILLIC SMALL LETTER YERU
0x044c "\\textcyr{\\char252}" "textcyr" "" # CYRILLIC SMALL LETTER SOFT SIGN
0x044d "\\textcyr{\\char253}" "textcyr" "" # CYRILLIC SMALL LETTER E
0x044e "\\textcyr{\\char254}" "textcyr" "" # CYRILLIC SMALL LETTER YU
0x044f "\\textcyr{\\char255}" "textcyr" "" # CYRILLIC SMALL LETTER YA
0x1e00 "\\textsubring{A}" "tipa" "" # LATIN CAPITAL LETTER A WITH RING BELOW 0x1e00 "\\textsubring{A}" "tipa" "" # LATIN CAPITAL LETTER A WITH RING BELOW
0x1e01 "\\textsubring{a}" "tipa" "" # LATIN SMALL LETTER A WITH RING BELOW 0x1e01 "\\textsubring{a}" "tipa" "" # LATIN SMALL LETTER A WITH RING BELOW
0x1e02 "\\.{B}" "" "" # LATIN CAPITAL LETTER B WITH DOT ABOVE 0x1e02 "\\.{B}" "" "" # LATIN CAPITAL LETTER B WITH DOT ABOVE

View File

@ -401,6 +401,23 @@ bool Encodings::isCombiningChar(char_type c)
} }
bool Encodings::isKnownLangChar(char_type c, string & preamble)
{
CharInfoMap::const_iterator const it = unicodesymbols.find(c);
if (it != unicodesymbols.end()) {
if (it->second.preamble != "textgreek" &&
it->second.preamble != "textcyr")
return false;
if (preamble.empty()) {
preamble = it->second.preamble;
return true;
}
return it->second.preamble == preamble;
}
return false;
}
Encoding const * Encodings::getFromLyXName(string const & name) const Encoding const * Encodings::getFromLyXName(string const & name) const
{ {
EncodingList::const_iterator it = encodinglist.find(name); EncodingList::const_iterator it = encodinglist.find(name);

View File

@ -142,6 +142,15 @@ public:
static char_type transformChar(char_type c, Letter_Form form); static char_type transformChar(char_type c, Letter_Form form);
/// Is this a combining char? /// Is this a combining char?
static bool isCombiningChar(char_type c); static bool isCombiningChar(char_type c);
/**
* Is this a known char from some language?
* If \p preamble is empty and code point \p c is known to belong
* to a supported language, true is returned and \p preamble is set
* to the corresponding entry in the unicodesymbols file.
* If \p preamble is not empty, a check is made whether code point
* \p c is a known character matching the preamble entry.
*/
static bool isKnownLangChar(char_type c, std::string & preamble);
/** /**
* Add the preamble snippet needed for the output of \p c to * Add the preamble snippet needed for the output of \p c to
* \p features. * \p features.

View File

@ -186,6 +186,20 @@ static string const changetracking_none_def =
"\\newcommand{\\lyxadded}[3]{#3}\n" "\\newcommand{\\lyxadded}[3]{#3}\n"
"\\newcommand{\\lyxdeleted}[3]{}\n"; "\\newcommand{\\lyxdeleted}[3]{}\n";
static string const textgreek_def =
"\\DeclareRobustCommand{\\greektext}{%\n"
" \\fontencoding{LGR}\\selectfont\n"
" \\def\\encodingdefault{LGR}}\n"
"\\DeclareRobustCommand{\\textgreek}[1]{\\leavevmode{\\greektext #1}}\n"
"\\DeclareFontEncoding{LGR}{}{}\n";
static string const textcyr_def =
"\\DeclareRobustCommand{\\cyrtext}{%\n"
" \\fontencoding{T2A}\\selectfont\n"
" \\def\\encodingdefault{T2A}}\n"
"\\DeclareRobustCommand{\\textcyr}[1]{\\leavevmode{\\cyrtext #1}}\n"
"\\DeclareFontEncoding{T2A}{}{}\n";
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
// //
@ -618,6 +632,12 @@ string const LaTeXFeatures::getMacros() const
if (mustProvide("lyxarrow")) if (mustProvide("lyxarrow"))
macros << lyxarrow_def << '\n'; macros << lyxarrow_def << '\n';
if (mustProvide("textgreek"))
macros << textgreek_def << '\n';
if (mustProvide("textcyr"))
macros << textcyr_def << '\n';
// quotes. // quotes.
if (mustProvide("quotesinglbase")) if (mustProvide("quotesinglbase"))
macros << quotesinglbase_def << '\n'; macros << quotesinglbase_def << '\n';

View File

@ -66,8 +66,10 @@ using std::ostream;
namespace lyx { namespace lyx {
using support::contains; using support::contains;
using support::prefixIs;
using support::suffixIs; using support::suffixIs;
using support::rsplit; using support::rsplit;
using support::rtrim;
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
@ -187,6 +189,11 @@ public:
unsigned int & column, unsigned int & column,
Font const & font, Font const & font,
Layout const & style); Layout const & style);
/// Output consecutive known unicode chars, belonging to the same
/// language as specified by \p preamble, to \p os starting from \p c.
/// \return the number of characters written.
int knownLangChars(odocstream & os, value_type c, string & preamble,
Change &, Encoding const &, pos_type &);
/// ///
void simpleTeXSpecialChars(Buffer const &, BufferParams const &, void simpleTeXSpecialChars(Buffer const &, BufferParams const &,
odocstream &, odocstream &,
@ -627,6 +634,61 @@ bool Paragraph::Pimpl::simpleTeXBlanks(Encoding const & encoding,
} }
int Paragraph::Pimpl::knownLangChars(odocstream & os,
value_type c,
string & preamble,
Change & runningChange,
Encoding const & encoding,
pos_type & i)
{
// The latex command is "\textLANG{<spec>}" and we have to retain
// "\textLANG{<spec>" for the first char but only "<spec>" for all
// subsequent chars (this also works when we are passed untranslated
// unicode).
docstring const latex1 = rtrim(encoding.latexChar(c), "}");
int length = latex1.length();
os << latex1;
while (i < size() - 1) {
char_type next = getChar(i + 1);
if (!Encodings::isKnownLangChar(next, preamble) ||
runningChange != lookupChange(i + 1))
break;
Font prev_font;
bool found = false;
FontList::const_iterator cit = fontlist.begin();
FontList::const_iterator end = fontlist.end();
for (; cit != end; ++cit) {
if (cit->pos() >= i && !found) {
prev_font = cit->font();
found = true;
}
if (cit->pos() >= i + 1)
break;
}
if (found && cit != end && prev_font != cit->font())
break;
docstring const latex = rtrim(encoding.latexChar(next), "}");
docstring::size_type const j =
latex.find_first_of(from_ascii("{"));
if (j == docstring::npos) {
os << latex;
length += latex.length();
} else {
os << latex.substr(j + 1);
length += latex.substr(j + 1).length();
}
++i;
}
// When the proper language is set, we are passed the straight unicode,
// so we should not try to close the \textLANG command.
if (prefixIs(latex1, from_ascii("\\" + preamble))) {
os << '}';
++length;
}
return length;
}
bool Paragraph::Pimpl::isTextAt(string const & str, pos_type pos) const bool Paragraph::Pimpl::isTextAt(string const & str, pos_type pos) const
{ {
pos_type const len = str.length(); pos_type const len = str.length();
@ -952,6 +1014,14 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
break; break;
} }
} }
string preamble;
if (Encodings::isKnownLangChar(c, preamble)) {
column +=
knownLangChars(os, c, preamble,
running_change,
encoding, i) - 1;
break;
}
docstring const latex = encoding.latexChar(c); docstring const latex = encoding.latexChar(c);
if (latex.length() > 1 && if (latex.length() > 1 &&
latex[latex.length() - 1] != '}') { latex[latex.length() - 1] != '}') {