mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-11-22 10:00:33 +00:00
Fix plaintext output of dashes (bug #3647)
Previously, consecutive dashes in .lyx files were combined to endash and emdash in some cases, and in other cases they were output as is. This made the code complicated, and resulted in inconsitencies ((bug #3647). Now, a dash in a .lyx file is always a dash in the output, for all flavours. The special handling is moved to the input side, so that you still get an endash if you type two hyphens. If needed, this can be changed or made customizable without the need to update the file format again. Many thanks for the fruitful mailing list dicsussion, which contributed significantly to the final version.
This commit is contained in:
parent
fc9e6326b9
commit
8aa37c43a1
@ -11,6 +11,11 @@ adjustments are made to tex2lyx and bugs are fixed in lyx2lyx.
|
||||
|
||||
-----------------------
|
||||
|
||||
2015-02-24 Georg Baum <Georg.Baum@post.rwth-aachen.de>
|
||||
* Format incremented to 481
|
||||
"--" and "---" are not treated as endash and emdash anymore, since
|
||||
we have unicode symbols for that now (bug 3647).
|
||||
|
||||
2015-01-09 Jürgen Spitzmüller <spitz@lyx.org>
|
||||
* Format incremented to 480:
|
||||
Add self-defined Question* and Question lemma types to
|
||||
|
@ -85,7 +85,7 @@ format_relation = [("0_06", [200], minor_versions("0.6" , 4)),
|
||||
("1_6", range(277,346), minor_versions("1.6" , 10)),
|
||||
("2_0", range(346,414), minor_versions("2.0", 8)),
|
||||
("2_1", range(414,475), minor_versions("2.1", 0)),
|
||||
("2_2", range(475,481), minor_versions("2.2", 0))
|
||||
("2_2", range(475,482), minor_versions("2.2", 0))
|
||||
]
|
||||
|
||||
####################################################################
|
||||
|
@ -481,6 +481,71 @@ def revert_question_env(document):
|
||||
i = j
|
||||
|
||||
|
||||
def convert_dashes(document):
|
||||
"convert -- and --- to \\twohyphens and \\threehyphens"
|
||||
|
||||
if document.backend != "latex":
|
||||
return
|
||||
|
||||
i = 0
|
||||
while i < len(document.body):
|
||||
words = document.body[i].split()
|
||||
if len(words) > 1 and words[0] == "\\begin_inset" and \
|
||||
words[1] in ["ERT", "Formula", "IPA"]:
|
||||
# must not replace anything in math
|
||||
# filtering out IPA makes Text::readParToken() more simple
|
||||
# skip ERT as well since it is not needed there
|
||||
j = find_end_of_inset(document.body, i)
|
||||
if j == -1:
|
||||
document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
|
||||
i += 1
|
||||
else:
|
||||
i = j
|
||||
continue
|
||||
while True:
|
||||
j = document.body[i].find("--")
|
||||
if j == -1:
|
||||
break
|
||||
front = document.body[i][:j]
|
||||
back = document.body[i][j+2:]
|
||||
# We can have an arbitrary number of consecutive hyphens.
|
||||
# These must be split into the corresponding number of two and three hyphens
|
||||
# We must match what LaTeX does: First try emdash, then endash, then single hyphen
|
||||
if back.find("-") == 0:
|
||||
back = back[1:]
|
||||
if len(back) > 0:
|
||||
document.body.insert(i+1, back)
|
||||
document.body[i] = front + "\\threehyphens"
|
||||
else:
|
||||
if len(back) > 0:
|
||||
document.body.insert(i+1, back)
|
||||
document.body[i] = front + "\\twohyphens"
|
||||
i += 1
|
||||
|
||||
|
||||
def revert_dashes(document):
|
||||
"convert \\twohyphens and \\threehyphens to -- and ---"
|
||||
|
||||
i = 0
|
||||
while i < len(document.body):
|
||||
replaced = False
|
||||
if document.body[i].find("\\twohyphens") >= 0:
|
||||
document.body[i] = document.body[i].replace("\\twohyphens", "--")
|
||||
replaced = True
|
||||
if document.body[i].find("\\threehyphens") >= 0:
|
||||
document.body[i] = document.body[i].replace("\\threehyphens", "---")
|
||||
replaced = True
|
||||
if replaced and i+1 < len(document.body) and \
|
||||
(document.body[i+1].find("\\") != 0 or \
|
||||
document.body[i+1].find("\\twohyphens") == 0 or
|
||||
document.body[i+1].find("\\threehyphens") == 0) and \
|
||||
len(document.body[i]) + len(document.body[i+1]) <= 80:
|
||||
document.body[i] = document.body[i] + document.body[i+1]
|
||||
document.body[i+1:i+2] = []
|
||||
else:
|
||||
i += 1
|
||||
|
||||
|
||||
##
|
||||
# Conversion hub
|
||||
#
|
||||
@ -495,10 +560,12 @@ convert = [
|
||||
[477, []],
|
||||
[478, []],
|
||||
[479, []],
|
||||
[480, []]
|
||||
[480, []],
|
||||
[481, [convert_dashes]]
|
||||
]
|
||||
|
||||
revert = [
|
||||
[480, [revert_dashes]],
|
||||
[479, [revert_question_env]],
|
||||
[478, [revert_beamer_lemma]],
|
||||
[477, [revert_xarrow]],
|
||||
|
@ -364,12 +364,6 @@ public:
|
||||
pos_type i,
|
||||
unsigned int & column);
|
||||
///
|
||||
bool latexSpecialTypewriter(
|
||||
char_type const c,
|
||||
otexstream & os,
|
||||
pos_type i,
|
||||
unsigned int & column);
|
||||
///
|
||||
bool latexSpecialPhrase(
|
||||
otexstream & os,
|
||||
pos_type & i,
|
||||
@ -1216,12 +1210,6 @@ void Paragraph::Private::latexSpecialChar(otexstream & os,
|
||||
&& lyxrc.fontenc == "T1" && latexSpecialT1(c, os, i, column))
|
||||
return;
|
||||
|
||||
// \tt font needs special treatment
|
||||
if (!runparams.inIPA
|
||||
&& running_font.fontInfo().family() == TYPEWRITER_FAMILY
|
||||
&& latexSpecialTypewriter(c, os, i, column))
|
||||
return;
|
||||
|
||||
// Otherwise, we use what LaTeX provides us.
|
||||
switch (c) {
|
||||
case '\\':
|
||||
@ -1242,6 +1230,14 @@ void Paragraph::Private::latexSpecialChar(otexstream & os,
|
||||
break;
|
||||
case '-':
|
||||
os << '-';
|
||||
if (i + 1 < end_pos && text_[i+1] == '-') {
|
||||
// Prevent "--" becoming an endash and "---" becoming
|
||||
// an emdash.
|
||||
// Within \ttfamily, "--" is merged to "-" (no endash)
|
||||
// so we avoid this rather irritating ligature as well
|
||||
os << "{}";
|
||||
column += 2;
|
||||
}
|
||||
break;
|
||||
case '\"':
|
||||
os << "\\char`\\\"{}";
|
||||
@ -1401,28 +1397,6 @@ bool Paragraph::Private::latexSpecialT3(char_type const c, otexstream & os,
|
||||
}
|
||||
|
||||
|
||||
bool Paragraph::Private::latexSpecialTypewriter(char_type const c, otexstream & os,
|
||||
pos_type i, unsigned int & column)
|
||||
{
|
||||
switch (c) {
|
||||
case '-':
|
||||
// within \ttfamily, "--" is merged to "-" (no endash)
|
||||
// so we avoid this rather irritating ligature
|
||||
if (i + 1 < int(text_.size()) && text_[i + 1] == '-') {
|
||||
os << "-{}";
|
||||
column += 2;
|
||||
} else
|
||||
os << '-';
|
||||
return true;
|
||||
|
||||
// everything else has to be checked separately
|
||||
// (depending on the encoding)
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// \param end_pos
|
||||
/// If [start_pos, end_pos) does not include entirely the special phrase, then
|
||||
/// do not apply the macro transformation.
|
||||
@ -3159,30 +3133,6 @@ docstring Paragraph::simpleLyXHTMLOnePar(Buffer const & buf,
|
||||
}
|
||||
} else {
|
||||
char_type c = getUChar(buf.masterBuffer()->params(), i);
|
||||
|
||||
if (style.pass_thru || runparams.pass_thru)
|
||||
xs << c;
|
||||
else if (c == '-' && !runparams.inIPA &&
|
||||
font.fontInfo().family() != TYPEWRITER_FAMILY) {
|
||||
docstring str;
|
||||
int j = i + 1;
|
||||
if (j < size() && d->text_[j] == '-') {
|
||||
j += 1;
|
||||
if (j < size() && d->text_[j] == '-') {
|
||||
str += from_ascii("—");
|
||||
i += 2;
|
||||
} else {
|
||||
str += from_ascii("–");
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
str += c;
|
||||
// We don't want to escape the entities. Note that
|
||||
// it is safe to do this, since str can otherwise
|
||||
// only be "-". E.g., it can't be "<".
|
||||
xs << XHTMLStream::ESCAPE_NONE << str;
|
||||
} else
|
||||
xs << c;
|
||||
}
|
||||
font_old = font.fontInfo();
|
||||
@ -3258,9 +3208,7 @@ bool Paragraph::isHardHyphenOrApostrophe(pos_type pos) const
|
||||
if ((nextpos == psize || isSpace(nextpos))
|
||||
&& (pos == 0 || isSpace(prevpos)))
|
||||
return false;
|
||||
return c == '\''
|
||||
|| ((nextpos == psize || d->text_[nextpos] != '-')
|
||||
&& (pos == 0 || d->text_[prevpos] != '-'));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
45
src/Text.cpp
45
src/Text.cpp
@ -498,6 +498,23 @@ void Text::readParToken(Paragraph & par, Lexer & lex,
|
||||
inset->read(lex);
|
||||
inset->setBuffer(*buf);
|
||||
par.insertInset(par.size(), inset.release(), font, change);
|
||||
} else if (token == "\\twohyphens" || token == "\\threehyphens") {
|
||||
// Ideally, this should be done by lyx2lyx, but lyx2lyx does not know the
|
||||
// running font and does not know anything about layouts (and CopyStyle).
|
||||
Layout const & layout(par.layout());
|
||||
FontInfo info = font.fontInfo();
|
||||
info.realize(layout.resfont);
|
||||
if (layout.pass_thru || info.family() == TYPEWRITER_FAMILY) {
|
||||
if (token == "\\twohyphens")
|
||||
par.insert(par.size(), from_ascii("--"), font, change);
|
||||
else
|
||||
par.insert(par.size(), from_ascii("---"), font, change);
|
||||
} else {
|
||||
if (token == "\\twohyphens")
|
||||
par.insertChar(par.size(), 0x2013, font, change);
|
||||
else
|
||||
par.insertChar(par.size(), 0x2014, font, change);
|
||||
}
|
||||
} else if (token == "\\backslash") {
|
||||
par.appendChar('\\', font, change);
|
||||
} else if (token == "\\LyXTable") {
|
||||
@ -1019,14 +1036,36 @@ void Text::insertChar(Cursor & cur, char_type c)
|
||||
}
|
||||
}
|
||||
|
||||
par.insertChar(cur.pos(), c, cur.current_font,
|
||||
pos_type pos = cur.pos();
|
||||
if (!cur.paragraph().isPassThru() && owner_->lyxCode() != IPA_CODE &&
|
||||
cur.current_font.fontInfo().family() != TYPEWRITER_FAMILY &&
|
||||
c == '-' && pos > 0) {
|
||||
if (par.getChar(pos - 1) == '-') {
|
||||
// convert "--" to endash
|
||||
par.eraseChar(pos - 1, cur.buffer()->params().track_changes);
|
||||
c = 0x2013;
|
||||
pos--;
|
||||
} else if (par.getChar(pos - 1) == 0x2013) {
|
||||
// convert "---" to emdash
|
||||
par.eraseChar(pos - 1, cur.buffer()->params().track_changes);
|
||||
c = 0x2014;
|
||||
pos--;
|
||||
} else if (par.getChar(pos - 1) == 0x2014) {
|
||||
// convert "----" to "-"
|
||||
par.eraseChar(pos - 1, cur.buffer()->params().track_changes);
|
||||
c = '-';
|
||||
pos--;
|
||||
}
|
||||
}
|
||||
|
||||
par.insertChar(pos, c, cur.current_font,
|
||||
cur.buffer()->params().track_changes);
|
||||
cur.checkBufferStructure();
|
||||
|
||||
// cur.screenUpdateFlags(Update::Force);
|
||||
bool boundary = cur.boundary()
|
||||
|| tm.isRTLBoundary(cur.pit(), cur.pos() + 1);
|
||||
setCursor(cur, cur.pit(), cur.pos() + 1, false, boundary);
|
||||
|| tm.isRTLBoundary(cur.pit(), pos + 1);
|
||||
setCursor(cur, cur.pit(), pos + 1, false, boundary);
|
||||
charInserted(cur);
|
||||
}
|
||||
|
||||
|
@ -82,7 +82,8 @@ Context::Context(bool need_layout_,
|
||||
: need_layout(need_layout_),
|
||||
need_end_layout(false), need_end_deeper(false),
|
||||
has_item(false), deeper_paragraph(false),
|
||||
new_layout_allowed(true), textclass(textclass_),
|
||||
new_layout_allowed(true), merging_hyphens_allowed(true),
|
||||
textclass(textclass_),
|
||||
layout(layout_), parent_layout(parent_layout_),
|
||||
font(font_)
|
||||
{
|
||||
@ -240,6 +241,8 @@ void Context::dump(ostream & os, string const & desc) const
|
||||
os << "deeper_paragraph ";
|
||||
if (new_layout_allowed)
|
||||
os << "new_layout_allowed ";
|
||||
if (merging_hyphens_allowed)
|
||||
os << "merging_hyphens_allowed ";
|
||||
if (!extra_stuff.empty())
|
||||
os << "extrastuff=[" << extra_stuff << "] ";
|
||||
if (!par_extra_stuff.empty())
|
||||
|
@ -146,6 +146,8 @@ public:
|
||||
* would not work.
|
||||
*/
|
||||
bool new_layout_allowed;
|
||||
/// May -- be converted to endash and --- to emdash?
|
||||
bool merging_hyphens_allowed;
|
||||
/// Did we output anything yet in any context?
|
||||
static bool empty;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
#LyX file created by tex2lyx 2.2
|
||||
\lyxformat 480
|
||||
\lyxformat 481
|
||||
\begin_document
|
||||
\begin_header
|
||||
\textclass article
|
||||
|
@ -1,5 +1,5 @@
|
||||
#LyX file created by tex2lyx 2.2
|
||||
\lyxformat 480
|
||||
\lyxformat 481
|
||||
\begin_document
|
||||
\begin_header
|
||||
\textclass article
|
||||
|
@ -1,5 +1,5 @@
|
||||
#LyX file created by tex2lyx 2.2
|
||||
\lyxformat 480
|
||||
\lyxformat 481
|
||||
\begin_document
|
||||
\begin_header
|
||||
\textclass article
|
||||
|
@ -1,5 +1,5 @@
|
||||
#LyX file created by tex2lyx 2.2
|
||||
\lyxformat 480
|
||||
\lyxformat 481
|
||||
\begin_document
|
||||
\begin_header
|
||||
\textclass article
|
||||
|
@ -1,5 +1,5 @@
|
||||
#LyX file created by tex2lyx 2.2
|
||||
\lyxformat 480
|
||||
\lyxformat 481
|
||||
\begin_document
|
||||
\begin_header
|
||||
\textclass article
|
||||
|
@ -1,5 +1,5 @@
|
||||
#LyX file created by tex2lyx 2.2
|
||||
\lyxformat 480
|
||||
\lyxformat 481
|
||||
\begin_document
|
||||
\begin_header
|
||||
\textclass article
|
||||
|
@ -1,5 +1,5 @@
|
||||
#LyX file created by tex2lyx 2.2
|
||||
\lyxformat 480
|
||||
\lyxformat 481
|
||||
\begin_document
|
||||
\begin_header
|
||||
\textclass article
|
||||
|
@ -1,5 +1,5 @@
|
||||
#LyX file created by tex2lyx 2.2
|
||||
\lyxformat 480
|
||||
\lyxformat 481
|
||||
\begin_document
|
||||
\begin_header
|
||||
\textclass article
|
||||
@ -3507,7 +3507,7 @@ A long table
|
||||
\begin_inset Caption Standard
|
||||
|
||||
\begin_layout Standard
|
||||
A long table -- continued
|
||||
A long table – continued
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
@ -6769,7 +6769,7 @@ fy ligature break.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
There are dashes: endash in short form -- and long form –, emdash is alike: --- and —. If we really want several hyphens in a row, we need to separate them: -
|
||||
There are dashes: endash in short form – and long form –, emdash is alike: — and —. If we really want several hyphens in a row, we need to separate them: -
|
||||
\begin_inset ERT
|
||||
status collapsed
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
#LyX file created by tex2lyx 2.2
|
||||
\lyxformat 480
|
||||
\lyxformat 481
|
||||
\begin_document
|
||||
\begin_header
|
||||
\textclass memoir
|
||||
|
@ -1,5 +1,5 @@
|
||||
#LyX file created by tex2lyx 2.2
|
||||
\lyxformat 480
|
||||
\lyxformat 481
|
||||
\begin_document
|
||||
\begin_header
|
||||
\textclass amsart
|
||||
|
@ -1,5 +1,5 @@
|
||||
#LyX file created by tex2lyx 2.2
|
||||
\lyxformat 480
|
||||
\lyxformat 481
|
||||
\begin_document
|
||||
\begin_header
|
||||
\textclass book
|
||||
|
@ -1,5 +1,5 @@
|
||||
#LyX file created by tex2lyx 2.2
|
||||
\lyxformat 480
|
||||
\lyxformat 481
|
||||
\begin_document
|
||||
\begin_header
|
||||
\textclass scrbook
|
||||
|
@ -1,5 +1,5 @@
|
||||
#LyX file created by tex2lyx 2.2
|
||||
\lyxformat 480
|
||||
\lyxformat 481
|
||||
\begin_document
|
||||
\begin_header
|
||||
\textclass article
|
||||
|
@ -1,5 +1,5 @@
|
||||
#LyX file created by tex2lyx 2.2
|
||||
\lyxformat 480
|
||||
\lyxformat 481
|
||||
\begin_document
|
||||
\begin_header
|
||||
\textclass article
|
||||
|
@ -1,5 +1,5 @@
|
||||
#LyX file created by tex2lyx 2.2
|
||||
\lyxformat 480
|
||||
\lyxformat 481
|
||||
\begin_document
|
||||
\begin_header
|
||||
\textclass article
|
||||
|
@ -2387,8 +2387,21 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
else if (t.cat() == catOther ||
|
||||
t.cat() == catAlign ||
|
||||
t.cat() == catParameter) {
|
||||
// This translates "&" to "\\&" which may be wrong...
|
||||
context.check_layout(os);
|
||||
if (t.asInput() == "-" && p.next_token().asInput() == "-" &&
|
||||
context.merging_hyphens_allowed &&
|
||||
context.font.family != "ttfamily" &&
|
||||
!context.layout->pass_thru) {
|
||||
if (p.next_next_token().asInput() == "-") {
|
||||
// --- is emdash
|
||||
os << to_utf8(docstring(1, 0x2014));
|
||||
p.get_token();
|
||||
} else
|
||||
// -- is endash
|
||||
os << to_utf8(docstring(1, 0x2013));
|
||||
p.get_token();
|
||||
} else
|
||||
// This translates "&" to "\\&" which may be wrong...
|
||||
os << t.cs();
|
||||
}
|
||||
|
||||
@ -3240,7 +3253,10 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
|
||||
else if (t.cs() == "textipa") {
|
||||
context.check_layout(os);
|
||||
begin_inset(os, "IPA\n");
|
||||
bool merging_hyphens_allowed = context.merging_hyphens_allowed;
|
||||
context.merging_hyphens_allowed = false;
|
||||
parse_text_in_inset(p, os, FLAG_ITEM, outer, context);
|
||||
context.merging_hyphens_allowed = merging_hyphens_allowed;
|
||||
end_inset(os);
|
||||
preamble.registerAutomaticallyLoadedPackage("tipa");
|
||||
preamble.registerAutomaticallyLoadedPackage("tipx");
|
||||
|
@ -36,8 +36,8 @@ extern char const * const lyx_version_info;
|
||||
|
||||
// Do not remove the comment below, so we get merge conflict in
|
||||
// independent branches. Instead add your own.
|
||||
#define LYX_FORMAT_LYX 480 // spitz: question and question* environments
|
||||
#define LYX_FORMAT_TEX2LYX 480
|
||||
#define LYX_FORMAT_LYX 481 // gb: endash and emdash
|
||||
#define LYX_FORMAT_TEX2LYX 481
|
||||
|
||||
#if LYX_FORMAT_TEX2LYX != LYX_FORMAT_LYX
|
||||
#ifndef _MSC_VER
|
||||
|
Loading…
Reference in New Issue
Block a user