Fix lyx2lyx conversion of dashes.

This commit is contained in:
Günter Milde 2017-09-30 23:26:02 +02:00
parent 291d2f1037
commit 586d16a49c
2 changed files with 81 additions and 78 deletions

View File

@ -659,6 +659,12 @@ def convert_dashes(document):
def revert_dashes(document): def revert_dashes(document):
"convert \\twohyphens and \\threehyphens to -- and ---" "convert \\twohyphens and \\threehyphens to -- and ---"
# eventually remove preamble code from 2.3->2.2 conversion:
for i, line in enumerate(document.preamble):
if i > 1 and line == r'\renewcommand{\textemdash}{---}':
if (document.preamble[i-1] == r'\renewcommand{\textendash}{--}'
and document.preamble[i-2] == '% Added by lyx2lyx'):
del document.preamble[i-2:i+1]
i = 0 i = 0
while i < len(document.body): while i < len(document.body):
words = document.body[i].split() words = document.body[i].split()

View File

@ -1841,58 +1841,63 @@ def revert_chapterbib(document):
def convert_dashligatures(document): def convert_dashligatures(document):
" Remove a zero-length space (U+200B) after en- and em-dashes. " "Set 'use_dash_ligatures' according to content."
use_dash_ligatures = None
i = find_token(document.header, "\\use_microtype", 0) # eventually remove preamble code from 2.3->2.2 conversion:
if i != -1: for i, line in enumerate(document.preamble):
if document.initial_format > 474 and document.initial_format < 509: if i > 1 and line == r'\renewcommand{\textemdash}{---}':
# This was created by LyX 2.2 if (document.preamble[i-1] == r'\renewcommand{\textendash}{--}'
document.header[i+1:i+1] = ["\\use_dash_ligatures false"] and document.preamble[i-2] == '% Added by lyx2lyx'):
else: del document.preamble[i-2:i+1]
# This was created by LyX 2.1 or earlier use_dash_ligatures = True
document.header[i+1:i+1] = ["\\use_dash_ligatures true"] if use_dash_ligatures is None:
# Look for dashes:
i = 0 # (Documents by LyX 2.1 or older have "\twohyphens\n" or "\threehyphens\n"
while i < len(document.body): # as interim representation for dash ligatures in 2.2.)
words = document.body[i].split() has_literal_dashes = False
has_ligature_dashes = False
j = 0
for i, line in enumerate(document.body):
# Skip some document parts where dashes are not converted # Skip some document parts where dashes are not converted
if (i < j) or line.startswith("\\labelwidthstring"):
continue
words = line.split()
if len(words) > 1 and words[0] == "\\begin_inset" and \ if len(words) > 1 and words[0] == "\\begin_inset" and \
words[1] in ["CommandInset", "ERT", "External", "Formula", \ words[1] in ["CommandInset", "ERT", "External", "Formula",
"FormulaMacro", "Graphics", "IPA", "listings"]: "FormulaMacro", "Graphics", "IPA", "listings"]:
j = find_end_of_inset(document.body, i) j = find_end_of_inset(document.body, i)
if j == -1: if j == -1:
document.warning("Malformed LyX document: Can't find end of " \ document.warning("Malformed LyX document: "
+ words[1] + " inset at line " + str(i)) "Can't find end of %s inset at line %d" % (words[1],i))
i += 1
else:
i = j
continue continue
if len(words) > 0 and words[0] in ["\\leftindent", \ # literal dash followed by a word or no-break space:
"\\paragraph_spacing", "\\align", "\\labelwidthstring"]: if re.search(u"[\u2013\u2014]([\w\u00A0]|$)", line,
i += 1 flags=re.UNICODE):
continue has_literal_dashes = True
# ligature dash followed by word or no-break space on next line:
start = 0 if re.search(ur"(\\twohyphens|\\threehyphens)", line,
while True: flags=re.UNICODE) and re.match(u"[\w\u00A0]",
j = document.body[i].find(u"\u2013", start) # en-dash document.body[i+1], flags=re.UNICODE):
k = document.body[i].find(u"\u2014", start) # em-dash has_ligature_dashes = True
if j == -1 and k == -1: if has_literal_dashes and has_ligature_dashes:
break # TODO: insert a warning note in the document?
if j == -1 or (k != -1 and k < j): document.warning('This document contained both literal and '
j = k '"ligature" dashes.\n Line breaks may have changed. '
after = document.body[i][j+1:] 'See UserGuide chapter 3.9.1 for details.')
if after.startswith(u"\u200B"): elif has_literal_dashes:
document.body[i] = document.body[i][:j+1] + after[1:] use_dash_ligatures = False
else: elif has_ligature_dashes:
if len(after) == 0 and document.body[i+1].startswith(u"\u200B"): use_dash_ligatures = True
document.body[i+1] = document.body[i+1][1:] # insert the setting if there is a preferred value
break if use_dash_ligatures is not None:
start = j+1 i = find_token(document.header, "\\use_microtype", 0)
i += 1 if i != -1:
document.header.insert(i+1, "\\use_dash_ligatures %s"
% str(use_dash_ligatures).lower())
def revert_dashligatures(document): def revert_dashligatures(document):
" Remove font ligature settings for en- and em-dashes. " """Remove font ligature settings for en- and em-dashes.
Revert conversion of \twodashes or \threedashes to literal dashes."""
i = find_token(document.header, "\\use_dash_ligatures", 0) i = find_token(document.header, "\\use_dash_ligatures", 0)
if i == -1: if i == -1:
return return
@ -1902,42 +1907,34 @@ def revert_dashligatures(document):
i = find_token(document.header, "\\use_non_tex_fonts", 0) i = find_token(document.header, "\\use_non_tex_fonts", 0)
if i != -1: if i != -1:
use_non_tex_fonts = get_bool_value(document.header, "\\use_non_tex_fonts", i) use_non_tex_fonts = get_bool_value(document.header, "\\use_non_tex_fonts", i)
if not use_dash_ligatures or use_non_tex_fonts: if not use_dash_ligatures or document.backend != "latex":
return return
# Add a zero-length space (U+200B) after en- and em-dashes j = 0
i = 0 new_body = []
while i < len(document.body): for i, line in enumerate(document.body):
words = document.body[i].split()
# Skip some document parts where dashes are not converted # Skip some document parts where dashes are not converted
if (i < j) or line.startswith("\\labelwidthstring"):
new_body.append(line)
continue
words = line.split()
if len(words) > 1 and words[0] == "\\begin_inset" and \ if len(words) > 1 and words[0] == "\\begin_inset" and \
words[1] in ["CommandInset", "ERT", "External", "Formula", \ words[1] in ["CommandInset", "ERT", "External", "Formula",
"FormulaMacro", "Graphics", "IPA", "listings"]: "FormulaMacro", "Graphics", "IPA", "listings"]:
j = find_end_of_inset(document.body, i) j = find_end_of_inset(document.body, i)
if j == -1: if j == -1:
document.warning("Malformed LyX document: Can't find end of " \ document.warning("Malformed LyX document: Can't find end of "
+ words[1] + " inset at line " + str(i)) + words[1] + " inset at line " + str(i))
i += 1 new_body.append(line)
else:
i = j
continue continue
if len(words) > 0 and words[0] in ["\\leftindent", \ line = line.replace(u'\u2013', '\\twohyphens\n')
"\\paragraph_spacing", "\\align", "\\labelwidthstring"]: line = line.replace(u'\u2014', '\\threehyphens\n')
i += 1 lines = line.split('\n')
continue new_body.extend(line.split('\n'))
document.body = new_body
start = 0 # redefine the dash LICRs to use ligature dashes:
while True: add_to_preamble(document, [r'\renewcommand{\textendash}{--}',
j = document.body[i].find(u"\u2013", start) # en-dash r'\renewcommand{\textemdash}{---}'])
k = document.body[i].find(u"\u2014", start) # em-dash
if j == -1 and k == -1:
break
if j == -1 or (k != -1 and k < j):
j = k
after = document.body[i][j+1:]
document.body[i] = document.body[i][:j+1] + u"\u200B" + after
start = j+1
i += 1
def revert_noto(document): def revert_noto(document):