lyx2lyx fixes and cleanup.

Don't insert empty line when translating QuoteInsets to literal
quotes.

Fix regexp pattern in re/convert_dashligatures.

Adjust logic in re/convert_dash(ligatur)es.
This commit is contained in:
Günter Milde 2018-02-09 16:49:23 +01:00
parent 3cc98a9bf6
commit 8f86b72cf4
3 changed files with 119 additions and 68 deletions

View File

@ -12,7 +12,7 @@ logicalmkup
\maintain_unincluded_children false \maintain_unincluded_children false
\language bosnian \language bosnian
\language_package default \language_package default
\inputencoding auto \inputencoding utf8
\fontencoding global \fontencoding global
\font_roman "cochineal" "DejaVu Serif" \font_roman "cochineal" "DejaVu Serif"
\font_sans "lmss" "default" \font_sans "lmss" "default"
@ -295,5 +295,53 @@ y=x^{2}
\end_layout \end_layout
\begin_layout Description
Quote
\begin_inset space ~
\end_inset
insets: Plain quote insets
\begin_inset Quotes qld
\end_inset
<file-
\begin_inset Quotes qls
\end_inset
name
\begin_inset Quotes qrs
\end_inset
>
\begin_inset Quotes qrd
\end_inset
vs.
literal quotes "<file-'name'>".
\end_layout
\begin_deeper
\begin_layout Verbatim
Quote insets in Verbatim:
\begin_inset Quotes cld
\end_inset
foo
\begin_inset Quotes frd
\end_inset
and
\begin_inset Quotes pls
\end_inset
bar
\begin_inset Quotes prs
\end_inset
\end_layout
\end_deeper
\end_body \end_body
\end_document \end_document

View File

@ -36,7 +36,7 @@ from lyx2lyx_tools import (add_to_preamble, put_cmd_in_ert, get_ert,
from parser_tools import (check_token, del_complete_lines, from parser_tools import (check_token, del_complete_lines,
find_end_of_inset, find_end_of_layout, find_nonempty_line, find_re, find_end_of_inset, find_end_of_layout, find_nonempty_line, find_re,
find_token, find_token_backwards, get_containing_layout, find_substring, find_token, find_token_backwards, get_containing_layout,
get_containing_inset, get_quoted_value, get_value, is_in_inset, get_containing_inset, get_quoted_value, get_value, is_in_inset,
get_bool_value, set_bool_value) get_bool_value, set_bool_value)
@ -618,19 +618,18 @@ def convert_dashes(document):
if document.backend != "latex": if document.backend != "latex":
return return
lines = document.body
i = 0 i = 0
while i+1 < len(lines): while True:
i += 1 i = find_substring(document.body, "--", i+1)
line = lines[i] if i == -1:
if "--" not in line: break
continue line = document.body[i]
# skip label width string (bug 10243): # skip label width string (bug 10243):
if line.startswith("\\labelwidthstring"): if line.startswith("\\labelwidthstring"):
continue continue
# Do not touch hyphens in some insets: # Do not touch hyphens in some insets:
try: try:
value, start, end = get_containing_inset(lines, i) value, start, end = get_containing_inset(document.body, i)
except TypeError: except TypeError:
# False means no (or malformed) containing inset # False means no (or malformed) containing inset
value, start, end = "no inset", -1, -1 value, start, end = "no inset", -1, -1
@ -644,7 +643,7 @@ def convert_dashes(document):
i = end i = end
continue continue
try: try:
layout, start, end, j = get_containing_layout(lines, i) layout, start, end, j = get_containing_layout(document.body, i)
except TypeError: # no (or malformed) containing layout except TypeError: # no (or malformed) containing layout
document.warning("Malformed LyX document: " document.warning("Malformed LyX document: "
"Can't find layout at line %d" % i) "Can't find layout at line %d" % i)
@ -656,18 +655,18 @@ def convert_dashes(document):
# Replace as LaTeX does: First try emdash, then endash # Replace as LaTeX does: First try emdash, then endash
line = line.replace("---", "\\threehyphens\n") line = line.replace("---", "\\threehyphens\n")
line = line.replace("--", "\\twohyphens\n") line = line.replace("--", "\\twohyphens\n")
lines[i:i+1] = line.splitlines() document.body[i:i+1] = line.split('\n')
# remove ligature breaks between dashes # remove ligature breaks between dashes
i = 1 i = 0
while i < len(lines): while True:
line = lines[i] i = find_substring(document.body,
if (line.endswith(r"-\SpecialChar \textcompwordmark{}") and r"-\SpecialChar \textcompwordmark{}", i+1)
lines[i+1].startswith("-")): if i == -1:
lines[i] = line.replace(r"\SpecialChar \textcompwordmark{}", break
lines.pop(i+1)) if document.body[i+1].startswith("-"):
else: document.body[i] = document.body[i].replace(
i += 1 r"\SpecialChar \textcompwordmark{}", document.body.pop(i+1))
def revert_dashes(document): def revert_dashes(document):
@ -682,19 +681,18 @@ def revert_dashes(document):
r'\renewcommand{\textemdash}{---}']) r'\renewcommand{\textemdash}{---}'])
# Insert ligature breaks to prevent ligation of hyphens to dashes: # Insert ligature breaks to prevent ligation of hyphens to dashes:
lines = document.body
i = 0 i = 0
while i+1 < len(lines): while True:
i += 1 i = find_substring(document.body, "--", i+1)
line = lines[i] if i == -1:
if "--" not in line: break
continue line = document.body[i]
# skip label width string (bug 10243): # skip label width string (bug 10243):
if line.startswith("\\labelwidthstring"): if line.startswith("\\labelwidthstring"):
continue continue
# do not touch hyphens in some insets (cf. convert_dashes): # do not touch hyphens in some insets (cf. convert_dashes):
try: try:
value, start, end = get_containing_inset(lines, i) value, start, end = get_containing_inset(document.body, i)
except TypeError: except TypeError:
# False means no (or malformed) containing inset # False means no (or malformed) containing inset
value, start, end = "no inset", -1, -1 value, start, end = "no inset", -1, -1
@ -708,14 +706,14 @@ def revert_dashes(document):
# Revert \twohyphens and \threehyphens: # Revert \twohyphens and \threehyphens:
i = 1 i = 1
while i < len(lines): while i < len(document.body):
line = lines[i] line = document.body[i]
if not line.endswith("hyphens"): if not line.endswith("hyphens"):
i +=1 i +=1
elif line.endswith("\\twohyphens") or line.endswith("\\threehyphens"): elif line.endswith("\\twohyphens") or line.endswith("\\threehyphens"):
line = line.replace("\\twohyphens", "--") line = line.replace("\\twohyphens", "--")
line = line.replace("\\threehyphens", "---") line = line.replace("\\threehyphens", "---")
lines[i] = line + lines.pop(i+1) document.body[i] = line + document.body.pop(i+1)
else: else:
i += 1 i += 1

View File

@ -132,7 +132,6 @@ def revert_ibranches(document):
continue continue
if inverted: if inverted:
branch = document.body[i][20:].strip() branch = document.body[i][20:].strip()
#document.warning(branch)
if not branch in antibranches: if not branch in antibranches:
antibranch = "Anti-" + branch antibranch = "Anti-" + branch
while antibranch in antibranches: while antibranch in antibranches:
@ -140,7 +139,6 @@ def revert_ibranches(document):
antibranches[branch] = antibranch antibranches[branch] = antibranch
else: else:
antibranch = antibranches[branch] antibranch = antibranches[branch]
#document.warning(antibranch)
document.body[i] = "\\begin_inset Branch " + antibranch document.body[i] = "\\begin_inset Branch " + antibranch
# now we need to add the new branches to the header # now we need to add the new branches to the header
@ -420,6 +418,7 @@ def revert_quotes(document):
if len(words) > 1 and words[0] == "\\begin_inset" and \ if len(words) > 1 and words[0] == "\\begin_inset" and \
( words[1] in ["ERT", "listings"] or ( len(words) > 2 and words[2] in ["URL", "Chunk", "Sweave", "S/R"]) ): ( words[1] in ["ERT", "listings"] or ( len(words) > 2 and words[2] in ["URL", "Chunk", "Sweave", "S/R"]) ):
j = find_end_of_inset(document.body, i) j = find_end_of_inset(document.body, i)
if j == -1: if j == -1:
document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i)) document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
i += 1 i += 1
@ -434,10 +433,10 @@ def revert_quotes(document):
document.warning("Malformed LyX document: Can't find end of Quote inset at line " + str(k)) document.warning("Malformed LyX document: Can't find end of Quote inset at line " + str(k))
i = k i = k
continue continue
replace = "\"" replace = '"'
if document.body[k].endswith("s"): if document.body[k].endswith("s"):
replace = "'" replace = "'"
document.body[k:l+1] = [replace] document.body[k:l+2] = [replace]
else: else:
i += 1 i += 1
continue continue
@ -467,7 +466,7 @@ def revert_quotes(document):
replace = "\"" replace = "\""
if document.body[k].endswith("s"): if document.body[k].endswith("s"):
replace = "'" replace = "'"
document.body[k:l+1] = [replace] document.body[k:l+2] = [replace]
else: else:
i += 1 i += 1
continue continue
@ -498,7 +497,7 @@ def revert_quotes(document):
replace = "\"" replace = "\""
if document.body[k].endswith("s"): if document.body[k].endswith("s"):
replace = "'" replace = "'"
document.body[k:l+1] = [replace] document.body[k:l+2] = [replace]
i = l i = l
@ -602,7 +601,7 @@ def revert_plainquote(document):
replace = "\"" replace = "\""
if document.body[k].endswith("s"): if document.body[k].endswith("s"):
replace = "'" replace = "'"
document.body[k:l+1] = [replace] document.body[k:l+2] = [replace]
i = l i = l
@ -1799,13 +1798,13 @@ def convert_dashligatures(document):
['% Added by lyx2lyx', ['% Added by lyx2lyx',
r'\renewcommand{\textendash}{--}', r'\renewcommand{\textendash}{--}',
r'\renewcommand{\textemdash}{---}']) or None r'\renewcommand{\textemdash}{---}']) or None
if use_dash_ligatures is None: if use_dash_ligatures is None:
# Look for dashes (Documents by LyX 2.1 or older have "\twohyphens\n" # Look for dashes (Documents by LyX 2.1 or older have "\twohyphens\n"
# or "\threehyphens\n" as interim representation for -- an ---.) # or "\threehyphens\n" as interim representation for -- an ---.)
lines = document.body lines = document.body
has_literal_dashes = has_ligature_dashes = False has_literal_dashes = has_ligature_dashes = False
dash_pattern = re.compile(u"[\u2013\u2014]|\\twohyphens|\\threehyphens") dash_pattern = re.compile(u".*[\u2013\u2014]|\\twohyphens|\\threehyphens")
i = j = 0 i = j = 0
while True: while True:
# skip lines without dashes: # skip lines without dashes:
@ -1837,13 +1836,13 @@ def convert_dashligatures(document):
i = end i = end
continue continue
# literal dash followed by a word or no-break space: # literal dash followed by a non-white-character or no-break space:
if re.search(u"[\u2013\u2014]([\w\u00A0]|$)", if re.search(u"[\u2013\u2014]([\S\u00A0\u202F\u2060]|$)",
line, flags=re.UNICODE): line, flags=re.UNICODE):
has_literal_dashes = True has_literal_dashes = True
# ligature dash followed by word or no-break space on next line: # ligature dash followed by non-white-char or no-break space on next line:
if (re.search(r"(\\twohyphens|\\threehyphens)", line) and if (re.search(r"(\\twohyphens|\\threehyphens)", line) and
re.match(u"[\w\u00A0]", lines[i+1], flags=re.UNICODE)): re.match(u"[\S\u00A0\u202F\u2060]", lines[i+1], flags=re.UNICODE)):
has_ligature_dashes = True has_ligature_dashes = True
if has_literal_dashes and has_ligature_dashes: if has_literal_dashes and has_ligature_dashes:
# TODO: insert a warning note in the document? # TODO: insert a warning note in the document?
@ -1866,40 +1865,46 @@ def convert_dashligatures(document):
def revert_dashligatures(document): def revert_dashligatures(document):
"""Remove font ligature settings for en- and em-dashes. """Remove font ligature settings for en- and em-dashes.
Revert conversion of \twodashes or \threedashes to literal dashes.""" Revert conversion of \twodashes or \threedashes to literal dashes.
"""
use_dash_ligatures = del_value(document.header, "\\use_dash_ligatures") use_dash_ligatures = del_value(document.header, "\\use_dash_ligatures")
if use_dash_ligatures != "true" or document.backend != "latex": if use_dash_ligatures != "true" or document.backend != "latex":
return return
j = 0 i = 0
new_body = [] dash_pattern = re.compile(u".*[\u2013\u2014]")
for i, line in enumerate(document.body): while True:
# Skip some document parts where dashes are not converted # skip lines without dashes:
if (i < j) or line.startswith("\\labelwidthstring"): i = find_re(document.body, dash_pattern, i+1)
new_body.append(line) if i == -1:
break
line = document.body[i]
# skip label width string (see bug 10243):
if line.startswith("\\labelwidthstring"):
continue continue
if (line.startswith("\\begin_inset ") and # do not touch hyphens in some insets (cf. lyx_2_2.convert_dashes):
line[13:].split()[0] in ["CommandInset", "ERT", "External", try:
"Formula", "FormulaMacro", "Graphics", "IPA", "listings"] inset_type, start, end = get_containing_inset(document.body, i)
or line == "\\begin_inset Flex Code"): except TypeError: # no containing inset
j = find_end_of_inset(document.body, i) inset_type, start, end = "no inset", -1, -1
if j == -1: if (inset_type.split()[0] in
document.warning("Malformed LyX document: Can't find end of " ["CommandInset", "ERT", "External", "Formula",
+ words[1] + " inset at line " + str(i)) "FormulaMacro", "Graphics", "IPA", "listings"]
new_body.append(line) or inset_type == "Flex Code"):
i = end
continue continue
if line == "\\begin_layout LyX-Code": try:
j = find_end_of_layout(document.body, i) layoutname, start, end, j = get_containing_layout(document.body, i)
if j == -1: except TypeError: # no (or malformed) containing layout
document.warning("Malformed LyX document: " document.warning("Malformed LyX document: "
"Can't find end of %s layout at line %d" % (words[1],i)) "Can't find layout at body line %d" % i)
new_body.append(line) continue
if layoutname == "LyX-Code":
i = end
continue continue
# TODO: skip replacement in typewriter fonts # TODO: skip replacement in typewriter fonts
line = line.replace(u'\u2013', '\\twohyphens\n') line = line.replace(u'\u2013', '\\twohyphens\n')
line = line.replace(u'\u2014', '\\threehyphens\n') line = line.replace(u'\u2014', '\\threehyphens\n')
lines = line.split('\n') document.body[i:i+1] = line.split('\n')
new_body.extend(line.split('\n'))
document.body = new_body
# redefine the dash LICRs to use ligature dashes: # redefine the dash LICRs to use ligature dashes:
add_to_preamble(document, [r'\renewcommand{\textendash}{--}', add_to_preamble(document, [r'\renewcommand{\textendash}{--}',
r'\renewcommand{\textemdash}{---}']) r'\renewcommand{\textemdash}{---}'])