mirror of
https://git.lyx.org/repos/lyx.git
synced 2025-01-03 08:28:25 +00:00
Fix lyx2lyx dash conversion and make it faster.
This commit is contained in:
parent
62f32992ab
commit
a151b274bf
@ -1557,10 +1557,11 @@ def convert_latexargs(document):
|
|||||||
"theorems-chap-bytype", "theorems-chap", "theorems-named", "theorems-sec-bytype",
|
"theorems-chap-bytype", "theorems-chap", "theorems-named", "theorems-sec-bytype",
|
||||||
"theorems-sec", "theorems-starred", "theorems-std", "todonotes"]
|
"theorems-sec", "theorems-starred", "theorems-std", "todonotes"]
|
||||||
# Modules we need to take care of
|
# Modules we need to take care of
|
||||||
caveat_modules = ["initials"]
|
caveat_modules = ["initials"] # TODO: , "graphicboxes", "bicaption"]
|
||||||
# information about the relevant styles in caveat_modules (number of opt and req args)
|
# information about the relevant styles in caveat_modules (number of opt and req args)
|
||||||
# use this if we get more caveat_modules. For now, use hard coding (see below).
|
# use this if we get more caveat_modules. For now, use hard coding (see below).
|
||||||
# initials = [{'Layout' : 'Initial', 'opt' : 1, 'req' : 1}]
|
# initials = [{'Layout' : 'Initial', 'opt' : 1, 'req' : 1}]
|
||||||
|
# graphicboxes = { ... }
|
||||||
|
|
||||||
# Is this a known safe layout?
|
# Is this a known safe layout?
|
||||||
safe_layout = document.textclass in safe_layouts
|
safe_layout = document.textclass in safe_layouts
|
||||||
|
@ -37,7 +37,7 @@ from lyx2lyx_tools import (add_to_preamble, put_cmd_in_ert, get_ert,
|
|||||||
from parser_tools import (check_token, del_complete_lines,
|
from parser_tools import (check_token, del_complete_lines,
|
||||||
find_end_of_inset, find_end_of_layout, find_nonempty_line, find_re,
|
find_end_of_inset, find_end_of_layout, find_nonempty_line, find_re,
|
||||||
find_token, find_token_backwards, get_containing_layout,
|
find_token, find_token_backwards, get_containing_layout,
|
||||||
get_value, is_in_inset)
|
get_containing_inset, get_value, is_in_inset)
|
||||||
|
|
||||||
|
|
||||||
####################################################################
|
####################################################################
|
||||||
@ -622,41 +622,40 @@ def convert_dashes(document):
|
|||||||
while i+1 < len(lines):
|
while i+1 < len(lines):
|
||||||
i += 1
|
i += 1
|
||||||
line = lines[i]
|
line = lines[i]
|
||||||
words = line.split()
|
if "--" not in line:
|
||||||
if (len(words) > 1 and words[0] == "\\begin_inset"
|
|
||||||
and (words[1] in ["CommandInset", "ERT", "External", "Formula",
|
|
||||||
"FormulaMacro", "Graphics", "IPA", "listings"]
|
|
||||||
or line.endswith("Flex Code"))):
|
|
||||||
# must not replace anything in insets that store LaTeX contents in .lyx files
|
|
||||||
# (math and command insets without overridden read() and write() methods
|
|
||||||
# filtering out IPA makes Text::readParToken() more simple
|
|
||||||
# skip ERT as well since it is not needed there
|
|
||||||
# Flex Code is logical markup, typically rendered as typewriter
|
|
||||||
j = find_end_of_inset(lines, i)
|
|
||||||
if j == -1:
|
|
||||||
document.warning("Malformed LyX document: Can't find end of " +
|
|
||||||
words[1] + " inset at line " + str(i))
|
|
||||||
else:
|
|
||||||
i = j
|
|
||||||
continue
|
|
||||||
if lines[i] == "\\begin_layout LyX-Code":
|
|
||||||
j = find_end_of_layout(lines, i)
|
|
||||||
if j == -1:
|
|
||||||
document.warning("Malformed LyX document: "
|
|
||||||
"Can't find end of %s layout at line %d" % (words[1],i))
|
|
||||||
else:
|
|
||||||
i = j
|
|
||||||
continue
|
continue
|
||||||
|
# skip label width string (bug 10243):
|
||||||
if line.startswith("\\labelwidthstring"):
|
if line.startswith("\\labelwidthstring"):
|
||||||
# skip label width string (bug 10243)
|
|
||||||
continue
|
continue
|
||||||
|
# Do not touch hyphens in some insets:
|
||||||
if "--" in line:
|
try:
|
||||||
# We can have an arbitrary number of consecutive hyphens.
|
value, start, end = get_containing_inset(lines, i)
|
||||||
# Replace as LaTeX does: First try emdash, then endash
|
except TypeError:
|
||||||
line = line.replace("---", "\\threehyphens\n")
|
# False means no (or malformed) containing inset
|
||||||
line = line.replace("--", "\\twohyphens\n")
|
value, start, end = "no inset", -1, -1
|
||||||
lines[i:i+1] = line.splitlines()
|
# We must not replace anything in insets that store LaTeX contents in .lyx files
|
||||||
|
# (math and command insets without overridden read() and write() methods.
|
||||||
|
# Filtering out IPA and ERT makes Text::readParToken() more simple,
|
||||||
|
# Flex Code is logical markup, typically rendered as typewriter
|
||||||
|
if (value.split()[0] in ["CommandInset", "ERT", "External", "Formula",
|
||||||
|
"FormulaMacro", "Graphics", "IPA", "listings"]
|
||||||
|
or value in ["Flex Code", "Flex URL"]):
|
||||||
|
i = end
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
layout, start, end, j = get_containing_layout(lines, i)
|
||||||
|
except TypeError: # no (or malformed) containing layout
|
||||||
|
document.warning("Malformed LyX document: "
|
||||||
|
"Can't find layout at line %d" % i)
|
||||||
|
continue
|
||||||
|
if layout == "LyX-Code":
|
||||||
|
i = end
|
||||||
|
continue
|
||||||
|
# We can have an arbitrary number of consecutive hyphens.
|
||||||
|
# Replace as LaTeX does: First try emdash, then endash
|
||||||
|
line = line.replace("---", "\\threehyphens\n")
|
||||||
|
line = line.replace("--", "\\twohyphens\n")
|
||||||
|
lines[i:i+1] = line.splitlines()
|
||||||
|
|
||||||
# remove ligature breaks between dashes
|
# remove ligature breaks between dashes
|
||||||
i = 1
|
i = 1
|
||||||
@ -672,40 +671,40 @@ def convert_dashes(document):
|
|||||||
|
|
||||||
def revert_dashes(document):
|
def revert_dashes(document):
|
||||||
"""
|
"""
|
||||||
|
Remove preamble code from 2.3->2.2 conversion.
|
||||||
Prevent ligatures of existing --- and --.
|
Prevent ligatures of existing --- and --.
|
||||||
Revert \\twohyphens and \\threehyphens to -- and ---.
|
Revert \\twohyphens and \\threehyphens to -- and ---.
|
||||||
Remove preamble code from 2.3->2.2 conversion.
|
|
||||||
"""
|
"""
|
||||||
del_complete_lines(document.preamble,
|
del_complete_lines(document.preamble,
|
||||||
['% Added by lyx2lyx',
|
['% Added by lyx2lyx',
|
||||||
r'\renewcommand{\textendash}{--}',
|
r'\renewcommand{\textendash}{--}',
|
||||||
r'\renewcommand{\textemdash}{---}'])
|
r'\renewcommand{\textemdash}{---}'])
|
||||||
|
|
||||||
# Insert ligature breaks to prevent ligation of hyphens to dashes:
|
# Insert ligature breaks to prevent ligation of hyphens to dashes:
|
||||||
lines = document.body
|
lines = document.body
|
||||||
i = 0
|
i = 0
|
||||||
while i+1 < len(lines):
|
while i+1 < len(lines):
|
||||||
i += 1
|
i += 1
|
||||||
line = lines[i]
|
line = lines[i]
|
||||||
|
if "--" not in line:
|
||||||
|
continue
|
||||||
# skip label width string (bug 10243):
|
# skip label width string (bug 10243):
|
||||||
if line.startswith("\\labelwidthstring"):
|
if line.startswith("\\labelwidthstring"):
|
||||||
continue
|
continue
|
||||||
# do not touch hyphens in some insets (cf. convert_dashes):
|
# do not touch hyphens in some insets (cf. convert_dashes):
|
||||||
if line.startswith("\\begin_inset"):
|
try:
|
||||||
try:
|
value, start, end = get_containing_inset(lines, i)
|
||||||
if line.split()[1] in ["CommandInset", "ERT", "External",
|
except TypeError:
|
||||||
"Formula", "FormulaMacro", "Graphics",
|
# False means no (or malformed) containing inset
|
||||||
"IPA", "listings"]:
|
value, start, end = "no inset", -1, -1
|
||||||
j = find_end_of_inset(lines, i)
|
if (value.split()[0] in ["CommandInset", "ERT", "External", "Formula",
|
||||||
if j == -1:
|
"FormulaMacro", "Graphics", "IPA", "listings"]
|
||||||
document.warning("Malformed LyX document: Can't find "
|
or value == "Flex URL"):
|
||||||
"end of %s inset at line %d." % (itype, i))
|
i = end
|
||||||
continue
|
continue
|
||||||
i = j
|
line = line.replace("--", "-\\SpecialChar \\textcompwordmark{}\n-")
|
||||||
except IndexError:
|
document.body[i:i+1] = line.split('\n')
|
||||||
continue
|
|
||||||
if "--" in line:
|
|
||||||
line = line.replace("--", "-\\SpecialChar \\textcompwordmark{}\n-")
|
|
||||||
document.body[i:i+1] = line.split('\n')
|
|
||||||
# Revert \twohyphens and \threehyphens:
|
# Revert \twohyphens and \threehyphens:
|
||||||
i = 1
|
i = 1
|
||||||
while i < len(lines):
|
while i < len(lines):
|
||||||
|
@ -26,7 +26,7 @@ import sys, os
|
|||||||
|
|
||||||
from parser_tools import (del_token, del_value, del_complete_lines,
|
from parser_tools import (del_token, del_value, del_complete_lines,
|
||||||
find_complete_lines, find_end_of, find_end_of_layout, find_end_of_inset,
|
find_complete_lines, find_end_of, find_end_of_layout, find_end_of_inset,
|
||||||
find_re, find_token, find_token_backwards,
|
find_re, find_token, find_token_backwards, get_containing_inset,
|
||||||
get_containing_layout, get_bool_value, get_value, get_quoted_value)
|
get_containing_layout, get_bool_value, get_value, get_quoted_value)
|
||||||
# find_tokens, find_token_exact, is_in_inset,
|
# find_tokens, find_token_exact, is_in_inset,
|
||||||
# check_token, get_option_value
|
# check_token, get_option_value
|
||||||
@ -1853,46 +1853,54 @@ def convert_dashligatures(document):
|
|||||||
if use_dash_ligatures is None:
|
if use_dash_ligatures is None:
|
||||||
# Look for dashes (Documents by LyX 2.1 or older have "\twohyphens\n"
|
# Look for dashes (Documents by LyX 2.1 or older have "\twohyphens\n"
|
||||||
# or "\threehyphens\n" as interim representation for -- an ---.)
|
# or "\threehyphens\n" as interim representation for -- an ---.)
|
||||||
has_literal_dashes = False
|
lines = document.body
|
||||||
has_ligature_dashes = False
|
has_literal_dashes = has_ligature_dashes = False
|
||||||
j = 0
|
i = j = 0
|
||||||
for i, line in enumerate(document.body):
|
while i+1 < len(lines):
|
||||||
# Skip some document parts where dashes are not converted
|
i += 1
|
||||||
if (i < j) or line.startswith("\\labelwidthstring"):
|
line = lines[i]
|
||||||
|
# skip lines without any dashes:
|
||||||
|
if not re.search(u"[\u2013\u2014]|\\twohyphens|\\threehyphens", line):
|
||||||
continue
|
continue
|
||||||
if line.startswith("\\begin_inset"):
|
# skip label width string (see bug 10243):
|
||||||
try:
|
if line.startswith("\\labelwidthstring"):
|
||||||
it = line.split()[1]
|
|
||||||
except IndexError:
|
|
||||||
continue
|
|
||||||
if (it in ["CommandInset", "ERT", "External", "Formula",
|
|
||||||
"FormulaMacro", "Graphics", "IPA", "listings"]
|
|
||||||
or line.endswith("Flex Code")):
|
|
||||||
j = find_end_of_inset(document.body, i)
|
|
||||||
if j == -1:
|
|
||||||
document.warning("Malformed LyX document: Can't "
|
|
||||||
"find end of %s inset at line %d." % (itype, i))
|
|
||||||
continue
|
|
||||||
if line == "\\begin_layout LyX-Code":
|
|
||||||
j = find_end_of_layout(document.body, i)
|
|
||||||
if j == -1:
|
|
||||||
document.warning("Malformed LyX document: "
|
|
||||||
"Can't find end of %s layout at line %d" % (words[1],i))
|
|
||||||
continue
|
continue
|
||||||
|
# do not touch hyphens in some insets (cf. lyx_2_2.convert_dashes):
|
||||||
|
try:
|
||||||
|
value, start, end = get_containing_inset(lines, i)
|
||||||
|
except TypeError: # no containing inset
|
||||||
|
value, start, end = "no inset", -1, -1
|
||||||
|
if (value.split()[0] in
|
||||||
|
["CommandInset", "ERT", "External", "Formula",
|
||||||
|
"FormulaMacro", "Graphics", "IPA", "listings"]
|
||||||
|
or value == "Flex Code"):
|
||||||
|
i = end
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
layout, start, end, j = get_containing_layout(lines, i)
|
||||||
|
except TypeError: # no (or malformed) containing layout
|
||||||
|
document.warning("Malformed LyX document: "
|
||||||
|
"Can't find layout at line %d" % i)
|
||||||
|
continue
|
||||||
|
if layout == "LyX-Code":
|
||||||
|
i = end
|
||||||
|
continue
|
||||||
|
|
||||||
# literal dash followed by a word or no-break space:
|
# literal dash followed by a word or no-break space:
|
||||||
if re.search(u"[\u2013\u2014]([\w\u00A0]|$)", line,
|
if re.search(u"[\u2013\u2014]([\w\u00A0]|$)", line,
|
||||||
flags=re.UNICODE):
|
flags=re.UNICODE):
|
||||||
has_literal_dashes = True
|
has_literal_dashes = True
|
||||||
# ligature dash followed by word or no-break space on next line:
|
# ligature dash followed by word or no-break space on next line:
|
||||||
if (re.search(r"(\\twohyphens|\\threehyphens)", line) and
|
if (re.search(r"(\\twohyphens|\\threehyphens)", line) and
|
||||||
re.match(u"[\w\u00A0]", document.body[i+1], flags=re.UNICODE)):
|
re.match(u"[\w\u00A0]", lines[i+1], flags=re.UNICODE)):
|
||||||
has_ligature_dashes = True
|
has_ligature_dashes = True
|
||||||
if has_literal_dashes and has_ligature_dashes:
|
if has_literal_dashes and has_ligature_dashes:
|
||||||
# TODO: insert a warning note in the document?
|
# TODO: insert a warning note in the document?
|
||||||
document.warning('This document contained both literal and '
|
document.warning('This document contained both literal and '
|
||||||
'"ligature" dashes.\n Line breaks may have changed. '
|
'"ligature" dashes.\n Line breaks may have changed. '
|
||||||
'See UserGuide chapter 3.9.1 for details.')
|
'See UserGuide chapter 3.9.1 for details.')
|
||||||
elif has_literal_dashes:
|
break
|
||||||
|
if has_literal_dashes:
|
||||||
use_dash_ligatures = False
|
use_dash_ligatures = False
|
||||||
elif has_ligature_dashes:
|
elif has_ligature_dashes:
|
||||||
use_dash_ligatures = True
|
use_dash_ligatures = True
|
||||||
|
Loading…
Reference in New Issue
Block a user