From ae196dd70c76406a07f3623cfb40d7c9cc80372f Mon Sep 17 00:00:00 2001 From: Juergen Spitzmueller Date: Sat, 9 Jun 2018 11:52:55 +0200 Subject: [PATCH] Support input of non-ASCII characters in hyperlinks We transform them to hex representation via QByteArray::toPercentEncoding() Fixes: #11165 (cherry picked from commit 01d8f418943f5f63f329658d387a7dbd9a024c91) --- src/insets/InsetHyperlink.cpp | 13 ++++++++----- src/support/lstrings.cpp | 8 ++++++++ src/support/lstrings.h | 4 ++++ status.23x | 2 +- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/insets/InsetHyperlink.cpp b/src/insets/InsetHyperlink.cpp index afa49d4e35..aae07ce0da 100644 --- a/src/insets/InsetHyperlink.cpp +++ b/src/insets/InsetHyperlink.cpp @@ -77,6 +77,7 @@ docstring InsetHyperlink::screenLabel() const return temp + url; } + void InsetHyperlink::doDispatch(Cursor & cur, FuncRequest & cmd) { switch (cmd.action()) { @@ -125,15 +126,16 @@ void InsetHyperlink::latex(otexstream & os, // For the case there is no name given, the target is set as name. // Do this before !url.empty() and !name.empty() to handle characters - // like the "%" correctly. + // such as % correctly. if (name.empty()) name = url; if (!url.empty()) { - // Replace the "\" character by its ASCII code according to the - // URL specifications because "\" is not allowed in URLs and by - // \href. Only do this when the following character is not also - // a "\", because "\\" is valid code + // Use URI/URL-style percent-encoded string (hexadecimal). + // We exclude some characters that must not be transformed + // in hrefs (% # / :) or that we need to treat manually (\). + url = to_percent_encoding(url, from_ascii("%#\\/:")); + // We handle \ manually since \\ is valid for (size_t i = 0, pos; (pos = url.find('\\', i)) != string::npos; i = pos + 2) { @@ -145,6 +147,7 @@ void InsetHyperlink::latex(otexstream & os, // field because otherwise LaTeX will fail when the hyperlink is // within an argument of another command, e.g. in a \footnote. It // is important that they are escaped as "\#" and not as "\#{}". + // FIXME this is not necessary in outside of commands. for (int k = 0; k < 2; k++) for (size_t i = 0, pos; (pos = url.find(chars_url[k], i)) != string::npos; diff --git a/src/support/lstrings.cpp b/src/support/lstrings.cpp index be326c0fd4..4a12c34b4d 100644 --- a/src/support/lstrings.cpp +++ b/src/support/lstrings.cpp @@ -1431,6 +1431,14 @@ std::string formatFPNumber(double x) } +docstring to_percent_encoding(docstring const & in, docstring const & ex) +{ + QByteArray input = toqstr(in).toUtf8(); + QByteArray excludes = toqstr(ex).toUtf8(); + return qstring_to_ucs4(QString(input.toPercentEncoding(excludes))); +} + + docstring bformat(docstring const & fmt, int arg1) { LATTEST(contains(fmt, from_ascii("%1$d"))); diff --git a/src/support/lstrings.h b/src/support/lstrings.h index 9bcff42afa..7d2fc6dd77 100644 --- a/src/support/lstrings.h +++ b/src/support/lstrings.h @@ -353,6 +353,10 @@ int findToken(char const * const str[], std::string const & search_token); /// like "1000000.000000", and precision control would not be that easy either. std::string formatFPNumber(double); +/// Returns an URI/URL-style percent-encoded copy of the string \p in. +/// \p ex defines a string of characters that are excluded from the transformation +docstring to_percent_encoding(docstring const & in, docstring const & ex = docstring()); + docstring bformat(docstring const & fmt, int arg1); docstring bformat(docstring const & fmt, long arg1); diff --git a/status.23x b/status.23x index 65f296e2b6..690313ae5d 100644 --- a/status.23x +++ b/status.23x @@ -15,7 +15,7 @@ What's new * DOCUMENT INPUT/OUTPUT - +- Add support for non-ASCII characters in hyperlinks (bug 11165). * MISCELLANEOUS