From 21e5138f6cf1e96d3cac702e2ada2a0148a3ec92 Mon Sep 17 00:00:00 2001 From: Kitsune Ral Date: Sun, 24 Mar 2019 18:51:08 +0900 Subject: linkifyUrls(): fix linkification of emails containing "www." Closes #303. --- lib/util.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/util.cpp') diff --git a/lib/util.cpp b/lib/util.cpp index d042aa34..e1f312ee 100644 --- a/lib/util.cpp +++ b/lib/util.cpp @@ -38,14 +38,14 @@ static void linkifyUrls(QString& htmlEscapedText) // comma or dot // Note: outer parentheses are a part of C++ raw string delimiters, not of // the regex (see http://en.cppreference.com/w/cpp/language/string_literal). - // Note2: yet another pair of outer parentheses are \1 in the replacement. + // Note2: the next-outer parentheses are \N in the replacement. static const QRegularExpression FullUrlRegExp(QStringLiteral( - R"(((www\.(?!\.)|(https?|ftp|magnet)://)(&(?![lg]t;)|[^&\s<>'"])+(&(?![lg]t;)|[^&!,.\s<>'"\]):])))" + R"(\b((www\.(?!\.)(?!(\w|\.|-)+@)|(https?|ftp|magnet)://)(&(?![lg]t;)|[^&\s<>'"])+(&(?![lg]t;)|[^&!,.\s<>'"\]):])))" ), RegExpOptions); // email address: // [word chars, dots or dashes]@[word chars, dots or dashes].[word chars] static const QRegularExpression EmailAddressRegExp(QStringLiteral( - R"((mailto:)?(\b(\w|\.|-)+@(\w|\.|-)+\.\w+\b))" + R"(\b(mailto:)?((\w|\.|-)+@(\w|\.|-)+\.\w+\b))" ), RegExpOptions); // An interim liberal implementation of // https://matrix.org/docs/spec/appendices.html#identifier-grammar @@ -53,7 +53,7 @@ static void linkifyUrls(QString& htmlEscapedText) R"((^|[^<>/])([!#@][-a-z0-9_=/.]{1,252}:[-.a-z0-9]+))" ), RegExpOptions); - // NOTE: htmlEscapedText is already HTML-escaped! No literal <,>,& + // NOTE: htmlEscapedText is already HTML-escaped! No literal <,>,&," htmlEscapedText.replace(EmailAddressRegExp, QStringLiteral(R"(\1\2)")); -- cgit v1.2.3 From e855085835909549aa866ed968e24902eb378b5a Mon Sep 17 00:00:00 2001 From: Kitsune Ral Date: Sun, 17 Mar 2019 09:03:34 +0900 Subject: RoomMemberEvent: sanitize user display names MemberEventContent::displayName() will strip away Unicode text direction override characters. Direct access to JSON can still provide "raw" data. --- lib/util.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'lib/util.cpp') diff --git a/lib/util.cpp b/lib/util.cpp index e1f312ee..8d16cfc8 100644 --- a/lib/util.cpp +++ b/lib/util.cpp @@ -63,10 +63,18 @@ static void linkifyUrls(QString& htmlEscapedText) QStringLiteral(R"(\1\2)")); } +QString QMatrixClient::sanitized(const QString& plainText) +{ + auto text = plainText; + text.remove(QChar(0x202e)); + text.remove(QChar(0x202d)); + return text; +} + QString QMatrixClient::prettyPrint(const QString& plainText) { auto pt = QStringLiteral("") + - plainText.toHtmlEscaped() + QStringLiteral(""); + plainText.toHtmlEscaped() + QStringLiteral(""); pt.replace('\n', QStringLiteral("
")); linkifyUrls(pt); -- cgit v1.2.3 From adcea5868d45610be0539af3e1cfc15f8495815c Mon Sep 17 00:00:00 2001 From: Kitsune Ral Date: Sun, 24 Mar 2019 19:09:48 +0900 Subject: Expose linkifyUrls() into library API for future use --- lib/util.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'lib/util.cpp') diff --git a/lib/util.cpp b/lib/util.cpp index 8d16cfc8..fe6286f3 100644 --- a/lib/util.cpp +++ b/lib/util.cpp @@ -29,16 +29,17 @@ static const auto RegExpOptions = | QRegularExpression::UseUnicodePropertiesOption; // Converts all that looks like a URL into HTML links -static void linkifyUrls(QString& htmlEscapedText) +void QMatrixClient::linkifyUrls(QString& htmlEscapedText) { + // Note: outer parentheses are a part of C++ raw string delimiters, not of + // the regex (see http://en.cppreference.com/w/cpp/language/string_literal). + // Note2: the next-outer parentheses are \N in the replacement. + + // generic url: // regexp is originally taken from Konsole (https://github.com/KDE/konsole) - // full url: // protocolname:// or www. followed by anything other than whitespaces, // <, >, ' or ", and ends before whitespaces, <, >, ', ", ], !, ), :, // comma or dot - // Note: outer parentheses are a part of C++ raw string delimiters, not of - // the regex (see http://en.cppreference.com/w/cpp/language/string_literal). - // Note2: the next-outer parentheses are \N in the replacement. static const QRegularExpression FullUrlRegExp(QStringLiteral( R"(\b((www\.(?!\.)(?!(\w|\.|-)+@)|(https?|ftp|magnet)://)(&(?![lg]t;)|[^&\s<>'"])+(&(?![lg]t;)|[^&!,.\s<>'"\]):])))" ), RegExpOptions); -- cgit v1.2.3 From 432e7fd7107d8260e0016a1adcd8d94263dc1044 Mon Sep 17 00:00:00 2001 From: Kitsune Ral Date: Thu, 4 Apr 2019 21:27:38 +0900 Subject: Clean up on clang-tidy/clazy analysis --- lib/util.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/util.cpp') diff --git a/lib/util.cpp b/lib/util.cpp index fe6286f3..c3e21c8e 100644 --- a/lib/util.cpp +++ b/lib/util.cpp @@ -157,7 +157,7 @@ static_assert(!is_callable_v>, "Test non-function object"); // "Test returns<> with static member function"); template -QString ft(T&&); +QString ft(T&&) { return {}; } static_assert(std::is_same)>, QString&&>(), "Test function templates"); -- cgit v1.2.3 From 346adee1810109f4b7b14298e55d29a44c076a66 Mon Sep 17 00:00:00 2001 From: Kitsune Ral Date: Mon, 13 May 2019 20:41:04 +0900 Subject: prettyPrint(): First linkify, than add more tags Otherwise the linkification gets confused by HTML tags being already there and doesn't linkify what has to be linkified if that occurs at the beginning of the message. --- lib/util.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'lib/util.cpp') diff --git a/lib/util.cpp b/lib/util.cpp index c3e21c8e..0248e521 100644 --- a/lib/util.cpp +++ b/lib/util.cpp @@ -74,12 +74,11 @@ QString QMatrixClient::sanitized(const QString& plainText) QString QMatrixClient::prettyPrint(const QString& plainText) { - auto pt = QStringLiteral("") + - plainText.toHtmlEscaped() + QStringLiteral(""); - pt.replace('\n', QStringLiteral("
")); - + auto pt = plainText.toHtmlEscaped(); linkifyUrls(pt); - return pt; + pt.replace('\n', QStringLiteral("
")); + return QStringLiteral("") + pt + + QStringLiteral(""); } QString QMatrixClient::cacheLocation(const QString& dirName) -- cgit v1.2.3 From 3c253ed025246a34d849d14aac6feaee672d7e63 Mon Sep 17 00:00:00 2001 From: Kitsune Ral Date: Mon, 13 May 2019 20:42:50 +0900 Subject: linkifyUrls(): be more conservative in parsing serverparts Closes #321. --- lib/util.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/util.cpp') diff --git a/lib/util.cpp b/lib/util.cpp index 0248e521..883db2ea 100644 --- a/lib/util.cpp +++ b/lib/util.cpp @@ -51,7 +51,7 @@ void QMatrixClient::linkifyUrls(QString& htmlEscapedText) // An interim liberal implementation of // https://matrix.org/docs/spec/appendices.html#identifier-grammar static const QRegularExpression MxIdRegExp(QStringLiteral( - R"((^|[^<>/])([!#@][-a-z0-9_=/.]{1,252}:[-.a-z0-9]+))" + R"((^|[^<>/])([!#@][-a-z0-9_=/.]{1,252}:(?:\w|\.|-)+\.\w+(?::\d{1,5})?))" ), RegExpOptions); // NOTE: htmlEscapedText is already HTML-escaped! No literal <,>,&," -- cgit v1.2.3 From 1a034626bcbe064ebe0ada8cdfe1a47f2d82e477 Mon Sep 17 00:00:00 2001 From: Kitsune Ral Date: Mon, 13 May 2019 21:43:54 +0900 Subject: sanitized(): add object replacement character (0xfffc) to the blacklist --- lib/util.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'lib/util.cpp') diff --git a/lib/util.cpp b/lib/util.cpp index 883db2ea..4e17d2f9 100644 --- a/lib/util.cpp +++ b/lib/util.cpp @@ -57,18 +57,19 @@ void QMatrixClient::linkifyUrls(QString& htmlEscapedText) // NOTE: htmlEscapedText is already HTML-escaped! No literal <,>,&," htmlEscapedText.replace(EmailAddressRegExp, - QStringLiteral(R"(\1\2)")); + QStringLiteral(R"(\1\2)")); htmlEscapedText.replace(FullUrlRegExp, - QStringLiteral(R"(\1)")); + QStringLiteral(R"(\1)")); htmlEscapedText.replace(MxIdRegExp, - QStringLiteral(R"(\1\2)")); + QStringLiteral(R"(\1\2)")); } QString QMatrixClient::sanitized(const QString& plainText) { auto text = plainText; - text.remove(QChar(0x202e)); - text.remove(QChar(0x202d)); + text.remove(QChar(0x202e)); // RLO + text.remove(QChar(0x202d)); // LRO + text.remove(QChar(0xfffc)); // Object replacement character return text; } -- cgit v1.2.3