From 785507dc2d9989d4dc9c9cd5aa92306d300eacf7 Mon Sep 17 00:00:00 2001 From: Winters Lu Date: Wed, 9 Dec 2020 00:05:46 -0800 Subject: [PATCH 1/6] encoding url with uri in text --- .../main/scala/com/salesforce/op/features/types/Text.scala | 5 +++-- .../scala/com/salesforce/op/features/types/URLTest.scala | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/features/src/main/scala/com/salesforce/op/features/types/Text.scala b/features/src/main/scala/com/salesforce/op/features/types/Text.scala index 5f308fe9eb..5669d4bd62 100644 --- a/features/src/main/scala/com/salesforce/op/features/types/Text.scala +++ b/features/src/main/scala/com/salesforce/op/features/types/Text.scala @@ -35,6 +35,7 @@ import java.nio.charset.StandardCharsets import java.util.regex.Pattern import com.twitter.chill.Base64.{InputStream => Base64InputStream} +import org.apache.commons.httpclient.URI import org.apache.commons.io.input.CharSequenceInputStream import org.apache.commons.validator.routines.UrlValidator @@ -181,11 +182,11 @@ class URL(value: Option[String]) extends Text(value){ /** * Extracts url domain, i.e. 'salesforce.com', 'data.com' etc. */ - def domain: Option[String] = value map (new java.net.URL(_).getHost) + def domain: Option[String] = value map (s => new java.net.URL(new URI(s, false).toString).getHost) /** * Extracts url protocol, i.e. http, https, ftp etc. */ - def protocol: Option[String] = value map (new java.net.URL(_).getProtocol) + def protocol: Option[String] = value map (s => new java.net.URL(new URI(s, false).toString).getProtocol) } object URL { def apply(value: Option[String]): URL = new URL(value) diff --git a/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala b/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala index ac5ca2736a..255b7ec0c0 100644 --- a/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala +++ b/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala @@ -77,7 +77,8 @@ class URLTest extends PropSpec with PropertyChecks with TestCommon { "http://nothingthere.com?Chr=%E5%85&Raj=%E7%B5%AE%A1&Hir=%8F%E0%B4%A3" -> "nothingthere.com", "ftp://my.red.book.com/amorcito.mio" -> "my.red.book.com", "http://secret.gov?Cla=%E9%99%B9%E4%8A%93&Cha=%E3&Eve=%EC%91%90%E8%87%B1" -> "secret.gov", - "ftp://nukes.mil?Lea=%E2%BC%84%EB%91%A3&Mur=%E2%83%BD%E1%92%83" -> "nukes.mil" + "ftp://nukes.mil?Lea=%E2%BC%84%EB%91%A3&Mur=%E2%83%BD%E1%92%83" -> "nukes.mil", + "http://specialchars.漢字.com/@" -> "specialchars.%E6%BC%A2%E5%AD%97.com" ) URL(None).domain shouldBe None From 3a6e81d8995cb3021105a4b38471e2e85d1f0e4d Mon Sep 17 00:00:00 2001 From: Winters Lu Date: Wed, 9 Dec 2020 12:41:38 -0800 Subject: [PATCH 2/6] fixing test --- .../test/scala/com/salesforce/op/features/types/URLTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala b/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala index 255b7ec0c0..82a4450e7e 100644 --- a/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala +++ b/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala @@ -78,7 +78,7 @@ class URLTest extends PropSpec with PropertyChecks with TestCommon { "ftp://my.red.book.com/amorcito.mio" -> "my.red.book.com", "http://secret.gov?Cla=%E9%99%B9%E4%8A%93&Cha=%E3&Eve=%EC%91%90%E8%87%B1" -> "secret.gov", "ftp://nukes.mil?Lea=%E2%BC%84%EB%91%A3&Mur=%E2%83%BD%E1%92%83" -> "nukes.mil", - "http://specialchars.漢字.com/@" -> "specialchars.%E6%BC%A2%E5%AD%97.com" + "http://specialchars.@.com/" -> "specialchars.%EF%BC%A0.com" ) URL(None).domain shouldBe None From cca548a21fd18a0664eed7720cd7f49e8a244911 Mon Sep 17 00:00:00 2001 From: Winters Lu Date: Wed, 9 Dec 2020 14:06:42 -0800 Subject: [PATCH 3/6] address review feedback --- .../scala/com/salesforce/op/dsl/RichTextFeature.scala | 6 +++--- .../impl/feature/UrlMapToPickListMapTransformer.scala | 2 +- .../scala/com/salesforce/op/features/types/Text.scala | 8 ++++++-- .../scala/com/salesforce/op/features/types/URLTest.scala | 4 ++-- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/core/src/main/scala/com/salesforce/op/dsl/RichTextFeature.scala b/core/src/main/scala/com/salesforce/op/dsl/RichTextFeature.scala index c010dee3be..4161b8f001 100644 --- a/core/src/main/scala/com/salesforce/op/dsl/RichTextFeature.scala +++ b/core/src/main/scala/com/salesforce/op/dsl/RichTextFeature.scala @@ -840,15 +840,15 @@ object RichTextFeatureLambdas { } class URLDomainToPickList extends Function1[URL, PickList] with Serializable { - def apply(v: URL): PickList = if (v.isValid) v.domain.toPickList else PickList.empty + def apply(v: URL): PickList = if (v.isValid) v.domain().toPickList else PickList.empty } class URLDomainToText extends Function1[URL, Text] with Serializable { - def apply(v: URL): Text = v.domain.toText + def apply(v: URL): Text = v.domain().toText } class URLProtocolToText extends Function1[URL, Text] with Serializable { - def apply(v: URL): Text = v.protocol.toText + def apply(v: URL): Text = v.protocol().toText } class URLIsValid extends Function1[URL, Boolean] with Serializable { diff --git a/core/src/main/scala/com/salesforce/op/stages/impl/feature/UrlMapToPickListMapTransformer.scala b/core/src/main/scala/com/salesforce/op/stages/impl/feature/UrlMapToPickListMapTransformer.scala index eb8c35f2ed..03c017ee76 100644 --- a/core/src/main/scala/com/salesforce/op/stages/impl/feature/UrlMapToPickListMapTransformer.scala +++ b/core/src/main/scala/com/salesforce/op/stages/impl/feature/UrlMapToPickListMapTransformer.scala @@ -38,7 +38,7 @@ class UrlMapToPickListMapTransformer(uid: String = UID[UrlMapToPickListMapTransf extends UnaryTransformer[URLMap, PickListMap](operationName = "urlMapToPickListMap", uid = uid) { override def transformFn: URLMap => PickListMap = _.value - .mapValues(v => if (v.toURL.isValid) v.toURL.domain else None) + .mapValues(v => if (v.toURL.isValid) v.toURL.domain() else None) .collect { case (k, Some(v)) => k -> v }.toPickListMap } diff --git a/features/src/main/scala/com/salesforce/op/features/types/Text.scala b/features/src/main/scala/com/salesforce/op/features/types/Text.scala index 5669d4bd62..5c554611d9 100644 --- a/features/src/main/scala/com/salesforce/op/features/types/Text.scala +++ b/features/src/main/scala/com/salesforce/op/features/types/Text.scala @@ -181,12 +181,16 @@ class URL(value: Option[String]) extends Text(value){ def isValid(protocols: Array[String]): Boolean = value.exists(new UrlValidator(protocols).isValid) /** * Extracts url domain, i.e. 'salesforce.com', 'data.com' etc. + * + * @param escaped true if URI character sequence is in escaped form. false otherwise. */ - def domain: Option[String] = value map (s => new java.net.URL(new URI(s, false).toString).getHost) + def domain(escaped: Boolean = false): Option[String] = value map (s => new java.net.URL(new URI(s, escaped).toString).getHost) /** * Extracts url protocol, i.e. http, https, ftp etc. + * + * @param escaped true if URI character sequence is in escaped form. false otherwise. */ - def protocol: Option[String] = value map (s => new java.net.URL(new URI(s, false).toString).getProtocol) + def protocol(escaped: Boolean = false): Option[String] = value map (s => new java.net.URL(new URI(s, escaped).toString).getProtocol) } object URL { def apply(value: Option[String]): URL = new URL(value) diff --git a/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala b/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala index 82a4450e7e..067333e931 100644 --- a/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala +++ b/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala @@ -86,7 +86,7 @@ class URLTest extends PropSpec with PropertyChecks with TestCommon { forAll(samples) { case (sample, expected) => val url = URL(sample) - val domain = url.domain + val domain = url.domain() domain shouldBe Some(expected) } } @@ -103,7 +103,7 @@ class URLTest extends PropSpec with PropertyChecks with TestCommon { forAll(samples) { case (sample, expected) => val url = URL(sample) - val domain = url.protocol + val domain = url.protocol() domain shouldBe Some(expected) } } From e6f0202172d074f83acbd1a04d43f4ee320f44bb Mon Sep 17 00:00:00 2001 From: Winters Lu Date: Wed, 9 Dec 2020 14:13:18 -0800 Subject: [PATCH 4/6] fix indent and compile error --- .../main/scala/com/salesforce/op/features/types/Text.scala | 6 ++++-- .../scala/com/salesforce/op/features/types/URLTest.scala | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/features/src/main/scala/com/salesforce/op/features/types/Text.scala b/features/src/main/scala/com/salesforce/op/features/types/Text.scala index 5c554611d9..1a1e345cd0 100644 --- a/features/src/main/scala/com/salesforce/op/features/types/Text.scala +++ b/features/src/main/scala/com/salesforce/op/features/types/Text.scala @@ -184,13 +184,15 @@ class URL(value: Option[String]) extends Text(value){ * * @param escaped true if URI character sequence is in escaped form. false otherwise. */ - def domain(escaped: Boolean = false): Option[String] = value map (s => new java.net.URL(new URI(s, escaped).toString).getHost) + def domain(escaped: Boolean = false): Option[String] = value map + (s => new java.net.URL(new URI(s, escaped).toString).getHost) /** * Extracts url protocol, i.e. http, https, ftp etc. * * @param escaped true if URI character sequence is in escaped form. false otherwise. */ - def protocol(escaped: Boolean = false): Option[String] = value map (s => new java.net.URL(new URI(s, escaped).toString).getProtocol) + def protocol(escaped: Boolean = false): Option[String] = value map + (s => new java.net.URL(new URI(s, escaped).toString).getProtocol) } object URL { def apply(value: Option[String]): URL = new URL(value) diff --git a/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala b/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala index 067333e931..cd17e7e745 100644 --- a/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala +++ b/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala @@ -81,7 +81,7 @@ class URLTest extends PropSpec with PropertyChecks with TestCommon { "http://specialchars.@.com/" -> "specialchars.%EF%BC%A0.com" ) - URL(None).domain shouldBe None + URL(None).domain() shouldBe None forAll(samples) { case (sample, expected) => @@ -98,7 +98,7 @@ class URLTest extends PropSpec with PropertyChecks with TestCommon { "ftp://my.red.book.com/amorcito.mio" -> "ftp" ) - URL(None).protocol shouldBe None + URL(None).protocol() shouldBe None forAll(samples) { case (sample, expected) => From a62fa7fceac59acce4317d2fb5035cf42064b51f Mon Sep 17 00:00:00 2001 From: Winters Lu Date: Fri, 11 Dec 2020 02:05:10 -0800 Subject: [PATCH 5/6] Change strategy to returning None if special chars in url domain --- .../com/salesforce/op/dsl/RichTextFeature.scala | 6 +++--- .../feature/UrlMapToPickListMapTransformer.scala | 2 +- .../com/salesforce/op/features/types/Text.scala | 14 +++++--------- .../com/salesforce/op/features/types/URLTest.scala | 14 +++++++------- 4 files changed, 16 insertions(+), 20 deletions(-) diff --git a/core/src/main/scala/com/salesforce/op/dsl/RichTextFeature.scala b/core/src/main/scala/com/salesforce/op/dsl/RichTextFeature.scala index 4161b8f001..c010dee3be 100644 --- a/core/src/main/scala/com/salesforce/op/dsl/RichTextFeature.scala +++ b/core/src/main/scala/com/salesforce/op/dsl/RichTextFeature.scala @@ -840,15 +840,15 @@ object RichTextFeatureLambdas { } class URLDomainToPickList extends Function1[URL, PickList] with Serializable { - def apply(v: URL): PickList = if (v.isValid) v.domain().toPickList else PickList.empty + def apply(v: URL): PickList = if (v.isValid) v.domain.toPickList else PickList.empty } class URLDomainToText extends Function1[URL, Text] with Serializable { - def apply(v: URL): Text = v.domain().toText + def apply(v: URL): Text = v.domain.toText } class URLProtocolToText extends Function1[URL, Text] with Serializable { - def apply(v: URL): Text = v.protocol().toText + def apply(v: URL): Text = v.protocol.toText } class URLIsValid extends Function1[URL, Boolean] with Serializable { diff --git a/core/src/main/scala/com/salesforce/op/stages/impl/feature/UrlMapToPickListMapTransformer.scala b/core/src/main/scala/com/salesforce/op/stages/impl/feature/UrlMapToPickListMapTransformer.scala index 03c017ee76..eb8c35f2ed 100644 --- a/core/src/main/scala/com/salesforce/op/stages/impl/feature/UrlMapToPickListMapTransformer.scala +++ b/core/src/main/scala/com/salesforce/op/stages/impl/feature/UrlMapToPickListMapTransformer.scala @@ -38,7 +38,7 @@ class UrlMapToPickListMapTransformer(uid: String = UID[UrlMapToPickListMapTransf extends UnaryTransformer[URLMap, PickListMap](operationName = "urlMapToPickListMap", uid = uid) { override def transformFn: URLMap => PickListMap = _.value - .mapValues(v => if (v.toURL.isValid) v.toURL.domain() else None) + .mapValues(v => if (v.toURL.isValid) v.toURL.domain else None) .collect { case (k, Some(v)) => k -> v }.toPickListMap } diff --git a/features/src/main/scala/com/salesforce/op/features/types/Text.scala b/features/src/main/scala/com/salesforce/op/features/types/Text.scala index 1a1e345cd0..ae059a4571 100644 --- a/features/src/main/scala/com/salesforce/op/features/types/Text.scala +++ b/features/src/main/scala/com/salesforce/op/features/types/Text.scala @@ -39,6 +39,8 @@ import org.apache.commons.httpclient.URI import org.apache.commons.io.input.CharSequenceInputStream import org.apache.commons.validator.routines.UrlValidator +import scala.util.Try + /** * Text value representation * @@ -172,7 +174,7 @@ class URL(value: Option[String]) extends Text(value){ * RFC2396 (http://www.ietf.org/rfc/rfc2396.txt) * Default valid protocols are: http, https, ftp. */ - def isValid: Boolean = value.exists(UrlValidator.getInstance().isValid) + def isValid: Boolean = value.exists(v => UrlValidator.getInstance().isValid(v) && Try(new java.net.URL(v)).isSuccess) /** * Verifies if the url is of correct form of "Uniform Resource Identifiers (URI): Generic Syntax" * RFC2396 (http://www.ietf.org/rfc/rfc2396.txt) @@ -181,18 +183,12 @@ class URL(value: Option[String]) extends Text(value){ def isValid(protocols: Array[String]): Boolean = value.exists(new UrlValidator(protocols).isValid) /** * Extracts url domain, i.e. 'salesforce.com', 'data.com' etc. - * - * @param escaped true if URI character sequence is in escaped form. false otherwise. */ - def domain(escaped: Boolean = false): Option[String] = value map - (s => new java.net.URL(new URI(s, escaped).toString).getHost) + def domain(): Option[String] = value map (new java.net.URL(_).getHost) /** * Extracts url protocol, i.e. http, https, ftp etc. - * - * @param escaped true if URI character sequence is in escaped form. false otherwise. */ - def protocol(escaped: Boolean = false): Option[String] = value map - (s => new java.net.URL(new URI(s, escaped).toString).getProtocol) + def protocol(): Option[String] = value map (new java.net.URL(_).getProtocol) } object URL { def apply(value: Option[String]): URL = new URL(value) diff --git a/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala b/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala index cd17e7e745..8f916a9eeb 100644 --- a/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala +++ b/features/src/test/scala/com/salesforce/op/features/types/URLTest.scala @@ -48,7 +48,8 @@ class URLTest extends PropSpec with PropertyChecks with TestCommon { Some("ftp://.codomain"), Some("https://.codomain"), Some("//domain.nambia"), - Some("http://\u00ff\u0080\u007f\u0000.com") // scalastyle:off + Some("http://\u00ff\u0080\u007f\u0000.com"), // scalastyle:off + Some("http://specialchars.@.com") ) val goodOnes = Table("good ones", @@ -77,16 +78,15 @@ class URLTest extends PropSpec with PropertyChecks with TestCommon { "http://nothingthere.com?Chr=%E5%85&Raj=%E7%B5%AE%A1&Hir=%8F%E0%B4%A3" -> "nothingthere.com", "ftp://my.red.book.com/amorcito.mio" -> "my.red.book.com", "http://secret.gov?Cla=%E9%99%B9%E4%8A%93&Cha=%E3&Eve=%EC%91%90%E8%87%B1" -> "secret.gov", - "ftp://nukes.mil?Lea=%E2%BC%84%EB%91%A3&Mur=%E2%83%BD%E1%92%83" -> "nukes.mil", - "http://specialchars.@.com/" -> "specialchars.%EF%BC%A0.com" + "ftp://nukes.mil?Lea=%E2%BC%84%EB%91%A3&Mur=%E2%83%BD%E1%92%83" -> "nukes.mil" ) - URL(None).domain() shouldBe None + URL(None).domain shouldBe None forAll(samples) { case (sample, expected) => val url = URL(sample) - val domain = url.domain() + val domain = url.domain domain shouldBe Some(expected) } } @@ -98,12 +98,12 @@ class URLTest extends PropSpec with PropertyChecks with TestCommon { "ftp://my.red.book.com/amorcito.mio" -> "ftp" ) - URL(None).protocol() shouldBe None + URL(None).protocol shouldBe None forAll(samples) { case (sample, expected) => val url = URL(sample) - val domain = url.protocol() + val domain = url.protocol domain shouldBe Some(expected) } } From bc726d4110f5e1e2c46b56557f5c23b8ab63b992 Mon Sep 17 00:00:00 2001 From: Winters Lu Date: Fri, 11 Dec 2020 04:15:23 -0800 Subject: [PATCH 6/6] cleanup --- .../main/scala/com/salesforce/op/features/types/Text.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/features/src/main/scala/com/salesforce/op/features/types/Text.scala b/features/src/main/scala/com/salesforce/op/features/types/Text.scala index ae059a4571..670b918fcd 100644 --- a/features/src/main/scala/com/salesforce/op/features/types/Text.scala +++ b/features/src/main/scala/com/salesforce/op/features/types/Text.scala @@ -35,7 +35,6 @@ import java.nio.charset.StandardCharsets import java.util.regex.Pattern import com.twitter.chill.Base64.{InputStream => Base64InputStream} -import org.apache.commons.httpclient.URI import org.apache.commons.io.input.CharSequenceInputStream import org.apache.commons.validator.routines.UrlValidator @@ -184,11 +183,11 @@ class URL(value: Option[String]) extends Text(value){ /** * Extracts url domain, i.e. 'salesforce.com', 'data.com' etc. */ - def domain(): Option[String] = value map (new java.net.URL(_).getHost) + def domain: Option[String] = value map (new java.net.URL(_).getHost) /** * Extracts url protocol, i.e. http, https, ftp etc. */ - def protocol(): Option[String] = value map (new java.net.URL(_).getProtocol) + def protocol: Option[String] = value map (new java.net.URL(_).getProtocol) } object URL { def apply(value: Option[String]): URL = new URL(value)