diff --git a/core/src/main/java/org/apache/stormcrawler/filtering/basic/BasicURLNormalizer.java b/core/src/main/java/org/apache/stormcrawler/filtering/basic/BasicURLNormalizer.java index 629bc976b..810c38864 100644 --- a/core/src/main/java/org/apache/stormcrawler/filtering/basic/BasicURLNormalizer.java +++ b/core/src/main/java/org/apache/stormcrawler/filtering/basic/BasicURLNormalizer.java @@ -145,7 +145,7 @@ public class BasicURLNormalizer extends URLFilter { // properly encode characters in path/file using percent-encoding String file2 = unescapePath(file); file2 = escapePath(file2); - if (!file.equals(file2)) { + if (!file.equalsIgnoreCase(file2)) { hasChanged = true; } if (hasChanged) { diff --git a/core/src/test/java/org/apache/stormcrawler/filtering/BasicURLNormalizerTest.java b/core/src/test/java/org/apache/stormcrawler/filtering/BasicURLNormalizerTest.java index 250ea401a..3c84257cb 100644 --- a/core/src/test/java/org/apache/stormcrawler/filtering/BasicURLNormalizerTest.java +++ b/core/src/test/java/org/apache/stormcrawler/filtering/BasicURLNormalizerTest.java @@ -300,6 +300,22 @@ void testNonStandardPercentEncoding() throws MalformedURLException { assertEquals(expectedURL, normalizedUrl, "Failed to filter query string"); } + // https://github.com/apache/incubator-stormcrawler/issues/1448 + @Test + void testProperURLEncodingWithLowerCase() throws MalformedURLException { + URLFilter urlFilter = createFilter(queryParamsToFilter); + String urlWithEscapedCharacters = "http://www.example.com/Exhibitions/Detail/NjAxOA%3d%3d"; + String expectedResult = "http://www.example.com/Exhibitions/Detail/NjAxOA%3d%3d"; + // normalization should not change this url. + URL testSourceUrl = new URL(urlWithEscapedCharacters); + String testUrl = urlWithEscapedCharacters; + String normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), testUrl); + assertEquals( + expectedResult, + normalizedUrl, + "Failed to normalize url encoded url with lower case letters"); + } + @Test void testHostIDNtoASCII() throws MalformedURLException { ObjectNode filterParams = new ObjectNode(JsonNodeFactory.instance);