From 1655a7601198c657dcd484881710d6b3973bbbb3 Mon Sep 17 00:00:00 2001
From: Anthony Ramine <n.oxyde@gmail.com>
Date: Fri, 19 Jul 2019 10:32:59 +0200
Subject: [PATCH 01/15] Update tests from wpt

The two json files were taken from
web-platform-tests/wpt@e69af8258d25011f3bdb7577323dcb98880445ea

> test result: FAILED. 624 passed; 89 failed; 0 ignored; 0 measured
---
 tests/setters_tests.json | 425 +++++++++++++++++++++++++---
 tests/urltestdata.json   | 596 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 946 insertions(+), 75 deletions(-)

diff --git a/tests/setters_tests.json b/tests/setters_tests.json
index a45171bf3..db23d9247 100644
--- a/tests/setters_tests.json
+++ b/tests/setters_tests.json
@@ -27,7 +27,7 @@
             "href": "a://example.net",
             "new_value": "",
             "expected": {
-                "href": "a://example.net/",
+                "href": "a://example.net",
                 "protocol": "a:"
             }
         },
@@ -35,16 +35,24 @@
             "href": "a://example.net",
             "new_value": "b",
             "expected": {
-                "href": "b://example.net/",
+                "href": "b://example.net",
                 "protocol": "b:"
             }
         },
+        {
+            "href": "javascript:alert(1)",
+            "new_value": "defuse",
+            "expected": {
+                "href": "defuse:alert(1)",
+                "protocol": "defuse:"
+            }
+        },
         {
             "comment": "Upper-case ASCII is lower-cased",
             "href": "a://example.net",
             "new_value": "B",
             "expected": {
-                "href": "b://example.net/",
+                "href": "b://example.net",
                 "protocol": "b:"
             }
         },
@@ -53,7 +61,7 @@
             "href": "a://example.net",
             "new_value": "é",
             "expected": {
-                "href": "a://example.net/",
+                "href": "a://example.net",
                 "protocol": "a:"
             }
         },
@@ -62,7 +70,7 @@
             "href": "a://example.net",
             "new_value": "0b",
             "expected": {
-                "href": "a://example.net/",
+                "href": "a://example.net",
                 "protocol": "a:"
             }
         },
@@ -71,7 +79,7 @@
             "href": "a://example.net",
             "new_value": "+b",
             "expected": {
-                "href": "a://example.net/",
+                "href": "a://example.net",
                 "protocol": "a:"
             }
         },
@@ -79,7 +87,7 @@
             "href": "a://example.net",
             "new_value": "bC0+-.",
             "expected": {
-                "href": "bc0+-.://example.net/",
+                "href": "bc0+-.://example.net",
                 "protocol": "bc0+-.:"
             }
         },
@@ -88,7 +96,7 @@
             "href": "a://example.net",
             "new_value": "b,c",
             "expected": {
-                "href": "a://example.net/",
+                "href": "a://example.net",
                 "protocol": "a:"
             }
         },
@@ -97,10 +105,35 @@
             "href": "a://example.net",
             "new_value": "bé",
             "expected": {
-                "href": "a://example.net/",
+                "href": "a://example.net",
                 "protocol": "a:"
             }
         },
+        {
+            "comment": "Can’t switch from URL containing username/password/port to file",
+            "href": "http://test@example.net",
+            "new_value": "file",
+            "expected": {
+                "href": "http://test@example.net/",
+                "protocol": "http:"
+            }
+        },
+        {
+            "href": "gopher://example.net:1234",
+            "new_value": "file",
+            "expected": {
+                "href": "gopher://example.net:1234/",
+                "protocol": "gopher:"
+            }
+        },
+        {
+            "href": "wss://x:x@example.net:1234",
+            "new_value": "file",
+            "expected": {
+                "href": "wss://x:x@example.net:1234/",
+                "protocol": "wss:"
+            }
+        },
         {
             "comment": "Can’t switch from file URL with no host",
             "href": "file://localhost/",
@@ -127,12 +160,36 @@
             }
         },
         {
-            "comment": "Spec deviation: from special scheme to not is not problematic. https://github.com/whatwg/url/issues/104",
+            "comment": "Can’t switch from special scheme to non-special",
             "href": "http://example.net",
             "new_value": "b",
             "expected": {
-                "href": "b://example.net/",
-                "protocol": "b:"
+                "href": "http://example.net/",
+                "protocol": "http:"
+            }
+        },
+        {
+            "href": "file://hi/path",
+            "new_value": "s",
+            "expected": {
+                "href": "file://hi/path",
+                "protocol": "file:"
+            }
+        },
+        {
+            "href": "https://example.net",
+            "new_value": "s",
+            "expected": {
+                "href": "https://example.net/",
+                "protocol": "https:"
+            }
+        },
+        {
+            "href": "ftp://example.net",
+            "new_value": "test",
+            "expected": {
+                "href": "ftp://example.net/",
+                "protocol": "ftp:"
             }
         },
         {
@@ -145,12 +202,44 @@
             }
         },
         {
-            "comment": "Spec deviation: from non-special scheme with a host to special is not problematic. https://github.com/whatwg/url/issues/104",
+            "comment": "Can’t switch from non-special scheme to special",
             "href": "ssh://me@example.net",
             "new_value": "http",
             "expected": {
-                "href": "http://me@example.net/",
-                "protocol": "http:"
+                "href": "ssh://me@example.net",
+                "protocol": "ssh:"
+            }
+        },
+        {
+            "href": "ssh://me@example.net",
+            "new_value": "gopher",
+            "expected": {
+                "href": "ssh://me@example.net",
+                "protocol": "ssh:"
+            }
+        },
+        {
+            "href": "ssh://me@example.net",
+            "new_value": "file",
+            "expected": {
+                "href": "ssh://me@example.net",
+                "protocol": "ssh:"
+            }
+        },
+        {
+            "href": "ssh://example.net",
+            "new_value": "file",
+            "expected": {
+                "href": "ssh://example.net",
+                "protocol": "ssh:"
+            }
+        },
+        {
+            "href": "nonsense:///test",
+            "new_value": "https",
+            "expected": {
+                "href": "nonsense:///test",
+                "protocol": "nonsense:"
             }
         },
         {
@@ -170,6 +259,16 @@
                 "href": "view-source+data:text/html,<p>Test",
                 "protocol": "view-source+data:"
             }
+        },
+        {
+            "comment": "Port is set to null if it is the default for new scheme.",
+            "href": "http://foo.com:443/",
+            "new_value": "https",
+            "expected": {
+                "href": "https://foo.com/",
+                "protocol": "https:",
+                "port": ""
+            }
         }
     ],
     "username": [
@@ -266,14 +365,6 @@
                 "username": ""
             }
         },
-        {
-            "href": "file://test/",
-            "new_value": "test",
-            "expected": {
-                "href": "file://test/",
-                "username": ""
-            }
-        },
         {
             "href": "javascript://x/",
             "new_value": "wario",
@@ -281,6 +372,14 @@
                 "href": "javascript://wario@x/",
                 "username": "wario"
             }
+        },
+        {
+            "href": "file://test/",
+            "new_value": "test",
+            "expected": {
+                "href": "file://test/",
+                "username": ""
+            }
         }
     ],
     "password": [
@@ -369,14 +468,6 @@
                 "password": ""
             }
         },
-        {
-            "href": "file://test/",
-            "new_value": "test",
-            "expected": {
-                "href": "file://test/",
-                "password": ""
-            }
-        },
         {
             "href": "javascript://x/",
             "new_value": "bowser",
@@ -384,9 +475,27 @@
                 "href": "javascript://:bowser@x/",
                 "password": "bowser"
             }
+        },
+        {
+            "href": "file://test/",
+            "new_value": "test",
+            "expected": {
+                "href": "file://test/",
+                "password": ""
+            }
         }
     ],
     "host": [
+        {
+            "comment": "Non-special scheme",
+            "href": "sc://x/",
+            "new_value": "\u0000",
+            "expected": {
+                "href": "sc://x/",
+                "host": "x",
+                "hostname": "x"
+            }
+        },
         {
             "href": "sc://x/",
             "new_value": "\u0009",
@@ -414,6 +523,15 @@
                 "hostname": ""
             }
         },
+        {
+            "href": "sc://x/",
+            "new_value": " ",
+            "expected": {
+                "href": "sc://x/",
+                "host": "x",
+                "hostname": "x"
+            }
+        },
         {
             "href": "sc://x/",
             "new_value": "#",
@@ -459,6 +577,16 @@
                 "hostname": "%C3%9F"
             }
         },
+        {
+            "comment": "IDNA Nontransitional_Processing",
+            "href": "https://x/",
+            "new_value": "ß",
+            "expected": {
+                "href": "https://xn--zca/",
+                "host": "xn--zca",
+                "hostname": "xn--zca"
+            }
+        },
         {
             "comment": "Cannot-be-a-base means no host",
             "href": "mailto:me@example.net",
@@ -499,14 +627,14 @@
             }
         },
         {
-            "comment": "Port number is removed if empty in the new value: https://github.com/whatwg/url/pull/113",
+            "comment": "Port number is unchanged if not specified",
             "href": "http://example.net:8080",
             "new_value": "example.com:",
             "expected": {
-                "href": "http://example.com/",
-                "host": "example.com",
+                "href": "http://example.com:8080/",
+                "host": "example.com:8080",
                 "hostname": "example.com",
-                "port": ""
+                "port": "8080"
             }
         },
         {
@@ -591,6 +719,17 @@
                 "port": "80"
             }
         },
+        {
+            "comment": "Port number is removed if new port is scheme default and existing URL has a non-default port",
+            "href": "http://example.net:8080",
+            "new_value": "example.com:80",
+            "expected": {
+                "href": "http://example.com/",
+                "host": "example.com",
+                "hostname": "example.com",
+                "port": ""
+            }
+        },
         {
             "comment": "Stuff after a / delimiter is ignored",
             "href": "http://example.net/path",
@@ -790,9 +929,69 @@
                 "host": "example.net",
                 "hostname": "example.net"
             }
+        },
+        {
+            "href": "file://y/",
+            "new_value": "x:123",
+            "expected": {
+                "href": "file://y/",
+                "host": "y",
+                "hostname": "y",
+                "port": ""
+            }
+        },
+        {
+            "href": "file://y/",
+            "new_value": "loc%41lhost",
+            "expected": {
+                "href": "file:///",
+                "host": "",
+                "hostname": "",
+                "port": ""
+            }
+        },
+        {
+            "href": "file://hi/x",
+            "new_value": "",
+            "expected": {
+                "href": "file:///x",
+                "host": "",
+                "hostname": "",
+                "port": ""
+            }
+        },
+        {
+            "href": "sc://test@test/",
+            "new_value": "",
+            "expected": {
+                "href": "sc://test@test/",
+                "host": "test",
+                "hostname": "test",
+                "username": "test"
+            }
+        },
+        {
+            "href": "sc://test:12/",
+            "new_value": "",
+            "expected": {
+                "href": "sc://test:12/",
+                "host": "test:12",
+                "hostname": "test",
+                "port": "12"
+            }
         }
     ],
     "hostname": [
+        {
+            "comment": "Non-special scheme",
+            "href": "sc://x/",
+            "new_value": "\u0000",
+            "expected": {
+                "href": "sc://x/",
+                "host": "x",
+                "hostname": "x"
+            }
+        },
         {
             "href": "sc://x/",
             "new_value": "\u0009",
@@ -820,6 +1019,15 @@
                 "hostname": ""
             }
         },
+        {
+            "href": "sc://x/",
+            "new_value": " ",
+            "expected": {
+                "href": "sc://x/",
+                "host": "x",
+                "hostname": "x"
+            }
+        },
         {
             "href": "sc://x/",
             "new_value": "#",
@@ -1055,6 +1263,56 @@
                 "host": "example.net",
                 "hostname": "example.net"
             }
+        },
+        {
+            "href": "file://y/",
+            "new_value": "x:123",
+            "expected": {
+                "href": "file://y/",
+                "host": "y",
+                "hostname": "y",
+                "port": ""
+            }
+        },
+        {
+            "href": "file://y/",
+            "new_value": "loc%41lhost",
+            "expected": {
+                "href": "file:///",
+                "host": "",
+                "hostname": "",
+                "port": ""
+            }
+        },
+        {
+            "href": "file://hi/x",
+            "new_value": "",
+            "expected": {
+                "href": "file:///x",
+                "host": "",
+                "hostname": "",
+                "port": ""
+            }
+        },
+        {
+            "href": "sc://test@test/",
+            "new_value": "",
+            "expected": {
+                "href": "sc://test@test/",
+                "host": "test",
+                "hostname": "test",
+                "username": "test"
+            }
+        },
+        {
+            "href": "sc://test:12/",
+            "new_value": "",
+            "expected": {
+                "href": "sc://test:12/",
+                "host": "test:12",
+                "hostname": "test",
+                "port": "12"
+            }
         }
     ],
     "port": [
@@ -1324,12 +1582,12 @@
             }
         },
         {
-            "comment": "UTF-8 percent encoding with the default encode set. Tabs and newlines are removed. Leading or training C0 controls and space are removed.",
+            "comment": "UTF-8 percent encoding with the default encode set. Tabs and newlines are removed.",
             "href": "a:/",
-            "new_value": "\u0000\u0001\t\n\r\u001f !\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
+            "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
             "expected": {
-                "href": "a:/!%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9",
-                "pathname": "/!%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9"
+                "href": "a:/%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9",
+                "pathname": "/%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9"
             }
         },
         {
@@ -1376,6 +1634,33 @@
                 "href": "sc://example.net/%23",
                 "pathname": "/%23"
             }
+        },
+        {
+            "comment": "File URLs and (back)slashes",
+            "href": "file://monkey/",
+            "new_value": "\\\\",
+            "expected": {
+                "href": "file://monkey/",
+                "pathname": "/"
+            }
+        },
+        {
+            "comment": "File URLs and (back)slashes",
+            "href": "file:///unicorn",
+            "new_value": "//\\/",
+            "expected": {
+                "href": "file:///",
+                "pathname": "/"
+            }
+        },
+        {
+            "comment": "File URLs and (back)slashes",
+            "href": "file:///unicorn",
+            "new_value": "//monkey/..//",
+            "expected": {
+                "href": "file:///",
+                "pathname": "/"
+            }
         }
     ],
     "search": [
@@ -1444,12 +1729,12 @@
             }
         },
         {
-            "comment": "UTF-8 percent encoding with the query encode set. Tabs and newlines are removed. Leading or training C0 controls and space are removed.",
+            "comment": "UTF-8 percent encoding with the query encode set. Tabs and newlines are removed.",
             "href": "a:/",
-            "new_value": "\u0000\u0001\t\n\r\u001f !\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
+            "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
             "expected": {
-                "href": "a:/?!%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9",
-                "search": "?!%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9"
+                "href": "a:/?%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9",
+                "search": "?%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9"
             }
         },
         {
@@ -1511,13 +1796,53 @@
                 "hash": ""
             }
         },
+        {
+            "href": "http://example.net",
+            "new_value": "#foo bar",
+            "expected": {
+                "href": "http://example.net/#foo%20bar",
+                "hash": "#foo%20bar"
+            }
+        },
+        {
+            "href": "http://example.net",
+            "new_value": "#foo\"bar",
+            "expected": {
+                "href": "http://example.net/#foo%22bar",
+                "hash": "#foo%22bar"
+            }
+        },
+        {
+            "href": "http://example.net",
+            "new_value": "#foo<bar",
+            "expected": {
+                "href": "http://example.net/#foo%3Cbar",
+                "hash": "#foo%3Cbar"
+            }
+        },
+        {
+            "href": "http://example.net",
+            "new_value": "#foo>bar",
+            "expected": {
+                "href": "http://example.net/#foo%3Ebar",
+                "hash": "#foo%3Ebar"
+            }
+        },
+        {
+            "href": "http://example.net",
+            "new_value": "#foo`bar",
+            "expected": {
+                "href": "http://example.net/#foo%60bar",
+                "hash": "#foo%60bar"
+            }
+        },
         {
             "comment": "Simple percent-encoding; nuls, tabs, and newlines are removed",
             "href": "a:/",
-            "new_value": "\u0000\u0001\t\n\r\u001f !\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
+            "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
             "expected": {
-                "href": "a:/#!%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9",
-                "hash": "#!%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9"
+                "href": "a:/#%01%1F%20!%22#$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_%60az{|}~%7F%C2%80%C2%81%C3%89%C3%A9",
+                "hash": "#%01%1F%20!%22#$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_%60az{|}~%7F%C2%80%C2%81%C3%89%C3%A9"
             }
         },
         {
@@ -1528,6 +1853,14 @@
                 "href": "http://example.net/#%c3%89t%C3%A9",
                 "hash": "#%c3%89t%C3%A9"
             }
+        },
+        {
+            "href": "javascript:alert(1)",
+            "new_value": "castle",
+            "expected": {
+                "href": "javascript:alert(1)#castle",
+                "hash": "#castle"
+            }
         }
     ]
 }
diff --git a/tests/urltestdata.json b/tests/urltestdata.json
index 5565c938f..bf4e2a783 100644
--- a/tests/urltestdata.json
+++ b/tests/urltestdata.json
@@ -153,7 +153,7 @@
   {
     "input": "http://f:21/ b ? d # e ",
     "base": "http://example.org/foo/bar",
-    "href": "http://f:21/%20b%20?%20d%20# e",
+    "href": "http://f:21/%20b%20?%20d%20#%20e",
     "origin": "http://f:21",
     "protocol": "http:",
     "username": "",
@@ -163,12 +163,12 @@
     "port": "21",
     "pathname": "/%20b%20",
     "search": "?%20d%20",
-    "hash": "# e"
+    "hash": "#%20e"
   },
   {
     "input": "lolscheme:x x#x x",
     "base": "about:blank",
-    "href": "lolscheme:x x#x x",
+    "href": "lolscheme:x x#x%20x",
     "protocol": "lolscheme:",
     "username": "",
     "password": "",
@@ -177,7 +177,7 @@
     "port": "",
     "pathname": "x x",
     "search": "",
-    "hash": "#x x"
+    "hash": "#x%20x"
   },
   {
     "input": "http://f:/c",
@@ -572,7 +572,7 @@
   {
     "input": "foo://",
     "base": "http://example.org/foo/bar",
-    "href": "foo:///",
+    "href": "foo://",
     "origin": "null",
     "protocol": "foo:",
     "username": "",
@@ -580,7 +580,7 @@
     "host": "",
     "hostname": "",
     "port": "",
-    "pathname": "/",
+    "pathname": "",
     "search": "",
     "hash": ""
   },
@@ -1433,6 +1433,22 @@
     "search": "",
     "hash": ""
   },
+  "# Based on https://felixfbecker.github.io/whatwg-url-custom-host-repro/",
+  {
+    "input": "ssh://example.com/foo/bar.git",
+    "base": "http://example.org/",
+    "href": "ssh://example.com/foo/bar.git",
+    "origin": "null",
+    "protocol": "ssh:",
+    "username": "",
+    "password": "",
+    "host": "example.com",
+    "hostname": "example.com",
+    "port": "",
+    "pathname": "/foo/bar.git",
+    "search": "",
+    "hash": ""
+  },
   "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/file.html",
   {
     "input": "file:c:\\foo\\bar.html",
@@ -2260,7 +2276,7 @@
   {
     "input": "http://www.google.com/foo?bar=baz# »",
     "base": "about:blank",
-    "href": "http://www.google.com/foo?bar=baz# %C2%BB",
+    "href": "http://www.google.com/foo?bar=baz#%20%C2%BB",
     "origin": "http://www.google.com",
     "protocol": "http:",
     "username": "",
@@ -2270,12 +2286,12 @@
     "port": "",
     "pathname": "/foo",
     "search": "?bar=baz",
-    "hash": "# %C2%BB"
+    "hash": "#%20%C2%BB"
   },
   {
     "input": "data:test# »",
     "base": "about:blank",
-    "href": "data:test# %C2%BB",
+    "href": "data:test#%20%C2%BB",
     "origin": "null",
     "protocol": "data:",
     "username": "",
@@ -2285,7 +2301,7 @@
     "port": "",
     "pathname": "test",
     "search": "",
-    "hash": "# %C2%BB"
+    "hash": "#%20%C2%BB"
   },
   {
     "input": "http://www.google.com",
@@ -4015,6 +4031,37 @@
     "search": "?`{}",
     "hash": ""
   },
+  "byte is ' and url is special",
+  {
+    "input": "http://host/?'",
+    "base": "about:blank",
+    "href": "http://host/?%27",
+    "origin": "http://host",
+    "protocol": "http:",
+    "username": "",
+    "password": "",
+    "host": "host",
+    "hostname": "host",
+    "port": "",
+    "pathname": "/",
+    "search": "?%27",
+    "hash": ""
+  },
+  {
+    "input": "notspecial://host/?'",
+    "base": "about:blank",
+    "href": "notspecial://host/?'",
+    "origin": "null",
+    "protocol": "notspecial:",
+    "username": "",
+    "password": "",
+    "host": "host",
+    "hostname": "host",
+    "port": "",
+    "pathname": "/",
+    "search": "?'",
+    "hash": ""
+  },
   "# Credentials in base",
   {
     "input": "/some/path",
@@ -4473,6 +4520,26 @@
     "search": "",
     "hash": ""
   },
+  {
+    "input": "sc://@/",
+    "base": "about:blank",
+    "failure": true
+  },
+  {
+    "input": "sc://te@s:t@/",
+    "base": "about:blank",
+    "failure": true
+  },
+  {
+    "input": "sc://:/",
+    "base": "about:blank",
+    "failure": true
+  },
+  {
+    "input": "sc://:12/",
+    "base": "about:blank",
+    "failure": true
+  },
   {
     "input": "sc://[/",
     "base": "about:blank",
@@ -4566,6 +4633,22 @@
     "search": "",
     "hash": ""
   },
+  "# unknown scheme with non-URL characters in the path",
+  {
+    "input": "wow:\uFFFF",
+    "base": "about:blank",
+    "href": "wow:%EF%BF%BF",
+    "origin": "null",
+    "protocol": "wow:",
+    "username": "",
+    "password": "",
+    "host": "",
+    "hostname": "",
+    "port": "",
+    "pathname": "%EF%BF%BF",
+    "search": "",
+    "hash": ""
+  },
   "# Hosts and percent-encoding",
   {
     "input": "ftp://example.com%80/",
@@ -4767,6 +4850,70 @@
     "searchParams": "qux=",
     "hash": "#foo%08bar"
   },
+  {
+    "input": "http://foo.bar/baz?qux#foo\"bar",
+    "base": "about:blank",
+    "href": "http://foo.bar/baz?qux#foo%22bar",
+    "origin": "http://foo.bar",
+    "protocol": "http:",
+    "username": "",
+    "password": "",
+    "host": "foo.bar",
+    "hostname": "foo.bar",
+    "port": "",
+    "pathname": "/baz",
+    "search": "?qux",
+    "searchParams": "qux=",
+    "hash": "#foo%22bar"
+  },
+  {
+    "input": "http://foo.bar/baz?qux#foo<bar",
+    "base": "about:blank",
+    "href": "http://foo.bar/baz?qux#foo%3Cbar",
+    "origin": "http://foo.bar",
+    "protocol": "http:",
+    "username": "",
+    "password": "",
+    "host": "foo.bar",
+    "hostname": "foo.bar",
+    "port": "",
+    "pathname": "/baz",
+    "search": "?qux",
+    "searchParams": "qux=",
+    "hash": "#foo%3Cbar"
+  },
+  {
+    "input": "http://foo.bar/baz?qux#foo>bar",
+    "base": "about:blank",
+    "href": "http://foo.bar/baz?qux#foo%3Ebar",
+    "origin": "http://foo.bar",
+    "protocol": "http:",
+    "username": "",
+    "password": "",
+    "host": "foo.bar",
+    "hostname": "foo.bar",
+    "port": "",
+    "pathname": "/baz",
+    "search": "?qux",
+    "searchParams": "qux=",
+    "hash": "#foo%3Ebar"
+  },
+  {
+    "input": "http://foo.bar/baz?qux#foo`bar",
+    "base": "about:blank",
+    "href": "http://foo.bar/baz?qux#foo%60bar",
+    "origin": "http://foo.bar",
+    "protocol": "http:",
+    "username": "",
+    "password": "",
+    "host": "foo.bar",
+    "hostname": "foo.bar",
+    "port": "",
+    "pathname": "/baz",
+    "search": "?qux",
+    "searchParams": "qux=",
+    "hash": "#foo%60bar"
+  },
   "# IPv4 parsing (via https://github.com/nodejs/node/pull/10317)",
   {
     "input": "http://192.168.257",
@@ -4954,6 +5101,11 @@
     "hash": ""
   },
   "More IPv4 parsing (via https://github.com/jsdom/whatwg-url/issues/92)",
+  {
+    "input": "https://0x100000000/test",
+    "base": "about:blank",
+    "failure": true
+  },
   {
     "input": "https://256.0.0.1/test",
     "base": "about:blank",
@@ -5187,6 +5339,90 @@
     "hash": "#x"
   },
   "# File URLs and many (back)slashes",
+  {
+    "input": "file:\\\\//",
+    "base": "about:blank",
+    "href": "file:///",
+    "protocol": "file:",
+    "username": "",
+    "password": "",
+    "host": "",
+    "hostname": "",
+    "port": "",
+    "pathname": "/",
+    "search": "",
+    "hash": ""
+  },
+  {
+    "input": "file:\\\\\\\\",
+    "base": "about:blank",
+    "href": "file:///",
+    "protocol": "file:",
+    "username": "",
+    "password": "",
+    "host": "",
+    "hostname": "",
+    "port": "",
+    "pathname": "/",
+    "search": "",
+    "hash": ""
+  },
+  {
+    "input": "file:\\\\\\\\?fox",
+    "base": "about:blank",
+    "href": "file:///?fox",
+    "protocol": "file:",
+    "username": "",
+    "password": "",
+    "host": "",
+    "hostname": "",
+    "port": "",
+    "pathname": "/",
+    "search": "?fox",
+    "hash": ""
+  },
+  {
+    "input": "file:\\\\\\\\#guppy",
+    "base": "about:blank",
+    "href": "file:///#guppy",
+    "protocol": "file:",
+    "username": "",
+    "password": "",
+    "host": "",
+    "hostname": "",
+    "port": "",
+    "pathname": "/",
+    "search": "",
+    "hash": "#guppy"
+  },
+  {
+    "input": "file://spider///",
+    "base": "about:blank",
+    "href": "file://spider/",
+    "protocol": "file:",
+    "username": "",
+    "password": "",
+    "host": "spider",
+    "hostname": "spider",
+    "port": "",
+    "pathname": "/",
+    "search": "",
+    "hash": ""
+  },
+  {
+    "input": "file:\\\\localhost//",
+    "base": "about:blank",
+    "href": "file:///",
+    "protocol": "file:",
+    "username": "",
+    "password": "",
+    "host": "",
+    "hostname": "",
+    "port": "",
+    "pathname": "/",
+    "search": "",
+    "hash": ""
+  },
   {
     "input": "file:///localhost//cat",
     "base": "about:blank",
@@ -5201,6 +5437,48 @@
     "search": "",
     "hash": ""
   },
+  {
+    "input": "file://\\/localhost//cat",
+    "base": "about:blank",
+    "href": "file:///localhost//cat",
+    "protocol": "file:",
+    "username": "",
+    "password": "",
+    "host": "",
+    "hostname": "",
+    "port": "",
+    "pathname": "/localhost//cat",
+    "search": "",
+    "hash": ""
+  },
+  {
+    "input": "file://localhost//a//../..//",
+    "base": "about:blank",
+    "href": "file:///",
+    "protocol": "file:",
+    "username": "",
+    "password": "",
+    "host": "",
+    "hostname": "",
+    "port": "",
+    "pathname": "/",
+    "search": "",
+    "hash": ""
+  },
+  {
+    "input": "/////mouse",
+    "base": "file:///elephant",
+    "href": "file:///mouse",
+    "protocol": "file:",
+    "username": "",
+    "password": "",
+    "host": "",
+    "hostname": "",
+    "port": "",
+    "pathname": "/mouse",
+    "search": "",
+    "hash": ""
+  },
   {
     "input": "\\//pig",
     "base": "file://lion/",
@@ -5215,6 +5493,48 @@
     "search": "",
     "hash": ""
   },
+  {
+    "input": "\\/localhost//pig",
+    "base": "file://lion/",
+    "href": "file:///pig",
+    "protocol": "file:",
+    "username": "",
+    "password": "",
+    "host": "",
+    "hostname": "",
+    "port": "",
+    "pathname": "/pig",
+    "search": "",
+    "hash": ""
+  },
+  {
+    "input": "//localhost//pig",
+    "base": "file://lion/",
+    "href": "file:///pig",
+    "protocol": "file:",
+    "username": "",
+    "password": "",
+    "host": "",
+    "hostname": "",
+    "port": "",
+    "pathname": "/pig",
+    "search": "",
+    "hash": ""
+  },
+  {
+    "input": "/..//localhost//pig",
+    "base": "file://lion/",
+    "href": "file://lion/localhost//pig",
+    "protocol": "file:",
+    "username": "",
+    "password": "",
+    "host": "lion",
+    "hostname": "lion",
+    "port": "",
+    "pathname": "/localhost//pig",
+    "search": "",
+    "hash": ""
+  },
   {
     "input": "file://",
     "base": "file://ape/",
@@ -5229,7 +5549,50 @@
     "search": "",
     "hash": ""
   },
+  "# File URLs with non-empty hosts",
+  {
+    "input": "/rooibos",
+    "base": "file://tea/",
+    "href": "file://tea/rooibos",
+    "protocol": "file:",
+    "username": "",
+    "password": "",
+    "host": "tea",
+    "hostname": "tea",
+    "port": "",
+    "pathname": "/rooibos",
+    "search": "",
+    "hash": ""
+  },
+  {
+    "input": "/?chai",
+    "base": "file://tea/",
+    "href": "file://tea/?chai",
+    "protocol": "file:",
+    "username": "",
+    "password": "",
+    "host": "tea",
+    "hostname": "tea",
+    "port": "",
+    "pathname": "/",
+    "search": "?chai",
+    "hash": ""
+  },
   "# Windows drive letter handling with the 'file:' base URL",
+  {
+    "input": "C|",
+    "base": "file://host/dir/file",
+    "href": "file:///C:",
+    "protocol": "file:",
+    "username": "",
+    "password": "",
+    "host": "",
+    "hostname": "",
+    "port": "",
+    "pathname": "/C:",
+    "search": "",
+    "hash": ""
+  },
   {
     "input": "C|#",
     "base": "file://host/dir/file",
@@ -5329,6 +5692,48 @@
     "hash": ""
   },
   "# Windows drive letter quirk in the file slash state",
+  {
+    "input": "/c:/foo/bar",
+    "base": "file:///c:/baz/qux",
+    "href": "file:///c:/foo/bar",
+    "protocol": "file:",
+    "username": "",
+    "password": "",
+    "host": "",
+    "hostname": "",
+    "port": "",
+    "pathname": "/c:/foo/bar",
+    "search": "",
+    "hash": ""
+  },
+  {
+    "input": "/c|/foo/bar",
+    "base": "file:///c:/baz/qux",
+    "href": "file:///c:/foo/bar",
+    "protocol": "file:",
+    "username": "",
+    "password": "",
+    "host": "",
+    "hostname": "",
+    "port": "",
+    "pathname": "/c:/foo/bar",
+    "search": "",
+    "hash": ""
+  },
+  {
+    "input": "file:\\c:\\foo\\bar",
+    "base": "file:///c:/baz/qux",
+    "href": "file:///c:/foo/bar",
+    "protocol": "file:",
+    "username": "",
+    "password": "",
+    "host": "",
+    "hostname": "",
+    "port": "",
+    "pathname": "/c:/foo/bar",
+    "search": "",
+    "hash": ""
+  },
   {
     "input": "/c:/foo/bar",
     "base": "file://host/path",
@@ -5343,9 +5748,9 @@
     "search": "",
     "hash": ""
   },
-  "# Windows drive letter quirk (no host)",
+  "# Windows drive letter quirk with not empty host",
   {
-    "input": "file:/C|/",
+    "input": "file://example.net/C:/",
     "base": "about:blank",
     "href": "file:///C:/",
     "protocol": "file:",
@@ -5359,7 +5764,7 @@
     "hash": ""
   },
   {
-    "input": "file://C|/",
+    "input": "file://1.2.3.4/C:/",
     "base": "about:blank",
     "href": "file:///C:/",
     "protocol": "file:",
@@ -5372,9 +5777,8 @@
     "search": "",
     "hash": ""
   },
-  "# Windows drive letter quirk with not empty host",
   {
-    "input": "file://example.net/C:/",
+    "input": "file://[1::8]/C:/",
     "base": "about:blank",
     "href": "file:///C:/",
     "protocol": "file:",
@@ -5387,8 +5791,9 @@
     "search": "",
     "hash": ""
   },
+  "# Windows drive letter quirk (no host)",
   {
-    "input": "file://1.2.3.4/C:/",
+    "input": "file:/C|/",
     "base": "about:blank",
     "href": "file:///C:/",
     "protocol": "file:",
@@ -5402,7 +5807,7 @@
     "hash": ""
   },
   {
-    "input": "file://[1::8]/C:/",
+    "input": "file://C|/",
     "base": "about:blank",
     "href": "file:///C:/",
     "protocol": "file:",
@@ -5544,6 +5949,109 @@
     "failure": true
   },
   "# Non-special-URL path tests",
+  {
+    "input": "sc://ñ",
+    "base": "about:blank",
+    "href": "sc://%C3%B1",
+    "origin": "null",
+    "protocol": "sc:",
+    "username": "",
+    "password": "",
+    "host": "%C3%B1",
+    "hostname": "%C3%B1",
+    "port": "",
+    "pathname": "",
+    "search": "",
+    "hash": ""
+  },
+  {
+    "input": "sc://ñ?x",
+    "base": "about:blank",
+    "href": "sc://%C3%B1?x",
+    "origin": "null",
+    "protocol": "sc:",
+    "username": "",
+    "password": "",
+    "host": "%C3%B1",
+    "hostname": "%C3%B1",
+    "port": "",
+    "pathname": "",
+    "search": "?x",
+    "hash": ""
+  },
+  {
+    "input": "sc://ñ#x",
+    "base": "about:blank",
+    "href": "sc://%C3%B1#x",
+    "origin": "null",
+    "protocol": "sc:",
+    "username": "",
+    "password": "",
+    "host": "%C3%B1",
+    "hostname": "%C3%B1",
+    "port": "",
+    "pathname": "",
+    "search": "",
+    "hash": "#x"
+  },
+  {
+    "input": "#x",
+    "base": "sc://ñ",
+    "href": "sc://%C3%B1#x",
+    "origin": "null",
+    "protocol": "sc:",
+    "username": "",
+    "password": "",
+    "host": "%C3%B1",
+    "hostname": "%C3%B1",
+    "port": "",
+    "pathname": "",
+    "search": "",
+    "hash": "#x"
+  },
+  {
+    "input": "?x",
+    "base": "sc://ñ",
+    "href": "sc://%C3%B1?x",
+    "origin": "null",
+    "protocol": "sc:",
+    "username": "",
+    "password": "",
+    "host": "%C3%B1",
+    "hostname": "%C3%B1",
+    "port": "",
+    "pathname": "",
+    "search": "?x",
+    "hash": ""
+  },
+  {
+    "input": "sc://?",
+    "base": "about:blank",
+    "href": "sc://?",
+    "protocol": "sc:",
+    "username": "",
+    "password": "",
+    "host": "",
+    "hostname": "",
+    "port": "",
+    "pathname": "",
+    "search": "",
+    "hash": ""
+  },
+  {
+    "input": "sc://#",
+    "base": "about:blank",
+    "href": "sc://#",
+    "protocol": "sc:",
+    "username": "",
+    "password": "",
+    "host": "",
+    "hostname": "",
+    "port": "",
+    "pathname": "",
+    "search": "",
+    "hash": ""
+  },
   {
     "input": "///",
     "base": "sc://x/",
@@ -5558,6 +6066,34 @@
     "search": "",
     "hash": ""
   },
+  {
+    "input": "////",
+    "base": "sc://x/",
+    "href": "sc:////",
+    "protocol": "sc:",
+    "username": "",
+    "password": "",
+    "host": "",
+    "hostname": "",
+    "port": "",
+    "pathname": "//",
+    "search": "",
+    "hash": ""
+  },
+  {
+    "input": "////x/",
+    "base": "sc://x/",
+    "href": "sc:////x/",
+    "protocol": "sc:",
+    "username": "",
+    "password": "",
+    "host": "",
+    "hostname": "",
+    "port": "",
+    "pathname": "//x/",
+    "search": "",
+    "hash": ""
+  },
   {
     "input": "tftp://foobar.com/someconfig;mode=netascii",
     "base": "about:blank",
@@ -6048,27 +6584,34 @@
     "search": "?a",
     "hash": "#%GH"
   },
-  "Bad bases",
+  "URLs that require a non-about:blank base. (Also serve as invalid base tests.)",
   {
-    "input": "test-a.html",
-    "base": "a",
+    "input": "a",
+    "base": "about:blank",
     "failure": true
   },
   {
-    "input": "test-a-slash.html",
-    "base": "a/",
+    "input": "a/",
+    "base": "about:blank",
     "failure": true
   },
   {
-    "input": "test-a-slash-slash.html",
-    "base": "a//",
+    "input": "a//",
+    "base": "about:blank",
     "failure": true
   },
+  "Bases that don't fail to parse but fail to be bases",
   {
     "input": "test-a-colon.html",
     "base": "a:",
     "failure": true
   },
+  {
+    "input": "test-a-colon-b.html",
+    "base": "a:b",
+    "failure": true
+  },
+  "Other base URL tests, that must succeed",
   {
     "input": "test-a-colon-slash.html",
     "base": "a:/",
@@ -6097,11 +6640,6 @@
     "search": "",
     "hash": ""
   },
-  {
-    "input": "test-a-colon-b.html",
-    "base": "a:b",
-    "failure": true
-  },
   {
     "input": "test-a-colon-slash-b.html",
     "base": "a:/b",

From fa9f04487b3f3404281ae42dcbbab2d88a5144de Mon Sep 17 00:00:00 2001
From: Anthony Ramine <n.oxyde@gmail.com>
Date: Fri, 19 Jul 2019 10:35:24 +0200
Subject: [PATCH 02/15] Fix percent encoding of fragments (closes #491)

> test result: FAILED. 637 passed; 76 failed; 0 ignored; 0 measured
---
 src/parser.rs | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/parser.rs b/src/parser.rs
index e2ea36bfa..6c1417ddb 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -1329,14 +1329,8 @@ impl<'a> Parser<'a> {
                 self.log_violation(SyntaxViolation::NullInFragment)
             } else {
                 self.check_url_code_point(c, &input);
-                self.serialization.extend(utf8_percent_encode(
-                    utf8_c,
-                    // FIXME: tests fail when we use the FRAGMENT set here
-                    // as defined in the spec as of 2019-07-17,
-                    // likely because tests are out of date.
-                    // See https://github.com/servo/rust-url/issues/290
-                    CONTROLS,
-                ));
+                self.serialization
+                    .extend(utf8_percent_encode(utf8_c, FRAGMENT));
             }
         }
     }

From 412266a2f838ff009d2c1103ad7b93bea0a43478 Mon Sep 17 00:00:00 2001
From: Anthony Ramine <n.oxyde@gmail.com>
Date: Sat, 20 Jul 2019 11:49:14 +0200
Subject: [PATCH 03/15] Refactor parse_file to look more like the spec

---
 src/parser.rs | 338 ++++++++++++++++++++++----------------------------
 1 file changed, 148 insertions(+), 190 deletions(-)

diff --git a/src/parser.rs b/src/parser.rs
index 6c1417ddb..537492459 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -488,15 +488,93 @@ impl<'a> Parser<'a> {
         mut self,
         input: Input,
         scheme_type: SchemeType,
-        mut base_file_url: Option<&Url>,
+        base_file_url: Option<&Url>,
     ) -> ParseResult<Url> {
         use SyntaxViolation::Backslash;
         // file state
         debug_assert!(self.serialization.is_empty());
         let (first_char, input_after_first_char) = input.split_first();
-        match first_char {
-            None => {
+        if matches!(first_char, Some('/') | Some('\\')) {
+            self.log_violation_if(SyntaxViolation::Backslash, || first_char == Some('\\'));
+            // file slash state
+            let (next_char, input_after_next_char) = input_after_first_char.split_first();
+            if matches!(next_char, Some('/') | Some('\\')) {
+                self.log_violation_if(Backslash, || next_char == Some('\\'));
+                // file host state
+                self.serialization.push_str("file://");
+                let scheme_end = "file".len() as u32;
+                let host_start = "file://".len() as u32;
+                let (path_start, mut host, remaining) =
+                    self.parse_file_host(input_after_next_char)?;
+                let mut host_end = to_u32(self.serialization.len())?;
+                let mut has_host = !matches!(host, HostInternal::None);
+                let remaining = if path_start {
+                    self.parse_path_start(SchemeType::File, &mut has_host, remaining)
+                } else {
+                    let path_start = self.serialization.len();
+                    self.serialization.push('/');
+                    self.parse_path(SchemeType::File, &mut has_host, path_start, remaining)
+                };
+                // For file URLs that have a host and whose path starts
+                // with the windows drive letter we just remove the host.
+                if !has_host {
+                    self.serialization
+                        .drain(host_start as usize..host_end as usize);
+                    host_end = host_start;
+                    host = HostInternal::None;
+                }
+                let (query_start, fragment_start) =
+                    self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
+                return Ok(Url {
+                    serialization: self.serialization,
+                    scheme_end: scheme_end,
+                    username_end: host_start,
+                    host_start: host_start,
+                    host_end: host_end,
+                    host: host,
+                    port: None,
+                    path_start: host_end,
+                    query_start: query_start,
+                    fragment_start: fragment_start,
+                });
+            } else {
+                self.serialization.push_str("file:///");
+                let scheme_end = "file".len() as u32;
+                let path_start = "file://".len();
                 if let Some(base_url) = base_file_url {
+                    let first_segment = base_url.path_segments().unwrap().next().unwrap();
+                    // FIXME: *normalized* drive letter
+                    if is_windows_drive_letter(first_segment) {
+                        self.serialization.push_str(first_segment);
+                        self.serialization.push('/');
+                    }
+                }
+                let remaining = self.parse_path(
+                    SchemeType::File,
+                    &mut false,
+                    path_start,
+                    input_after_first_char,
+                );
+                let (query_start, fragment_start) =
+                    self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
+                let path_start = path_start as u32;
+                return Ok(Url {
+                    serialization: self.serialization,
+                    scheme_end: scheme_end,
+                    username_end: path_start,
+                    host_start: path_start,
+                    host_end: path_start,
+                    host: HostInternal::None,
+                    port: None,
+                    path_start: path_start,
+                    query_start: query_start,
+                    fragment_start: fragment_start,
+                });
+            }
+        }
+        if let Some(base_url) = base_file_url {
+            match first_char {
+                None => {
                     // Copy everything except the fragment
                     let before_fragment = match base_url.fragment_start {
                         Some(i) => &base_url.serialization[..i as usize],
@@ -508,26 +586,8 @@ impl<'a> Parser<'a> {
                         fragment_start: None,
                         ..*base_url
                     })
-                } else {
-                    self.serialization.push_str("file:///");
-                    let scheme_end = "file".len() as u32;
-                    let path_start = "file://".len() as u32;
-                    Ok(Url {
-                        serialization: self.serialization,
-                        scheme_end,
-                        username_end: path_start,
-                        host_start: path_start,
-                        host_end: path_start,
-                        host: HostInternal::None,
-                        port: None,
-                        path_start,
-                        query_start: None,
-                        fragment_start: None,
-                    })
                 }
-            }
-            Some('?') => {
-                if let Some(base_url) = base_file_url {
+                Some('?') => {
                     // Copy everything up to the query string
                     let before_query = match (base_url.query_start, base_url.fragment_start) {
                         (None, None) => &*base_url.serialization,
@@ -542,179 +602,77 @@ impl<'a> Parser<'a> {
                         fragment_start,
                         ..*base_url
                     })
-                } else {
-                    self.serialization.push_str("file:///");
-                    let scheme_end = "file".len() as u32;
-                    let path_start = "file://".len() as u32;
-                    let (query_start, fragment_start) =
-                        self.parse_query_and_fragment(scheme_type, scheme_end, input)?;
-                    Ok(Url {
-                        serialization: self.serialization,
-                        scheme_end,
-                        username_end: path_start,
-                        host_start: path_start,
-                        host_end: path_start,
-                        host: HostInternal::None,
-                        port: None,
-                        path_start,
-                        query_start,
-                        fragment_start,
-                    })
                 }
-            }
-            Some('#') => {
-                if let Some(base_url) = base_file_url {
-                    self.fragment_only(base_url, input)
-                } else {
-                    self.serialization.push_str("file:///");
-                    let scheme_end = "file".len() as u32;
-                    let path_start = "file://".len() as u32;
-                    let fragment_start = "file:///".len() as u32;
-                    self.serialization.push('#');
-                    self.parse_fragment(input_after_first_char);
-                    Ok(Url {
-                        serialization: self.serialization,
-                        scheme_end,
-                        username_end: path_start,
-                        host_start: path_start,
-                        host_end: path_start,
-                        host: HostInternal::None,
-                        port: None,
-                        path_start,
-                        query_start: None,
-                        fragment_start: Some(fragment_start),
-                    })
-                }
-            }
-            Some('/') | Some('\\') => {
-                self.log_violation_if(Backslash, || first_char == Some('\\'));
-                // file slash state
-                let (next_char, input_after_next_char) = input_after_first_char.split_first();
-                self.log_violation_if(Backslash, || next_char == Some('\\'));
-                if matches!(next_char, Some('/') | Some('\\')) {
-                    // file host state
-                    self.serialization.push_str("file://");
-                    let scheme_end = "file".len() as u32;
-                    let host_start = "file://".len() as u32;
-                    let (path_start, mut host, remaining) =
-                        self.parse_file_host(input_after_next_char)?;
-                    let mut host_end = to_u32(self.serialization.len())?;
-                    let mut has_host = !matches!(host, HostInternal::None);
-                    let remaining = if path_start {
-                        self.parse_path_start(SchemeType::File, &mut has_host, remaining)
+                Some('#') => self.fragment_only(base_url, input),
+                _ => {
+                    if !starts_with_windows_drive_letter_segment(&input) {
+                        let before_query = match (base_url.query_start, base_url.fragment_start) {
+                            (None, None) => &*base_url.serialization,
+                            (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
+                        };
+                        self.serialization.push_str(before_query);
+                        self.pop_path(SchemeType::File, base_url.path_start as usize);
+                        let remaining = self.parse_path(
+                            SchemeType::File,
+                            &mut true,
+                            base_url.path_start as usize,
+                            input,
+                        );
+                        self.with_query_and_fragment(
+                            SchemeType::File,
+                            base_url.scheme_end,
+                            base_url.username_end,
+                            base_url.host_start,
+                            base_url.host_end,
+                            base_url.host,
+                            base_url.port,
+                            base_url.path_start,
+                            remaining,
+                        )
                     } else {
-                        let path_start = self.serialization.len();
-                        self.serialization.push('/');
-                        self.parse_path(SchemeType::File, &mut has_host, path_start, remaining)
-                    };
-                    // For file URLs that have a host and whose path starts
-                    // with the windows drive letter we just remove the host.
-                    if !has_host {
-                        self.serialization
-                            .drain(host_start as usize..host_end as usize);
-                        host_end = host_start;
-                        host = HostInternal::None;
+                        self.serialization.push_str("file:///");
+                        let scheme_end = "file".len() as u32;
+                        let path_start = "file://".len();
+                        let remaining =
+                            self.parse_path(SchemeType::File, &mut false, path_start, input);
+                        let (query_start, fragment_start) =
+                            self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
+                        let path_start = path_start as u32;
+                        Ok(Url {
+                            serialization: self.serialization,
+                            scheme_end: scheme_end,
+                            username_end: path_start,
+                            host_start: path_start,
+                            host_end: path_start,
+                            host: HostInternal::None,
+                            port: None,
+                            path_start: path_start,
+                            query_start: query_start,
+                            fragment_start: fragment_start,
+                        })
                     }
-                    let (query_start, fragment_start) =
-                        self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
-                    Ok(Url {
-                        serialization: self.serialization,
-                        scheme_end,
-                        username_end: host_start,
-                        host_start,
-                        host_end,
-                        host,
-                        port: None,
-                        path_start: host_end,
-                        query_start,
-                        fragment_start,
-                    })
-                } else {
-                    self.serialization.push_str("file:///");
-                    let scheme_end = "file".len() as u32;
-                    let path_start = "file://".len();
-                    if let Some(base_url) = base_file_url {
-                        let first_segment = base_url.path_segments().unwrap().next().unwrap();
-                        // FIXME: *normalized* drive letter
-                        if is_windows_drive_letter(first_segment) {
-                            self.serialization.push_str(first_segment);
-                            self.serialization.push('/');
-                        }
-                    }
-                    let remaining = self.parse_path(
-                        SchemeType::File,
-                        &mut false,
-                        path_start,
-                        input_after_first_char,
-                    );
-                    let (query_start, fragment_start) =
-                        self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
-                    let path_start = path_start as u32;
-                    Ok(Url {
-                        serialization: self.serialization,
-                        scheme_end,
-                        username_end: path_start,
-                        host_start: path_start,
-                        host_end: path_start,
-                        host: HostInternal::None,
-                        port: None,
-                        path_start,
-                        query_start,
-                        fragment_start,
-                    })
-                }
-            }
-            _ => {
-                if starts_with_windows_drive_letter_segment(&input) {
-                    base_file_url = None;
-                }
-                if let Some(base_url) = base_file_url {
-                    let before_query = match (base_url.query_start, base_url.fragment_start) {
-                        (None, None) => &*base_url.serialization,
-                        (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
-                    };
-                    self.serialization.push_str(before_query);
-                    self.pop_path(SchemeType::File, base_url.path_start as usize);
-                    let remaining = self.parse_path(
-                        SchemeType::File,
-                        &mut true,
-                        base_url.path_start as usize,
-                        input,
-                    );
-                    self.with_query_and_fragment(
-                        SchemeType::File,
-                        base_url.scheme_end,
-                        base_url.username_end,
-                        base_url.host_start,
-                        base_url.host_end,
-                        base_url.host,
-                        base_url.port,
-                        base_url.path_start,
-                        remaining,
-                    )
-                } else {
-                    self.serialization.push_str("file:///");
-                    let scheme_end = "file".len() as u32;
-                    let path_start = "file://".len();
-                    let remaining =
-                        self.parse_path(SchemeType::File, &mut false, path_start, input);
-                    let (query_start, fragment_start) =
-                        self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
-                    let path_start = path_start as u32;
-                    Ok(Url {
-                        serialization: self.serialization,
-                        scheme_end,
-                        username_end: path_start,
-                        host_start: path_start,
-                        host_end: path_start,
-                        host: HostInternal::None,
-                        port: None,
-                        path_start,
-                        query_start,
-                        fragment_start,
-                    })
                 }
             }
+        } else {
+            self.serialization.push_str("file:///");
+            let scheme_end = "file".len() as u32;
+            let path_start = "file://".len();
+            let remaining = self.parse_path(SchemeType::File, &mut false, path_start, input);
+            let (query_start, fragment_start) =
+                self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
+            let path_start = path_start as u32;
+            Ok(Url {
+                serialization: self.serialization,
+                scheme_end: scheme_end,
+                username_end: path_start,
+                host_start: path_start,
+                host_end: path_start,
+                host: HostInternal::None,
+                port: None,
+                path_start: path_start,
+                query_start: query_start,
+                fragment_start: fragment_start,
+            })
         }
     }
 

From e93f999dc5f6f8de61ccf6fe79cc8d11774f08b8 Mon Sep 17 00:00:00 2001
From: Anthony Ramine <n.oxyde@gmail.com>
Date: Sat, 20 Jul 2019 12:09:54 +0200
Subject: [PATCH 04/15] Fix a Windows quirk

> test result: FAILED. 640 passed; 73 failed; 0 ignored; 0 measured
---
 src/parser.rs | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/parser.rs b/src/parser.rs
index 537492459..5805afb41 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -541,12 +541,13 @@ impl<'a> Parser<'a> {
                 self.serialization.push_str("file:///");
                 let scheme_end = "file".len() as u32;
                 let path_start = "file://".len();
-                if let Some(base_url) = base_file_url {
-                    let first_segment = base_url.path_segments().unwrap().next().unwrap();
-                    // FIXME: *normalized* drive letter
-                    if is_windows_drive_letter(first_segment) {
-                        self.serialization.push_str(first_segment);
-                        self.serialization.push('/');
+                if !starts_with_windows_drive_letter_segment(&input_after_first_char) {
+                    if let Some(base_url) = base_file_url {
+                        let first_segment = base_url.path_segments().unwrap().next().unwrap();
+                        if is_normalized_windows_drive_letter(first_segment) {
+                            self.serialization.push_str(first_segment);
+                            self.serialization.push('/');
+                        }
                     }
                 }
                 let remaining = self.parse_path(
@@ -1361,6 +1362,10 @@ pub fn to_u32(i: usize) -> ParseResult<u32> {
     }
 }
 
+fn is_normalized_windows_drive_letter(segment: &str) -> bool {
+    is_windows_drive_letter(segment) && segment.as_bytes()[1] == b':'
+}
+
 /// Wether the scheme is file:, the path has a single segment, and that segment
 /// is a Windows drive letter
 fn is_windows_drive_letter(segment: &str) -> bool {

From efe9ab98888e0229315c56b684f0e3fa8d40ca0d Mon Sep 17 00:00:00 2001
From: Anthony Ramine <n.oxyde@gmail.com>
Date: Sat, 20 Jul 2019 13:21:45 +0200
Subject: [PATCH 05/15] Properly copy hosts of base file:// URLs when needed

> test result: FAILED. 642 passed; 71 failed; 0 ignored; 0 measured
---
 src/parser.rs | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/src/parser.rs b/src/parser.rs
index 5805afb41..8daac86df 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -538,36 +538,44 @@ impl<'a> Parser<'a> {
                     fragment_start: fragment_start,
                 });
             } else {
-                self.serialization.push_str("file:///");
+                self.serialization.push_str("file://");
                 let scheme_end = "file".len() as u32;
-                let path_start = "file://".len();
+                let host_start = "file://".len();
+                let mut host_end = host_start;
+                let mut host = HostInternal::None;
                 if !starts_with_windows_drive_letter_segment(&input_after_first_char) {
                     if let Some(base_url) = base_file_url {
                         let first_segment = base_url.path_segments().unwrap().next().unwrap();
                         if is_normalized_windows_drive_letter(first_segment) {
-                            self.serialization.push_str(first_segment);
                             self.serialization.push('/');
+                            self.serialization.push_str(first_segment);
+                        } else if let Some(host_str) = base_url.host_str() {
+                            self.serialization.push_str(host_str);
+                            host_end = self.serialization.len();
+                            host = base_url.host.clone();
                         }
                     }
                 }
+                self.serialization.push('/');
                 let remaining = self.parse_path(
                     SchemeType::File,
                     &mut false,
-                    path_start,
+                    host_end,
                     input_after_first_char,
                 );
                 let (query_start, fragment_start) =
                     self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
-                let path_start = path_start as u32;
+                let host_start = host_start as u32;
+                let host_end = host_end as u32;
                 return Ok(Url {
                     serialization: self.serialization,
                     scheme_end: scheme_end,
-                    username_end: path_start,
-                    host_start: path_start,
-                    host_end: path_start,
-                    host: HostInternal::None,
+                    username_end: host_start,
+                    host_start,
+                    host_end,
+                    host,
                     port: None,
-                    path_start: path_start,
+                    path_start: host_end,
                     query_start: query_start,
                     fragment_start: fragment_start,
                 });

From 54a158b7a239a4cca5c1c142aaafe4474a18ed4a Mon Sep 17 00:00:00 2001
From: Jeremy Lempereur <jeremy.lempereur@gmail.com>
Date: Tue, 30 Jul 2019 13:52:13 +0200
Subject: [PATCH 06/15] Path and file parsing.

---
 src/host.rs          |   6 +-
 src/lib.rs           |  41 +++++--
 src/parser.rs        | 256 +++++++++++++++++++++++++++++++++++--------
 src/path_segments.rs |  15 ++-
 src/quirks.rs        |  27 ++++-
 tests/unit.rs        |  18 ++-
 6 files changed, 300 insertions(+), 63 deletions(-)

diff --git a/src/host.rs b/src/host.rs
index 9afc6d8e7..238d523ed 100644
--- a/src/host.rs
+++ b/src/host.rs
@@ -24,9 +24,13 @@ pub(crate) enum HostInternal {
     Ipv6(Ipv6Addr),
 }
 
-impl<S> From<Host<S>> for HostInternal {
+impl<S> From<Host<S>> for HostInternal
+where
+    S: ToString,
+{
     fn from(host: Host<S>) -> HostInternal {
         match host {
+            Host::Domain(ref s) if s.to_string().is_empty() => HostInternal::None,
             Host::Domain(_) => HostInternal::Domain,
             Host::Ipv4(address) => HostInternal::Ipv4(address),
             Host::Ipv6(address) => HostInternal::Ipv6(address),
diff --git a/src/lib.rs b/src/lib.rs
index d60935c29..536cc1199 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -456,13 +456,15 @@ impl Url {
 
         if self.slice(self.scheme_end + 1..).starts_with("//") {
             // URL with authority
-            match self.byte_at(self.username_end) {
-                b':' => {
-                    assert!(self.host_start >= self.username_end + 2);
-                    assert_eq!(self.byte_at(self.host_start - 1), b'@');
+            if self.username_end != self.serialization.len() as u32 {
+                match self.byte_at(self.username_end) {
+                    b':' => {
+                        assert!(self.host_start >= self.username_end + 2);
+                        assert_eq!(self.byte_at(self.host_start - 1), b'@');
+                    }
+                    b'@' => assert!(self.host_start == self.username_end + 1),
+                    _ => assert_eq!(self.username_end, self.scheme_end + 3),
                 }
-                b'@' => assert!(self.host_start == self.username_end + 1),
-                _ => assert_eq!(self.username_end, self.scheme_end + 3),
             }
             assert!(self.host_start >= self.username_end);
             assert!(self.host_end >= self.host_start);
@@ -490,7 +492,10 @@ impl Url {
                     Some(port_str.parse::<u16>().expect("Couldn't parse port?"))
                 );
             }
-            assert_eq!(self.byte_at(self.path_start), b'/');
+            assert!(
+                self.path_start as usize == self.serialization.len()
+                    || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?')
+            );
         } else {
             // Anarchist URL (no authority)
             assert_eq!(self.username_end, self.scheme_end + 1);
@@ -501,11 +506,11 @@ impl Url {
             assert_eq!(self.path_start, self.scheme_end + 1);
         }
         if let Some(start) = self.query_start {
-            assert!(start > self.path_start);
+            assert!(start >= self.path_start);
             assert_eq!(self.byte_at(start), b'?');
         }
         if let Some(start) = self.fragment_start {
-            assert!(start > self.path_start);
+            assert!(start >= self.path_start);
             assert_eq!(self.byte_at(start), b'#');
         }
         if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) {
@@ -745,7 +750,10 @@ impl Url {
     pub fn password(&self) -> Option<&str> {
         // This ':' is not the one marking a port number since a host can not be empty.
         // (Except for file: URLs, which do not have port numbers.)
-        if self.has_authority() && self.byte_at(self.username_end) == b':' {
+        if self.has_authority()
+            && self.username_end != self.serialization.len() as u32
+            && self.byte_at(self.username_end) == b':'
+        {
             debug_assert!(self.byte_at(self.host_start - 1) == b'@');
             Some(self.slice(self.username_end + 1..self.host_start - 1))
         } else {
@@ -1226,7 +1234,7 @@ impl Url {
         if let Some(input) = fragment {
             self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
             self.serialization.push('#');
-            self.mutate(|parser| parser.parse_fragment(parser::Input::new(input)))
+            self.mutate(|parser| parser.parse_fragment(parser::Input::no_trim(input)))
         } else {
             self.fragment_start = None
         }
@@ -1284,7 +1292,12 @@ impl Url {
             let scheme_type = SchemeType::from(self.scheme());
             let scheme_end = self.scheme_end;
             self.mutate(|parser| {
-                parser.parse_query(scheme_type, scheme_end, parser::Input::new(input))
+                let vfn = parser.violation_fn;
+                parser.parse_query(
+                    scheme_type,
+                    scheme_end,
+                    parser::Input::trim_tab_and_newlines(input, vfn),
+                )
             });
         }
 
@@ -1390,8 +1403,12 @@ impl Url {
                 }
                 parser.parse_cannot_be_a_base_path(parser::Input::new(path));
             } else {
+                let path_start = parser.serialization.len();
                 let mut has_host = true; // FIXME
                 parser.parse_path_start(scheme_type, &mut has_host, parser::Input::new(path));
+                if scheme_type.is_file() {
+                    parser::trim_path(&mut parser.serialization, path_start);
+                }
             }
         });
         self.restore_after_path(old_after_path_pos, &after_path);
diff --git a/src/parser.rs b/src/parser.rs
index 8daac86df..7cd1dbd8b 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -201,6 +201,30 @@ impl<'i> Input<'i> {
         Input::with_log(input, None)
     }
 
+    pub fn no_trim(input: &'i str) -> Self {
+        Input {
+            chars: input.chars(),
+        }
+    }
+
+    pub fn trim_tab_and_newlines(
+        original_input: &'i str,
+        vfn: Option<&dyn Fn(SyntaxViolation)>,
+    ) -> Self {
+        let input = original_input.trim_matches(ascii_tab_or_new_line);
+        if let Some(vfn) = vfn {
+            if input.len() < original_input.len() {
+                vfn(SyntaxViolation::C0SpaceIgnored)
+            }
+            if input.chars().any(|c| matches!(c, '\t' | '\n' | '\r')) {
+                vfn(SyntaxViolation::TabOrNewlineIgnored)
+            }
+        }
+        Input {
+            chars: input.chars(),
+        }
+    }
+
     pub fn with_log(original_input: &'i str, vfn: Option<&dyn Fn(SyntaxViolation)>) -> Self {
         let input = original_input.trim_matches(c0_control_or_space);
         if let Some(vfn) = vfn {
@@ -515,6 +539,8 @@ impl<'a> Parser<'a> {
                     self.serialization.push('/');
                     self.parse_path(SchemeType::File, &mut has_host, path_start, remaining)
                 };
+
+                trim_path(&mut self.serialization, host_end as usize);
                 // For file URLs that have a host and whose path starts
                 // with the windows drive letter we just remove the host.
                 if !has_host {
@@ -556,16 +582,27 @@ impl<'a> Parser<'a> {
                         }
                     }
                 }
-                self.serialization.push('/');
-                let remaining = self.parse_path(
-                    SchemeType::File,
-                    &mut false,
-                    host_end,
-                    input_after_first_char,
-                );
+                // If c is the EOF code point, U+002F (/), U+005C (\), U+003F (?), or U+0023 (#), then decrease pointer by one
+                let parse_path_input = if let Some(c) = first_char {
+                    if c == '/' || c == '\\' || c == '?' || c == '#' {
+                        input
+                    } else {
+                        input_after_first_char
+                    }
+                } else {
+                    input_after_first_char
+                };
+
+                let remaining =
+                    self.parse_path(SchemeType::File, &mut false, host_end, parse_path_input);
+
+                let host_start = host_start as u32;
+
+                trim_path(&mut self.serialization, host_end);
+
                 let (query_start, fragment_start) =
                     self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
-                let host_start = host_start as u32;
+
                 let host_end = host_end as u32;
                 return Ok(Url {
                     serialization: self.serialization,
@@ -620,7 +657,7 @@ impl<'a> Parser<'a> {
                             (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
                         };
                         self.serialization.push_str(before_query);
-                        self.pop_path(SchemeType::File, base_url.path_start as usize);
+                        self.shorten_path(SchemeType::File, base_url.path_start as usize);
                         let remaining = self.parse_path(
                             SchemeType::File,
                             &mut true,
@@ -739,12 +776,14 @@ impl<'a> Parser<'a> {
                     debug_assert!(base_url.byte_at(scheme_end) == b':');
                     self.serialization
                         .push_str(base_url.slice(..scheme_end + 1));
+                    if let Some(after_prefix) = input.split_prefix("//") {
+                        return self.after_double_slash(after_prefix, scheme_type, scheme_end);
+                    }
                     return self.after_double_slash(remaining, scheme_type, scheme_end);
                 }
                 let path_start = base_url.path_start;
-                debug_assert!(base_url.byte_at(path_start) == b'/');
-                self.serialization
-                    .push_str(base_url.slice(..path_start + 1));
+                self.serialization.push_str(base_url.slice(..path_start));
+                self.serialization.push_str("/");
                 let remaining = self.parse_path(
                     scheme_type,
                     &mut true,
@@ -771,8 +810,24 @@ impl<'a> Parser<'a> {
                 self.serialization.push_str(before_query);
                 // FIXME spec says just "remove last entry", not the "pop" algorithm
                 self.pop_path(scheme_type, base_url.path_start as usize);
-                let remaining =
-                    self.parse_path(scheme_type, &mut true, base_url.path_start as usize, input);
+                // A special url always has a path.
+                // A path always starts with '/'
+                if self.serialization.len() == base_url.path_start as usize {
+                    if SchemeType::from(base_url.scheme()).is_special() || !input.is_empty() {
+                        self.serialization.push('/');
+                    }
+                }
+                let remaining = match input.split_first() {
+                    (Some('/'), remaining) => self.parse_path(
+                        scheme_type,
+                        &mut true,
+                        base_url.path_start as usize,
+                        remaining,
+                    ),
+                    _ => {
+                        self.parse_path(scheme_type, &mut true, base_url.path_start as usize, input)
+                    }
+                };
                 self.with_query_and_fragment(
                     scheme_type,
                     base_url.scheme_end,
@@ -946,7 +1001,7 @@ impl<'a> Parser<'a> {
                 host_str = &input_str[..bytes]
             }
         }
-        if scheme_type.is_special() && host_str.is_empty() {
+        if scheme_type == SchemeType::SpecialNotFile && host_str.is_empty() {
             return Err(ParseError::EmptyHost);
         }
         if !scheme_type.is_special() {
@@ -1040,21 +1095,34 @@ impl<'a> Parser<'a> {
         &mut self,
         scheme_type: SchemeType,
         has_host: &mut bool,
-        mut input: Input<'i>,
+        input: Input<'i>,
     ) -> Input<'i> {
-        // Path start state
-        match input.split_first() {
-            (Some('/'), remaining) => input = remaining,
-            (Some('\\'), remaining) => {
-                if scheme_type.is_special() {
-                    self.log_violation(SyntaxViolation::Backslash);
-                    input = remaining
+        let path_start = self.serialization.len();
+        let (maybe_c, remaining) = input.split_first();
+        // If url is special, then:
+        if scheme_type.is_special() {
+            if maybe_c == Some('\\') {
+                // If c is U+005C (\), validation error.
+                self.log_violation(SyntaxViolation::Backslash);
+            }
+            // A special URL always has a non-empty path.
+            if !self.serialization.ends_with("/") {
+                self.serialization.push('/');
+                // We have already made sure the forward slash is present.
+                if maybe_c == Some('/') || maybe_c == Some('\\') {
+                    return self.parse_path(scheme_type, has_host, path_start, remaining);
                 }
             }
-            _ => {}
+            return self.parse_path(scheme_type, has_host, path_start, input);
+        } else if maybe_c == Some('?') || maybe_c == Some('#') {
+            // Otherwise, if state override is not given and c is U+003F (?),
+            // set url’s query to the empty string and state to query state.
+            // Otherwise, if state override is not given and c is U+0023 (#),
+            // set url’s fragment to the empty string and state to fragment state.
+            // The query and path states will be handled by the caller.
+            return input;
         }
-        let path_start = self.serialization.len();
-        self.serialization.push('/');
+        // Otherwise, if c is not the EOF code point:
         self.parse_path(scheme_type, has_host, path_start, input)
     }
 
@@ -1066,7 +1134,6 @@ impl<'a> Parser<'a> {
         mut input: Input<'i>,
     ) -> Input<'i> {
         // Relative path state
-        debug_assert!(self.serialization.ends_with('/'));
         loop {
             let segment_start = self.serialization.len();
             let mut ends_with_slash = false;
@@ -1079,6 +1146,7 @@ impl<'a> Parser<'a> {
                 };
                 match c {
                     '/' if self.context != Context::PathSegmentSetter => {
+                        self.serialization.push(c);
                         ends_with_slash = true;
                         break;
                     }
@@ -1086,6 +1154,7 @@ impl<'a> Parser<'a> {
                         && scheme_type.is_special() =>
                     {
                         self.log_violation(SyntaxViolation::Backslash);
+                        self.serialization.push('/');
                         ends_with_slash = true;
                         break;
                     }
@@ -1109,35 +1178,57 @@ impl<'a> Parser<'a> {
                     }
                 }
             }
-            match &self.serialization[segment_start..] {
+
+            let segment_before_slash = if ends_with_slash {
+                &self.serialization[segment_start..self.serialization.len() - 1]
+            } else {
+                &self.serialization[segment_start..self.serialization.len()]
+            };
+            match segment_before_slash {
+                // If buffer is a double-dot path segment, shorten url’s path,
                 ".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e"
                 | ".%2E" => {
                     debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/');
-                    self.serialization.truncate(segment_start - 1); // Truncate "/.."
-                    self.pop_path(scheme_type, path_start);
-                    if !self.serialization[path_start..].ends_with('/') {
-                        self.serialization.push('/')
+                    self.serialization.truncate(segment_start);
+                    if self.serialization.ends_with("/")
+                        && Parser::last_slash_can_be_removed(&self.serialization, path_start)
+                    {
+                        self.serialization.pop();
+                    }
+                    self.shorten_path(scheme_type, path_start);
+
+                    // and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
+                    if ends_with_slash && !self.serialization.ends_with("/") {
+                        self.serialization.push('/');
                     }
                 }
+                // Otherwise, if buffer is a single-dot path segment and if neither c is U+002F (/),
+                // nor url is special and c is U+005C (\), append the empty string to url’s path.
                 "." | "%2e" | "%2E" => {
                     self.serialization.truncate(segment_start);
+                    if !self.serialization.ends_with("/") {
+                        self.serialization.push('/');
+                    }
                 }
                 _ => {
-                    if scheme_type.is_file()
-                        && is_windows_drive_letter(&self.serialization[path_start + 1..])
-                    {
-                        if self.serialization.ends_with('|') {
-                            self.serialization.pop();
+                    // If url’s scheme is "file", url’s path is empty, and buffer is a Windows drive letter, then
+                    if scheme_type.is_file() && is_windows_drive_letter(segment_before_slash) {
+                        // Replace the second code point in buffer with U+003A (:).
+                        if let Some(c) = segment_before_slash.chars().nth(0) {
+                            self.serialization.truncate(segment_start);
+                            self.serialization.push(c);
                             self.serialization.push(':');
+                            if ends_with_slash {
+                                self.serialization.push('/');
+                            }
                         }
+                        // If url’s host is neither the empty string nor null,
+                        // validation error, set url’s host to the empty string.
                         if *has_host {
                             self.log_violation(SyntaxViolation::FileWithHostAndWindowsDrive);
                             *has_host = false; // FIXME account for this in callers
                         }
                     }
-                    if ends_with_slash {
-                        self.serialization.push('/')
-                    }
                 }
             }
             if !ends_with_slash {
@@ -1147,6 +1238,39 @@ impl<'a> Parser<'a> {
         input
     }
 
+    fn last_slash_can_be_removed(serialization: &String, path_start: usize) -> bool {
+        let url_before_segment = &serialization[..serialization.len() - 1];
+        if let Some(segment_before_start) = url_before_segment.rfind("/") {
+            // Do not remove the root slash
+            segment_before_start >= path_start
+                // Or a windows drive letter slash
+                && !path_starts_with_windows_drive_letter(&serialization[segment_before_start..])
+        } else {
+            false
+        }
+    }
+
+    /// https://url.spec.whatwg.org/#shorten-a-urls-path
+    fn shorten_path(&mut self, scheme_type: SchemeType, path_start: usize) {
+        // If path is empty, then return.
+        if self.serialization.len() == path_start {
+            return;
+        }
+        // If url’s scheme is "file", path’s size is 1, and path[0] is a normalized Windows drive letter, then return.
+        let segments: Vec<&str> = self.serialization[path_start..]
+            .split('/')
+            .filter(|s| !s.is_empty())
+            .collect();
+        if scheme_type.is_file()
+            && segments.len() == 1
+            && is_normalized_windows_drive_letter(segments[0])
+        {
+            return;
+        }
+        // Remove path’s last item.
+        self.pop_path(scheme_type, path_start);
+    }
+
     /// https://url.spec.whatwg.org/#pop-a-urls-path
     fn pop_path(&mut self, scheme_type: SchemeType, path_start: usize) {
         if self.serialization.len() > path_start {
@@ -1154,9 +1278,8 @@ impl<'a> Parser<'a> {
             // + 1 since rfind returns the position before the slash.
             let segment_start = path_start + slash_position + 1;
             // Don’t pop a Windows drive letter
-            // FIXME: *normalized* Windows drive letter
             if !(scheme_type.is_file()
-                && is_windows_drive_letter(&self.serialization[segment_start..]))
+                && is_normalized_windows_drive_letter(&self.serialization[segment_start..]))
             {
                 self.serialization.truncate(segment_start);
             }
@@ -1318,6 +1441,18 @@ impl<'a> Parser<'a> {
     }
 }
 
+// Trim path start forward slashes when no authority is present
+// https://github.com/whatwg/url/issues/232
+pub fn trim_path(serialization: &mut String, path_start: usize) {
+    let path = serialization.split_off(path_start);
+    if path.starts_with("/") {
+        serialization.push('/');
+        serialization.push_str(&path.trim_start_matches("/"));
+    } else {
+        serialization.push_str(&path);
+    }
+}
+
 #[inline]
 fn is_ascii_hex_digit(c: char) -> bool {
     matches!(c, 'a'..='f' | 'A'..='F' | '0'..='9')
@@ -1355,6 +1490,12 @@ fn c0_control_or_space(ch: char) -> bool {
     ch <= ' ' // U+0000 to U+0020
 }
 
+/// https://infra.spec.whatwg.org/#ascii-tab-or-newline
+#[inline]
+pub fn ascii_tab_or_new_line(ch: char) -> bool {
+    matches!(ch, '\t' | '\r' | '\n')
+}
+
 /// https://url.spec.whatwg.org/#ascii-alpha
 #[inline]
 pub fn ascii_alpha(ch: char) -> bool {
@@ -1380,12 +1521,37 @@ fn is_windows_drive_letter(segment: &str) -> bool {
     segment.len() == 2 && starts_with_windows_drive_letter(segment)
 }
 
+/// Wether path starts with a root slash
+/// and a windows drive letter eg: "/c:" or "/a:/"
+fn path_starts_with_windows_drive_letter(s: &str) -> bool {
+    if let Some(c) = s.as_bytes().get(0) {
+        matches!(c, b'/' | b'\\' | b'?' | b'#') && starts_with_windows_drive_letter(&s[1..])
+    } else {
+        false
+    }
+}
+
 fn starts_with_windows_drive_letter(s: &str) -> bool {
-    ascii_alpha(s.as_bytes()[0] as char) && matches!(s.as_bytes()[1], b':' | b'|')
+    s.len() >= 2
+        && ascii_alpha(s.as_bytes()[0] as char)
+        && matches!(s.as_bytes()[1], b':' | b'|')
+        && (s.len() == 2 || matches!(s.as_bytes()[2], b'/' | b'\\' | b'?' | b'#'))
 }
 
+/// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
 fn starts_with_windows_drive_letter_segment(input: &Input) -> bool {
     let mut input = input.clone();
-    matches!((input.next(), input.next(), input.next()), (Some(a), Some(b), Some(c))
-             if ascii_alpha(a) && matches!(b, ':' | '|') && matches!(c, '/' | '\\' | '?' | '#'))
+    match (input.next(), input.next(), input.next()) {
+        // its first two code points are a Windows drive letter
+        // its third code point is U+002F (/), U+005C (\), U+003F (?), or U+0023 (#).
+        (Some(a), Some(b), Some(c))
+            if ascii_alpha(a) && matches!(b, ':' | '|') && matches!(c, '/' | '\\' | '?' | '#') =>
+        {
+            true
+        }
+        // its first two code points are a Windows drive letter
+        // its length is 2
+        (Some(a), Some(b), None) if ascii_alpha(a) && matches!(b, ':' | '|') => true,
+        _ => false,
+    }
 }
diff --git a/src/path_segments.rs b/src/path_segments.rs
index 97055e777..6f5679887 100644
--- a/src/path_segments.rs
+++ b/src/path_segments.rs
@@ -45,7 +45,15 @@ pub struct PathSegmentsMut<'a> {
 pub fn new(url: &mut Url) -> PathSegmentsMut {
     let after_path = url.take_after_path();
     let old_after_path_position = to_u32(url.serialization.len()).unwrap();
-    debug_assert!(url.byte_at(url.path_start) == b'/');
+    // Special urls always have a non empty path
+    if SchemeType::from(url.scheme()).is_special() {
+        debug_assert!(url.byte_at(url.path_start) == b'/');
+    } else {
+        debug_assert!(
+            url.serialization.len() == url.path_start as usize
+                || url.byte_at(url.path_start) == b'/'
+        );
+    }
     PathSegmentsMut {
         after_first_slash: url.path_start as usize + "/".len(),
         url,
@@ -212,7 +220,10 @@ impl<'a> PathSegmentsMut<'a> {
                 if matches!(segment, "." | "..") {
                     continue;
                 }
-                if parser.serialization.len() > path_start + 1 {
+                if parser.serialization.len() > path_start + 1
+                    // Non special url's path might still be empty
+                    || parser.serialization.len() == path_start
+                {
                     parser.serialization.push('/');
                 }
                 let mut has_host = true; // FIXME account for this?
diff --git a/src/quirks.rs b/src/quirks.rs
index 285ee21b6..ded278565 100644
--- a/src/quirks.rs
+++ b/src/quirks.rs
@@ -99,9 +99,13 @@ pub fn host(url: &Url) -> &str {
 
 /// Setter for https://url.spec.whatwg.org/#dom-url-host
 pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
+    // If context object’s url’s cannot-be-a-base-URL flag is set, then return.
     if url.cannot_be_a_base() {
         return Err(());
     }
+    // Host parsing rules are strict,
+    // We don't want to trim the input
+    let input = Input::no_trim(new_host);
     let host;
     let opt_port;
     {
@@ -121,6 +125,20 @@ pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
             Err(_) => return Err(()),
         }
     }
+    // Make sure we won't set an empty host to a url with a username or a port
+    if host == Host::Domain("".to_string()) {
+        if !username(&url).is_empty() {
+            return Err(());
+        }
+        if let Some(p) = opt_port {
+            if let Some(_) = p {
+                return Err(());
+            }
+        }
+        if url.port().is_some() {
+            return Err(());
+        }
+    }
     url.set_host_internal(host, opt_port);
     Ok(())
 }
@@ -182,7 +200,14 @@ pub fn pathname(url: &Url) -> &str {
 
 /// Setter for https://url.spec.whatwg.org/#dom-url-pathname
 pub fn set_pathname(url: &mut Url, new_pathname: &str) {
-    if !url.cannot_be_a_base() {
+    if url.cannot_be_a_base() {
+        return;
+    }
+    if Some('/') == new_pathname.chars().nth(0)
+        || SchemeType::from(url.scheme()).is_special()
+        // \ is a segment delimiter for 'special' URLs"
+        && Some('\\') == new_pathname.chars().nth(0)
+    {
         url.set_path(new_pathname)
     }
 }
diff --git a/tests/unit.rs b/tests/unit.rs
index 9918ea316..82493828c 100644
--- a/tests/unit.rs
+++ b/tests/unit.rs
@@ -23,6 +23,20 @@ fn size() {
     assert_eq!(size_of::<Url>(), size_of::<Option<Url>>());
 }
 
+#[test]
+fn test_relative() {
+    let base: Url = "sc://%C3%B1".parse().unwrap();
+    let url = base.join("/resources/testharness.js").unwrap();
+    assert_eq!(url.as_str(), "sc://%C3%B1/resources/testharness.js");
+}
+
+#[test]
+fn test_relative_empty() {
+    let base: Url = "sc://%C3%B1".parse().unwrap();
+    let url = base.join("").unwrap();
+    assert_eq!(url.as_str(), "sc://%C3%B1");
+}
+
 macro_rules! assert_from_file_path {
     ($path: expr) => {
         assert_from_file_path!($path, $path)
@@ -413,9 +427,9 @@ fn test_set_host() {
     assert_eq!(url.as_str(), "foobar:/hello");
 
     let mut url = Url::parse("foo://ș").unwrap();
-    assert_eq!(url.as_str(), "foo://%C8%99/");
+    assert_eq!(url.as_str(), "foo://%C8%99");
     url.set_host(Some("goșu.ro")).unwrap();
-    assert_eq!(url.as_str(), "foo://go%C8%99u.ro/");
+    assert_eq!(url.as_str(), "foo://go%C8%99u.ro");
 }
 
 #[test]

From 0586854c8b778c9d14bccd1a0213bc2263e9345a Mon Sep 17 00:00:00 2001
From: Jeremy Lempereur <jeremy.lempereur@gmail.com>
Date: Sat, 20 Jul 2019 23:31:39 +0200
Subject: [PATCH 07/15] Host parsing rules.

---
 src/host.rs   |  9 ++----
 src/lib.rs    | 21 +++++++++++--
 src/parser.rs | 85 ++++++++++++++++++++++++++++++++++++++++-----------
 src/quirks.rs | 50 ++++++++++++++++++++++--------
 4 files changed, 127 insertions(+), 38 deletions(-)

diff --git a/src/host.rs b/src/host.rs
index 238d523ed..02bae9e25 100644
--- a/src/host.rs
+++ b/src/host.rs
@@ -24,13 +24,10 @@ pub(crate) enum HostInternal {
     Ipv6(Ipv6Addr),
 }
 
-impl<S> From<Host<S>> for HostInternal
-where
-    S: ToString,
-{
-    fn from(host: Host<S>) -> HostInternal {
+impl From<Host<String>> for HostInternal {
+    fn from(host: Host<String>) -> HostInternal {
         match host {
-            Host::Domain(ref s) if s.to_string().is_empty() => HostInternal::None,
+            Host::Domain(ref s) if s.is_empty() => HostInternal::None,
             Host::Domain(_) => HostInternal::Domain,
             Host::Ipv4(address) => HostInternal::Ipv4(address),
             Host::Ipv6(address) => HostInternal::Ipv6(address),
diff --git a/src/lib.rs b/src/lib.rs
index 536cc1199..4969a6672 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -690,7 +690,7 @@ impl Url {
     /// ```
     #[inline]
     pub fn cannot_be_a_base(&self) -> bool {
-        !self.slice(self.path_start..).starts_with('/')
+        !self.slice(self.scheme_end + 1..).starts_with('/')
     }
 
     /// Return the username for this URL (typically the empty string)
@@ -1642,10 +1642,25 @@ impl Url {
             if host == "" && SchemeType::from(self.scheme()).is_special() {
                 return Err(ParseError::EmptyHost);
             }
+            let mut host_substr = host;
+            // Otherwise, if c is U+003A (:) and the [] flag is unset, then
+            if !host.starts_with('[') || !host.ends_with(']') {
+                match host.find(':') {
+                    Some(0) => {
+                        // If buffer is the empty string, validation error, return failure.
+                        return Err(ParseError::InvalidDomainCharacter);
+                    }
+                    // Let host be the result of host parsing buffer
+                    Some(colon_index) => {
+                        host_substr = &host[..colon_index];
+                    }
+                    None => {}
+                }
+            }
             if SchemeType::from(self.scheme()).is_special() {
-                self.set_host_internal(Host::parse(host)?, None)
+                self.set_host_internal(Host::parse(host_substr)?, None);
             } else {
-                self.set_host_internal(Host::parse_opaque(host)?, None)
+                self.set_host_internal(Host::parse_opaque(host_substr)?, None);
             }
         } else if self.has_host() {
             if SchemeType::from(self.scheme()).is_special() {
diff --git a/src/parser.rs b/src/parser.rs
index 7cd1dbd8b..b90cd7c7c 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -156,7 +156,7 @@ impl fmt::Display for SyntaxViolation {
     }
 }
 
-#[derive(Copy, Clone)]
+#[derive(Copy, Clone, PartialEq)]
 pub enum SchemeType {
     File,
     SpecialNotFile,
@@ -852,11 +852,16 @@ impl<'a> Parser<'a> {
         self.serialization.push('/');
         self.serialization.push('/');
         // authority state
+        let before_authority = self.serialization.len();
         let (username_end, remaining) = self.parse_userinfo(input, scheme_type)?;
+        let has_authority = before_authority != self.serialization.len();
         // host state
         let host_start = to_u32(self.serialization.len())?;
         let (host_end, host, port, remaining) =
             self.parse_host_and_port(remaining, scheme_end, scheme_type)?;
+        if host == HostInternal::None && has_authority {
+            return Err(ParseError::EmptyHost);
+        }
         // path state
         let path_start = to_u32(self.serialization.len())?;
         let remaining = self.parse_path_start(scheme_type, &mut true, remaining);
@@ -900,7 +905,18 @@ impl<'a> Parser<'a> {
         }
         let (mut userinfo_char_count, remaining) = match last_at {
             None => return Ok((to_u32(self.serialization.len())?, input)),
-            Some((0, remaining)) => return Ok((to_u32(self.serialization.len())?, remaining)),
+            Some((0, remaining)) => {
+                // Otherwise, if one of the following is true
+                // c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
+                // url is special and c is U+005C (\)
+                // If @ flag is set and buffer is the empty string, validation error, return failure.
+                if let (Some(c), _) = remaining.split_first() {
+                    if c == '/' || c == '?' || c == '#' || scheme_type.is_special() && c == '\\' {
+                        return Err(ParseError::EmptyHost);
+                    }
+                }
+                return Ok((to_u32(self.serialization.len())?, remaining));
+            }
             Some(x) => x,
         };
 
@@ -946,6 +962,18 @@ impl<'a> Parser<'a> {
         let (host, remaining) = Parser::parse_host(input, scheme_type)?;
         write!(&mut self.serialization, "{}", host).unwrap();
         let host_end = to_u32(self.serialization.len())?;
+        if let Host::Domain(h) = &host {
+            if h.is_empty() {
+                // Port with an empty host
+                if remaining.starts_with(":") {
+                    return Err(ParseError::EmptyHost);
+                }
+                if scheme_type.is_special() {
+                    return Err(ParseError::EmptyHost);
+                }
+            }
+        };
+
         let (port, remaining) = if let Some(remaining) = remaining.split_prefix(':') {
             let scheme = || default_port(&self.serialization[..scheme_end as usize]);
             Parser::parse_port(remaining, scheme, self.context)?
@@ -962,6 +990,9 @@ impl<'a> Parser<'a> {
         mut input: Input,
         scheme_type: SchemeType,
     ) -> ParseResult<(Host<String>, Input)> {
+        if scheme_type.is_file() {
+            return Parser::get_file_host(input);
+        }
         // Undo the Input abstraction here to avoid allocating in the common case
         // where the host part of the input does not contain any tab or newline
         let input_str = input.chars.as_str();
@@ -1012,10 +1043,41 @@ impl<'a> Parser<'a> {
         Ok((host, input))
     }
 
-    pub(crate) fn parse_file_host<'i>(
+    fn get_file_host<'i>(input: Input<'i>) -> ParseResult<(Host<String>, Input)> {
+        let (_, host_str, remaining) = Parser::file_host(input)?;
+        let host = match Host::parse(&host_str)? {
+            Host::Domain(ref d) if d == "localhost" => Host::Domain("".to_string()),
+            host => host,
+        };
+        Ok((host, remaining))
+    }
+
+    fn parse_file_host<'i>(
         &mut self,
         input: Input<'i>,
     ) -> ParseResult<(bool, HostInternal, Input<'i>)> {
+        let has_host;
+        let (_, host_str, remaining) = Parser::file_host(input)?;
+        let host = if host_str.is_empty() {
+            has_host = false;
+            HostInternal::None
+        } else {
+            match Host::parse(&host_str)? {
+                Host::Domain(ref d) if d == "localhost" => {
+                    has_host = false;
+                    HostInternal::None
+                }
+                host => {
+                    write!(&mut self.serialization, "{}", host).unwrap();
+                    has_host = true;
+                    host.into()
+                }
+            }
+        };
+        Ok((has_host, host, remaining))
+    }
+
+    pub fn file_host<'i>(input: Input<'i>) -> ParseResult<(bool, String, Input<'i>)> {
         // Undo the Input abstraction here to avoid allocating in the common case
         // where the host part of the input does not contain any tab or newline
         let input_str = input.chars.as_str();
@@ -1044,20 +1106,9 @@ impl<'a> Parser<'a> {
             }
         }
         if is_windows_drive_letter(host_str) {
-            return Ok((false, HostInternal::None, input));
+            return Ok((false, "".to_string(), input));
         }
-        let host = if host_str.is_empty() {
-            HostInternal::None
-        } else {
-            match Host::parse(host_str)? {
-                Host::Domain(ref d) if d == "localhost" => HostInternal::None,
-                host => {
-                    write!(&mut self.serialization, "{}", host).unwrap();
-                    host.into()
-                }
-            }
-        };
-        Ok((true, host, remaining))
+        Ok((true, host_str.to_string(), remaining))
     }
 
     pub fn parse_port<P>(
@@ -1492,7 +1543,7 @@ fn c0_control_or_space(ch: char) -> bool {
 
 /// https://infra.spec.whatwg.org/#ascii-tab-or-newline
 #[inline]
-pub fn ascii_tab_or_new_line(ch: char) -> bool {
+fn ascii_tab_or_new_line(ch: char) -> bool {
     matches!(ch, '\t' | '\r' | '\n')
 }
 
diff --git a/src/quirks.rs b/src/quirks.rs
index ded278565..b3ea3681f 100644
--- a/src/quirks.rs
+++ b/src/quirks.rs
@@ -12,6 +12,8 @@
 //! you probably want to use `Url` method instead.
 
 use parser::{default_port, Context, Input, Parser, SchemeType};
+use std::cell::RefCell;
+use SyntaxViolation;
 use {idna, Host, ParseError, Position, Url};
 
 /// https://url.spec.whatwg.org/#dom-url-domaintoascii
@@ -110,19 +112,22 @@ pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
     let opt_port;
     {
         let scheme = url.scheme();
-        let result = Parser::parse_host(Input::new(new_host), SchemeType::from(scheme));
-        match result {
-            Ok((h, remaining)) => {
-                host = h;
-                opt_port = if let Some(remaining) = remaining.split_prefix(':') {
+        let scheme_type = SchemeType::from(scheme);
+        if let Ok((h, remaining)) = Parser::parse_host(input, scheme_type) {
+            host = h;
+            opt_port = if let Some(remaining) = remaining.split_prefix(':') {
+                if remaining.is_empty() {
+                    None
+                } else {
                     Parser::parse_port(remaining, || default_port(scheme), Context::Setter)
                         .ok()
                         .map(|(port, _remaining)| port)
-                } else {
-                    None
-                };
-            }
-            Err(_) => return Err(()),
+                }
+            } else {
+                None
+            };
+        } else {
+            return Err(());
         }
     }
     // Make sure we won't set an empty host to a url with a username or a port
@@ -154,8 +159,25 @@ pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> {
     if url.cannot_be_a_base() {
         return Err(());
     }
-    let result = Parser::parse_host(Input::new(new_hostname), SchemeType::from(url.scheme()));
-    if let Ok((host, _remaining)) = result {
+    // Host parsing rules are strict,
+    // We don't want to trim the input
+    let input = Input::no_trim(new_hostname);
+    let scheme_type = SchemeType::from(url.scheme());
+    if let Ok((host, _remaining)) = Parser::parse_host(input, scheme_type) {
+        if let Host::Domain(h) = &host {
+            if h.is_empty() {
+                // Empty host on special not file url
+                if SchemeType::from(url.scheme()) == SchemeType::SpecialNotFile
+                    // Port with an empty host
+                    ||!port(&url).is_empty()
+                    // Empty host with includes credentials
+                    || !url.username().is_empty()
+                    || !url.password().unwrap_or(&"").is_empty()
+                {
+                    return Err(());
+                }
+            }
+        }
         url.set_host_internal(host, None);
         Ok(())
     } else {
@@ -209,6 +231,10 @@ pub fn set_pathname(url: &mut Url, new_pathname: &str) {
         && Some('\\') == new_pathname.chars().nth(0)
     {
         url.set_path(new_pathname)
+    } else {
+        let mut path_to_set = String::from("/");
+        path_to_set.push_str(new_pathname);
+        url.set_path(&path_to_set)
     }
 }
 

From 26ccc0d6ea46b9d244f6015177f29958fb0f84c3 Mon Sep 17 00:00:00 2001
From: Jeremy Lempereur <jeremy.lempereur@gmail.com>
Date: Sun, 21 Jul 2019 00:19:35 +0200
Subject: [PATCH 08/15] Hash getter and setter.

---
 src/quirks.rs | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/quirks.rs b/src/quirks.rs
index b3ea3681f..c7d163533 100644
--- a/src/quirks.rs
+++ b/src/quirks.rs
@@ -259,13 +259,14 @@ pub fn hash(url: &Url) -> &str {
 
 /// Setter for https://url.spec.whatwg.org/#dom-url-hash
 pub fn set_hash(url: &mut Url, new_hash: &str) {
-    if url.scheme() != "javascript" {
-        url.set_fragment(match new_hash {
-            "" => None,
-            _ if new_hash.starts_with('#') => Some(&new_hash[1..]),
-            _ => Some(new_hash),
-        })
-    }
+    url.set_fragment(match new_hash {
+        // If the given value is the empty string,
+        // then set context object’s url’s fragment to null and return.
+        "" => None,
+        // Let input be the given value with a single leading U+0023 (#) removed, if any.
+        _ if new_hash.starts_with('#') => Some(&new_hash[1..]),
+        _ => Some(new_hash),
+    })
 }
 
 fn trim(s: &str) -> &str {

From 7efdc53193adfdfd65c1d39bc7ad4762dd4c272b Mon Sep 17 00:00:00 2001
From: Jeremy Lempereur <jeremy.lempereur@gmail.com>
Date: Sat, 20 Jul 2019 12:36:32 +0200
Subject: [PATCH 09/15] Fix scheme setter

> test result: FAILED. 650 passed; 63 failed; 0 ignored; 0 measured
---
 src/lib.rs | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 72 insertions(+), 5 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 4969a6672..822e31091 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1663,8 +1663,13 @@ impl Url {
                 self.set_host_internal(Host::parse_opaque(host_substr)?, None);
             }
         } else if self.has_host() {
-            if SchemeType::from(self.scheme()).is_special() {
+            let scheme_type = SchemeType::from(self.scheme());
+            if scheme_type.is_special() {
                 return Err(ParseError::EmptyHost);
+            } else {
+                if self.serialization.len() == self.path_start as usize {
+                    self.serialization.push('/');
+                }
             }
             debug_assert!(self.byte_at(self.scheme_end) == b':');
             debug_assert!(self.byte_at(self.path_start) == b'/');
@@ -1967,14 +1972,28 @@ impl Url {
     ///
     /// # fn run() -> Result<(), ParseError> {
     /// let mut url = Url::parse("https://example.net")?;
-    /// let result = url.set_scheme("foo");
-    /// assert_eq!(url.as_str(), "foo://example.net/");
+    /// let result = url.set_scheme("http");
+    /// assert_eq!(url.as_str(), "http://example.net/");
     /// assert!(result.is_ok());
     /// # Ok(())
     /// # }
     /// # run().unwrap();
     /// ```
+    /// Change the URL’s scheme from `foo` to `bar`:
     ///
+    /// ```
+    /// use url::Url;
+    /// # use url::ParseError;
+    ///
+    /// # fn run() -> Result<(), ParseError> {
+    /// let mut url = Url::parse("foo://example.net")?;
+    /// let result = url.set_scheme("bar");
+    /// assert_eq!(url.as_str(), "bar://example.net");
+    /// assert!(result.is_ok());
+    /// # Ok(())
+    /// # }
+    /// # run().unwrap();
+    /// ```
     ///
     /// Cannot change URL’s scheme from `https` to `foõ`:
     ///
@@ -2007,14 +2026,55 @@ impl Url {
     /// # }
     /// # run().unwrap();
     /// ```
+    /// Cannot change the URL’s scheme from `foo` to `https`:
+    ///
+    /// ```
+    /// use url::Url;
+    /// # use url::ParseError;
+    ///
+    /// # fn run() -> Result<(), ParseError> {
+    /// let mut url = Url::parse("foo://example.net")?;
+    /// let result = url.set_scheme("https");
+    /// assert_eq!(url.as_str(), "foo://example.net");
+    /// assert!(result.is_err());
+    /// # Ok(())
+    /// # }
+    /// # run().unwrap();
+    /// ```
+    /// Cannot change the URL’s scheme from `http` to `foo`:
+    ///
+    /// ```
+    /// use url::Url;
+    /// # use url::ParseError;
+    ///
+    /// # fn run() -> Result<(), ParseError> {
+    /// let mut url = Url::parse("http://example.net")?;
+    /// let result = url.set_scheme("foo");
+    /// assert_eq!(url.as_str(), "http://example.net/");
+    /// assert!(result.is_err());
+    /// # Ok(())
+    /// # }
+    /// # run().unwrap();
+    /// ```
     pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> {
         let mut parser = Parser::for_setter(String::new());
         let remaining = parser.parse_scheme(parser::Input::new(scheme))?;
-        if !remaining.is_empty()
-            || (!self.has_host() && SchemeType::from(&parser.serialization).is_special())
+        let new_scheme_type = SchemeType::from(&parser.serialization);
+        let old_scheme_type = SchemeType::from(self.scheme());
+        // If url’s scheme is a special scheme and buffer is not a special scheme, then return.
+        if new_scheme_type.is_special() && !old_scheme_type.is_special() ||
+            // If url’s scheme is not a special scheme and buffer is a special scheme, then return.
+            !new_scheme_type.is_special() && old_scheme_type.is_special() ||
+            // If url includes credentials or has a non-null port, and buffer is "file", then return.
+            // If url’s scheme is "file" and its host is an empty host or null, then return.
+            new_scheme_type.is_file() && self.has_authority()
         {
             return Err(());
         }
+
+        if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) {
+            return Err(());
+        }
         let old_scheme_end = self.scheme_end;
         let new_scheme_end = to_u32(parser.serialization.len()).unwrap();
         let adjust = |index: &mut u32| {
@@ -2036,6 +2096,13 @@ impl Url {
 
         parser.serialization.push_str(self.slice(old_scheme_end..));
         self.serialization = parser.serialization;
+
+        // Update the port so it can be removed
+        // If it is the scheme's default
+        // We don't mind it silently failing
+        // If there was no port in the first place
+        let _ = self.set_port(self.port());
+
         Ok(())
     }
 

From 736d7bc7c305e2ea9f4b152d67b852d346a64ca8 Mon Sep 17 00:00:00 2001
From: o0Ignition0o <jeremy.lempereur@gmail.com>
Date: Sun, 4 Aug 2019 00:27:20 +0200
Subject: [PATCH 10/15] removing unused imports.

---
 src/quirks.rs | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/quirks.rs b/src/quirks.rs
index c7d163533..5104317d6 100644
--- a/src/quirks.rs
+++ b/src/quirks.rs
@@ -12,8 +12,6 @@
 //! you probably want to use `Url` method instead.
 
 use parser::{default_port, Context, Input, Parser, SchemeType};
-use std::cell::RefCell;
-use SyntaxViolation;
 use {idna, Host, ParseError, Position, Url};
 
 /// https://url.spec.whatwg.org/#dom-url-domaintoascii

From a9ca033439001d05154dc4afd053570f4bfc0928 Mon Sep 17 00:00:00 2001
From: o0Ignition0o <jeremy.lempereur@gmail.com>
Date: Sun, 4 Aug 2019 01:02:45 +0200
Subject: [PATCH 11/15] Pleasing the 1.33.0 borrow checker.

---
 src/lib.rs    |  3 ++-
 src/parser.rs | 29 ++++++++++++++++-------------
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 822e31091..95a369599 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2101,7 +2101,8 @@ impl Url {
         // If it is the scheme's default
         // We don't mind it silently failing
         // If there was no port in the first place
-        let _ = self.set_port(self.port());
+        let previous_port = self.port();
+        let _ = self.set_port(previous_port);
 
         Ok(())
     }
diff --git a/src/parser.rs b/src/parser.rs
index b90cd7c7c..2749e5dbc 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -1229,12 +1229,13 @@ impl<'a> Parser<'a> {
                     }
                 }
             }
-
-            let segment_before_slash = if ends_with_slash {
-                &self.serialization[segment_start..self.serialization.len() - 1]
+            // Going from &str to String to &str to please the 1.33.0 borrow checker
+            let before_slash_string = if ends_with_slash {
+                self.serialization[segment_start..self.serialization.len() - 1].to_owned()
             } else {
-                &self.serialization[segment_start..self.serialization.len()]
+                self.serialization[segment_start..self.serialization.len()].to_owned()
             };
+            let segment_before_slash: &str = &before_slash_string;
             match segment_before_slash {
                 // If buffer is a double-dot path segment, shorten url’s path,
                 ".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e"
@@ -1307,16 +1308,18 @@ impl<'a> Parser<'a> {
         if self.serialization.len() == path_start {
             return;
         }
-        // If url’s scheme is "file", path’s size is 1, and path[0] is a normalized Windows drive letter, then return.
-        let segments: Vec<&str> = self.serialization[path_start..]
-            .split('/')
-            .filter(|s| !s.is_empty())
-            .collect();
-        if scheme_type.is_file()
-            && segments.len() == 1
-            && is_normalized_windows_drive_letter(segments[0])
         {
-            return;
+            // If url’s scheme is "file", path’s size is 1, and path[0] is a normalized Windows drive letter, then return.
+            let segments: Vec<&str> = self.serialization[path_start..]
+                .split('/')
+                .filter(|s| !s.is_empty())
+                .collect();
+            if scheme_type.is_file()
+                && segments.len() == 1
+                && is_normalized_windows_drive_letter(segments[0])
+            {
+                return;
+            }
         }
         // Remove path’s last item.
         self.pop_path(scheme_type, path_start);

From 8ef48471a8f82658fbb2eddad4a785ba54122d2e Mon Sep 17 00:00:00 2001
From: o0Ignition0o <jeremy.lempereur@gmail.com>
Date: Mon, 5 Aug 2019 13:23:45 +0200
Subject: [PATCH 12/15] Make sure a windows drive letter segment always ends
 with a slash.

---
 src/lib.rs    | 18 ++++++++++++++++++
 src/parser.rs |  3 ++-
 tests/unit.rs | 26 ++++++++++++++++++++++++++
 3 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/src/lib.rs b/src/lib.rs
index 95a369599..05d4e56d9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2508,6 +2508,7 @@ fn path_to_file_url_segments_windows(
     }
     let mut components = path.components();
 
+    let host_start = serialization.len() + 1;
     let host_end;
     let host_internal;
     match components.next() {
@@ -2534,15 +2535,24 @@ fn path_to_file_url_segments_windows(
         _ => return Err(()),
     }
 
+    let mut path_only_has_prefix = true;
     for component in components {
         if component == Component::RootDir {
             continue;
         }
+        path_only_has_prefix = false;
         // FIXME: somehow work with non-unicode?
         let component = component.as_os_str().to_str().ok_or(())?;
         serialization.push('/');
         serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
     }
+    // A windows drive letter must end with a slash.
+    if serialization.len() > host_start
+        && parser::is_windows_drive_letter(&serialization[host_start..])
+        && path_only_has_prefix
+    {
+        serialization.push('/');
+    }
     Ok((host_end, host_internal))
 }
 
@@ -2567,6 +2577,14 @@ fn file_url_segments_to_pathbuf(
         bytes.push(b'/');
         bytes.extend(percent_decode(segment.as_bytes()));
     }
+    // A windows drive letter must end with a slash.
+    if bytes.len() > 2 {
+        if matches!(bytes[bytes.len() -2], b'a'..=b'z' | b'A'..=b'Z')
+            && matches!(bytes[bytes.len() - 1], b':' | b'|')
+        {
+            bytes.push(b'/');
+        }
+    }
     let os_str = OsStr::from_bytes(&bytes);
     let path = PathBuf::from(os_str);
     debug_assert!(
diff --git a/src/parser.rs b/src/parser.rs
index 2749e5dbc..179caba67 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -1571,7 +1571,8 @@ fn is_normalized_windows_drive_letter(segment: &str) -> bool {
 
 /// Wether the scheme is file:, the path has a single segment, and that segment
 /// is a Windows drive letter
-fn is_windows_drive_letter(segment: &str) -> bool {
+#[inline]
+pub fn is_windows_drive_letter(segment: &str) -> bool {
     segment.len() == 2 && starts_with_windows_drive_letter(segment)
 }
 
diff --git a/tests/unit.rs b/tests/unit.rs
index 82493828c..41d0b8268 100644
--- a/tests/unit.rs
+++ b/tests/unit.rs
@@ -564,3 +564,29 @@ fn test_options_reuse() {
     assert_eq!(url.as_str(), "http://mozilla.org/sub/path");
     assert_eq!(*violations.borrow(), vec!(ExpectedDoubleSlash, Backslash));
 }
+
+/// https://github.com/servo/rust-url/issues/505
+#[cfg(windows)]
+#[test]
+fn test_url_from_file_path() {
+    use std::path::PathBuf;
+    use url::Url;
+
+    let p = PathBuf::from("c:///");
+    let u = Url::from_file_path(p).unwrap();
+    let path = u.to_file_path().unwrap();
+    assert_eq!("C:\\", path.to_str().unwrap());
+}
+
+/// https://github.com/servo/rust-url/issues/505
+#[cfg(not(windows))]
+#[test]
+fn test_url_from_file_path() {
+    use std::path::PathBuf;
+    use url::Url;
+
+    let p = PathBuf::from("/c:/");
+    let u = Url::from_file_path(p).unwrap();
+    let path = u.to_file_path().unwrap();
+    assert_eq!("/c:/", path.to_str().unwrap());
+}

From aeef54febed316165625321a6aaf54eabbd906d8 Mon Sep 17 00:00:00 2001
From: Jeremy Lempereur <jeremy.lempereur@gmail.com>
Date: Sun, 10 Nov 2019 16:10:55 +0100
Subject: [PATCH 13/15] trim file paths if needed.

---
 src/lib.rs    |  4 ----
 src/parser.rs | 24 +++++++++---------------
 2 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 05d4e56d9..2d432cc3b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1403,12 +1403,8 @@ impl Url {
                 }
                 parser.parse_cannot_be_a_base_path(parser::Input::new(path));
             } else {
-                let path_start = parser.serialization.len();
                 let mut has_host = true; // FIXME
                 parser.parse_path_start(scheme_type, &mut has_host, parser::Input::new(path));
-                if scheme_type.is_file() {
-                    parser::trim_path(&mut parser.serialization, path_start);
-                }
             }
         });
         self.restore_after_path(old_after_path_pos, &after_path);
diff --git a/src/parser.rs b/src/parser.rs
index 179caba67..54369adf8 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -540,7 +540,6 @@ impl<'a> Parser<'a> {
                     self.parse_path(SchemeType::File, &mut has_host, path_start, remaining)
                 };
 
-                trim_path(&mut self.serialization, host_end as usize);
                 // For file URLs that have a host and whose path starts
                 // with the windows drive letter we just remove the host.
                 if !has_host {
@@ -598,8 +597,6 @@ impl<'a> Parser<'a> {
 
                 let host_start = host_start as u32;
 
-                trim_path(&mut self.serialization, host_end);
-
                 let (query_start, fragment_start) =
                     self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
 
@@ -1287,6 +1284,15 @@ impl<'a> Parser<'a> {
                 break;
             }
         }
+        if scheme_type.is_file() {
+            // while url’s path’s size is greater than 1
+            // and url’s path[0] is the empty string,
+            // validation error, remove the first item from url’s path.
+            //FIXME: log violation
+            let path = self.serialization.split_off(path_start);
+            self.serialization.push('/');
+            self.serialization.push_str(&path.trim_start_matches("/"));
+        }
         input
     }
 
@@ -1495,18 +1501,6 @@ impl<'a> Parser<'a> {
     }
 }
 
-// Trim path start forward slashes when no authority is present
-// https://github.com/whatwg/url/issues/232
-pub fn trim_path(serialization: &mut String, path_start: usize) {
-    let path = serialization.split_off(path_start);
-    if path.starts_with("/") {
-        serialization.push('/');
-        serialization.push_str(&path.trim_start_matches("/"));
-    } else {
-        serialization.push_str(&path);
-    }
-}
-
 #[inline]
 fn is_ascii_hex_digit(c: char) -> bool {
     matches!(c, 'a'..='f' | 'A'..='F' | '0'..='9')

From 925ec94a6d0e2c3e66289f9922f90726d6b21e7f Mon Sep 17 00:00:00 2001
From: Jeremy Lempereur <jeremy.lempereur@gmail.com>
Date: Sun, 10 Nov 2019 16:41:37 +0100
Subject: [PATCH 14/15] Avoid allocation when checking for windows drive
 letters.

---
 src/parser.rs | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/src/parser.rs b/src/parser.rs
index 54369adf8..00458365a 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -1314,18 +1314,11 @@ impl<'a> Parser<'a> {
         if self.serialization.len() == path_start {
             return;
         }
+        // If url’s scheme is "file", path’s size is 1, and path[0] is a normalized Windows drive letter, then return.
+        if scheme_type.is_file()
+            && is_normalized_windows_drive_letter(&self.serialization[path_start..])
         {
-            // If url’s scheme is "file", path’s size is 1, and path[0] is a normalized Windows drive letter, then return.
-            let segments: Vec<&str> = self.serialization[path_start..]
-                .split('/')
-                .filter(|s| !s.is_empty())
-                .collect();
-            if scheme_type.is_file()
-                && segments.len() == 1
-                && is_normalized_windows_drive_letter(segments[0])
-            {
-                return;
-            }
+            return;
         }
         // Remove path’s last item.
         self.pop_path(scheme_type, path_start);

From 446484009e49caca8147ba32cd7125c260d0246a Mon Sep 17 00:00:00 2001
From: o0Ignition0o <jeremy.lempereur@gmail.com>
Date: Sat, 7 Dec 2019 11:15:18 +0100
Subject: [PATCH 15/15] Comments and nits fixups.

---
 src/lib.rs    | 12 ++++++------
 src/parser.rs |  2 +-
 src/quirks.rs | 11 +++++------
 tests/unit.rs | 29 +++++++++++++++++++++++++++++
 4 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 2d432cc3b..2ad421d08 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2058,12 +2058,12 @@ impl Url {
         let new_scheme_type = SchemeType::from(&parser.serialization);
         let old_scheme_type = SchemeType::from(self.scheme());
         // If url’s scheme is a special scheme and buffer is not a special scheme, then return.
-        if new_scheme_type.is_special() && !old_scheme_type.is_special() ||
+        if (new_scheme_type.is_special() && !old_scheme_type.is_special()) ||
             // If url’s scheme is not a special scheme and buffer is a special scheme, then return.
-            !new_scheme_type.is_special() && old_scheme_type.is_special() ||
+            (!new_scheme_type.is_special() && old_scheme_type.is_special()) ||
             // If url includes credentials or has a non-null port, and buffer is "file", then return.
             // If url’s scheme is "file" and its host is an empty host or null, then return.
-            new_scheme_type.is_file() && self.has_authority()
+            (new_scheme_type.is_file() && self.has_authority())
         {
             return Err(());
         }
@@ -2095,8 +2095,8 @@ impl Url {
 
         // Update the port so it can be removed
         // If it is the scheme's default
-        // We don't mind it silently failing
-        // If there was no port in the first place
+        // we don't mind it silently failing
+        // if there was no port in the first place
         let previous_port = self.port();
         let _ = self.set_port(previous_port);
 
@@ -2575,7 +2575,7 @@ fn file_url_segments_to_pathbuf(
     }
     // A windows drive letter must end with a slash.
     if bytes.len() > 2 {
-        if matches!(bytes[bytes.len() -2], b'a'..=b'z' | b'A'..=b'Z')
+        if matches!(bytes[bytes.len() - 2], b'a'..=b'z' | b'A'..=b'Z')
             && matches!(bytes[bytes.len() - 1], b':' | b'|')
         {
             bytes.push(b'/');
diff --git a/src/parser.rs b/src/parser.rs
index 00458365a..6c84ba412 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -908,7 +908,7 @@ impl<'a> Parser<'a> {
                 // url is special and c is U+005C (\)
                 // If @ flag is set and buffer is the empty string, validation error, return failure.
                 if let (Some(c), _) = remaining.split_first() {
-                    if c == '/' || c == '?' || c == '#' || scheme_type.is_special() && c == '\\' {
+                    if c == '/' || c == '?' || c == '#' || (scheme_type.is_special() && c == '\\') {
                         return Err(ParseError::EmptyHost);
                     }
                 }
diff --git a/src/quirks.rs b/src/quirks.rs
index 5104317d6..caab354cc 100644
--- a/src/quirks.rs
+++ b/src/quirks.rs
@@ -157,8 +157,7 @@ pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> {
     if url.cannot_be_a_base() {
         return Err(());
     }
-    // Host parsing rules are strict,
-    // We don't want to trim the input
+    // Host parsing rules are strict we don't want to trim the input
     let input = Input::no_trim(new_hostname);
     let scheme_type = SchemeType::from(url.scheme());
     if let Ok((host, _remaining)) = Parser::parse_host(input, scheme_type) {
@@ -168,7 +167,7 @@ pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> {
                 if SchemeType::from(url.scheme()) == SchemeType::SpecialNotFile
                     // Port with an empty host
                     ||!port(&url).is_empty()
-                    // Empty host with includes credentials
+                    // Empty host that includes credentials
                     || !url.username().is_empty()
                     || !url.password().unwrap_or(&"").is_empty()
                 {
@@ -224,9 +223,9 @@ pub fn set_pathname(url: &mut Url, new_pathname: &str) {
         return;
     }
     if Some('/') == new_pathname.chars().nth(0)
-        || SchemeType::from(url.scheme()).is_special()
-        // \ is a segment delimiter for 'special' URLs"
-        && Some('\\') == new_pathname.chars().nth(0)
+        || (SchemeType::from(url.scheme()).is_special()
+            // \ is a segment delimiter for 'special' URLs"
+            && Some('\\') == new_pathname.chars().nth(0))
     {
         url.set_path(new_pathname)
     } else {
diff --git a/tests/unit.rs b/tests/unit.rs
index 41d0b8268..9cc7c53fe 100644
--- a/tests/unit.rs
+++ b/tests/unit.rs
@@ -37,6 +37,35 @@ fn test_relative_empty() {
     assert_eq!(url.as_str(), "sc://%C3%B1");
 }
 
+#[test]
+fn test_set_empty_host() {
+    let mut base: Url = "moz://foo:bar@servo/baz".parse().unwrap();
+    base.set_username("").unwrap();
+    assert_eq!(base.as_str(), "moz://:bar@servo/baz");
+    base.set_host(None).unwrap();
+    assert_eq!(base.as_str(), "moz:/baz");
+    base.set_host(Some("servo")).unwrap();
+    assert_eq!(base.as_str(), "moz://servo/baz");
+}
+
+#[test]
+fn test_set_empty_hostname() {
+    use url::quirks;
+    let mut base: Url = "moz://foo@servo/baz".parse().unwrap();
+    assert!(
+        quirks::set_hostname(&mut base, "").is_err(),
+        "setting an empty hostname to a url with a username should fail"
+    );
+    base = "moz://:pass@servo/baz".parse().unwrap();
+    assert!(
+        quirks::set_hostname(&mut base, "").is_err(),
+        "setting an empty hostname to a url with a password should fail"
+    );
+    base = "moz://servo/baz".parse().unwrap();
+    quirks::set_hostname(&mut base, "").unwrap();
+    assert_eq!(base.as_str(), "moz:///baz");
+}
+
 macro_rules! assert_from_file_path {
     ($path: expr) => {
         assert_from_file_path!($path, $path)