diff --git a/docs/html/topics/authentication.md b/docs/html/topics/authentication.md
index 981aab5abd7..20387d81778 100644
--- a/docs/html/topics/authentication.md
+++ b/docs/html/topics/authentication.md
@@ -16,6 +16,54 @@ token as the "username" and do not provide a password:
https://0123456789abcdef@pypi.company.com/simple
```
+When you specify several indexes, each of them can come with its own
+authentication information. When the domains and schemes of multiple
+indexes partially overlap, you can specify different authentication for each of them
+For example you can have:
+
+```
+PIP_INDEX_URL=https://build:password1@pkgs.dev.azure.com/feed1
+PIP_EXTRA_INDEX_URL=https://build:password2@pkgs.dev.azure.com/feed2
+```
+
+If you specify multiple identical index URLs with different authentication information,
+authentication from the first index will be used.
+
+```{versionchanged} 22.1
+The basic authentication is now compliant with RFC 7617
+```
+
+In compliance with [RFC7617](https://datatracker.ietf.org/doc/html/rfc7617#section-2.2) if the indexes
+overlap, the authentication information from the prefix-match will be reused for the longer index if
+the longer index does not contain the authentication information. In case multiple indexes are
+prefix-matching, the authentication of the first of the longest matching prefixes is used.
+
+For example in this case, build:password authentication will be used when authenticating with the extra
+index URL.
+
+```
+PIP_INDEX_URL=https://build:password@pkgs.dev.azure.com/
+PIP_EXTRA_INDEX_URL=https://pkgs.dev.azure.com/feed1
+```
+
+```{note}
+Prior to version 22.1 reusing of basic authentication between URLs was not RFC7617 compliant.
+This could lead to the situation that custom-built indexes could receive the authentication
+provided for the index path, to download files outside fof the security domain of the path.
+
+For example if your index at https://username:password@pypi.company.com/simple served files from
+https://pypi.company.com/file.tar.gz - the username and password provided for the "/simple" path
+was also used to authenticate download of the `file.tar.gz`. This is not RFC7617 compliant and as of
+version 22.1 it will not work automatically. If you encounter a problem where your files are being
+served from different security domain than your index and authentication is not used for them, you
+should (ideally) fix it on your server side or (as temporary workaround)
+specify your file download location as extra index url:
+
+PIP_EXTRA_INDEX_URL=https://username:password@pypi.company.com/
+
+```
+
+
### Percent-encoding special characters
```{versionadded} 10.0
diff --git a/news/10904.feature.rst b/news/10904.feature.rst
new file mode 100644
index 00000000000..b880c1e0405
--- /dev/null
+++ b/news/10904.feature.rst
@@ -0,0 +1,3 @@
+When you specify multiple indexes with common domains and schemes, basic authentication for each of
+the indexes can be different - compliant with RFC 7617. Previous versions of pip reused basic
+authentication credentials for all urls within the same domain.
diff --git a/src/pip/_internal/network/auth.py b/src/pip/_internal/network/auth.py
index e40ebfb2785..ef84c705e16 100644
--- a/src/pip/_internal/network/auth.py
+++ b/src/pip/_internal/network/auth.py
@@ -5,7 +5,7 @@
"""
import urllib.parse
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, List, Optional, Tuple
from pip._vendor.requests.auth import AuthBase, HTTPBasicAuth
from pip._vendor.requests.models import Request, Response
@@ -76,7 +76,7 @@ def __init__(
) -> None:
self.prompting = prompting
self.index_urls = index_urls
- self.passwords: Dict[str, AuthInfo] = {}
+ self.passwords: List[Tuple[str, AuthInfo]] = []
# When the user is prompted to enter credentials and keyring is
# available, we will offer to save them. If the user accepts,
# this value is set to the credentials they entered. After the
@@ -163,6 +163,49 @@ def _get_new_credentials(
return username, password
+ def _get_matching_credentials(self, original_url: str) -> Optional[AuthInfo]:
+ """
+ Find matching credentials based on the longest matching prefix found.
+
+ According to https://datatracker.ietf.org/doc/html/rfc7617#section-2.2
+ the authentication scope is defined by removing the last part after
+ the last "/" of the resource - but in case of `pip` the end of path is always
+ "/project", so we can treat the `original_url` as full `authentication scope'
+ for the request. The RFC specifies that prefix matching of the scope is
+ also within the protection scope specified by the realm value, so we can
+ safely assume that if we find any match, the longest matching prefix is
+ the right authentication information we should use.
+
+ The specification does not decide which of the matching scopes should be
+ used if there are more of them. Our decision is to choose the longest
+ matching one. In case exactly the same prefix will be added several times,
+ the authentication information from the first one is used.
+
+ According to the RFC, the authentication scope includes both scheme and netloc,
+ Both have to match in order to share the authentication scope.
+
+ :param original_url: original URL of the request
+ :return: Stored Authentication info matching the authentication scope or
+ None if not found
+ """
+ max_matched_prefix_length = 0
+ matched_auth_info = None
+ no_auth_url = remove_auth_from_url(original_url)
+ parsed_original = urllib.parse.urlparse(no_auth_url)
+ for prefix, auth_info in self.passwords:
+ parsed_stored = urllib.parse.urlparse(prefix)
+ if (
+ parsed_stored.netloc != parsed_original.netloc
+ or parsed_stored.scheme != parsed_original.scheme
+ ):
+ # only consider match when both scheme and netloc match
+ continue
+ if no_auth_url.startswith(prefix):
+ if len(prefix) > max_matched_prefix_length:
+ matched_auth_info = auth_info
+ max_matched_prefix_length = len(prefix)
+ return matched_auth_info
+
def _get_url_and_credentials(
self, original_url: str
) -> Tuple[str, Optional[str], Optional[str]]:
@@ -184,12 +227,14 @@ def _get_url_and_credentials(
# Do this if either the username or the password is missing.
# This accounts for the situation in which the user has specified
# the username in the index url, but the password comes from keyring.
- if (username is None or password is None) and netloc in self.passwords:
- un, pw = self.passwords[netloc]
- # It is possible that the cached credentials are for a different username,
- # in which case the cache should be ignored.
- if username is None or username == un:
- username, password = un, pw
+ if username is None or password is None:
+ matched_credentials = self._get_matching_credentials(original_url)
+ if matched_credentials:
+ un, pw = matched_credentials
+ # It is possible that the cached credentials are for a
+ # different username, in which case the cache should be ignored.
+ if username is None or username == un:
+ username, password = un, pw
if username is not None or password is not None:
# Convert the username and password if they're None, so that
@@ -200,7 +245,7 @@ def _get_url_and_credentials(
password = password or ""
# Store any acquired credentials.
- self.passwords[netloc] = (username, password)
+ self.passwords.append((remove_auth_from_url(url), (username, password)))
assert (
# Credentials were found
@@ -273,7 +318,9 @@ def handle_401(self, resp: Response, **kwargs: Any) -> Response:
# Store the new username and password to use for future requests
self._credentials_to_save = None
if username is not None and password is not None:
- self.passwords[parsed.netloc] = (username, password)
+ self.passwords.append(
+ (remove_auth_from_url(resp.url), (username, password))
+ )
# Prompt to save the password to keyring
if save and self._should_save_password_to_keyring():
diff --git a/tests/functional/test_install_config.py b/tests/functional/test_install_config.py
index c90be5bb4da..e9ce041a096 100644
--- a/tests/functional/test_install_config.py
+++ b/tests/functional/test_install_config.py
@@ -432,7 +432,7 @@ def test_prioritize_url_credentials_over_netrc(
server.mock.side_effect = [
package_page(
{
- "simple-3.0.tar.gz": "/files/simple-3.0.tar.gz",
+ "simple-3.0.tar.gz": "/simple/files/simple-3.0.tar.gz",
}
),
authorization_response(str(data.packages / "simple-3.0.tar.gz")),
diff --git a/tests/unit/test_network_auth.py b/tests/unit/test_network_auth.py
index 5c0e5746281..eb0699a5454 100644
--- a/tests/unit/test_network_auth.py
+++ b/tests/unit/test_network_auth.py
@@ -1,10 +1,12 @@
import functools
from typing import Any, List, Optional, Tuple
+from unittest.mock import Mock, PropertyMock
import pytest
import pip._internal.network.auth
from pip._internal.network.auth import MultiDomainBasicAuth
+from src.pip._vendor.requests import PreparedRequest
from tests.lib.requests_mocks import MockConnection, MockRequest, MockResponse
@@ -50,40 +52,143 @@ def test_get_credentials_parses_correctly(
(username is None and password is None)
or
# Credentials were found and "cached" appropriately
- auth.passwords["example.com"] == (username, password)
+ (url, (username, password)) in auth.passwords
)
+def test_handle_prompt_for_password_successful() -> None:
+ auth = MultiDomainBasicAuth()
+ resp = Mock()
+ resp.status_code = 401
+ resp.url = "http://example.com"
+ resp.raw = Mock()
+ resp.content = PropertyMock()
+ resp.content = ""
+ resp.request = PreparedRequest()
+ resp.request.headers = {}
+ auth._prompt_for_password = Mock() # type: ignore[assignment]
+ auth._prompt_for_password.return_value = ("user", "password", True)
+ auth.handle_401(resp)
+ auth._prompt_for_password.assert_called_with("example.com")
+ expected = ("http://example.com", ("user", "password"))
+ assert len(auth.passwords) == 1
+ assert auth.passwords[0] == expected
+
+
+def test_handle_prompt_for_password_unsuccessful() -> None:
+ auth = MultiDomainBasicAuth()
+ resp = Mock()
+ resp.status_code = 401
+ resp.url = "http://example.com"
+ resp.raw = Mock()
+ resp.content = PropertyMock()
+ resp.content = ""
+ resp.request = PreparedRequest()
+ resp.request.headers = {}
+ auth._prompt_for_password = Mock() # type: ignore[assignment]
+ auth._prompt_for_password.return_value = (None, None, False)
+ auth.handle_401(resp)
+ auth._prompt_for_password.assert_called_with("example.com")
+ assert len(auth.passwords) == 0
+
+
def test_get_credentials_not_to_uses_cached_credentials() -> None:
auth = MultiDomainBasicAuth()
- auth.passwords["example.com"] = ("user", "pass")
+ auth.passwords.append(("http://example.com", ("user", "pass")))
got = auth._get_url_and_credentials("http://foo:bar@example.com/path")
expected = ("http://example.com/path", "foo", "bar")
assert got == expected
-def test_get_credentials_not_to_uses_cached_credentials_only_username() -> None:
+def test_get_credentials_not_to_use_cached_credentials_only_username() -> None:
+ auth = MultiDomainBasicAuth()
+ auth.passwords.append(("https://example.com", ("user", "pass")))
+
+ got = auth._get_url_and_credentials("https://foo@example.com/path")
+ expected = ("https://example.com/path", "foo", "")
+ assert got == expected
+
+
+def test_multi_domain_credentials_match() -> None:
+ auth = MultiDomainBasicAuth()
+ auth.passwords.append(("http://example.com", ("user", "pass")))
+ auth.passwords.append(("http://example.com/path", ("user", "pass2")))
+
+ got = auth._get_url_and_credentials("http://user@example.com/path")
+ expected = ("http://example.com/path", "user", "pass2")
+ assert got == expected
+
+
+def test_multi_domain_credentials_longest_match() -> None:
+ auth = MultiDomainBasicAuth()
+ auth.passwords.append(("http://example.com", ("user", "pass")))
+ auth.passwords.append(("http://example.com/path", ("user", "pass2")))
+ auth.passwords.append(("http://example.com/path/subpath", ("user", "pass3")))
+
+ got = auth._get_url_and_credentials("http://user@example.com/path")
+ expected = ("http://example.com/path", "user", "pass2")
+ assert got == expected
+
+
+def test_multi_domain_credentials_partial_match_only() -> None:
auth = MultiDomainBasicAuth()
- auth.passwords["example.com"] = ("user", "pass")
+ auth.passwords.append(("http://example.com/path1", ("user", "pass")))
- got = auth._get_url_and_credentials("http://foo@example.com/path")
- expected = ("http://example.com/path", "foo", "")
+ got = auth._get_url_and_credentials("http://example.com/path2")
+ expected = ("http://example.com/path2", None, None)
assert got == expected
def test_get_credentials_uses_cached_credentials() -> None:
auth = MultiDomainBasicAuth()
- auth.passwords["example.com"] = ("user", "pass")
+ auth.passwords.append(("https://example.com", ("user", "pass")))
+
+ got = auth._get_url_and_credentials("https://example.com/path")
+ expected = ("https://example.com/path", "user", "pass")
+ assert got == expected
+
+
+def test_get_credentials_not_uses_cached_credentials_different_scheme_http() -> None:
+ auth = MultiDomainBasicAuth()
+ auth.passwords.append(("http://example.com", ("user", "pass")))
+
+ got = auth._get_url_and_credentials("https://example.com/path")
+ expected = ("https://example.com/path", None, None)
+ assert got == expected
+
+
+def test_get_credentials_not_uses_cached_credentials_different_scheme_https() -> None:
+ auth = MultiDomainBasicAuth()
+ auth.passwords.append(("https://example.com", ("user", "pass")))
got = auth._get_url_and_credentials("http://example.com/path")
- expected = ("http://example.com/path", "user", "pass")
+ expected = ("http://example.com/path", None, None)
assert got == expected
def test_get_credentials_uses_cached_credentials_only_username() -> None:
auth = MultiDomainBasicAuth()
- auth.passwords["example.com"] = ("user", "pass")
+ auth.passwords.append(("http://example.com", ("user", "pass")))
+
+ got = auth._get_url_and_credentials("http://user@example.com/path")
+ expected = ("http://example.com/path", "user", "pass")
+ assert got == expected
+
+
+def test_get_credentials_uses_cached_credentials_wrong_username() -> None:
+ auth = MultiDomainBasicAuth()
+ auth.passwords.append(("http://example.com", ("user", "pass")))
+
+ got = auth._get_url_and_credentials("http://user2@example.com/path")
+ expected = ("http://example.com/path", "user2", "")
+ assert got == expected
+
+
+def test_get_credentials_added_multiple_times() -> None:
+ auth = MultiDomainBasicAuth()
+ auth.passwords.append(("http://example.com", ("user", "pass")))
+ auth.passwords.append(("http://example.com", ("user", "pass2")))
got = auth._get_url_and_credentials("http://user@example.com/path")
expected = ("http://example.com/path", "user", "pass")