diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py
index 316b45861..380927089 100644
--- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py
+++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py
@@ -19,7 +19,7 @@
import urllib.parse
import requests
-from requests.packages.urllib3.util.retry import Retry
+from urllib3.util.retry import Retry
from threatexchange.exchanges.clients.utils.common import TimeoutHTTPAdapter
@@ -123,6 +123,19 @@ class NCMECEntryType(Enum):
video = "video"
+@unique
+class NCMECFeedbackType(Enum):
+ md5 = "MD5"
+ sha1 = "SHA1"
+ pdna = "PDNA"
+ pdq = "PDQ"
+ netclean = "NETCLEAN"
+ videntifier = "VIDENTIFIER"
+ tmk_pdqf = "TMK_PDQF"
+ ssvh_pdna = "SSVH_PDNA"
+ ssvh_safer_hash = "SSVH_SAFER_HASH"
+
+
@dataclass
class NCMECEntryUpdate:
id: str
@@ -131,6 +144,7 @@ class NCMECEntryUpdate:
deleted: bool
classification: t.Optional[str]
fingerprints: t.Dict[str, str]
+ feedback: t.List[t.Dict[str, t.Any]]
@classmethod
def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate":
@@ -148,6 +162,36 @@ def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate":
fingerprints={
x.tag: x.text for x in xml.maybe("fingerprints") if x.has_text
},
+ feedback=(
+ [
+ {
+ "sentiment": x.tag, # "affirmativeFeedback" or "negativeFeedback"
+ "type": x.str("type"),
+ "latest_feedback_time": x.str("lastUpdateTimestamp"),
+ "members": [
+ {"id": m.str("id"), "name": m.text}
+ for m in x.maybe("members")
+ if m.has_text
+ ],
+ "reasons": [
+ {
+ "guid": r.maybe("reason").str("guid"),
+ "name": r.maybe("reason").str("name"),
+ "type": r.maybe("reason").str("type"),
+ "members": [
+ {"id": m.str("id"), "name": m.text}
+ for m in x.maybe("members")
+ ],
+ }
+ for r in x.maybe("reasons")
+ if r.maybe("reason")
+ ],
+ }
+ for x in xml.maybe("feedback")
+ ]
+ if xml.maybe("feedback").has_text
+ else []
+ ),
)
@@ -215,11 +259,49 @@ def estimated_entries_in_range(self) -> int:
)
+# TODO: once we know the shape of response, finish this class
+@dataclass
+class UpdateEntryResponse:
+ updates: t.List[NCMECEntryUpdate]
+
+ @classmethod
+ def from_xml(
+ cls, xml: _XMLWrapper, fallback_max_time: int
+ ) -> "UpdateEntryResponse":
+ updates: t.List[NCMECEntryUpdate] = []
+
+ for content_xml in (xml.maybe("images"), xml.maybe("videos")):
+ if not content_xml or not len(content_xml):
+ continue
+ updates.extend(NCMECEntryUpdate.from_xml(c) for c in content_xml)
+
+ return cls(updates)
+
+
+@dataclass
+class GetFeedbackReasonsResponse:
+ reasons: t.List[t.Dict[str, str]]
+
+ @classmethod
+ def from_xml(cls, xml: _XMLWrapper) -> "GetFeedbackReasonsResponse":
+ reasons = []
+ for reason in xml.maybe("availableFeedbackReasons"):
+ reasons.append(
+ {
+ "guid": reason.str("guid"),
+ "name": reason.str("name"),
+ "type": reason.str("type"),
+ }
+ )
+ return cls(reasons)
+
+
@unique
class NCMECEndpoint(Enum):
status = "status"
entries = "entries"
members = "members"
+ feedback = "feedback"
class NCMECEnvironment(Enum):
@@ -261,15 +343,19 @@ def __init__(
username: str,
password: str,
environment: NCMECEnvironment,
+ member_id: t.Optional[str] = None,
+ reasons_map: t.Dict[str, t.List[t.Dict[str, str]]] = {},
) -> None:
assert is_valid_user_pass(username, password)
self.username = username
self.password = password
self._base_url = environment.value
+ self.member_id = member_id
+ self.reasons_map = reasons_map or {}
def _get_session(self) -> requests.Session:
"""
- Custom requests sesson
+ Custom requests session
Ideally, should be used within a context manager:
```
@@ -295,7 +381,9 @@ def _get_session(self) -> requests.Session:
)
return session
- def _get(self, endpoint: NCMECEndpoint, *, next_: str = "", **params) -> ET.Element:
+ def _get(
+ self, endpoint: NCMECEndpoint, *, path: str = "", next_: str = "", **params
+ ) -> ET.Element:
"""
Perform an HTTP GET request, and return the XML response payload.
@@ -303,6 +391,8 @@ def _get(self, endpoint: NCMECEndpoint, *, next_: str = "", **params) -> ET.Elem
"""
url = "/".join((self._base_url, self.VERSION, endpoint.value))
+ if path:
+ url = "/".join((url, path))
if next_:
url = self._base_url + next_
params = {}
@@ -328,16 +418,49 @@ def _post(self, endpoint: NCMECEndpoint, *, data=None) -> t.Any:
No timeout or retry strategy.
"""
- url = "/".join((self._base_url, endpoint.value))
+ url = "/".join((self._base_url, self.VERSION, endpoint.value))
with self._get_session() as session:
response = session.post(url, data=data)
response.raise_for_status()
return response
+ def _put(
+ self,
+ endpoint: NCMECEndpoint,
+ *,
+ member_id: t.Optional[str] = None,
+ entry_id: t.Optional[str] = None,
+ feedback_type: t.Optional[NCMECFeedbackType] = None,
+ data=None,
+ ) -> t.Any:
+ """
+ Perform an HTTP PUT request, and return the XML response payload.
+
+ No timeout or retry strategy.
+ """
+
+ url = "/".join((self._base_url, self.VERSION, endpoint.value))
+ if feedback_type and member_id and entry_id:
+ url = "/".join(
+ (
+ self._base_url,
+ endpoint.value,
+ member_id,
+ entry_id,
+ feedback_type.value,
+ NCMECEndpoint.feedback.value,
+ )
+ )
+ with self._get_session() as session:
+ response = session.put(url, data=data)
+ response.raise_for_status()
+ return response
+
def status(self) -> StatusResult:
"""Query the status endpoint, which tells you who you are."""
response = self._get(NCMECEndpoint.status)
member = _XMLWrapper(response)["member"]
+ self.member_id = member.str("id")
return StatusResult(member.int("id"), member.text)
def members(self) -> t.List[StatusResult]:
@@ -348,6 +471,17 @@ def members(self) -> t.List[StatusResult]:
for member in _XMLWrapper(response)
]
+ def feedback_reasons(self) -> GetFeedbackReasonsResponse:
+ """Get the possible negative feedback reasons for each feedback type"""
+ for feedbackType in NCMECFeedbackType:
+ resp = self._get(
+ NCMECEndpoint.feedback, path=f"{feedbackType.value}/reasons"
+ )
+ reasonsResp = GetFeedbackReasonsResponse.from_xml(_XMLWrapper(resp))
+ self.reasons_map[feedbackType.value] = reasonsResp.reasons
+
+ return reasonsResp
+
def get_entries(
self,
*,
@@ -401,6 +535,55 @@ def get_entries_iter(
has_more = bool(next_)
yield result
+ def submit_feedback(
+ self,
+ entry_id: str,
+ feedback_type: NCMECFeedbackType,
+ affirmative: bool,
+ reason_id: t.Optional[str] = None,
+ ) -> GetEntriesResponse:
+ if not affirmative and not reason_id:
+ raise ValueError("Negative feedback must have a reason_id")
+
+ # need member_id to submit feedback
+ if not self.member_id:
+ self.status()
+
+ # need valid reasons to submit negative feedback
+ if not affirmative and not self.reasons_map:
+ self.feedback_reasons()
+
+ # Prepare the XML payload
+ root = ET.Element("feedbackSubmission")
+ root.set("xmlns", "https://hashsharing.ncmec.org/hashsharing/v2")
+ vote = ET.SubElement(root, "affirmative" if affirmative else "negative")
+
+ if not affirmative:
+ valid_reason_ids = [
+ reason["guid"] for reason in self.reasons_map[feedback_type.value]
+ ]
+ if reason_id not in valid_reason_ids:
+ print(
+ "must choose from the following reasons: ",
+ self.reasons_map[feedback_type.value],
+ )
+ raise ValueError("Invalid reason_id")
+ reasons = ET.SubElement(vote, "reasonIds")
+ guid = ET.SubElement(reasons, "guid")
+ guid.text = reason_id
+ # ET.dump(root)
+
+ resp = self._put(
+ NCMECEndpoint.entries,
+ member_id=self.member_id,
+ entry_id=entry_id,
+ feedback_type=feedback_type,
+ data=ET.tostring(root),
+ )
+
+ # TODO: parse response here once we know the shape using UpdateEntryResponse
+ return resp
+
def _date_format(timestamp: int) -> str:
"""ISO 8601 format yyyy-MM-dd'T'HH:mm:ss.SSSZ"""
diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py
new file mode 100644
index 000000000..21b7a9ca5
--- /dev/null
+++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py
@@ -0,0 +1,220 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+STATUS_XML = """
+
+
+ 127.0.0.1
+ testington
+ Sir Testington
+
+""".strip()
+
+NEXT_UNESCAPED = (
+ "/v2/entries?from=2017-10-20T00%3A00%3A00.000Z"
+ "&to=2017-10-30T00%3A00%3A00.000Z&start=2001&size=1000&max=3000"
+)
+
+NEXT_UNESCAPED2 = (
+ "/v2/entries?from=2017-10-20T00%3A00%3A00.000Z"
+ "&to=2017-10-30T00%3A00%3A00.000Z&start=3001&size=1000&max=4000"
+)
+NEXT_UNESCAPED3 = (
+ "/v2/entries?from=2017-10-20T00%3A00%3A00.000Z"
+ "&to=2017-10-30T00%3A00%3A00.000Z&start=4001&size=1000&max=5000"
+)
+
+ENTRIES_XML = """
+
+
+
+
+ Example Member
+ 2017-10-24T15:00:00Z
+ image1
+ A1
+
+ a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1
+ a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1
+ a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1...
+
+
+
+
+ Example Member
+
+
+
+
+
+
+ Example Member
+
+
+
+
+
+
+ Example Member2
+ image4
+ 2017-10-24T15:10:00Z
+
+
+
+
+
+ Example Member
+ video4
+ 2017-10-24T15:20:00Z
+
+
+
+ /v2/entries?from=2017-10-20T00%3A00%3A00.000Z&to=2017-10-30T00%3A00%3A00.000Z&start=2001&size=1000&max=3000
+
+
+""".strip()
+
+
+ENTRIES_XML2 = """
+
+
+
+
+ Example Member
+ 2019-10-24T15:00:00Z
+ image10
+ A1
+
+ b1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1
+ b1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1
+ b1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1...
+
+
+
+
+
+ /v2/entries?from=2017-10-20T00%3A00%3A00.000Z&to=2017-10-30T00%3A00%3A00.000Z&start=3001&size=1000&max=4000
+
+
+""".strip()
+
+# This example isn't in the documentation, but shows how updates work
+ENTRIES_XML3 = """
+
+
+
+
+
+
+
+ /v2/entries?from=2017-10-20T00%3A00%3A00.000Z&to=2017-10-30T00%3A00%3A00.000Z&start=4001&size=1000&max=5000
+
+
+""".strip()
+
+ENTRIES_XML4 = """
+
+
+
+
+
+ TX Example
+ 2019-11-25T15:10:00Z
+ willdelete
+
+
+
+""".strip()
+
+ENTRIES_LARGE_FINGERPRINTS = """
+
+
+
+
+
+
+""".strip()
+
+STATUS_XML = """
+
+
+ 1.1.1.1
+ test_user
+ test member
+
+""".strip()
+
+FEEDBACK_REASONS_XML = """
+
+
+
+
+""".strip()
+
+AFFIRMATIVE_FEEDBACK_XML = """
+
+
+
+
+
+
+""".strip()
+
+NEGATIVE_FEEDBACK_XML = """
+
+
+
+
+ 01234567-abcd-0123-4567-012345678900
+
+
+
+""".strip()
+
+UPDATE_FEEDBACK_RESULT_XML = """
+
+
+
+
+""".strip()
diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py
index 89968543a..ba946d504 100644
--- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py
+++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py
@@ -1,177 +1,29 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
from unittest.mock import Mock
-import urllib.parse
import typing as t
import pytest
import requests
from threatexchange.exchanges.clients.ncmec.hash_api import (
NCMECEntryType,
NCMECEntryUpdate,
+ NCMECFeedbackType,
NCMECHashAPI,
NCMECEnvironment,
)
-
-STATUS_XML = """
-
-
- 127.0.0.1
- testington
- Sir Testington
-
-""".strip()
-
-NEXT_UNESCAPED = (
- "/v2/entries?from=2017-10-20T00%3A00%3A00.000Z"
- "&to=2017-10-30T00%3A00%3A00.000Z&start=2001&size=1000&max=3000"
-)
-
-NEXT_UNESCAPED2 = (
- "/v2/entries?from=2017-10-20T00%3A00%3A00.000Z"
- "&to=2017-10-30T00%3A00%3A00.000Z&start=3001&size=1000&max=4000"
-)
-NEXT_UNESCAPED3 = (
- "/v2/entries?from=2017-10-20T00%3A00%3A00.000Z"
- "&to=2017-10-30T00%3A00%3A00.000Z&start=4001&size=1000&max=5000"
+from threatexchange.exchanges.clients.ncmec.tests.data import (
+ ENTRIES_LARGE_FINGERPRINTS,
+ ENTRIES_XML,
+ ENTRIES_XML2,
+ ENTRIES_XML3,
+ ENTRIES_XML4,
+ NEXT_UNESCAPED,
+ NEXT_UNESCAPED2,
+ NEXT_UNESCAPED3,
+ STATUS_XML,
+ UPDATE_FEEDBACK_RESULT_XML,
)
-ENTRIES_XML = """
-
-
-
-
- Example Member
- 2017-10-24T15:00:00Z
- image1
- A1
-
- a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1
- a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1
- a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1...
-
-
-
- Example Member2
- image4
- 2017-10-24T15:10:00Z
-
-
-
-
-
- Example Member
- video4
- 2017-10-24T15:20:00Z
-
-
-
- /v2/entries?from=2017-10-20T00%3A00%3A00.000Z&to=2017-10-30T00%3A00%3A00.000Z&start=2001&size=1000&max=3000
-
-
-""".strip()
-
-
-ENTRIES_XML2 = """
-
-
-
-
- Example Member
- 2019-10-24T15:00:00Z
- image10
- A1
-
- b1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1
- b1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1
- b1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1...
-
-
-
-
- /v2/entries?from=2017-10-20T00%3A00%3A00.000Z&to=2017-10-30T00%3A00%3A00.000Z&start=3001&size=1000&max=4000
-
-
-""".strip()
-
-# This example isn't in the documentation, but shows how updates work
-ENTRIES_XML3 = """
-
-
-
-
-
-
-
- /v2/entries?from=2017-10-20T00%3A00%3A00.000Z&to=2017-10-30T00%3A00%3A00.000Z&start=4001&size=1000&max=5000
-
-
-""".strip()
-
-ENTRIES_XML4 = """
-
-
-
-
-
- TX Example
- 2019-11-25T15:10:00Z
- willdelete
-
-
-
-""".strip()
-
-ENTRIES_LARGE_FINGERPRINTS = """
-
-
-
-
-
-
-""".strip()
-
def mock_get_impl(url: str, **params):
content = ENTRIES_XML
@@ -323,3 +175,35 @@ def test_large_fingerprint_entries(monkeypatch):
assert len(update.fingerprints) == 1
assert update.fingerprints == {"md5": "facefacefacefacefacefacefaceface"}
assert result.next == ""
+
+
+def test_feedback_entries(monkeypatch):
+ api = NCMECHashAPI(
+ "fake_user",
+ "fake_pass",
+ NCMECEnvironment.test_Industry,
+ member_id="123",
+ reasons_map={
+ NCMECFeedbackType.md5.value: [
+ {
+ "guid": "01234567-abcd-0123-4567-012345678900",
+ "name": "Example Reason 1",
+ "type": "Sha1",
+ }
+ ]
+ },
+ )
+ session = Mock(
+ strict_spec=["put", "__enter__", "__exit__"],
+ put=set_api_return(UPDATE_FEEDBACK_RESULT_XML),
+ __enter__=lambda _: session,
+ __exit__=lambda *args: None,
+ )
+ monkeypatch.setattr(api, "_get_session", lambda: session)
+
+ result = api.submit_feedback("image1", NCMECFeedbackType.md5, True)
+ result = api.submit_feedback(
+ "image1", NCMECFeedbackType.md5, False, "01234567-abcd-0123-4567-012345678900"
+ )
+
+ assert result.status_code == 200