From 965cfa8e38e7596df1800c3a7624252b11de54a2 Mon Sep 17 00:00:00 2001 From: Arya Seghatoleslami Date: Wed, 18 Sep 2024 11:56:23 -0400 Subject: [PATCH 1/6] wip --- .../exchanges/clients/ncmec/hash_api.py | 41 +++- .../exchanges/clients/ncmec/tests/data.py | 202 ++++++++++++++++++ .../clients/ncmec/tests/test_hash_api.py | 189 +++------------- 3 files changed, 271 insertions(+), 161 deletions(-) create mode 100644 python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py index 316b45861..3ffe5cd04 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py @@ -19,7 +19,7 @@ import urllib.parse import requests -from requests.packages.urllib3.util.retry import Retry +from urllib3.util.retry import Retry from threatexchange.exchanges.clients.utils.common import TimeoutHTTPAdapter @@ -27,6 +27,7 @@ _DEFAULT_ELE = ET.Element("") T = t.TypeVar("T") +FEEDBACK = t.List[t.Dict[str, str]] def nullthrows(v: t.Optional[T]) -> T: @@ -131,6 +132,7 @@ class NCMECEntryUpdate: deleted: bool classification: t.Optional[str] fingerprints: t.Dict[str, str] + feedback: t.Optional[FEEDBACK] @classmethod def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate": @@ -148,6 +150,18 @@ def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate": fingerprints={ x.tag: x.text for x in xml.maybe("fingerprints") if x.has_text }, + feedback=[ + { + "type": x.tag, # "affirmativeFeedback" or "negativeFeedback" + "members": x.maybe( + "members" # "timestamp", "member.id", "member.name" + ), + "reasons": x.maybe( + "reasons" + ), # "reason" with "guid", "name", "type" | "members" + } + for x in xml.maybe("feedback") + ], ) @@ -215,6 +229,23 @@ def estimated_entries_in_range(self) -> int: ) +# TODO: check http code until we have update response shape, we also might not care about it +@dataclass +class UpdateEntryResponse: + updates: t.List[NCMECEntryUpdate] + + @classmethod + def from_xml(cls, xml: _XMLWrapper, fallback_max_time: int) -> "GetEntriesResponse": + updates: t.List[NCMECEntryUpdate] = [] + + for content_xml in (xml.maybe("images"), xml.maybe("videos")): + if not content_xml or not len(content_xml): + continue + updates.extend(NCMECEntryUpdate.from_xml(c) for c in content_xml) + + return cls(updates) + + @unique class NCMECEndpoint(Enum): status = "status" @@ -401,6 +432,14 @@ def get_entries_iter( has_more = bool(next_) yield result + # TODO: split into 2, submit upvote and downvote + def submit_feedback(self, entry_id: str, is_good: bool) -> GetEntriesResponse: + # TODO + # 1. Prepare the XML payload + # 2. Send the POST request using _post + # 3. Parse the response using GetEntriesResponse.from_xml + return + def _date_format(timestamp: int) -> str: """ISO 8601 format yyyy-MM-dd'T'HH:mm:ss.SSSZ""" diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py new file mode 100644 index 000000000..81929b7ba --- /dev/null +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py @@ -0,0 +1,202 @@ +STATUS_XML = """ + + + 127.0.0.1 + testington + Sir Testington + +""".strip() + +NEXT_UNESCAPED = ( + "/v2/entries?from=2017-10-20T00%3A00%3A00.000Z" + "&to=2017-10-30T00%3A00%3A00.000Z&start=2001&size=1000&max=3000" +) + +NEXT_UNESCAPED2 = ( + "/v2/entries?from=2017-10-20T00%3A00%3A00.000Z" + "&to=2017-10-30T00%3A00%3A00.000Z&start=3001&size=1000&max=4000" +) +NEXT_UNESCAPED3 = ( + "/v2/entries?from=2017-10-20T00%3A00%3A00.000Z" + "&to=2017-10-30T00%3A00%3A00.000Z&start=4001&size=1000&max=5000" +) + +ENTRIES_XML = """ + + + + + Example Member + 2017-10-24T15:00:00Z + image1 + A1 + + a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1 + a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1 + a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1... + + + + + Example Member + + + + + + + Example Member + + + + + + + Example Member2 + image4 + 2017-10-24T15:10:00Z + + + + + + Example Member + video4 + 2017-10-24T15:20:00Z + + + + /v2/entries?from=2017-10-20T00%3A00%3A00.000Z&to=2017-10-30T00%3A00%3A00.000Z&start=2001&size=1000&max=3000 + + +""".strip() + + +ENTRIES_XML2 = """ + + + + + Example Member + 2019-10-24T15:00:00Z + image10 + A1 + + b1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1 + b1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1 + b1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1... + + + + + + /v2/entries?from=2017-10-20T00%3A00%3A00.000Z&to=2017-10-30T00%3A00%3A00.000Z&start=3001&size=1000&max=4000 + + +""".strip() + +# This example isn't in the documentation, but shows how updates work +ENTRIES_XML3 = """ + + + + + + + + /v2/entries?from=2017-10-20T00%3A00%3A00.000Z&to=2017-10-30T00%3A00%3A00.000Z&start=4001&size=1000&max=5000 + + +""".strip() + +ENTRIES_XML4 = """ + + + + + + TX Example + 2019-11-25T15:10:00Z + willdelete + + + +""".strip() + +ENTRIES_LARGE_FINGERPRINTS = """ + + + + + + +""".strip() + +AFFIRMATIVE_FEEDBACK_XML = """ + + + + + + +""".strip() + +NEGATIVE_FEEDBACK_XML = """ + + + + + 01234567-abcd-0123-4567-012345678900 + + + +""".strip() + +UPDATE_FEEDBACK_RESULT_XML = """ + + + + +""".strip() diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py index 89968543a..ab776cf3e 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py @@ -1,7 +1,6 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. from unittest.mock import Mock -import urllib.parse import typing as t import pytest import requests @@ -11,166 +10,17 @@ NCMECHashAPI, NCMECEnvironment, ) - -STATUS_XML = """ - - - 127.0.0.1 - testington - Sir Testington - -""".strip() - -NEXT_UNESCAPED = ( - "/v2/entries?from=2017-10-20T00%3A00%3A00.000Z" - "&to=2017-10-30T00%3A00%3A00.000Z&start=2001&size=1000&max=3000" -) - -NEXT_UNESCAPED2 = ( - "/v2/entries?from=2017-10-20T00%3A00%3A00.000Z" - "&to=2017-10-30T00%3A00%3A00.000Z&start=3001&size=1000&max=4000" +from threatexchange.exchanges.clients.ncmec.tests.data import ( + ENTRIES_LARGE_FINGERPRINTS, + ENTRIES_XML, + ENTRIES_XML2, + ENTRIES_XML3, + ENTRIES_XML4, + NEXT_UNESCAPED, + NEXT_UNESCAPED2, + NEXT_UNESCAPED3, + UPDATE_FEEDBACK_RESULT_XML, ) -NEXT_UNESCAPED3 = ( - "/v2/entries?from=2017-10-20T00%3A00%3A00.000Z" - "&to=2017-10-30T00%3A00%3A00.000Z&start=4001&size=1000&max=5000" -) - -ENTRIES_XML = """ - - - - - Example Member - 2017-10-24T15:00:00Z - image1 - A1 - - a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1 - a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1 - a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1... - - - - Example Member2 - image4 - 2017-10-24T15:10:00Z - - - - - - Example Member - video4 - 2017-10-24T15:20:00Z - - - - /v2/entries?from=2017-10-20T00%3A00%3A00.000Z&to=2017-10-30T00%3A00%3A00.000Z&start=2001&size=1000&max=3000 - - -""".strip() - - -ENTRIES_XML2 = """ - - - - - Example Member - 2019-10-24T15:00:00Z - image10 - A1 - - b1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1 - b1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1 - b1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1... - - - - - /v2/entries?from=2017-10-20T00%3A00%3A00.000Z&to=2017-10-30T00%3A00%3A00.000Z&start=3001&size=1000&max=4000 - - -""".strip() - -# This example isn't in the documentation, but shows how updates work -ENTRIES_XML3 = """ - - - - - - - - /v2/entries?from=2017-10-20T00%3A00%3A00.000Z&to=2017-10-30T00%3A00%3A00.000Z&start=4001&size=1000&max=5000 - - -""".strip() - -ENTRIES_XML4 = """ - - - - - - TX Example - 2019-11-25T15:10:00Z - willdelete - - - -""".strip() - -ENTRIES_LARGE_FINGERPRINTS = """ - - - - - - -""".strip() def mock_get_impl(url: str, **params): @@ -323,3 +173,22 @@ def test_large_fingerprint_entries(monkeypatch): assert len(update.fingerprints) == 1 assert update.fingerprints == {"md5": "facefacefacefacefacefacefaceface"} assert result.next == "" + + +def test_feedback_entries(monkeypatch): + api = NCMECHashAPI("fake_user", "fake_pass", NCMECEnvironment.test_Industry) + session = Mock( + strict_spec=["post", "__enter__", "__exit__"], + post=set_api_return(UPDATE_FEEDBACK_RESULT_XML), + __enter__=lambda _: session, + __exit__=lambda *args: None, + ) + monkeypatch.setattr(api, "_get_session", lambda: session) + + result = api.submit_feedback("image1", True) + + assert len(result.updates) == 1 + update = result.updates[0] + assert update.received == 1 + assert update.accepted == 1 + assert update.updated == 1 From 90aa0ed810d5a26497da3463525e3a39655d1263 Mon Sep 17 00:00:00 2001 From: Arya Seghatoleslami Date: Wed, 25 Sep 2024 21:33:16 -0400 Subject: [PATCH 2/6] add NCMECFeedbackType, member_id, build xml payload --- .../exchanges/clients/ncmec/hash_api.py | 43 +++++++++++++++++-- .../exchanges/clients/ncmec/tests/data.py | 2 + .../clients/ncmec/tests/test_hash_api.py | 6 ++- 3 files changed, 46 insertions(+), 5 deletions(-) diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py index 3ffe5cd04..b23dbbb4a 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py @@ -27,7 +27,6 @@ _DEFAULT_ELE = ET.Element("") T = t.TypeVar("T") -FEEDBACK = t.List[t.Dict[str, str]] def nullthrows(v: t.Optional[T]) -> T: @@ -124,6 +123,19 @@ class NCMECEntryType(Enum): video = "video" +@unique +class NCMECFeedbackType(Enum): + md5 = "MD5" + sha1 = "SHA1" + pdna = "PDNA" + pdq = "PDQ" + netclean = "NETCLEAN" + Videntifier = "VIDENTIFIER" + tmk_pdqf = "TMK_PDQF" + ssvh_pdna = "SSVH_PDNA" + ssvh_safer_hash = "SSVH_SAFER_HASH" + + @dataclass class NCMECEntryUpdate: id: str @@ -132,7 +144,7 @@ class NCMECEntryUpdate: deleted: bool classification: t.Optional[str] fingerprints: t.Dict[str, str] - feedback: t.Optional[FEEDBACK] + feedback: t.List[t.Dict[str, str]] @classmethod def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate": @@ -297,10 +309,11 @@ def __init__( self.username = username self.password = password self._base_url = environment.value + self.member_id = None def _get_session(self) -> requests.Session: """ - Custom requests sesson + Custom requests session Ideally, should be used within a context manager: ``` @@ -369,6 +382,7 @@ def status(self) -> StatusResult: """Query the status endpoint, which tells you who you are.""" response = self._get(NCMECEndpoint.status) member = _XMLWrapper(response)["member"] + self.member_id = member.int("id") return StatusResult(member.int("id"), member.text) def members(self) -> t.List[StatusResult]: @@ -433,9 +447,30 @@ def get_entries_iter( yield result # TODO: split into 2, submit upvote and downvote - def submit_feedback(self, entry_id: str, is_good: bool) -> GetEntriesResponse: + def submit_feedback( + self, + entry_id: str, + feedback_type: NCMECFeedbackType, + affirmative: bool, + reason_id: str = None, + ) -> GetEntriesResponse: + if not affirmative and not reason_id: + raise ValueError("Negative feedback must have a reason_id") + + # need member_id to submit feedback + if not self.member_id: + self.status() + # TODO # 1. Prepare the XML payload + root = ET.Element("feedbackSubmission") + root.set("xmlns", "https://hashsharing.ncmec.org/hashsharing/v2") + vote = ET.SubElement(root, "affirmative" if affirmative else "negative") + if not affirmative: + reasons = ET.SubElement(vote, "reasonIds") + guid = ET.SubElement(reasons, "guid") + guid.text = reason_id + # ET.dump(root) # 2. Send the POST request using _post # 3. Parse the response using GetEntriesResponse.from_xml return diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py index 81929b7ba..7664ade8a 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py @@ -1,3 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. + STATUS_XML = """ diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py index ab776cf3e..35750766a 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py @@ -7,6 +7,7 @@ from threatexchange.exchanges.clients.ncmec.hash_api import ( NCMECEntryType, NCMECEntryUpdate, + NCMECFeedbackType, NCMECHashAPI, NCMECEnvironment, ) @@ -185,7 +186,10 @@ def test_feedback_entries(monkeypatch): ) monkeypatch.setattr(api, "_get_session", lambda: session) - result = api.submit_feedback("image1", True) + result = api.submit_feedback("image1", NCMECFeedbackType.md5, True) + result = api.submit_feedback( + "image1", NCMECFeedbackType.md5, False, "01234567-abcd-0123-4567-012345678900" + ) assert len(result.updates) == 1 update = result.updates[0] From 1da801d559e8389b3ab9508153a95c5537a6c54e Mon Sep 17 00:00:00 2001 From: Arya Seghatoleslami Date: Sat, 28 Sep 2024 15:03:38 -0400 Subject: [PATCH 3/6] get feedback_reasons --- .../exchanges/clients/ncmec/hash_api.py | 110 ++++++++++++++++-- .../exchanges/clients/ncmec/tests/data.py | 16 +++ .../clients/ncmec/tests/test_hash_api.py | 22 +++- 3 files changed, 134 insertions(+), 14 deletions(-) diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py index b23dbbb4a..dbe0cb3fa 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py @@ -241,7 +241,7 @@ def estimated_entries_in_range(self) -> int: ) -# TODO: check http code until we have update response shape, we also might not care about it +# TODO: once we know the shape of response, finish this class @dataclass class UpdateEntryResponse: updates: t.List[NCMECEntryUpdate] @@ -258,11 +258,30 @@ def from_xml(cls, xml: _XMLWrapper, fallback_max_time: int) -> "GetEntriesRespon return cls(updates) +@dataclass +class GetFeedbackReasonsResponse: + reasons: t.List[t.Dict[str, str]] + + @classmethod + def from_xml(cls, xml: _XMLWrapper) -> "GetFeedbackReasonsResponse": + reasons = [] + for reason in xml.maybe("availableFeedbackReasons"): + reasons.append( + { + "guid": reason.str("guid"), + "name": reason.str("name"), + "type": reason.str("type"), + } + ) + return cls(reasons) + + @unique class NCMECEndpoint(Enum): status = "status" entries = "entries" members = "members" + feedback = "feedback" class NCMECEnvironment(Enum): @@ -304,12 +323,15 @@ def __init__( username: str, password: str, environment: NCMECEnvironment, + member_id: t.Optional[str] = None, + reasons_map: t.Dict[str, t.List[t.Dict[str, str]]] = None, ) -> None: assert is_valid_user_pass(username, password) self.username = username self.password = password self._base_url = environment.value - self.member_id = None + self.member_id = member_id + self.reasons_map = reasons_map or {} def _get_session(self) -> requests.Session: """ @@ -339,7 +361,9 @@ def _get_session(self) -> requests.Session: ) return session - def _get(self, endpoint: NCMECEndpoint, *, next_: str = "", **params) -> ET.Element: + def _get( + self, endpoint: NCMECEndpoint, *, path: str = "", next_: str = "", **params + ) -> ET.Element: """ Perform an HTTP GET request, and return the XML response payload. @@ -347,6 +371,8 @@ def _get(self, endpoint: NCMECEndpoint, *, next_: str = "", **params) -> ET.Elem """ url = "/".join((self._base_url, self.VERSION, endpoint.value)) + if path: + url = "/".join((url, path)) if next_: url = self._base_url + next_ params = {} @@ -372,17 +398,49 @@ def _post(self, endpoint: NCMECEndpoint, *, data=None) -> t.Any: No timeout or retry strategy. """ - url = "/".join((self._base_url, endpoint.value)) + url = "/".join((self._base_url, self.VERSION, endpoint.value)) with self._get_session() as session: response = session.post(url, data=data) response.raise_for_status() return response + def _put( + self, + endpoint: NCMECEndpoint, + *, + member_id: str = None, + entry_id: str = None, + feedback_type: NCMECFeedbackType = None, + data=None, + ) -> t.Any: + """ + Perform an HTTP PUT request, and return the XML response payload. + + No timeout or retry strategy. + """ + + url = "/".join((self._base_url, self.VERSION, endpoint.value)) + if feedback_type: + url = "/".join( + ( + self._base_url, + endpoint.value, + member_id, + entry_id, + feedback_type.value, + NCMECEndpoint.feedback.value, + ) + ) + with self._get_session() as session: + response = session.put(url, data=data) + response.raise_for_status() + return response + def status(self) -> StatusResult: """Query the status endpoint, which tells you who you are.""" response = self._get(NCMECEndpoint.status) member = _XMLWrapper(response)["member"] - self.member_id = member.int("id") + self.member_id = member.str("id") return StatusResult(member.int("id"), member.text) def members(self) -> t.List[StatusResult]: @@ -393,6 +451,16 @@ def members(self) -> t.List[StatusResult]: for member in _XMLWrapper(response) ] + def feedback_reasons(self) -> GetFeedbackReasonsResponse: + for feedbackType in NCMECFeedbackType: + resp = self._get( + NCMECEndpoint.feedback, path=f"{feedbackType.value}/reasons" + ) + reasons = GetFeedbackReasonsResponse.from_xml(_XMLWrapper(resp)).reasons + self.reasons_map[feedbackType] = reasons + + return + def get_entries( self, *, @@ -446,7 +514,6 @@ def get_entries_iter( has_more = bool(next_) yield result - # TODO: split into 2, submit upvote and downvote def submit_feedback( self, entry_id: str, @@ -461,19 +528,40 @@ def submit_feedback( if not self.member_id: self.status() - # TODO - # 1. Prepare the XML payload + # need valid reasons to submit feedback + if not self.reasons_map: + self.feedback_reasons() + + # Prepare the XML payload root = ET.Element("feedbackSubmission") root.set("xmlns", "https://hashsharing.ncmec.org/hashsharing/v2") vote = ET.SubElement(root, "affirmative" if affirmative else "negative") + + valid_reason_ids = [ + reason["guid"] for reason in self.reasons_map[feedback_type.value] + ] if not affirmative: + if reason_id not in valid_reason_ids: + print( + "must choose from the following reasons: ", + self.reasons_map[feedback_type.value], + ) + raise ValueError("Invalid reason_id") reasons = ET.SubElement(vote, "reasonIds") guid = ET.SubElement(reasons, "guid") guid.text = reason_id # ET.dump(root) - # 2. Send the POST request using _post - # 3. Parse the response using GetEntriesResponse.from_xml - return + + resp = self._put( + NCMECEndpoint.entries, + member_id=self.member_id, + entry_id=entry_id, + feedback_type=feedback_type, + data=ET.tostring(root), + ) + + # TODO: parse response here once we know the shape using UpdateEntryResponse + return resp def _date_format(timestamp: int) -> str: diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py index 7664ade8a..fc66a5b3f 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py @@ -176,6 +176,22 @@ """.strip() +STATUS_XML = """ + + + 1.1.1.1 + test_user + test member + +""".strip() + +FEEDBACK_REASONS_XML = """ + + + + +""".strip() + AFFIRMATIVE_FEEDBACK_XML = """ diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py index 35750766a..3cbd1a94d 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py @@ -20,6 +20,7 @@ NEXT_UNESCAPED, NEXT_UNESCAPED2, NEXT_UNESCAPED3, + STATUS_XML, UPDATE_FEEDBACK_RESULT_XML, ) @@ -177,10 +178,25 @@ def test_large_fingerprint_entries(monkeypatch): def test_feedback_entries(monkeypatch): - api = NCMECHashAPI("fake_user", "fake_pass", NCMECEnvironment.test_Industry) + api = NCMECHashAPI( + "fake_user", + "fake_pass", + NCMECEnvironment.test_Industry, + "123", + { + NCMECFeedbackType.md5.value: [ + { + "guid": "01234567-abcd-0123-4567-012345678900", + "name": "Example Reason 1", + "type": "Sha1", + } + ] + }, + ) session = Mock( - strict_spec=["post", "__enter__", "__exit__"], - post=set_api_return(UPDATE_FEEDBACK_RESULT_XML), + strict_spec=["get", "put", "__enter__", "__exit__"], + get=set_api_return(STATUS_XML), + put=set_api_return(UPDATE_FEEDBACK_RESULT_XML), __enter__=lambda _: session, __exit__=lambda *args: None, ) From be4cea236062a27f56480c382993d589c898e90c Mon Sep 17 00:00:00 2001 From: Arya Seghatoleslami Date: Sat, 28 Sep 2024 15:18:14 -0400 Subject: [PATCH 4/6] update test check status code --- .../exchanges/clients/ncmec/tests/test_hash_api.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py index 3cbd1a94d..571b7702f 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py @@ -207,8 +207,4 @@ def test_feedback_entries(monkeypatch): "image1", NCMECFeedbackType.md5, False, "01234567-abcd-0123-4567-012345678900" ) - assert len(result.updates) == 1 - update = result.updates[0] - assert update.received == 1 - assert update.accepted == 1 - assert update.updated == 1 + assert result.status_code == 200 From a82dd7b936c63454ab1cdfa73259702752792973 Mon Sep 17 00:00:00 2001 From: Arya Seghatoleslami Date: Sat, 28 Sep 2024 15:31:48 -0400 Subject: [PATCH 5/6] type fixes --- .../exchanges/clients/ncmec/hash_api.py | 24 ++++++++++--------- .../clients/ncmec/tests/test_hash_api.py | 7 +++--- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py index dbe0cb3fa..f24c4b17c 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py @@ -144,7 +144,7 @@ class NCMECEntryUpdate: deleted: bool classification: t.Optional[str] fingerprints: t.Dict[str, str] - feedback: t.List[t.Dict[str, str]] + feedback: t.List[t.Dict[str, t.Any]] @classmethod def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate": @@ -247,7 +247,9 @@ class UpdateEntryResponse: updates: t.List[NCMECEntryUpdate] @classmethod - def from_xml(cls, xml: _XMLWrapper, fallback_max_time: int) -> "GetEntriesResponse": + def from_xml( + cls, xml: _XMLWrapper, fallback_max_time: int + ) -> "UpdateEntryResponse": updates: t.List[NCMECEntryUpdate] = [] for content_xml in (xml.maybe("images"), xml.maybe("videos")): @@ -324,7 +326,7 @@ def __init__( password: str, environment: NCMECEnvironment, member_id: t.Optional[str] = None, - reasons_map: t.Dict[str, t.List[t.Dict[str, str]]] = None, + reasons_map: t.Dict[str, t.List[t.Dict[str, str]]] = {}, ) -> None: assert is_valid_user_pass(username, password) self.username = username @@ -408,9 +410,9 @@ def _put( self, endpoint: NCMECEndpoint, *, - member_id: str = None, - entry_id: str = None, - feedback_type: NCMECFeedbackType = None, + member_id: t.Optional[str] = None, + entry_id: t.Optional[str] = None, + feedback_type: t.Optional[NCMECFeedbackType] = None, data=None, ) -> t.Any: """ @@ -420,7 +422,7 @@ def _put( """ url = "/".join((self._base_url, self.VERSION, endpoint.value)) - if feedback_type: + if feedback_type and member_id and entry_id: url = "/".join( ( self._base_url, @@ -456,10 +458,10 @@ def feedback_reasons(self) -> GetFeedbackReasonsResponse: resp = self._get( NCMECEndpoint.feedback, path=f"{feedbackType.value}/reasons" ) - reasons = GetFeedbackReasonsResponse.from_xml(_XMLWrapper(resp)).reasons - self.reasons_map[feedbackType] = reasons + reasonsResp = GetFeedbackReasonsResponse.from_xml(_XMLWrapper(resp)) + self.reasons_map[feedbackType.value] = reasonsResp.reasons - return + return reasonsResp def get_entries( self, @@ -519,7 +521,7 @@ def submit_feedback( entry_id: str, feedback_type: NCMECFeedbackType, affirmative: bool, - reason_id: str = None, + reason_id: t.Optional[str] = None, ) -> GetEntriesResponse: if not affirmative and not reason_id: raise ValueError("Negative feedback must have a reason_id") diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py index 571b7702f..ba946d504 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/test_hash_api.py @@ -182,8 +182,8 @@ def test_feedback_entries(monkeypatch): "fake_user", "fake_pass", NCMECEnvironment.test_Industry, - "123", - { + member_id="123", + reasons_map={ NCMECFeedbackType.md5.value: [ { "guid": "01234567-abcd-0123-4567-012345678900", @@ -194,8 +194,7 @@ def test_feedback_entries(monkeypatch): }, ) session = Mock( - strict_spec=["get", "put", "__enter__", "__exit__"], - get=set_api_return(STATUS_XML), + strict_spec=["put", "__enter__", "__exit__"], put=set_api_return(UPDATE_FEEDBACK_RESULT_XML), __enter__=lambda _: session, __exit__=lambda *args: None, From 6dee071c18f09b084a84863a7923a0665752dab3 Mon Sep 17 00:00:00 2001 From: Arya Seghatoleslami Date: Mon, 30 Sep 2024 09:50:01 -0400 Subject: [PATCH 6/6] parse feedback better --- .../exchanges/clients/ncmec/hash_api.py | 55 +++++++++++++------ .../exchanges/clients/ncmec/tests/data.py | 16 +++--- 2 files changed, 45 insertions(+), 26 deletions(-) diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py index f24c4b17c..380927089 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/hash_api.py @@ -130,7 +130,7 @@ class NCMECFeedbackType(Enum): pdna = "PDNA" pdq = "PDQ" netclean = "NETCLEAN" - Videntifier = "VIDENTIFIER" + videntifier = "VIDENTIFIER" tmk_pdqf = "TMK_PDQF" ssvh_pdna = "SSVH_PDNA" ssvh_safer_hash = "SSVH_SAFER_HASH" @@ -162,18 +162,36 @@ def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate": fingerprints={ x.tag: x.text for x in xml.maybe("fingerprints") if x.has_text }, - feedback=[ - { - "type": x.tag, # "affirmativeFeedback" or "negativeFeedback" - "members": x.maybe( - "members" # "timestamp", "member.id", "member.name" - ), - "reasons": x.maybe( - "reasons" - ), # "reason" with "guid", "name", "type" | "members" - } - for x in xml.maybe("feedback") - ], + feedback=( + [ + { + "sentiment": x.tag, # "affirmativeFeedback" or "negativeFeedback" + "type": x.str("type"), + "latest_feedback_time": x.str("lastUpdateTimestamp"), + "members": [ + {"id": m.str("id"), "name": m.text} + for m in x.maybe("members") + if m.has_text + ], + "reasons": [ + { + "guid": r.maybe("reason").str("guid"), + "name": r.maybe("reason").str("name"), + "type": r.maybe("reason").str("type"), + "members": [ + {"id": m.str("id"), "name": m.text} + for m in x.maybe("members") + ], + } + for r in x.maybe("reasons") + if r.maybe("reason") + ], + } + for x in xml.maybe("feedback") + ] + if xml.maybe("feedback").has_text + else [] + ), ) @@ -454,6 +472,7 @@ def members(self) -> t.List[StatusResult]: ] def feedback_reasons(self) -> GetFeedbackReasonsResponse: + """Get the possible negative feedback reasons for each feedback type""" for feedbackType in NCMECFeedbackType: resp = self._get( NCMECEndpoint.feedback, path=f"{feedbackType.value}/reasons" @@ -530,8 +549,8 @@ def submit_feedback( if not self.member_id: self.status() - # need valid reasons to submit feedback - if not self.reasons_map: + # need valid reasons to submit negative feedback + if not affirmative and not self.reasons_map: self.feedback_reasons() # Prepare the XML payload @@ -539,10 +558,10 @@ def submit_feedback( root.set("xmlns", "https://hashsharing.ncmec.org/hashsharing/v2") vote = ET.SubElement(root, "affirmative" if affirmative else "negative") - valid_reason_ids = [ - reason["guid"] for reason in self.reasons_map[feedback_type.value] - ] if not affirmative: + valid_reason_ids = [ + reason["guid"] for reason in self.reasons_map[feedback_type.value] + ] if reason_id not in valid_reason_ids: print( "must choose from the following reasons: ", diff --git a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py index fc66a5b3f..21b7a9ca5 100644 --- a/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py +++ b/python-threatexchange/threatexchange/exchanges/clients/ncmec/tests/data.py @@ -39,17 +39,17 @@ - - Example Member - - - - - Example Member - + + + + + + Example Member + +