From 287c1193976bea83a6828c86860e09be90bf80c4 Mon Sep 17 00:00:00 2001 From: lakshmi2506 <141401869+lakshmi2506@users.noreply.github.com> Date: Thu, 11 Jul 2024 15:32:51 +0530 Subject: [PATCH] Update the validation for loading to check the required fields as well (#3807) To ensure field-level permissions and successful dataset load, there are several validations performed on the `mapping.yml` file. These validations check whether the fields and SObjects have the required permissions, and whether namespaces need to be injected, as well as handling case insensitivity for the `mapping.yml`. This functionality is already implemented in the function `validate_and_inject_mapping`. However, there was a missing corner case where the function did not capture errors for required fields missing in the mapping.yml. This functionality has now been added, and the function is used for the preflight check. --------- Co-authored-by: James Estevez --- cumulusci/cumulusci.yml | 4 + cumulusci/tasks/bulkdata/mapping_parser.py | 33 ++++++- .../tasks/bulkdata/tests/mapping_after.yml | 6 ++ cumulusci/tasks/bulkdata/tests/mapping_v1.yml | 1 + .../tests/person_accounts_minimal.yml | 1 + .../tasks/bulkdata/tests/test_extract.py | 8 +- cumulusci/tasks/bulkdata/tests/test_load.py | 6 +- .../bulkdata/tests/test_mapping_parser.py | 88 ++++++++++++++++++- cumulusci/tasks/preflight/dataset_load.py | 49 +++++++++++ .../preflight/tests/test_dataset_load.py | 85 ++++++++++++++++++ datasets/default/default.mapping.yml | 20 +++++ 11 files changed, 286 insertions(+), 15 deletions(-) create mode 100644 cumulusci/tasks/preflight/dataset_load.py create mode 100644 cumulusci/tasks/preflight/tests/test_dataset_load.py create mode 100644 datasets/default/default.mapping.yml diff --git a/cumulusci/cumulusci.yml b/cumulusci/cumulusci.yml index 397e40678e..3f5b20acd8 100644 --- a/cumulusci/cumulusci.yml +++ b/cumulusci/cumulusci.yml @@ -81,6 +81,10 @@ tasks: description: Waits on a batch apex or queueable apex job to finish. class_path: cumulusci.tasks.apex.batch.BatchApexWait group: Salesforce + check_dataset_load: + description: Runs as a preflight check to determine whether dataset can be loaded successfully. + class_path: cumulusci.tasks.preflight.dataset_load.LoadDataSetCheck + group: Salesforce Preflight Checks check_my_domain_active: description: Runs as a preflight check to determine whether My Domain is active. class_path: cumulusci.tasks.preflight.settings.CheckMyDomainActive diff --git a/cumulusci/tasks/bulkdata/mapping_parser.py b/cumulusci/tasks/bulkdata/mapping_parser.py index fd390a1a23..3e6aa28541 100644 --- a/cumulusci/tasks/bulkdata/mapping_parser.py +++ b/cumulusci/tasks/bulkdata/mapping_parser.py @@ -478,6 +478,27 @@ def _validate_sobject( return True + def check_required(self, fields_describe): + required_fields = set() + for field in fields_describe: + defaulted = ( + fields_describe[field]["defaultValue"] is not None + or fields_describe[field]["nillable"] + or fields_describe[field]["defaultedOnCreate"] + ) + if fields_describe[field]["createable"] and not defaulted: + required_fields.add(field) + missing_fields = required_fields.difference( + set(self.fields.keys()) | set(self.lookups) + ) + if len(missing_fields) > 0: + logger.error( + f"One or more required fields are missing for loading on {self.sf_object} :{missing_fields}" + ) + return False + else: + return True + def validate_and_inject_namespace( self, sf: Salesforce, @@ -485,6 +506,7 @@ def validate_and_inject_namespace( operation: DataOperationType, inject_namespaces: bool = False, drop_missing: bool = False, + is_load: bool = False, ): """Process the schema elements in this step. @@ -516,7 +538,6 @@ def strip(element: str): global_describe = CaseInsensitiveDict( {entry["name"]: entry for entry in sf.describe()["sobjects"]} ) - if not self._validate_sobject(global_describe, inject, strip, operation): # Don't attempt to validate field permissions if the object doesn't exist. return False @@ -524,7 +545,6 @@ def strip(element: str): # Validate, inject, and drop (if configured) fields. # By this point, we know the attribute is valid. describe = self.describe_data(sf) - fields_correct = self._validate_field_dict( describe, self.fields, inject, strip, drop_missing, operation ) @@ -533,6 +553,11 @@ def strip(element: str): describe, self.lookups, inject, strip, drop_missing, operation ) + if is_load: + required_fields_correct = self.check_required(describe) + if not required_fields_correct: + return False + if not (fields_correct and lookups_correct): return False @@ -687,7 +712,7 @@ def validate_and_inject_mapping( should_continue = [ m.validate_and_inject_namespace( - sf, namespace, data_operation, inject_namespaces, drop_missing + sf, namespace, data_operation, inject_namespaces, drop_missing, is_load ) for m in mapping.values() ] @@ -696,7 +721,7 @@ def validate_and_inject_mapping( raise BulkDataException( "One or more schema or permissions errors blocked the operation.\n" "If you would like to attempt the load regardless, you can specify " - "'--drop_missing_schema True' on the command." + "'--drop_missing_schema True' on the command option and ensure all required fields are included in the mapping file." ) if drop_missing: diff --git a/cumulusci/tasks/bulkdata/tests/mapping_after.yml b/cumulusci/tasks/bulkdata/tests/mapping_after.yml index 2d3ebdb726..9c8798cf0e 100644 --- a/cumulusci/tasks/bulkdata/tests/mapping_after.yml +++ b/cumulusci/tasks/bulkdata/tests/mapping_after.yml @@ -4,6 +4,7 @@ Insert Accounts: table: accounts fields: Id: sf_id + Name: Name lookups: ParentId: after: Insert Accounts @@ -17,16 +18,21 @@ Insert Contacts: table: contacts fields: Id: sf_id + LastName: LastName lookups: ReportsToId: after: Insert Contacts table: contacts + Insert Opportunities: api: bulk sf_object: Opportunity table: opportunities fields: Id: sf_id + CloseDate: CloseDate + StageName: StageName + Name: Name lookups: AccountId: table: accounts diff --git a/cumulusci/tasks/bulkdata/tests/mapping_v1.yml b/cumulusci/tasks/bulkdata/tests/mapping_v1.yml index 0d04fbb90d..6fe35ef1d6 100644 --- a/cumulusci/tasks/bulkdata/tests/mapping_v1.yml +++ b/cumulusci/tasks/bulkdata/tests/mapping_v1.yml @@ -4,6 +4,7 @@ Insert Households: table: households fields: Id: sf_id + Name: Name static: Name: TestHousehold record_type: HH_Account diff --git a/cumulusci/tasks/bulkdata/tests/person_accounts_minimal.yml b/cumulusci/tasks/bulkdata/tests/person_accounts_minimal.yml index 49bd555a7e..c8520397e0 100644 --- a/cumulusci/tasks/bulkdata/tests/person_accounts_minimal.yml +++ b/cumulusci/tasks/bulkdata/tests/person_accounts_minimal.yml @@ -8,6 +8,7 @@ Insert PersonContact: table: PersonContact fields: - IsPersonAccount + - LastName lookups: AccountId: table: Account diff --git a/cumulusci/tasks/bulkdata/tests/test_extract.py b/cumulusci/tasks/bulkdata/tests/test_extract.py index e2fd9d65c7..996584a2a5 100644 --- a/cumulusci/tasks/bulkdata/tests/test_extract.py +++ b/cumulusci/tasks/bulkdata/tests/test_extract.py @@ -115,7 +115,7 @@ def test_run__person_accounts_disabled(self, query_op_mock): sobject="Account", api_options={}, context=task, - query="SELECT Id FROM Account", + query="SELECT Id, Name FROM Account", ) mock_query_contacts = MockBulkQueryOperation( sobject="Contact", @@ -123,7 +123,7 @@ def test_run__person_accounts_disabled(self, query_op_mock): context=task, query="SELECT Id, FirstName, LastName, Email, AccountId FROM Contact", ) - mock_query_households.results = [["1"]] + mock_query_households.results = [["1", "None"]] mock_query_contacts.results = [ ["2", "First", "Last", "test@example.com", "1"] ] @@ -170,7 +170,7 @@ def test_run__person_accounts_enabled(self, query_op_mock): sobject="Account", api_options={}, context=task, - query="SELECT Id, IsPersonAccount FROM Account", + query="SELECT Id, Name IsPersonAccount FROM Account", ) mock_query_contacts = MockBulkQueryOperation( sobject="Contact", @@ -178,7 +178,7 @@ def test_run__person_accounts_enabled(self, query_op_mock): context=task, query="SELECT Id, FirstName, LastName, Email, IsPersonAccount, AccountId FROM Contact", ) - mock_query_households.results = [["1", "false"]] + mock_query_households.results = [["1", "None", "false"]] mock_query_contacts.results = [ ["2", "First", "Last", "test@example.com", "true", "1"] ] diff --git a/cumulusci/tasks/bulkdata/tests/test_load.py b/cumulusci/tasks/bulkdata/tests/test_load.py index 3d6f64503b..6649ff202e 100644 --- a/cumulusci/tasks/bulkdata/tests/test_load.py +++ b/cumulusci/tasks/bulkdata/tests/test_load.py @@ -122,9 +122,8 @@ def test_run(self, dml_mock): mock_describe_calls() task() - assert step.records == [ - ["TestHousehold", "1"], + ["TestHousehold", "TestHousehold", "1"], ["Test", "User", "test@example.com", "001000000000000"], ["Error", "User", "error@example.com", "001000000000000"], ] @@ -387,9 +386,8 @@ def test_run__sql(self, dml_mock): ] mock_describe_calls() task() - assert step.records == [ - ["TestHousehold", "1"], + [None, "TestHousehold", "1"], ["Test☃", "User", "test@example.com", "001000000000000"], ["Error", "User", "error@example.com", "001000000000000"], ] diff --git a/cumulusci/tasks/bulkdata/tests/test_mapping_parser.py b/cumulusci/tasks/bulkdata/tests/test_mapping_parser.py index 432c2a0e50..1ad4476976 100644 --- a/cumulusci/tasks/bulkdata/tests/test_mapping_parser.py +++ b/cumulusci/tasks/bulkdata/tests/test_mapping_parser.py @@ -323,6 +323,64 @@ def test_validate_field_dict__injection(self): assert ms.fields_ == {"Id": "Id", "Name": "Name", "npsp__Test__c": "Test__c"} + def test_validate_fields_required(self): + ms = MappingStep( + sf_object="Account", + fields=["Id", "Name", "Test__c"], + action=DataOperationType.INSERT, + ) + fields_describe = CaseInsensitiveDict( + { + "Name": { + "createable": True, + "nillable": False, + "defaultedOnCreate": False, + "defaultValue": None, + }, + "npsp__Test__c": { + "createable": True, + "nillable": False, + "defaultedOnCreate": False, + "defaultValue": None, + }, + } + ) + ms._validate_field_dict( + describe=fields_describe, + field_dict=ms.fields_, + inject=lambda field: f"npsp__{field}", + strip=None, + drop_missing=False, + data_operation_type=DataOperationType.INSERT, + ) + assert ms.fields_ == {"Id": "Id", "Name": "Name", "npsp__Test__c": "Test__c"} + assert ms.check_required(fields_describe) + + def test_validate_fields_required_missing(self): + ms = MappingStep( + sf_object="Account", + fields=["Test__c"], + action=DataOperationType.INSERT, + ) + fields_describe = CaseInsensitiveDict( + { + "Name": { + "createable": True, + "nillable": False, + "defaultedOnCreate": False, + "defaultValue": None, + }, + "Test__c": { + "createable": True, + "nillable": False, + "defaultedOnCreate": False, + "defaultValue": None, + }, + } + ) + assert ms.fields_ == {"Test__c": "Test__c"} + assert not ms.check_required(fields_describe) + def test_validate_field_dict__injection_duplicate_fields(self): ms = MappingStep( sf_object="Account", @@ -930,7 +988,7 @@ def test_validate_and_inject_mapping_removes_lookups_with_drop_missing(self): StringIO( ( "Insert Accounts:\n sf_object: NotAccount\n table: Account\n fields:\n - Nonsense__c\n" - "Insert Contacts:\n sf_object: Contact\n table: Contact\n lookups:\n AccountId:\n table: Account" + "Insert Contacts:\n sf_object: Contact\n table: Contact\n fields:\n - LastName\n lookups:\n AccountId:\n table: Account" ) ) ) @@ -1027,7 +1085,7 @@ def test_validate_and_inject_mapping_throws_exception_required_lookup_dropped(se StringIO( ( "Insert Accounts:\n sf_object: NotAccount\n table: Account\n fields:\n - Nonsense__c\n" - "Insert Contacts:\n sf_object: Contact\n table: Contact\n lookups:\n Id:\n table: Account" + "Insert Contacts:\n sf_object: Contact\n table: Contact\n fields:\n - LastName\n lookups:\n Id:\n table: Account" ) ) ) @@ -1045,6 +1103,30 @@ def test_validate_and_inject_mapping_throws_exception_required_lookup_dropped(se drop_missing=True, ) + @responses.activate + def test_validate_and_inject_mapping_throws_exception_required_fields_missing(self): + mock_describe_calls() + mapping = parse_from_yaml( + StringIO( + ( + "Insert Accounts:\n sf_object: Account\n table: Account\n fields:\n - ns__Description__c\n" + ) + ) + ) + org_config = DummyOrgConfig( + {"instance_url": "https://example.com", "access_token": "abc123"}, "test" + ) + + with pytest.raises(BulkDataException): + validate_and_inject_mapping( + mapping=mapping, + sf=org_config.salesforce_client, + namespace=None, + data_operation=DataOperationType.INSERT, + inject_namespaces=False, + drop_missing=False, + ) + @responses.activate def test_validate_and_inject_mapping_injects_namespaces(self): mock_describe_calls() @@ -1206,7 +1288,7 @@ def test_validate_and_inject_mapping_works_case_insensitively(self): StringIO( ( "Insert Accounts:\n sf_object: account\n table: account\n fields:\n - name\n" - "Insert Contacts:\n sf_object: contact\n table: contact\n fields:\n - fIRSTnAME\n lookups:\n accountid:\n table: account" + "Insert Contacts:\n sf_object: contact\n table: contact\n fields:\n - LaSTnAME\n lookups:\n accountid:\n table: account" ) ) ) diff --git a/cumulusci/tasks/preflight/dataset_load.py b/cumulusci/tasks/preflight/dataset_load.py new file mode 100644 index 0000000000..41a89b5cd4 --- /dev/null +++ b/cumulusci/tasks/preflight/dataset_load.py @@ -0,0 +1,49 @@ +from cumulusci.core.datasets import Dataset +from cumulusci.core.exceptions import BulkDataException +from cumulusci.tasks.bulkdata.mapping_parser import ( + parse_from_yaml, + validate_and_inject_mapping, +) +from cumulusci.tasks.bulkdata.step import DataOperationType +from cumulusci.tasks.salesforce import BaseSalesforceApiTask + + +class LoadDataSetCheck(BaseSalesforceApiTask): + task_docs = """ + A preflight check to ensure a dataset can be loaded successfully + """ + task_options = { + "dataset": { + "description": "Dataset on which preflight checks need to be performed", + "required": False, + }, + } + + def _init_options(self, kwargs): + super(BaseSalesforceApiTask, self)._init_options(kwargs) + if "dataset" not in self.options: + self.options["dataset"] = "default" + + def _run_task(self): + mapping_file_path = Dataset( + self.options["dataset"], + self.project_config, + self.sf, + self.org_config, + schema=None, + ).mapping_file + self.mapping = parse_from_yaml(mapping_file_path) + try: + validate_and_inject_mapping( + mapping=self.mapping, + sf=self.sf, + namespace=self.project_config.project__package__namespace, + data_operation=DataOperationType.INSERT, + inject_namespaces=True, + drop_missing=False, + ) + self.return_values = True + except BulkDataException as e: + self.logger.error(e) + self.return_values = False + return self.return_values diff --git a/cumulusci/tasks/preflight/tests/test_dataset_load.py b/cumulusci/tasks/preflight/tests/test_dataset_load.py new file mode 100644 index 0000000000..b57b2a4acc --- /dev/null +++ b/cumulusci/tasks/preflight/tests/test_dataset_load.py @@ -0,0 +1,85 @@ +from unittest import mock + +import pytest + +from cumulusci.core.exceptions import BulkDataException +from cumulusci.tasks.bulkdata.mapping_parser import MappingStep +from cumulusci.tasks.bulkdata.step import DataApi, DataOperationType +from cumulusci.tasks.preflight.dataset_load import LoadDataSetCheck +from cumulusci.tasks.salesforce.tests.util import create_task + + +class TestLoadDataSetCheck: + @mock.patch( + "cumulusci.tasks.preflight.dataset_load.validate_and_inject_mapping", + return_value=True, + ) + def test_run_task(self, validate_and_inject_mapping): + task = create_task(LoadDataSetCheck, {}) + assert task() + assert task.options["dataset"] == "default" + assert task.mapping == { + "Account": MappingStep( + sf_object="Account", + table="Account", + fields={ + "Name": "Name", + "Description": "Description", + "ShippingStreet": "ShippingStreet", + "ShippingCity": "ShippingCity", + "ShippingState": "ShippingState", + "ShippingPostalCode": "ShippingPostalCode", + "ShippingCountry": "ShippingCountry", + "Phone": "Phone", + "AccountNumber": "AccountNumber", + }, + lookups={}, + static={}, + filters=[], + action=DataOperationType.INSERT, + api=DataApi.BULK, + batch_size=1, + oid_as_pk=False, + record_type=None, + bulk_mode=None, + anchor_date=None, + soql_filter=None, + update_key=(), + ), + "Contact": MappingStep( + sf_object="Contact", + table="Contact", + fields={"FirstName": "FirstName"}, + lookups={}, + static={}, + filters=[], + action=DataOperationType.INSERT, + api=DataApi.BULK, + batch_size=1, + oid_as_pk=False, + record_type=None, + bulk_mode=None, + anchor_date=None, + soql_filter=None, + update_key=(), + ), + } + + def test_mapping_file_not_found(self): + task = create_task(LoadDataSetCheck, {"dataset": "alpha"}) + with pytest.raises(Exception) as e: + task() + assert "No such file or directory" in str(e.value) + assert task.options["dataset"] == "alpha" + + @mock.patch( + "cumulusci.tasks.preflight.dataset_load.validate_and_inject_mapping", + side_effect=BulkDataException("An error occurred during validation"), + ) + def test_run_fail(self, validate_and_inject_mapping): + task = create_task(LoadDataSetCheck, {}) + task.logger = mock.Mock() + assert not task() + assert task.logger.error.asset_called_once_with( + "An error occurred during validation" + ) diff --git a/datasets/default/default.mapping.yml b/datasets/default/default.mapping.yml new file mode 100644 index 0000000000..845816ab1f --- /dev/null +++ b/datasets/default/default.mapping.yml @@ -0,0 +1,20 @@ +Account: + sf_object: Account + api: bulk + batch_size: 1 + fields: + - Name + - Description + - ShippingStreet + - ShippingCity + - ShippingState + - ShippingPostalCode + - ShippingCountry + - Phone + - AccountNumber +Contact: + sf_object: Contact + api: bulk + batch_size: 1 + fields: + - FirstName