Skip to content

Commit

Permalink
Merge branch 'main' of github.com:SFDO-Tooling/CumulusCI into feature…
Browse files Browse the repository at this point in the history
…/add_checks_command
  • Loading branch information
jain-naman-sf committed Feb 5, 2025
2 parents 4726951 + 4c88b7d commit 2c164c8
Show file tree
Hide file tree
Showing 22 changed files with 553 additions and 161 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/feature_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,30 @@ jobs:
- name: Run Pytest
run: uv run pytest --cov-report= --cov=cumulusci

unit_tests_opt_deps:
name: "Unit tests with optional dependencies: ${{ matrix.os }}-${{ matrix.python-version }}"
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [macos-latest, SFDO-Tooling-Ubuntu, SFDO-Tooling-Windows]
python-version: ["3.11", "3.12", "3.13"]
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "${{ matrix.python-version }}"
- name: Set up uv
uses: SFDO-Tooling/setup-uv@main
with:
version: "0.5.0"
enable-cache: true
- name: Install dependencies
run: uv sync --all-extras -p ${{ matrix.python-version }}
- name: Run Pytest
run: uv run pytest --cov-report= --cov=cumulusci

robot_api:
name: "Robot: No browser"
runs-on: SFDO-Tooling-Ubuntu
Expand Down
4 changes: 3 additions & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ build:
- asdf plugin add uv
- asdf install uv latest
- asdf global uv latest
- uv sync --only-group docs --frozen
- uv sync --group docs --frozen
- uv run cci task doc --write
- uv run cci flow doc > docs/flows.rst
- uv run -m sphinx -T -b html -d docs/_build/doctrees -D language=en docs $READTHEDOCS_OUTPUT/html

# Build documentation in the docs/ directory with Sphinx
Expand Down
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,4 @@ For example:
* Gustavo Tandeciarz (dcinzona)
* Chandler Anderson (zenibako)
* Ben French (BenjaminFrench)
* Rupert Barrow (rupertbarrow)
2 changes: 1 addition & 1 deletion cumulusci/__about__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "4.0.1"
__version__ = "4.2.0"
5 changes: 4 additions & 1 deletion cumulusci/tasks/bulkdata/mapping_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,10 @@ def _get_required_permission_types(
self, operation: DataOperationType
) -> T.Tuple[str]:
"""Return a tuple of the permission types required to execute an operation"""
if operation is DataOperationType.QUERY:
if (
operation is DataOperationType.QUERY
or self.action is DataOperationType.SELECT
):
return ("queryable",)
if (
operation is DataOperationType.INSERT
Expand Down
50 changes: 30 additions & 20 deletions cumulusci/tasks/bulkdata/select_utils.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,34 @@
import logging
import random
import re
import typing as T
from enum import Enum

import numpy as np
import pandas as pd
from annoy import AnnoyIndex
from pydantic import Field, root_validator, validator
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.preprocessing import StandardScaler

from cumulusci.core.enums import StrEnum
from cumulusci.tasks.bulkdata.extract_dataset_utils.hardcoded_default_declarations import (
DEFAULT_DECLARATIONS,
)
from cumulusci.tasks.bulkdata.utils import CaseInsensitiveDict
from cumulusci.utils import get_cci_upgrade_command
from cumulusci.utils.yaml.model_parser import CCIDictModel

logger = logging.getLogger(__name__)
try:
import numpy as np
import pandas as pd
from annoy import AnnoyIndex
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.preprocessing import StandardScaler

OPTIONAL_DEPENDENCIES_AVAILABLE = True
except ImportError:
logger.warning(
f"Optional dependencies are missing. "
"Handling high volumes of records for the 'select' functionality will be significantly slower, "
"as optimizations for this feature are currently disabled. "
f"To enable optimized performance, install all required dependencies using: {get_cci_upgrade_command()}[select]\n"
)
OPTIONAL_DEPENDENCIES_AVAILABLE = False


class SelectStrategy(StrEnum):
"""Enum defining the different selection strategies requested."""
Expand Down Expand Up @@ -173,10 +185,6 @@ def standard_generate_query(
filter_clause=user_filter, limit_clause=limit, offset_clause=offset
)
else:
# Get the WHERE clause from DEFAULT_DECLARATIONS if available
declaration = DEFAULT_DECLARATIONS.get(sobject)
if declaration:
query += f" WHERE {declaration.where}"
query += f" LIMIT {limit}" if limit else ""
query += f" OFFSET {offset}" if offset else ""
return query, ["Id"]
Expand Down Expand Up @@ -266,10 +274,6 @@ def similarity_generate_query(
filter_clause=user_filter, limit_clause=limit, offset_clause=offset
)
else:
# Get the WHERE clause from DEFAULT_DECLARATIONS if available
declaration = DEFAULT_DECLARATIONS.get(sobject)
if declaration:
query += f" WHERE {declaration.where}"
query += f" LIMIT {limit}" if limit else ""
query += f" OFFSET {offset}" if offset else ""

Expand All @@ -292,7 +296,7 @@ def similarity_post_process(
]:
"""Processes the query results for the similarity selection strategy"""
# Handle case where query returns 0 records
if not query_records and not threshold:
if not query_records and threshold is None:
error_message = f"No records found for {sobject} in the target org."
return [], [], error_message

Expand All @@ -308,7 +312,7 @@ def similarity_post_process(
select_records = []
insert_records = []

if complexity_constant < 1000:
if complexity_constant < 1000 or not OPTIONAL_DEPENDENCIES_AVAILABLE:
select_records, insert_records = levenshtein_post_process(
load_records, query_records, fields, weights, threshold
)
Expand All @@ -328,6 +332,12 @@ def annoy_post_process(
threshold: T.Union[float, None],
) -> T.Tuple[T.List[dict], list]:
"""Processes the query results for the similarity selection strategy using Annoy algorithm for large number of records"""
# Add warning when threshold is 0
if threshold is not None and threshold == 0:
logger.warning(
"Warning: A threshold of 0 may miss exact matches in high volumes. Use a small value like 0.1 for better accuracy."
)

selected_records = []
insertion_candidates = []

Expand Down Expand Up @@ -397,7 +407,7 @@ def annoy_post_process(
# Retrieve the corresponding record from the database
record = query_record_data[neighbor_index]
closest_record_id = record_to_id_map[tuple(record)]
if threshold and (neighbor_distances[idx] >= threshold):
if threshold is not None and (neighbor_distances[idx] >= threshold):
selected_records.append(None)
insertion_candidates.append(load_shaped_records[i])
else:
Expand Down Expand Up @@ -445,7 +455,7 @@ def levenshtein_post_process(
select_record, target_records, similarity_weights
)

if distance_threshold and match_distance > distance_threshold:
if distance_threshold is not None and match_distance > distance_threshold:
# Append load record for insertion if distance exceeds threshold
insertion_candidates.append(load_record)
selected_records.append(None)
Expand Down
6 changes: 4 additions & 2 deletions cumulusci/tasks/bulkdata/snowfakery.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,8 +583,10 @@ def _generate_and_load_initial_batch(self, working_directory: Path):
self.sets_finished_while_generating_template = num_records

new_template_dir = data_loader_new_directory_name(template_dir, self.run_until)
shutil.move(template_dir, new_template_dir)
template_dir = new_template_dir
# rename only if new_template_dir does not match template_dir
if template_dir.resolve() != new_template_dir.resolve():
shutil.move(template_dir, new_template_dir)
template_dir = new_template_dir

# don't send data tables to child processes. All they
# care about are ID->OID mappings
Expand Down
5 changes: 2 additions & 3 deletions cumulusci/tasks/bulkdata/step.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from contextlib import contextmanager
from itertools import tee
from typing import Any, Dict, List, NamedTuple, Optional, Union
from urllib.parse import quote

import requests
import salesforce_bulk
Expand Down Expand Up @@ -955,9 +956,7 @@ def _determine_limit_clause(self, total_num_records):
def _execute_soql_query(self, select_query, query_fields):
"""Executes the SOQL query and returns the flattened records."""
query_records = []
response = self.sf.restful(
requests.utils.requote_uri(f"query/?q={select_query}"), method="GET"
)
response = self.sf.restful(f"query/?q={quote(select_query)}", method="GET")
query_records.extend(self._flatten_response_records(response, query_fields))

while not response["done"]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ interactions:

- request:
method: GET
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id,%20Name,%20Description,%20Phone,%20AccountNumber%20FROM%20Account%20WHERE%20Name%20!=%20'Sample%20Account%20for%20Entitlements'
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id,%20Name,%20Description,%20Phone,%20AccountNumber%20FROM%20Account
body: null
headers: *id004
response:
Expand Down Expand Up @@ -125,7 +125,7 @@ interactions:

- request:
method: GET
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id%20FROM%20Account%20WHERE%20Name%20!=%20'Sample%20Account%20for%20Entitlements'%20LIMIT%205
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id%20FROM%20Account%20LIMIT%205
body: null
headers: *id004
response:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ interactions:

- request:
method: GET
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id,%20TYPEOF%20Who%20WHEN%20Contact%20THEN%20LastName,%20Email%20WHEN%20Lead%20THEN%20LastName,%20Company%20ELSE%20Id%20END,%20TYPEOF%20What%20WHEN%20Account%20THEN%20Name,%20Description,%20Phone,%20AccountNumber%20ELSE%20Id%20END,%20Subject,%20DurationInMinutes,%20ActivityDateTime%20FROM%20Event
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id%2C%20TYPEOF%20Who%20WHEN%20Contact%20THEN%20LastName%2C%20Email%20WHEN%20Lead%20THEN%20LastName%2C%20Company%20ELSE%20Id%20END%2C%20TYPEOF%20What%20WHEN%20Account%20THEN%20Name%2C%20Description%2C%20Phone%2C%20AccountNumber%20ELSE%20Id%20END%2C%20Subject%2C%20DurationInMinutes%2C%20ActivityDateTime%20FROM%20Event
body: null
headers: *id004
response:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ interactions:

- request:
method: GET
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id,%20TYPEOF%20Who%20WHEN%20Contact%20THEN%20LastName,%20Email%20WHEN%20Lead%20THEN%20LastName,%20Company%20ELSE%20Id%20END,%20TYPEOF%20What%20WHEN%20Account%20THEN%20Name,%20Description,%20Phone,%20AccountNumber%20ELSE%20Id%20END,%20Subject,%20DurationInMinutes,%20ActivityDateTime%20FROM%20Event
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id%2C%20TYPEOF%20Who%20WHEN%20Contact%20THEN%20LastName%2C%20Email%20WHEN%20Lead%20THEN%20LastName%2C%20Company%20ELSE%20Id%20END%2C%20TYPEOF%20What%20WHEN%20Account%20THEN%20Name%2C%20Description%2C%20Phone%2C%20AccountNumber%20ELSE%20Id%20END%2C%20Subject%2C%20DurationInMinutes%2C%20ActivityDateTime%20FROM%20Event
body: null
headers: *id004
response:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ interactions:

- request:
method: GET
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id,%20Name,%20Description,%20Phone,%20AccountNumber%20FROM%20Account%20WHERE%20Name%20!=%20'Sample%20Account%20for%20Entitlements'
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id%2C%20Name%2C%20Description%2C%20Phone%2C%20AccountNumber%20FROM%20Account
body: null
headers: *id004
response:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ interactions:

- request:
method: GET
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id,%20Name,%20Description,%20Phone,%20AccountNumber%20FROM%20Account%20WHERE%20Name%20!=%20'Sample%20Account%20for%20Entitlements'
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id,%20Name,%20Description,%20Phone,%20AccountNumber%20FROM%20Account
body: null
headers: *id004
response:
Expand Down Expand Up @@ -125,7 +125,7 @@ interactions:

- request:
method: GET
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id%20FROM%20Account%20WHERE%20Name%20!=%20'Sample%20Account%20for%20Entitlements'%20LIMIT%205
uri: https://orgname.my.salesforce.com/services/data/v62.0/query/?q=SELECT%20Id%20FROM%20Account%20LIMIT%205
body: null
headers: *id004
response:
Expand Down
Loading

0 comments on commit 2c164c8

Please sign in to comment.