From 220114fda655b4c3638f9a5ee04506c30c03ad11 Mon Sep 17 00:00:00 2001 From: Tauquir <30658453+itstauq@users.noreply.github.com> Date: Mon, 16 Sep 2024 02:06:11 +0400 Subject: [PATCH 01/78] Add scenario tests to the CI --- .github/workflows/pr-tests-syft.yml | 85 +++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/.github/workflows/pr-tests-syft.yml b/.github/workflows/pr-tests-syft.yml index 873bcdda160..b8abb92cb40 100644 --- a/.github/workflows/pr-tests-syft.yml +++ b/.github/workflows/pr-tests-syft.yml @@ -188,6 +188,91 @@ jobs: max_attempts: 3 command: tox -e syft.test.notebook + pr-tests-syft-scenario: + strategy: + max-parallel: 99 + matrix: + # TODO try enabling on other OS + os: [ubuntu-latest] + python-version: ["3.12"] + deployment-type: ["python"] + bump-version: ["False"] + include: + - python-version: "3.11" + os: "ubuntu-latest" + deployment-type: "python" + - python-version: "3.10" + os: "ubuntu-latest" + deployment-type: "python" + - python-version: "3.12" + os: "ubuntu-latest" + deployment-type: "python" + bump-version: "True" + + runs-on: ${{ matrix.os }} + steps: + # - name: Permission to home directory + # if: matrix.os == 'ubuntu-latest' + # run: | + # sudo chown -R $USER:$USER $HOME + - name: "clean .git/config" + if: matrix.os == 'windows-latest' + continue-on-error: true + shell: bash + run: | + echo "deleting ${GITHUB_WORKSPACE}/.git/config" + rm ${GITHUB_WORKSPACE}/.git/config + + - uses: actions/checkout@v4 + + - name: Check for file changes + uses: dorny/paths-filter@v3 + id: changes + with: + base: ${{ github.ref }} + token: ${{ github.token }} + filters: .github/file-filters.yml + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + if: steps.changes.outputs.syft == 'true' || steps.changes.outputs.notebooks_scenario == 'true' + with: + python-version: ${{ matrix.python-version }} + + - name: Install pip packages + if: steps.changes.outputs.syft == 'true' || steps.changes.outputs.notebooks_scenario == 'true' + run: | + python -m pip install --upgrade pip + pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 + uv --version + + - name: Get uv cache dir + id: pip-cache + if: steps.changes.outputs.syft == 'true' || steps.changes.outputs.notebooks_scenario == 'true' + shell: bash + run: | + echo "dir=$(uv cache dir)" >> $GITHUB_OUTPUT + + - name: Load github cache + uses: actions/cache@v4 + if: steps.changes.outputs.syft == 'true' || steps.changes.outputs.notebooks_scenario == 'true' + with: + path: ${{ steps.pip-cache.outputs.dir }} + key: ${{ runner.os }}-uv-py${{ matrix.python-version }}-${{ hashFiles('setup.cfg') }} + restore-keys: | + ${{ runner.os }}-uv-py${{ matrix.python-version }}- + + - name: Run notebook scenario tests + if: steps.changes.outputs.syft == 'true' || steps.changes.outputs.notebooks_scenario == 'true' + env: + ORCHESTRA_DEPLOYMENT_TYPE: "${{ matrix.deployment-type }}" + BUMP_VERSION: "${{ matrix.bump-version }}" + TOX_PYTHON: python${{ matrix.python-version }} + shell: bash + run: | + export PATH="/usr/share/miniconda/bin:$PATH" + tox -e syft.test.scenario + pr-tests-syft-notebook-scenario: strategy: max-parallel: 99 From f721f731c3aac13fa35e104feb5d79d087fbf423 Mon Sep 17 00:00:00 2001 From: Tauquir <30658453+itstauq@users.noreply.github.com> Date: Mon, 16 Sep 2024 16:22:08 +0400 Subject: [PATCH 02/78] add support to run scenario tests using mock apis --- tests/scenarios/bigquery/helpers/make.py | 278 ++---------------- .../scenarios/bigquery/level_2_basic_test.py | 11 +- 2 files changed, 32 insertions(+), 257 deletions(-) diff --git a/tests/scenarios/bigquery/helpers/make.py b/tests/scenarios/bigquery/helpers/make.py index 4ce8ada996d..e83bf09376e 100644 --- a/tests/scenarios/bigquery/helpers/make.py +++ b/tests/scenarios/bigquery/helpers/make.py @@ -1,10 +1,25 @@ +# stdlib +import os +import sys + # third party from helpers.fixtures_sync import create_user from unsync import unsync # syft absolute import syft as sy -from syft import test_settings + +# TODO remove hacky imports once https://github.com/OpenMined/PySyft/pull/9291/ is merged +notebook_helpers_module_path = os.path.abspath( + os.path.join(os.path.dirname(__file__), "../../../../notebooks/notebook_helpers/") +) +if notebook_helpers_module_path not in sys.path: + sys.path.append(notebook_helpers_module_path) + + +# third party +from apis import make_schema # noqa: E402 +from apis import make_test_query # noqa: E402 # Define any helper methods for our rate limiter @@ -35,178 +50,17 @@ async def create_users(root_client, events, users, event_name): @unsync async def create_endpoints_query(events, client, worker_pool_name: str, register: str): - @sy.api_endpoint_method( + private_query_function = make_test_query( settings={ - "credentials": test_settings.gce_service_account.to_dict(), - "region": test_settings.gce_region, - "project_id": test_settings.gce_project_id, + "rate_limiter_enabled": False, } ) - def private_query_function( - context, - sql_query: str, - ) -> str: - # third party - from google.cloud import bigquery # noqa: F811 - from google.oauth2 import service_account - - # syft absolute - from syft.service.response import SyftError - - # Auth for Bigquer based on the workload identity - credentials = service_account.Credentials.from_service_account_info( - context.settings["credentials"] - ) - scoped_credentials = credentials.with_scopes( - ["https://www.googleapis.com/auth/cloud-platform"] - ) - - client = bigquery.Client( - credentials=scoped_credentials, - location=context.settings["region"], - ) - - try: - rows = client.query_and_wait( - sql_query, - project=context.settings["project_id"], - ) - - if rows.total_rows > 1_000_000: - return SyftError( - message="Please only write queries that gather aggregate statistics" - ) - - return rows.to_dataframe() - except Exception as e: - # We MUST handle the errors that we want to be visible to the data owners. - # Any exception not catched is visible only to the data owner. - # not a bigquery exception - if not hasattr(e, "_errors"): - output = f"got exception e: {type(e)} {str(e)}" - return SyftError( - message=f"An error occured executing the API call {output}" - ) - # return SyftError(message="An error occured executing the API call, please contact the domain owner.") - - if e._errors[0]["reason"] in [ - "badRequest", - "blocked", - "duplicate", - "invalidQuery", - "invalid", - "jobBackendError", - "jobInternalError", - "notFound", - "notImplemented", - "rateLimitExceeded", - "resourceInUse", - "resourcesExceeded", - "tableUnavailable", - "timeout", - ]: - return SyftError( - message="Error occured during the call: " + e._errors[0]["message"] - ) - else: - return SyftError( - message="An error occured executing the API call, please contact the domain owner." - ) - - # Define a mock endpoint that the researchers can use for testing - @sy.api_endpoint_method( + mock_query_function = make_test_query( settings={ - "credentials": test_settings.gce_service_account.to_dict(), - "region": test_settings.gce_region, - "project_id": test_settings.gce_project_id, - "CALLS_PER_MIN": 10, - }, - helper_functions=[is_within_rate_limit], + "rate_limiter_enabled": True, + "calls_per_min": 10, + } ) - def mock_query_function( - context, - sql_query: str, - ) -> str: - # stdlib - import datetime - - # third party - from google.cloud import bigquery # noqa: F811 - from google.oauth2 import service_account - - # syft absolute - from syft.service.response import SyftError - - # Auth for Bigquer based on the workload identity - credentials = service_account.Credentials.from_service_account_info( - context.settings["credentials"] - ) - scoped_credentials = credentials.with_scopes( - ["https://www.googleapis.com/auth/cloud-platform"] - ) - - client = bigquery.Client( - credentials=scoped_credentials, - location=context.settings["region"], - ) - - # Store a dict with the calltimes for each user, via the email. - if context.user.email not in context.state.keys(): - context.state[context.user.email] = [] - - if not context.code.is_within_rate_limit(context): - return SyftError(message="Rate limit of calls per minute has been reached.") - - try: - context.state[context.user.email].append(datetime.datetime.now()) - - rows = client.query_and_wait( - sql_query, - project=context.settings["project_id"], - ) - - if rows.total_rows > 1_000_000: - return SyftError( - message="Please only write queries that gather aggregate statistics" - ) - - return rows.to_dataframe() - - except Exception as e: - # not a bigquery exception - if not hasattr(e, "_errors"): - output = f"got exception e: {type(e)} {str(e)}" - return SyftError( - message=f"An error occured executing the API call {output}" - ) - # return SyftError(message="An error occured executing the API call, please contact the domain owner.") - - # Treat all errors that we would like to be forwarded to the data scientists - # By default, any exception is only visible to the data owner. - - if e._errors[0]["reason"] in [ - "badRequest", - "blocked", - "duplicate", - "invalidQuery", - "invalid", - "jobBackendError", - "jobInternalError", - "notFound", - "notImplemented", - "rateLimitExceeded", - "resourceInUse", - "resourcesExceeded", - "tableUnavailable", - "timeout", - ]: - return SyftError( - message="Error occured during the call: " + e._errors[0]["message"] - ) - else: - return SyftError( - message="An error occured executing the API call, please contact the domain owner." - ) new_endpoint = sy.TwinAPIEndpoint( path="bigquery.test_query", @@ -227,96 +81,12 @@ def mock_query_function( @unsync async def create_endpoints_schema(events, client, worker_pool_name: str, register: str): - @sy.api_endpoint( - path="bigquery.schema", - description="This endpoint allows for visualising the metadata of tables available in BigQuery.", + schema_function = make_schema( settings={ - "credentials": test_settings.gce_service_account.to_dict(), - "region": test_settings.gce_region, - "project_id": test_settings.gce_project_id, - "dataset_1": test_settings.dataset_1, - "table_1": test_settings.table_1, - "table_2": test_settings.table_2, - "CALLS_PER_MIN": 5, + "calls_per_min": 5, }, - helper_functions=[ - is_within_rate_limit - ], # Adds ratelimit as this is also a method available to data scientists worker_pool=worker_pool_name, ) - def schema_function( - context, - ) -> str: - # stdlib - import datetime - - # third party - from google.cloud import bigquery # noqa: F811 - from google.oauth2 import service_account - import pandas as pd - - # syft absolute - from syft.service.response import SyftError - - # Auth for Bigquer based on the workload identity - credentials = service_account.Credentials.from_service_account_info( - context.settings["credentials"] - ) - scoped_credentials = credentials.with_scopes( - ["https://www.googleapis.com/auth/cloud-platform"] - ) - - client = bigquery.Client( - credentials=scoped_credentials, - location=context.settings["region"], - ) - - if context.user.email not in context.state.keys(): - context.state[context.user.email] = [] - - if not context.code.is_within_rate_limit(context): - return SyftError(message="Rate limit of calls per minute has been reached.") - - try: - context.state[context.user.email].append(datetime.datetime.now()) - - # Formats the data schema in a data frame format - # Warning: the only supported format types are primitives, np.ndarrays and pd.DataFrames - - data_schema = [] - for table_id in [ - f"{context.settings['dataset_1']}.{context.settings['table_1']}", - f"{context.settings['dataset_1']}.{context.settings['table_2']}", - ]: - table = client.get_table(table_id) - for schema in table.schema: - data_schema.append( - { - "project": str(table.project), - "dataset_id": str(table.dataset_id), - "table_id": str(table.table_id), - "schema_name": str(schema.name), - "schema_field": str(schema.field_type), - "description": str(table.description), - "num_rows": str(table.num_rows), - } - ) - return pd.DataFrame(data_schema) - - except Exception as e: - # not a bigquery exception - if not hasattr(e, "_errors"): - output = f"got exception e: {type(e)} {str(e)}" - return SyftError( - message=f"An error occured executing the API call {output}" - ) - # return SyftError(message="An error occured executing the API call, please contact the domain owner.") - - # Should add appropriate error handling for what should be exposed to the data scientists. - return SyftError( - message="An error occured executing the API call, please contact the domain owner." - ) - result = client.custom_api.add(endpoint=schema_function) if register: diff --git a/tests/scenarios/bigquery/level_2_basic_test.py b/tests/scenarios/bigquery/level_2_basic_test.py index 6f4f4372ad7..0d43dcce9f0 100644 --- a/tests/scenarios/bigquery/level_2_basic_test.py +++ b/tests/scenarios/bigquery/level_2_basic_test.py @@ -206,9 +206,14 @@ async def set_endpoint_settings( def query_sql(): - query = f"SELECT {test_settings.table_2_col_id}, AVG({test_settings.table_2_col_score}) AS average_score \ - FROM {test_settings.dataset_2}.{test_settings.table_2} \ - GROUP BY {test_settings.table_2_col_id} \ + dataset_2 = test_settings.get("dataset_2", default="dataset_2") + table_2 = test_settings.get("table_2", default="table_2") + table_2_col_id = test_settings.get("table_2_col_id", default="table_id") + table_2_col_score = test_settings.get("table_2_col_score", default="colname") + + query = f"SELECT {table_2_col_id}, AVG({table_2_col_score}) AS average_score \ + FROM {dataset_2}.{table_2} \ + GROUP BY {table_2_col_id} \ LIMIT 10000" return query From 5225e39bc045df98567ac90d8ee81cd981c2690d Mon Sep 17 00:00:00 2001 From: Tauquir <30658453+itstauq@users.noreply.github.com> Date: Tue, 17 Sep 2024 00:40:38 +0400 Subject: [PATCH 03/78] Clean up 1 --- .github/workflows/pr-tests-syft.yml | 1 - tests/scenarios/bigquery/helpers/make.py | 11 ++++++----- tests/scenarios/bigquery/level_2_basic_test.py | 1 - 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/pr-tests-syft.yml b/.github/workflows/pr-tests-syft.yml index b8abb92cb40..95c2a106f63 100644 --- a/.github/workflows/pr-tests-syft.yml +++ b/.github/workflows/pr-tests-syft.yml @@ -270,7 +270,6 @@ jobs: TOX_PYTHON: python${{ matrix.python-version }} shell: bash run: | - export PATH="/usr/share/miniconda/bin:$PATH" tox -e syft.test.scenario pr-tests-syft-notebook-scenario: diff --git a/tests/scenarios/bigquery/helpers/make.py b/tests/scenarios/bigquery/helpers/make.py index e83bf09376e..ee7d5d6621c 100644 --- a/tests/scenarios/bigquery/helpers/make.py +++ b/tests/scenarios/bigquery/helpers/make.py @@ -8,18 +8,19 @@ # syft absolute import syft as sy +from syft.util.util import find_base_dir_with_tox_ini # TODO remove hacky imports once https://github.com/OpenMined/PySyft/pull/9291/ is merged -notebook_helpers_module_path = os.path.abspath( - os.path.join(os.path.dirname(__file__), "../../../../notebooks/notebook_helpers/") -) +base_dir = find_base_dir_with_tox_ini() +notebook_helpers_module_path = os.path.abspath(os.path.join(base_dir, "notebooks/")) if notebook_helpers_module_path not in sys.path: sys.path.append(notebook_helpers_module_path) # third party -from apis import make_schema # noqa: E402 -from apis import make_test_query # noqa: E402 +# The below two imports work only after the above sys.path.append +from notebook_helpers.apis import make_schema # noqa: E402 +from notebook_helpers.apis import make_test_query # noqa: E402 # Define any helper methods for our rate limiter diff --git a/tests/scenarios/bigquery/level_2_basic_test.py b/tests/scenarios/bigquery/level_2_basic_test.py index 0d43dcce9f0..4cbb01c5de9 100644 --- a/tests/scenarios/bigquery/level_2_basic_test.py +++ b/tests/scenarios/bigquery/level_2_basic_test.py @@ -57,7 +57,6 @@ async def create_prebuilt_worker_image(events, client, expected_tag, event_name) docker_config = sy.PrebuiltWorkerConfig(tag=f"{external_registry}/{expected_tag}") result = client.api.services.worker_image.submit(worker_config=docker_config) assert isinstance(result, sy.SyftSuccess) - asyncio.sleep(5) events.register(event_name) From 22c587b45f0b0bb7037757fb33da6cf1953ade55 Mon Sep 17 00:00:00 2001 From: Tauquir <30658453+itstauq@users.noreply.github.com> Date: Tue, 17 Sep 2024 13:38:32 +0400 Subject: [PATCH 04/78] Refactor --- .../bigquery/helpers/{make.py => api.py} | 35 +- tests/scenarios/bigquery/helpers/asserts.py | 58 +++ tests/scenarios/bigquery/helpers/code.py | 91 ++++ tests/scenarios/bigquery/helpers/events.py | 40 +- .../bigquery/helpers/fixtures_sync.py | 9 + tests/scenarios/bigquery/helpers/users.py | 43 ++ tests/scenarios/bigquery/helpers/workers.py | 63 +++ .../scenarios/bigquery/level_0_basic_test.py | 28 ++ .../scenarios/bigquery/level_2_basic_test.py | 396 +++--------------- 9 files changed, 405 insertions(+), 358 deletions(-) rename tests/scenarios/bigquery/helpers/{make.py => api.py} (85%) create mode 100644 tests/scenarios/bigquery/helpers/code.py create mode 100644 tests/scenarios/bigquery/helpers/workers.py create mode 100644 tests/scenarios/bigquery/level_0_basic_test.py diff --git a/tests/scenarios/bigquery/helpers/make.py b/tests/scenarios/bigquery/helpers/api.py similarity index 85% rename from tests/scenarios/bigquery/helpers/make.py rename to tests/scenarios/bigquery/helpers/api.py index ee7d5d6621c..1d6619f50c6 100644 --- a/tests/scenarios/bigquery/helpers/make.py +++ b/tests/scenarios/bigquery/helpers/api.py @@ -3,7 +3,6 @@ import sys # third party -from helpers.fixtures_sync import create_user from unsync import unsync # syft absolute @@ -42,13 +41,6 @@ def is_within_rate_limit(context): return sum(calls_last_min) < settings["CALLS_PER_MIN"] -@unsync -async def create_users(root_client, events, users, event_name): - for test_user in users: - create_user(root_client, test_user) - events.register(event_name) - - @unsync async def create_endpoints_query(events, client, worker_pool_name: str, register: str): private_query_function = make_test_query( @@ -152,3 +144,30 @@ def execute_query(query: str, endpoint): events.register(register) else: print("Failed to add api endpoint") + + +@unsync +async def set_endpoint_settings( + events, client, path, kwargs, after: str, register: str +): + if after: + await events.await_for(event_name=after) + + # Here, we update the endpoint to timeout after 100s (rather the default of 60s) + result = client.api.services.api.update(endpoint_path=path, **kwargs) + if isinstance(result, sy.SyftSuccess): + events.register(register) + + +def api_for_path(client, path): + root = client.api.services + for part in path.split("."): + if hasattr(root, part): + root = getattr(root, part) + return root + + +def run_api_path(client, path, **kwargs): + api_method = api_for_path(client, path) + result = api_method(**kwargs) + return result diff --git a/tests/scenarios/bigquery/helpers/asserts.py b/tests/scenarios/bigquery/helpers/asserts.py index ad8a97b1eaf..734e02fd0f3 100644 --- a/tests/scenarios/bigquery/helpers/asserts.py +++ b/tests/scenarios/bigquery/helpers/asserts.py @@ -7,6 +7,9 @@ # third party import anyio +# syft absolute +import syft as sy + class FailedAssert(Exception): pass @@ -49,3 +52,58 @@ def ensure_package_installed(package_name, module_name): install_package(package_name) else: print(f"{module_name} is already installed.") + + +async def result_is( + events, + expr, + matches: bool | str | type | object, + after: str | None = None, + register: str | None = None, +): + if after: + await events.await_for(event_name=after) + + lambda_source = inspect.getsource(expr) + try: + result = None + try: + result = expr() + except Exception as e: + if isinstance(e, sy.SyftException): + result = e + else: + raise e + + assertion = False + if isinstance(matches, bool): + assertion = result == matches + elif isinstance(matches, type): + assertion = isinstance(result, matches) + elif isinstance(matches, str): + message = matches.replace("*", "") + assertion = message in str(result) + else: + type_matches = isinstance(result, type(matches)) + message_matches = True + + message = None + if isinstance(matches, sy.service.response.SyftResponseMessage): + message = matches.message.replace("*", "") + elif isinstance(result, sy.SyftException): + message = matches.public_message.replace("*", "") + + if message: + if isinstance(result, sy.service.response.SyftResponseMessage): + message_matches = message in str(result) + elif isinstance(result, sy.SyftException): + message_matches = message in result.public_message + + assertion = type_matches and message_matches + if assertion and register: + events.register(event_name=register) + return assertion + except Exception as e: + print(f"insinstance({lambda_source}, {matches}). {e}") + + return False diff --git a/tests/scenarios/bigquery/helpers/code.py b/tests/scenarios/bigquery/helpers/code.py new file mode 100644 index 00000000000..9e19f76709b --- /dev/null +++ b/tests/scenarios/bigquery/helpers/code.py @@ -0,0 +1,91 @@ +# stdlib +import asyncio + +# third party +from helpers.api import api_for_path +from unsync import unsync + +# syft absolute +from syft.service.code.user_code import UserCode +from syft.service.job.job_stash import Job + + +def get_approved(client): + results = [] + for request in client.requests: + if str(request.status) == "RequestStatus.APPROVED": + results.append(request) + return results + + +def run_code(client, method_name, **kwargs): + service_func_name = method_name + if "*" in method_name: + matcher = method_name.replace("*", "") + all_code = client.api.services.code.get_all() + for code in all_code: + if matcher in code.service_func_name: + service_func_name = code.service_func_name + break + + api_method = api_for_path(client, path=f"code.{service_func_name}") + # can raise + result = api_method(**kwargs) + return result + + +def approve_and_deposit(client, request_id): + request = client.requests.get_by_uid(uid=request_id) + code = request.code + + if not isinstance(code, UserCode): + return + + func_name = request.code.service_func_name + job = run_code(client, func_name, blocking=False) + if not isinstance(job, Job): + return None + + job.wait() + job_info = job.info(result=True) + result = request.deposit_result(job_info, approve=True) + return result + + +def get_pending(client): + results = [] + for request in client.requests: + if str(request.status) == "RequestStatus.PENDING": + results.append(request) + return results + + +@unsync +async def triage_requests(events, client, after, register): + if after: + await events.await_for(event_name=after) + while True: + await asyncio.sleep(2) + requests = get_pending(client) + for request in requests: + approve_and_deposit(client, request.id) + events.register(event_name=register) + + +@unsync +async def get_results(events, client, method_name, after, register): + method_name = method_name.replace("*", "") + if after: + await events.await_for(event_name=after) + while True: + await asyncio.sleep(1) + requests = get_approved(client) + for request in requests: + if method_name in request.code.service_func_name: + job = run_code(client, request.code.service_func_name, blocking=False) + if not isinstance(job, Job): + continue + else: + result = job.wait().get() + if hasattr(result, "__len__") and len(result) == 10000: + events.register(event_name=register) diff --git a/tests/scenarios/bigquery/helpers/events.py b/tests/scenarios/bigquery/helpers/events.py index b666f7b98de..9e0f2383dc2 100644 --- a/tests/scenarios/bigquery/helpers/events.py +++ b/tests/scenarios/bigquery/helpers/events.py @@ -10,25 +10,27 @@ # third party from unsync import unsync -EVENT_USER_ADMIN_CREATED = "user_admin_created" -EVENT_USERS_CREATED = "users_created" -EVENT_DATASET_UPLOADED = "dataset_uploaded" -EVENT_DATASET_MOCK_READABLE = "dataset_mock_readable" -EVENT_PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED = "prebuilt_worker_image_bigquery_created" -EVENT_EXTERNAL_REGISTRY_BIGQUERY_CREATED = "external_registry_bigquery_created" -EVENT_WORKER_POOL_CREATED = "worker_pool_created" -EVENT_ALLOW_GUEST_SIGNUP_DISABLED = "allow_guest_signup_disabled" -EVENT_USERS_CREATED_CHECKED = "users_created_checked" -EVENT_SCHEMA_ENDPOINT_CREATED = "schema_endpoint_created" -EVENT_SUBMIT_QUERY_ENDPOINT_CREATED = "submit_query_endpoint_created" -EVENT_SUBMIT_QUERY_ENDPOINT_CONFIGURED = "submit_query_endpoint_configured" -EVENT_USERS_CAN_QUERY_MOCK = "users_can_query_mock" -EVENT_USERS_CAN_SUBMIT_QUERY = "users_can_submit_query" -EVENT_ADMIN_APPROVED_FIRST_REQUEST = "admin_approved_first_request" -EVENT_USERS_CAN_GET_APPROVED_RESULT = "users_can_get_approved_result" -EVENT_USERS_QUERY_NOT_READY = "users_query_not_ready" -EVENT_QUERY_ENDPOINT_CREATED = "query_endpoint_created" -EVENT_QUERY_ENDPOINT_CONFIGURED = "query_endpoint_configured" + +class Event: + USER_ADMIN_CREATED = "user_admin_created" + USERS_CREATED = "users_created" + DATASET_UPLOADED = "dataset_uploaded" + DATASET_MOCK_READABLE = "dataset_mock_readable" + PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED = "prebuilt_worker_image_bigquery_created" + EXTERNAL_REGISTRY_BIGQUERY_CREATED = "external_registry_bigquery_created" + WORKER_POOL_CREATED = "worker_pool_created" + ALLOW_GUEST_SIGNUP_DISABLED = "allow_guest_signup_disabled" + USERS_CREATED_CHECKED = "users_created_checked" + SCHEMA_ENDPOINT_CREATED = "schema_endpoint_created" + SUBMIT_QUERY_ENDPOINT_CREATED = "submit_query_endpoint_created" + SUBMIT_QUERY_ENDPOINT_CONFIGURED = "submit_query_endpoint_configured" + USERS_CAN_QUERY_MOCK = "users_can_query_mock" + USERS_CAN_SUBMIT_QUERY = "users_can_submit_query" + ADMIN_APPROVED_FIRST_REQUEST = "admin_approved_first_request" + USERS_CAN_GET_APPROVED_RESULT = "users_can_get_approved_result" + USERS_QUERY_NOT_READY = "users_query_not_ready" + QUERY_ENDPOINT_CREATED = "query_endpoint_created" + QUERY_ENDPOINT_CONFIGURED = "query_endpoint_configured" @dataclass diff --git a/tests/scenarios/bigquery/helpers/fixtures_sync.py b/tests/scenarios/bigquery/helpers/fixtures_sync.py index 266f27128dd..b32ddd5d3b0 100644 --- a/tests/scenarios/bigquery/helpers/fixtures_sync.py +++ b/tests/scenarios/bigquery/helpers/fixtures_sync.py @@ -5,6 +5,7 @@ from faker import Faker from helpers.users import TestUser import pandas as pd +from unsync import unsync # syft absolute import syft as sy @@ -120,6 +121,7 @@ def create_dataset(name: str): def make_server(request: Any | None = None, server_name: str | None = None) -> Any: + # TODO: make it compatible with remote deployments print("making server") if server_name is None: faker = Faker() @@ -142,3 +144,10 @@ def cleanup(): else: request.addfinalizer(cleanup) return server + + +@unsync +async def create_users(root_client, events, users, event_name): + for test_user in users: + create_user(root_client, test_user) + events.register(event_name) diff --git a/tests/scenarios/bigquery/helpers/users.py b/tests/scenarios/bigquery/helpers/users.py index e5d142befce..fe3895f8468 100644 --- a/tests/scenarios/bigquery/helpers/users.py +++ b/tests/scenarios/bigquery/helpers/users.py @@ -2,7 +2,12 @@ from dataclasses import dataclass from typing import Any +# third party +from faker import Faker +from unsync import unsync + # syft absolute +import syft as sy from syft.service.user.user_roles import ServiceRole @@ -21,3 +26,41 @@ def client(self, server=None): self.server_cache = server return server.login(email=self.email, password=self.password) + + +@unsync +async def set_settings_allow_guest_signup( + events, client, enabled, event_name: str | None = None +): + result = client.settings.allow_guest_signup(enable=enabled) + if event_name: + if isinstance(result, sy.SyftSuccess): + events.register(event_name) + + +@unsync +async def check_users_created(events, client, users, event_name, event_set): + expected_emails = {user.email for user in users} + found_emails = set() + await events.await_for(event_name=event_name) + user_results = client.api.services.user.get_all() + for user_result in user_results: + if user_result.email in expected_emails: + found_emails.add(user_result.email) + + if len(found_emails) == len(expected_emails): + events.register(event_set) + + +def guest_register(client, test_user): + guest_client = client.guest() + fake = Faker() + result = guest_client.register( + name=test_user.name, + email=test_user.email, + password=test_user.password, + password_verify=test_user.password, + institution=fake.company(), + website=fake.url(), + ) + return result diff --git a/tests/scenarios/bigquery/helpers/workers.py b/tests/scenarios/bigquery/helpers/workers.py new file mode 100644 index 00000000000..4f64cda85fb --- /dev/null +++ b/tests/scenarios/bigquery/helpers/workers.py @@ -0,0 +1,63 @@ +# third party +from unsync import unsync + +# syft absolute +import syft as sy +from syft import test_settings + + +@unsync +async def get_prebuilt_worker_image(events, client, expected_tag, event_name): + await events.await_for(event_name=event_name, show=True) + worker_images = client.images.get_all() + for worker_image in worker_images: + if expected_tag in str(worker_image.image_identifier): + assert expected_tag in str(worker_image.image_identifier) + return worker_image + + +@unsync +async def create_prebuilt_worker_image(events, client, expected_tag, event_name): + external_registry = test_settings.get("external_registry", default="docker.io") + docker_config = sy.PrebuiltWorkerConfig(tag=f"{external_registry}/{expected_tag}") + result = client.api.services.worker_image.submit(worker_config=docker_config) + assert isinstance(result, sy.SyftSuccess) + events.register(event_name) + + +@unsync +async def add_external_registry(events, client, event_name): + external_registry = test_settings.get("external_registry", default="docker.io") + result = client.api.services.image_registry.add(external_registry) + assert isinstance(result, sy.SyftSuccess) + events.register(event_name) + + +@unsync +async def create_worker_pool( + events, client, worker_pool_name, worker_pool_result, event_name +): + # block until this is available + worker_image = worker_pool_result.result(timeout=5) + + result = client.api.services.worker_pool.launch( + pool_name=worker_pool_name, + image_uid=worker_image.id, + num_workers=1, + ) + + if isinstance(result, list) and isinstance( + result[0], sy.service.worker.worker_pool.ContainerSpawnStatus + ): + events.register(event_name) + + +@unsync +async def check_worker_pool_exists(events, client, worker_pool_name, event_name): + timeout = 30 + await events.await_for(event_name=event_name, timeout=timeout) + pools = client.worker_pools.get_all() + for pool in pools: + if worker_pool_name == pool.name: + assert worker_pool_name == pool.name + return worker_pool_name == pool.name diff --git a/tests/scenarios/bigquery/level_0_basic_test.py b/tests/scenarios/bigquery/level_0_basic_test.py new file mode 100644 index 00000000000..e7cbcfd71e8 --- /dev/null +++ b/tests/scenarios/bigquery/level_0_basic_test.py @@ -0,0 +1,28 @@ +# @pytest.mark.asyncio +# async def test_level_2_basic_scenario(request): +# ensure_package_installed("google-cloud-bigquery", "google.cloud.bigquery") +# ensure_package_installed("db-dtypes", "db_dtypes") + +# scenario = Scenario( +# name="test_create_apis_and_triage_requests", +# events=[ +# EVENT_USER_ADMIN_CREATED, +# EVENT_PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED, +# EVENT_EXTERNAL_REGISTRY_BIGQUERY_CREATED, +# EVENT_WORKER_POOL_CREATED, +# EVENT_ALLOW_GUEST_SIGNUP_DISABLED, +# EVENT_USERS_CREATED, +# EVENT_USERS_CREATED_CHECKED, +# EVENT_QUERY_ENDPOINT_CREATED, +# EVENT_QUERY_ENDPOINT_CONFIGURED, +# EVENT_SCHEMA_ENDPOINT_CREATED, +# EVENT_SUBMIT_QUERY_ENDPOINT_CREATED, +# EVENT_SUBMIT_QUERY_ENDPOINT_CONFIGURED, +# EVENT_USERS_CAN_QUERY_MOCK, +# EVENT_USERS_CAN_SUBMIT_QUERY, +# EVENT_USERS_QUERY_NOT_READY, +# EVENT_ADMIN_APPROVED_FIRST_REQUEST, +# EVENT_USERS_CAN_GET_APPROVED_RESULT, +# ], +# ) + diff --git a/tests/scenarios/bigquery/level_2_basic_test.py b/tests/scenarios/bigquery/level_2_basic_test.py index 4cbb01c5de9..4d8b8e4a7bd 100644 --- a/tests/scenarios/bigquery/level_2_basic_test.py +++ b/tests/scenarios/bigquery/level_2_basic_test.py @@ -1,207 +1,36 @@ # stdlib -import asyncio -import inspect # third party -from faker import Faker +from helpers.api import create_endpoints_query +from helpers.api import create_endpoints_schema +from helpers.api import create_endpoints_submit_query +from helpers.api import run_api_path +from helpers.api import set_endpoint_settings from helpers.asserts import ensure_package_installed -from helpers.events import EVENT_ADMIN_APPROVED_FIRST_REQUEST -from helpers.events import EVENT_ALLOW_GUEST_SIGNUP_DISABLED -from helpers.events import EVENT_EXTERNAL_REGISTRY_BIGQUERY_CREATED -from helpers.events import EVENT_PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED -from helpers.events import EVENT_QUERY_ENDPOINT_CONFIGURED -from helpers.events import EVENT_QUERY_ENDPOINT_CREATED -from helpers.events import EVENT_SCHEMA_ENDPOINT_CREATED -from helpers.events import EVENT_SUBMIT_QUERY_ENDPOINT_CONFIGURED -from helpers.events import EVENT_SUBMIT_QUERY_ENDPOINT_CREATED -from helpers.events import EVENT_USERS_CAN_GET_APPROVED_RESULT -from helpers.events import EVENT_USERS_CAN_QUERY_MOCK -from helpers.events import EVENT_USERS_CAN_SUBMIT_QUERY -from helpers.events import EVENT_USERS_CREATED -from helpers.events import EVENT_USERS_CREATED_CHECKED -from helpers.events import EVENT_USERS_QUERY_NOT_READY -from helpers.events import EVENT_USER_ADMIN_CREATED -from helpers.events import EVENT_WORKER_POOL_CREATED +from helpers.asserts import result_is +from helpers.code import get_results +from helpers.code import run_code +from helpers.code import triage_requests +from helpers.events import Event from helpers.events import EventManager from helpers.events import Scenario +from helpers.fixtures_sync import create_users from helpers.fixtures_sync import make_admin from helpers.fixtures_sync import make_server from helpers.fixtures_sync import make_user -from helpers.make import create_endpoints_query -from helpers.make import create_endpoints_schema -from helpers.make import create_endpoints_submit_query -from helpers.make import create_users +from helpers.users import check_users_created +from helpers.users import guest_register +from helpers.users import set_settings_allow_guest_signup +from helpers.workers import add_external_registry +from helpers.workers import check_worker_pool_exists +from helpers.workers import create_prebuilt_worker_image +from helpers.workers import create_worker_pool +from helpers.workers import get_prebuilt_worker_image import pytest -from unsync import unsync # syft absolute import syft as sy from syft import test_settings -from syft.service.code.user_code import UserCode -from syft.service.job.job_stash import Job - - -@unsync -async def get_prebuilt_worker_image(events, client, expected_tag, event_name): - await events.await_for(event_name=event_name, show=True) - worker_images = client.images.get_all() - for worker_image in worker_images: - if expected_tag in str(worker_image.image_identifier): - assert expected_tag in str(worker_image.image_identifier) - return worker_image - - -@unsync -async def create_prebuilt_worker_image(events, client, expected_tag, event_name): - external_registry = test_settings.get("external_registry", default="docker.io") - docker_config = sy.PrebuiltWorkerConfig(tag=f"{external_registry}/{expected_tag}") - result = client.api.services.worker_image.submit(worker_config=docker_config) - assert isinstance(result, sy.SyftSuccess) - events.register(event_name) - - -@unsync -async def add_external_registry(events, client, event_name): - external_registry = test_settings.get("external_registry", default="docker.io") - result = client.api.services.image_registry.add(external_registry) - assert isinstance(result, sy.SyftSuccess) - events.register(event_name) - - -@unsync -async def create_worker_pool( - events, client, worker_pool_name, worker_pool_result, event_name -): - # block until this is available - worker_image = worker_pool_result.result(timeout=5) - - result = client.api.services.worker_pool.launch( - pool_name=worker_pool_name, - image_uid=worker_image.id, - num_workers=1, - ) - - if isinstance(result, list) and isinstance( - result[0], sy.service.worker.worker_pool.ContainerSpawnStatus - ): - events.register(event_name) - - -@unsync -async def check_worker_pool_exists(events, client, worker_pool_name, event_name): - timeout = 30 - await events.await_for(event_name=event_name, timeout=timeout) - pools = client.worker_pools.get_all() - for pool in pools: - if worker_pool_name == pool.name: - assert worker_pool_name == pool.name - return worker_pool_name == pool.name - - -@unsync -async def set_settings_allow_guest_signup( - events, client, enabled, event_name: str | None = None -): - result = client.settings.allow_guest_signup(enable=enabled) - if event_name: - if isinstance(result, sy.SyftSuccess): - events.register(event_name) - - -@unsync -async def check_users_created(events, client, users, event_name, event_set): - expected_emails = {user.email for user in users} - found_emails = set() - await events.await_for(event_name=event_name) - user_results = client.api.services.user.get_all() - for user_result in user_results: - if user_result.email in expected_emails: - found_emails.add(user_result.email) - - if len(found_emails) == len(expected_emails): - events.register(event_set) - - -def guest_register(client, test_user): - guest_client = client.guest() - fake = Faker() - result = guest_client.register( - name=test_user.name, - email=test_user.email, - password=test_user.password, - password_verify=test_user.password, - institution=fake.company(), - website=fake.url(), - ) - return result - - -async def result_is( - events, - expr, - matches: bool | str | type | object, - after: str | None = None, - register: str | None = None, -): - if after: - await events.await_for(event_name=after) - - lambda_source = inspect.getsource(expr) - try: - result = None - try: - result = expr() - except Exception as e: - if isinstance(e, sy.SyftException): - result = e - else: - raise e - - assertion = False - if isinstance(matches, bool): - assertion = result == matches - elif isinstance(matches, type): - assertion = isinstance(result, matches) - elif isinstance(matches, str): - message = matches.replace("*", "") - assertion = message in str(result) - else: - type_matches = isinstance(result, type(matches)) - message_matches = True - - message = None - if isinstance(matches, sy.service.response.SyftResponseMessage): - message = matches.message.replace("*", "") - elif isinstance(result, sy.SyftException): - message = matches.public_message.replace("*", "") - - if message: - if isinstance(result, sy.service.response.SyftResponseMessage): - message_matches = message in str(result) - elif isinstance(result, sy.SyftException): - message_matches = message in result.public_message - - assertion = type_matches and message_matches - if assertion and register: - events.register(event_name=register) - return assertion - except Exception as e: - print(f"insinstance({lambda_source}, {matches}). {e}") - - return False - - -@unsync -async def set_endpoint_settings( - events, client, path, kwargs, after: str, register: str -): - if after: - await events.await_for(event_name=after) - - # Here, we update the endpoint to timeout after 100s (rather the default of 60s) - result = client.api.services.api.update(endpoint_path=path, **kwargs) - if isinstance(result, sy.SyftSuccess): - events.register(register) def query_sql(): @@ -217,101 +46,6 @@ def query_sql(): return query -def run_code(client, method_name, **kwargs): - service_func_name = method_name - if "*" in method_name: - matcher = method_name.replace("*", "") - all_code = client.api.services.code.get_all() - for code in all_code: - if matcher in code.service_func_name: - service_func_name = code.service_func_name - break - - api_method = api_for_path(client, path=f"code.{service_func_name}") - # can raise - result = api_method(**kwargs) - return result - - -def run_api_path(client, path, **kwargs): - api_method = api_for_path(client, path) - result = api_method(**kwargs) - return result - - -def api_for_path(client, path): - root = client.api.services - for part in path.split("."): - if hasattr(root, part): - root = getattr(root, part) - return root - - -def get_pending(client): - results = [] - for request in client.requests: - if str(request.status) == "RequestStatus.PENDING": - results.append(request) - return results - - -def approve_and_deposit(client, request_id): - request = client.requests.get_by_uid(uid=request_id) - code = request.code - - if not isinstance(code, UserCode): - return - - func_name = request.code.service_func_name - job = run_code(client, func_name, blocking=False) - if not isinstance(job, Job): - return None - - job.wait() - job_info = job.info(result=True) - result = request.deposit_result(job_info, approve=True) - return result - - -@unsync -async def triage_requests(events, client, after, register): - if after: - await events.await_for(event_name=after) - while True: - await asyncio.sleep(2) - requests = get_pending(client) - for request in requests: - approve_and_deposit(client, request.id) - events.register(event_name=register) - - -def get_approved(client): - results = [] - for request in client.requests: - if str(request.status) == "RequestStatus.APPROVED": - results.append(request) - return results - - -@unsync -async def get_results(events, client, method_name, after, register): - method_name = method_name.replace("*", "") - if after: - await events.await_for(event_name=after) - while True: - await asyncio.sleep(1) - requests = get_approved(client) - for request in requests: - if method_name in request.code.service_func_name: - job = run_code(client, request.code.service_func_name, blocking=False) - if not isinstance(job, Job): - continue - else: - result = job.wait().get() - if hasattr(result, "__len__") and len(result) == 10000: - events.register(event_name=register) - - @pytest.mark.asyncio async def test_level_2_basic_scenario(request): ensure_package_installed("google-cloud-bigquery", "google.cloud.bigquery") @@ -320,23 +54,23 @@ async def test_level_2_basic_scenario(request): scenario = Scenario( name="test_create_apis_and_triage_requests", events=[ - EVENT_USER_ADMIN_CREATED, - EVENT_PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED, - EVENT_EXTERNAL_REGISTRY_BIGQUERY_CREATED, - EVENT_WORKER_POOL_CREATED, - EVENT_ALLOW_GUEST_SIGNUP_DISABLED, - EVENT_USERS_CREATED, - EVENT_USERS_CREATED_CHECKED, - EVENT_QUERY_ENDPOINT_CREATED, - EVENT_QUERY_ENDPOINT_CONFIGURED, - EVENT_SCHEMA_ENDPOINT_CREATED, - EVENT_SUBMIT_QUERY_ENDPOINT_CREATED, - EVENT_SUBMIT_QUERY_ENDPOINT_CONFIGURED, - EVENT_USERS_CAN_QUERY_MOCK, - EVENT_USERS_CAN_SUBMIT_QUERY, - EVENT_USERS_QUERY_NOT_READY, - EVENT_ADMIN_APPROVED_FIRST_REQUEST, - EVENT_USERS_CAN_GET_APPROVED_RESULT, + Event.USER_ADMIN_CREATED, + Event.PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED, + Event.EXTERNAL_REGISTRY_BIGQUERY_CREATED, + Event.WORKER_POOL_CREATED, + Event.ALLOW_GUEST_SIGNUP_DISABLED, + Event.USERS_CREATED, + Event.USERS_CREATED_CHECKED, + Event.QUERY_ENDPOINT_CREATED, + Event.QUERY_ENDPOINT_CONFIGURED, + Event.SCHEMA_ENDPOINT_CREATED, + Event.SUBMIT_QUERY_ENDPOINT_CREATED, + Event.SUBMIT_QUERY_ENDPOINT_CONFIGURED, + Event.USERS_CAN_QUERY_MOCK, + Event.USERS_CAN_SUBMIT_QUERY, + Event.USERS_QUERY_NOT_READY, + Event.ADMIN_APPROVED_FIRST_REQUEST, + Event.USERS_CAN_GET_APPROVED_RESULT, ], ) @@ -347,17 +81,17 @@ async def test_level_2_basic_scenario(request): server = make_server(request) admin = make_admin() - events.register(EVENT_USER_ADMIN_CREATED) + events.register(Event.USER_ADMIN_CREATED) - await events.await_for(event_name=EVENT_USER_ADMIN_CREATED) - assert events.happened(EVENT_USER_ADMIN_CREATED) + await events.await_for(event_name=Event.USER_ADMIN_CREATED) + assert events.happened(Event.USER_ADMIN_CREATED) root_client = admin.client(server) triage_requests( events, root_client, - after=EVENT_USER_ADMIN_CREATED, - register=EVENT_ADMIN_APPROVED_FIRST_REQUEST, + after=Event.USER_ADMIN_CREATED, + register=Event.ADMIN_APPROVED_FIRST_REQUEST, ) worker_pool_name = "bigquery-pool" @@ -368,47 +102,47 @@ async def test_level_2_basic_scenario(request): events, root_client, worker_docker_tag, - EVENT_PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED, + Event.PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED, ) worker_image_result = get_prebuilt_worker_image( events, root_client, worker_docker_tag, - EVENT_PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED, + Event.PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED, ) - add_external_registry(events, root_client, EVENT_EXTERNAL_REGISTRY_BIGQUERY_CREATED) + add_external_registry(events, root_client, Event.EXTERNAL_REGISTRY_BIGQUERY_CREATED) create_worker_pool( events, root_client, worker_pool_name, worker_image_result, - EVENT_WORKER_POOL_CREATED, + Event.WORKER_POOL_CREATED, ) check_worker_pool_exists( - events, root_client, worker_pool_name, EVENT_WORKER_POOL_CREATED + events, root_client, worker_pool_name, Event.WORKER_POOL_CREATED ) set_settings_allow_guest_signup( - events, root_client, False, EVENT_ALLOW_GUEST_SIGNUP_DISABLED + events, root_client, False, Event.ALLOW_GUEST_SIGNUP_DISABLED ) users = [make_user() for i in range(2)] - create_users(root_client, events, users, EVENT_USERS_CREATED) + create_users(root_client, events, users, Event.USERS_CREATED) check_users_created( - events, root_client, users, EVENT_USERS_CREATED, EVENT_USERS_CREATED_CHECKED + events, root_client, users, Event.USERS_CREATED, Event.USERS_CREATED_CHECKED ) create_endpoints_query( events, root_client, worker_pool_name=worker_pool_name, - register=EVENT_QUERY_ENDPOINT_CREATED, + register=Event.QUERY_ENDPOINT_CREATED, ) test_query_path = "bigquery.test_query" @@ -417,22 +151,22 @@ async def test_level_2_basic_scenario(request): root_client, path=test_query_path, kwargs={"endpoint_timeout": 120, "hide_mock_definition": True}, - after=EVENT_QUERY_ENDPOINT_CREATED, - register=EVENT_QUERY_ENDPOINT_CONFIGURED, + after=Event.QUERY_ENDPOINT_CREATED, + register=Event.QUERY_ENDPOINT_CONFIGURED, ) create_endpoints_schema( events, root_client, worker_pool_name=worker_pool_name, - register=EVENT_SCHEMA_ENDPOINT_CREATED, + register=Event.SCHEMA_ENDPOINT_CREATED, ) create_endpoints_submit_query( events, root_client, worker_pool_name=worker_pool_name, - register=EVENT_SUBMIT_QUERY_ENDPOINT_CREATED, + register=Event.SUBMIT_QUERY_ENDPOINT_CREATED, ) submit_query_path = "bigquery.submit_query" @@ -441,8 +175,8 @@ async def test_level_2_basic_scenario(request): root_client, path=submit_query_path, kwargs={"hide_mock_definition": True}, - after=EVENT_SUBMIT_QUERY_ENDPOINT_CREATED, - register=EVENT_SUBMIT_QUERY_ENDPOINT_CONFIGURED, + after=Event.SUBMIT_QUERY_ENDPOINT_CREATED, + register=Event.SUBMIT_QUERY_ENDPOINT_CONFIGURED, ) await result_is( @@ -454,8 +188,8 @@ async def test_level_2_basic_scenario(request): ) == 10000, matches=True, - after=[EVENT_QUERY_ENDPOINT_CONFIGURED, EVENT_USERS_CREATED_CHECKED], - register=EVENT_USERS_CAN_QUERY_MOCK, + after=[Event.QUERY_ENDPOINT_CONFIGURED, Event.USERS_CREATED_CHECKED], + register=Event.USERS_CAN_QUERY_MOCK, ) func_name = "test_func" @@ -469,24 +203,24 @@ async def test_level_2_basic_scenario(request): query=query_sql(), ), matches="*Query submitted*", - after=[EVENT_SUBMIT_QUERY_ENDPOINT_CONFIGURED, EVENT_USERS_CREATED_CHECKED], - register=EVENT_USERS_CAN_SUBMIT_QUERY, + after=[Event.SUBMIT_QUERY_ENDPOINT_CONFIGURED, Event.USERS_CREATED_CHECKED], + register=Event.USERS_CAN_SUBMIT_QUERY, ) await result_is( events, lambda: run_code(users[0].client(server), method_name=f"{func_name}*"), matches=sy.SyftException(public_message="*Your code is waiting for approval*"), - after=[EVENT_USERS_CAN_SUBMIT_QUERY], - register=EVENT_USERS_QUERY_NOT_READY, + after=[Event.USERS_CAN_SUBMIT_QUERY], + register=Event.USERS_QUERY_NOT_READY, ) get_results( events, users[0].client(server), method_name=f"{func_name}*", - after=EVENT_USERS_QUERY_NOT_READY, - register=EVENT_USERS_CAN_GET_APPROVED_RESULT, + after=Event.USERS_QUERY_NOT_READY, + register=Event.USERS_CAN_GET_APPROVED_RESULT, ) res = await result_is( @@ -495,7 +229,7 @@ async def test_level_2_basic_scenario(request): matches=sy.SyftException( public_message="*You have no permission to create an account*" ), - after=EVENT_ALLOW_GUEST_SIGNUP_DISABLED, + after=Event.ALLOW_GUEST_SIGNUP_DISABLED, ) assert res is True From b6504954360795e3affc5559b38c03930769069b Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Wed, 18 Sep 2024 14:42:29 +0530 Subject: [PATCH 05/78] some early l0 activities in sim --- packages/syft/setup.cfg | 8 +- test_helpers/__init__.py | 0 tests/scenarios/bigquery/helpers/api.py | 22 +- tests/scenarios/bigquery/helpers/code.py | 4 +- tests/scenarios/bigquery/helpers/events.py | 6 + .../bigquery/helpers/fixtures_sync.py | 8 + tests/scenarios/bigquery/helpers/workers.py | 30 ++- tests/scenarios/bigquery/level_0_test.py | 205 ++++++++++++++++++ .../scenarios/bigquery/level_2_basic_test.py | 5 +- 9 files changed, 258 insertions(+), 30 deletions(-) create mode 100644 test_helpers/__init__.py create mode 100644 tests/scenarios/bigquery/level_0_test.py diff --git a/packages/syft/setup.cfg b/packages/syft/setup.cfg index d4ee2cff521..d8361db7725 100644 --- a/packages/syft/setup.cfg +++ b/packages/syft/setup.cfg @@ -139,10 +139,10 @@ test_plugins = faker distro dynaconf - ; pytest-asyncio - ; pytest-timeout - ; anyio - ; unsync + pytest-asyncio + pytest-timeout + anyio + unsync [options.entry_points] console_scripts = diff --git a/test_helpers/__init__.py b/test_helpers/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/scenarios/bigquery/helpers/api.py b/tests/scenarios/bigquery/helpers/api.py index 1d6619f50c6..1a6d8a4002d 100644 --- a/tests/scenarios/bigquery/helpers/api.py +++ b/tests/scenarios/bigquery/helpers/api.py @@ -1,25 +1,15 @@ -# stdlib -import os -import sys - # third party from unsync import unsync # syft absolute import syft as sy -from syft.util.util import find_base_dir_with_tox_ini - -# TODO remove hacky imports once https://github.com/OpenMined/PySyft/pull/9291/ is merged -base_dir = find_base_dir_with_tox_ini() -notebook_helpers_module_path = os.path.abspath(os.path.join(base_dir, "notebooks/")) -if notebook_helpers_module_path not in sys.path: - sys.path.append(notebook_helpers_module_path) +from syft import test_helpers # noqa: F401 - -# third party -# The below two imports work only after the above sys.path.append -from notebook_helpers.apis import make_schema # noqa: E402 -from notebook_helpers.apis import make_test_query # noqa: E402 +# the fuck? fix test_helpers +if True: + # third party + from apis import make_schema # type: ignore + from apis import make_test_query # Define any helper methods for our rate limiter diff --git a/tests/scenarios/bigquery/helpers/code.py b/tests/scenarios/bigquery/helpers/code.py index 9e19f76709b..400dd3c0c48 100644 --- a/tests/scenarios/bigquery/helpers/code.py +++ b/tests/scenarios/bigquery/helpers/code.py @@ -61,11 +61,11 @@ def get_pending(client): @unsync -async def triage_requests(events, client, after, register): +async def triage_requests(events, client, after, register, sleep=2): if after: await events.await_for(event_name=after) while True: - await asyncio.sleep(2) + await asyncio.sleep(sleep) requests = get_pending(client) for request in requests: approve_and_deposit(client, request.id) diff --git a/tests/scenarios/bigquery/helpers/events.py b/tests/scenarios/bigquery/helpers/events.py index 9e0f2383dc2..f4127bfe2de 100644 --- a/tests/scenarios/bigquery/helpers/events.py +++ b/tests/scenarios/bigquery/helpers/events.py @@ -19,6 +19,8 @@ class Event: PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED = "prebuilt_worker_image_bigquery_created" EXTERNAL_REGISTRY_BIGQUERY_CREATED = "external_registry_bigquery_created" WORKER_POOL_CREATED = "worker_pool_created" + WORKER_POOL_SCALED = "worker_pool_scaled" + ALLOW_GUEST_SIGNUP_ENABLED = "allow_guest_signup_enabled" ALLOW_GUEST_SIGNUP_DISABLED = "allow_guest_signup_disabled" USERS_CREATED_CHECKED = "users_created_checked" SCHEMA_ENDPOINT_CREATED = "schema_endpoint_created" @@ -26,12 +28,16 @@ class Event: SUBMIT_QUERY_ENDPOINT_CONFIGURED = "submit_query_endpoint_configured" USERS_CAN_QUERY_MOCK = "users_can_query_mock" USERS_CAN_SUBMIT_QUERY = "users_can_submit_query" + ADMIN_APPROVED_REQUEST = "admin_approved_request" ADMIN_APPROVED_FIRST_REQUEST = "admin_approved_first_request" USERS_CAN_GET_APPROVED_RESULT = "users_can_get_approved_result" USERS_QUERY_NOT_READY = "users_query_not_ready" QUERY_ENDPOINT_CREATED = "query_endpoint_created" QUERY_ENDPOINT_CONFIGURED = "query_endpoint_configured" + ADMIN_LOW_SIDE_WORKFLOW_COMPLETED = "admin_low_side_workflow_completed" + ADMIN_HIGH_SIDE_WORKFLOW_COMPLETED = "admin_high_side_workflow_completed" + @dataclass class Scenario: diff --git a/tests/scenarios/bigquery/helpers/fixtures_sync.py b/tests/scenarios/bigquery/helpers/fixtures_sync.py index b32ddd5d3b0..ce05d5d4b77 100644 --- a/tests/scenarios/bigquery/helpers/fixtures_sync.py +++ b/tests/scenarios/bigquery/helpers/fixtures_sync.py @@ -146,6 +146,14 @@ def cleanup(): return server +def make_client(url: str, email: str, password: str) -> Any: + return sy.login(url=url, email=email, password=password) + + +def sync_clients(from_client, to_client): + return sy.sync(from_client, to_client) + + @unsync async def create_users(root_client, events, users, event_name): for test_user in users: diff --git a/tests/scenarios/bigquery/helpers/workers.py b/tests/scenarios/bigquery/helpers/workers.py index 4f64cda85fb..96f00a490ac 100644 --- a/tests/scenarios/bigquery/helpers/workers.py +++ b/tests/scenarios/bigquery/helpers/workers.py @@ -7,8 +7,8 @@ @unsync -async def get_prebuilt_worker_image(events, client, expected_tag, event_name): - await events.await_for(event_name=event_name, show=True) +async def get_prebuilt_worker_image(events, client, expected_tag, after): + await events.await_for(event_name=after, show=True) worker_images = client.images.get_all() for worker_image in worker_images: if expected_tag in str(worker_image.image_identifier): @@ -35,7 +35,11 @@ async def add_external_registry(events, client, event_name): @unsync async def create_worker_pool( - events, client, worker_pool_name, worker_pool_result, event_name + events, + client, + worker_pool_name, + worker_pool_result, + event_name, ): # block until this is available worker_image = worker_pool_result.result(timeout=5) @@ -53,11 +57,27 @@ async def create_worker_pool( @unsync -async def check_worker_pool_exists(events, client, worker_pool_name, event_name): +async def check_worker_pool_exists(events, client, worker_pool_name, after): timeout = 30 - await events.await_for(event_name=event_name, timeout=timeout) + await events.await_for(event_name=after, timeout=timeout) pools = client.worker_pools.get_all() for pool in pools: if worker_pool_name == pool.name: assert worker_pool_name == pool.name return worker_pool_name == pool.name + + +@unsync +async def scale_worker_pool( + events, client, worker_pool_name, num_workers, event_name, after +): + if after: + await events.await_for(event_name=after) + + result = client.api.services.worker_pool.scale( + pool_name=worker_pool_name, + num_workers=num_workers, + ) + + assert isinstance(result, sy.SyftSuccess) + events.register(event_name) diff --git a/tests/scenarios/bigquery/level_0_test.py b/tests/scenarios/bigquery/level_0_test.py new file mode 100644 index 00000000000..4c5d3446e2a --- /dev/null +++ b/tests/scenarios/bigquery/level_0_test.py @@ -0,0 +1,205 @@ +# stdlib +import asyncio +import random + +# third party +from helpers.events import Event +from helpers.events import EventManager +from helpers.events import Scenario +from helpers.fixtures_sync import make_client +from helpers.fixtures_sync import sync_clients +from helpers.users import set_settings_allow_guest_signup +from helpers.workers import add_external_registry +from helpers.workers import check_worker_pool_exists +from helpers.workers import create_prebuilt_worker_image +from helpers.workers import create_worker_pool +from helpers.workers import get_prebuilt_worker_image +import pytest +from unsync import unsync + +random.seed(42069) + + +async def user_low_side_activity(): + # loop: guest user creation is allowed + # create_user + + # login_user + + # submit_code + # request_approval + + # loop: wait for approval + + # execute code + # get result + + # dump result in a file + pass + + +@unsync +async def admin_sync_activity(_, events, after): + if after: + await events.await_for(event_name=after) + + # login to high side + admin_client_high = make_client( + url="http://localhost:8080", + email="info@openmined.org", + password="changethis", + ) + + admin_client_low = make_client( + url="http://localhost:8081", + email="info@openmined.org", + password="changethis", + ) + + while True: + await asyncio.sleep(3) + print("admin_sync_activity: syncing high & low") + sync_clients(admin_client_high, admin_client_low) + + +@unsync +async def admin_create_worker_pool(_, admin_client, events): + """ + Worker pool creation typically involves + - Register custom image + - Launch worker pool + - Scale worker pool + + """ + + worker_pool_name = "bigquery-pool" + worker_docker_tag = "openmined/worker-bigquery:0.9.1" + + create_prebuilt_worker_image( + events, + admin_client, + worker_docker_tag, + Event.PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED, + ) + + worker_image_result = get_prebuilt_worker_image( + events, + admin_client, + worker_docker_tag, + after=Event.PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED, + ) + + # todo - configure this manually?? + add_external_registry( + events, + admin_client, + Event.EXTERNAL_REGISTRY_BIGQUERY_CREATED, + ) + + create_worker_pool( + events, + admin_client, + worker_pool_name, + worker_image_result, + Event.WORKER_POOL_CREATED, + ) + + check_worker_pool_exists( + events, + admin_client, + worker_pool_name, + after=Event.WORKER_POOL_CREATED, + ) + + # TODO + # scale_worker_pool( + # events, + # admin_client, + # worker_pool_name, + # event_name=Event.WORKER_POOL_SCALED, + # after=Event.WORKER_POOL_CREATED, + # ) + + +@unsync +async def admin_low_side_activity(_, events): + """ + Typical admin activity on low-side server + 1. Login to low-side server + 2. Enable guest sign up + 3. Start checking requests every 'n' seconds + """ + + # login to low side + admin_client = make_client( + url="http://localhost:8081", + email="info@openmined.org", + password="changethis", + ) + + # enable guest sign up + set_settings_allow_guest_signup( + events, + admin_client, + True, + Event.ALLOW_GUEST_SIGNUP_ENABLED, + ) + + # create worker pool on low side + admin_create_worker_pool(_, admin_client, events) + + # start checking requests every 5s + # triage_requests( + # events, + # admin_client, + # register=Event.ADMIN_APPROVED_REQUEST, + # sleep=5, + # ) + + events.register(Event.ADMIN_LOW_SIDE_WORKFLOW_COMPLETED) + + +@unsync +async def admin_high_side_activity(_, events): + # login + admin_client = make_client( + url="http://localhost:8080", + email="info@openmined.org", + password="changethis", + ) + + admin_create_worker_pool(_, admin_client, events) + + events.register(Event.ADMIN_HIGH_SIDE_WORKFLOW_COMPLETED) + + +@pytest.mark.asyncio +async def test_level_0_k8s(request): + scenario = Scenario( + name="test_level_0_k8s", + events=[ + Event.ALLOW_GUEST_SIGNUP_ENABLED, + Event.ADMIN_LOW_SIDE_WORKFLOW_COMPLETED, + Event.ADMIN_HIGH_SIDE_WORKFLOW_COMPLETED, + ], + ) + + events = EventManager() + events.add_scenario(scenario) + events.monitor() + + # start admin activity on high side + admin_low_side_activity(request, events) + + # todo + admin_high_side_activity(request, events) + + # todo - only start syncing after the root user created other admin users + admin_sync_activity(request, events, after=Event.USER_ADMIN_CREATED) + + # todo + # users = create_users() + # [user_low_side_activity(user) for user in users] + + await events.await_scenario(scenario_name="test_level_0_k8s", timeout=30) + assert events.scenario_completed("test_level_0_k8s") diff --git a/tests/scenarios/bigquery/level_2_basic_test.py b/tests/scenarios/bigquery/level_2_basic_test.py index 4d8b8e4a7bd..9b98169d202 100644 --- a/tests/scenarios/bigquery/level_2_basic_test.py +++ b/tests/scenarios/bigquery/level_2_basic_test.py @@ -1,5 +1,3 @@ -# stdlib - # third party from helpers.api import create_endpoints_query from helpers.api import create_endpoints_schema @@ -235,6 +233,7 @@ async def test_level_2_basic_scenario(request): assert res is True await events.await_scenario( - scenario_name="test_create_apis_and_triage_requests", timeout=30 + scenario_name="test_create_apis_and_triage_requests", + timeout=30, ) assert events.scenario_completed("test_create_apis_and_triage_requests") From fdb6c8764569576518c36c34ca8661208f1afebc Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Wed, 18 Sep 2024 16:45:12 +0530 Subject: [PATCH 06/78] some more code --- tests/scenarios/bigquery/helpers/events.py | 2 + .../bigquery/helpers/fixtures_sync.py | 14 +- tests/scenarios/bigquery/level_0_test.py | 182 ++++++++++++++++-- 3 files changed, 178 insertions(+), 20 deletions(-) diff --git a/tests/scenarios/bigquery/helpers/events.py b/tests/scenarios/bigquery/helpers/events.py index f4127bfe2de..f5acd663052 100644 --- a/tests/scenarios/bigquery/helpers/events.py +++ b/tests/scenarios/bigquery/helpers/events.py @@ -37,6 +37,8 @@ class Event: ADMIN_LOW_SIDE_WORKFLOW_COMPLETED = "admin_low_side_workflow_completed" ADMIN_HIGH_SIDE_WORKFLOW_COMPLETED = "admin_high_side_workflow_completed" + ADMIN_SYNC_HIGH_TO_LOW = "admin_sync_high_to_low" + ADMIN_SYNC_LOW_TO_HIGH = "admin_sync_low_to_high" @dataclass diff --git a/tests/scenarios/bigquery/helpers/fixtures_sync.py b/tests/scenarios/bigquery/helpers/fixtures_sync.py index ce05d5d4b77..099b7c02433 100644 --- a/tests/scenarios/bigquery/helpers/fixtures_sync.py +++ b/tests/scenarios/bigquery/helpers/fixtures_sync.py @@ -150,8 +150,18 @@ def make_client(url: str, email: str, password: str) -> Any: return sy.login(url=url, email=email, password=password) -def sync_clients(from_client, to_client): - return sy.sync(from_client, to_client) +def make_guest_client(url: str) -> Any: + return sy.login_as_guest(url=url) + + +@unsync +async def sync_clients(events, from_client, to_client, event_name, after=None): + if after: + await events.await_for(event_name=after) + widget = sy.sync(from_client, to_client) + widget._share_all() + widget._sync_all() + events.register(event_name) @unsync diff --git a/tests/scenarios/bigquery/level_0_test.py b/tests/scenarios/bigquery/level_0_test.py index 4c5d3446e2a..317b7043e51 100644 --- a/tests/scenarios/bigquery/level_0_test.py +++ b/tests/scenarios/bigquery/level_0_test.py @@ -3,10 +3,16 @@ import random # third party +from helpers.api import create_endpoints_query +from helpers.api import create_endpoints_schema +from helpers.api import create_endpoints_submit_query +from helpers.api import set_endpoint_settings from helpers.events import Event from helpers.events import EventManager from helpers.events import Scenario from helpers.fixtures_sync import make_client +from helpers.fixtures_sync import make_guest_client +from helpers.fixtures_sync import make_user from helpers.fixtures_sync import sync_clients from helpers.users import set_settings_allow_guest_signup from helpers.workers import add_external_registry @@ -20,9 +26,21 @@ random.seed(42069) -async def user_low_side_activity(): +@unsync +async def guest_user_setup_flow(_, events, user): + user_client = make_guest_client(url="http://localhost:8081") + print(f"Logged in as guest user {user.email}") + user_client.forgot_password(email=user.email) + print(f"Requested password reset {user.email}") + + +@unsync +async def user_low_side_activity(_, events, user, after=None): # loop: guest user creation is allowed - # create_user + if after: + await events.await_for(event_name=after) + + guest_user_setup_flow(user.email) # login_user @@ -39,7 +57,7 @@ async def user_low_side_activity(): @unsync -async def admin_sync_activity(_, events, after): +async def root_sync_activity(_, events, after): if after: await events.await_for(event_name=after) @@ -58,12 +76,22 @@ async def admin_sync_activity(_, events, after): while True: await asyncio.sleep(3) - print("admin_sync_activity: syncing high & low") - sync_clients(admin_client_high, admin_client_low) + sync_clients( + events, + admin_client_low, + admin_client_high, + event_name=Event.ADMIN_SYNC_LOW_TO_HIGH, + ) @unsync -async def admin_create_worker_pool(_, admin_client, events): +async def admin_create_worker_pool( + _, + admin_client, + worker_pool_name, + worker_docker_tag, + events, +): """ Worker pool creation typically involves - Register custom image @@ -72,9 +100,6 @@ async def admin_create_worker_pool(_, admin_client, events): """ - worker_pool_name = "bigquery-pool" - worker_docker_tag = "openmined/worker-bigquery:0.9.1" - create_prebuilt_worker_image( events, admin_client, @@ -121,15 +146,27 @@ async def admin_create_worker_pool(_, admin_client, events): # ) +@unsync +async def mark_completed(events, register, after): + if after: + await events.await_for(event_name=after) + + events.register(register) + + @unsync async def admin_low_side_activity(_, events): """ Typical admin activity on low-side server 1. Login to low-side server 2. Enable guest sign up + 3. Create a worker pool 3. Start checking requests every 'n' seconds """ + worker_pool_name = "bigquery-pool" + worker_docker_tag = "openmined/worker-bigquery:0.9.1" + # login to low side admin_client = make_client( url="http://localhost:8081", @@ -146,7 +183,13 @@ async def admin_low_side_activity(_, events): ) # create worker pool on low side - admin_create_worker_pool(_, admin_client, events) + admin_create_worker_pool( + _, + admin_client, + worker_pool_name, + worker_docker_tag, + events, + ) # start checking requests every 5s # triage_requests( @@ -156,31 +199,127 @@ async def admin_low_side_activity(_, events): # sleep=5, # ) - events.register(Event.ADMIN_LOW_SIDE_WORKFLOW_COMPLETED) + mark_completed( + events, + register=Event.ADMIN_LOW_SIDE_WORKFLOW_COMPLETED, + after=Event.WORKER_POOL_CREATED, + ) + + +@unsync +async def admin_create_api_endpoint( + _, + events, + admin_client_high, + admin_client_low, + worker_pool_name, + after=None, +): + if after: + await events.await_for(event_name=after) + + test_query_path = "bigquery.test_query" + submit_query_path = "bigquery.submit_query" + + create_endpoints_query( + events, + admin_client_high, + worker_pool_name=worker_pool_name, + register=Event.QUERY_ENDPOINT_CREATED, + ) + + set_endpoint_settings( + events, + admin_client_high, + path=test_query_path, + kwargs={"endpoint_timeout": 120, "hide_mock_definition": True}, + after=Event.QUERY_ENDPOINT_CREATED, + register=Event.QUERY_ENDPOINT_CONFIGURED, + ) + + create_endpoints_schema( + events, + admin_client_high, + worker_pool_name=worker_pool_name, + register=Event.SCHEMA_ENDPOINT_CREATED, + ) + + create_endpoints_submit_query( + events, + admin_client_high, + worker_pool_name=worker_pool_name, + register=Event.SUBMIT_QUERY_ENDPOINT_CREATED, + ) + + set_endpoint_settings( + events, + admin_client_high, + path=submit_query_path, + kwargs={"hide_mock_definition": True}, + after=Event.SUBMIT_QUERY_ENDPOINT_CREATED, + register=Event.SUBMIT_QUERY_ENDPOINT_CONFIGURED, + ) + + sync_clients( + events, + admin_client_low, + admin_client_high, + event_name=Event.ADMIN_SYNC_HIGH_TO_LOW, + after=Event.SUBMIT_QUERY_ENDPOINT_CONFIGURED, + ) @unsync async def admin_high_side_activity(_, events): # login - admin_client = make_client( + admin_client_high = make_client( url="http://localhost:8080", email="info@openmined.org", password="changethis", ) + admin_client_low = make_client( + url="http://localhost:8081", + email="info@openmined.org", + password="changethis", + ) + + worker_pool_name = "bigquery-pool" + worker_docker_tag = "openmined/worker-bigquery:0.9.1" + + admin_create_worker_pool( + _, + admin_client_high, + worker_pool_name, + worker_docker_tag, + events, + ) - admin_create_worker_pool(_, admin_client, events) + admin_create_api_endpoint( + _, + events, + admin_client_high, + admin_client_low, + worker_pool_name, + after=None, + ) events.register(Event.ADMIN_HIGH_SIDE_WORKFLOW_COMPLETED) @pytest.mark.asyncio async def test_level_0_k8s(request): + """ + Goal + - Setup two datasites - high & low + - Root client of each datasite creates an multiple admin users + + """ scenario = Scenario( name="test_level_0_k8s", events=[ Event.ALLOW_GUEST_SIGNUP_ENABLED, Event.ADMIN_LOW_SIDE_WORKFLOW_COMPLETED, - Event.ADMIN_HIGH_SIDE_WORKFLOW_COMPLETED, + # Event.ADMIN_HIGH_SIDE_WORKFLOW_COMPLETED, ], ) @@ -192,14 +331,21 @@ async def test_level_0_k8s(request): admin_low_side_activity(request, events) # todo - admin_high_side_activity(request, events) + # admin_high_side_activity(request, events) # todo - only start syncing after the root user created other admin users - admin_sync_activity(request, events, after=Event.USER_ADMIN_CREATED) + # root_sync_activity(request, events, after=Event.USER_ADMIN_CREATED) # todo - # users = create_users() - # [user_low_side_activity(user) for user in users] + [ + user_low_side_activity( + request, + events, + make_user(), + after=Event.ALLOW_GUEST_SIGNUP_ENABLED, + ) + for i in range(5) + ] await events.await_scenario(scenario_name="test_level_0_k8s", timeout=30) assert events.scenario_completed("test_level_0_k8s") From ab412f2e11f4193592e19e06eaf47295396b185c Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Wed, 18 Sep 2024 16:22:20 +0200 Subject: [PATCH 07/78] exit early if an exception occurs --- tests/scenarios/bigquery/helpers/events.py | 9 ++++- tests/scenarios/bigquery/level_0_test.py | 45 ++++++++++++++++++---- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/tests/scenarios/bigquery/helpers/events.py b/tests/scenarios/bigquery/helpers/events.py index f5acd663052..e5c5d6c4a20 100644 --- a/tests/scenarios/bigquery/helpers/events.py +++ b/tests/scenarios/bigquery/helpers/events.py @@ -40,6 +40,8 @@ class Event: ADMIN_SYNC_HIGH_TO_LOW = "admin_sync_high_to_low" ADMIN_SYNC_LOW_TO_HIGH = "admin_sync_low_to_high" + EXCEPTION_OCCURRED = "exception_occurred" + @dataclass class Scenario: @@ -103,7 +105,7 @@ async def await_scenario( print( f"async await_for_scenario: {scenario_name}. Time left: {time_left}" ) - await asyncio.sleep(1) + await asyncio.sleep(5) return False def scenario_completed(self, scenario_name: str) -> bool: @@ -114,6 +116,9 @@ def scenario_completed(self, scenario_name: str) -> bool: incomplete_events = [ event for event in scenario_events if events.get(event) is None ] + if Event.EXCEPTION_OCCURRED in events: + msg = f"Scenario '{scenario_name}' failed due to an exception. Missing events: {incomplete_events}" + raise Exception(msg) if incomplete_events: print( @@ -201,7 +206,7 @@ async def await_for( if show: time_left = timeout - (time.time() - start_time) print(f"async await_for: {event_names}. Time left: {time_left}") - await asyncio.sleep(1) + await asyncio.sleep(5) return False def happened(self, event_name: str) -> bool: diff --git a/tests/scenarios/bigquery/level_0_test.py b/tests/scenarios/bigquery/level_0_test.py index 317b7043e51..a98c7e51572 100644 --- a/tests/scenarios/bigquery/level_0_test.py +++ b/tests/scenarios/bigquery/level_0_test.py @@ -1,5 +1,6 @@ # stdlib import asyncio +from functools import wraps import random # third party @@ -26,7 +27,35 @@ random.seed(42069) -@unsync +def unsync_guard(): + "Make sure we exit early if an exception occurs" + + def decorator(func): + @wraps(func) + @unsync + async def wrapper(*args, **kwargs): + try: + result = await func(*args, **kwargs) + return result + except Exception as e: + print(f"Exception occurred: {e}") + for arg in args: + if isinstance(arg, EventManager): + print("Registering exception event") + arg.register(Event.EXCEPTION_OCCURRED) + break + raise + + return wrapper + + return decorator + + +unsync_ = unsync_guard() +# unsync_ = unsync + + +@unsync_ async def guest_user_setup_flow(_, events, user): user_client = make_guest_client(url="http://localhost:8081") print(f"Logged in as guest user {user.email}") @@ -34,7 +63,7 @@ async def guest_user_setup_flow(_, events, user): print(f"Requested password reset {user.email}") -@unsync +@unsync_ async def user_low_side_activity(_, events, user, after=None): # loop: guest user creation is allowed if after: @@ -56,7 +85,7 @@ async def user_low_side_activity(_, events, user, after=None): pass -@unsync +@unsync_ async def root_sync_activity(_, events, after): if after: await events.await_for(event_name=after) @@ -84,7 +113,7 @@ async def root_sync_activity(_, events, after): ) -@unsync +@unsync_ async def admin_create_worker_pool( _, admin_client, @@ -146,7 +175,7 @@ async def admin_create_worker_pool( # ) -@unsync +@unsync_ async def mark_completed(events, register, after): if after: await events.await_for(event_name=after) @@ -154,7 +183,7 @@ async def mark_completed(events, register, after): events.register(register) -@unsync +@unsync_ async def admin_low_side_activity(_, events): """ Typical admin activity on low-side server @@ -206,7 +235,7 @@ async def admin_low_side_activity(_, events): ) -@unsync +@unsync_ async def admin_create_api_endpoint( _, events, @@ -269,7 +298,7 @@ async def admin_create_api_endpoint( ) -@unsync +@unsync_ async def admin_high_side_activity(_, events): # login admin_client_high = make_client( From c3c1ddb03c450fce6df1e8a840a0750ffefca693 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Thu, 19 Sep 2024 11:13:33 +0700 Subject: [PATCH 08/78] working on low-side ds workflow --- .../bigquery/helpers/fixtures_sync.py | 2 +- .../scenarios/bigquery/level_0_basic_test.py | 28 ------------------- tests/scenarios/bigquery/level_0_test.py | 25 ++++++++++++++--- 3 files changed, 22 insertions(+), 33 deletions(-) delete mode 100644 tests/scenarios/bigquery/level_0_basic_test.py diff --git a/tests/scenarios/bigquery/helpers/fixtures_sync.py b/tests/scenarios/bigquery/helpers/fixtures_sync.py index 099b7c02433..0ce1dac7289 100644 --- a/tests/scenarios/bigquery/helpers/fixtures_sync.py +++ b/tests/scenarios/bigquery/helpers/fixtures_sync.py @@ -18,7 +18,7 @@ def make_user( email: str | None = None, password: str | None = None, role: ServiceRole = ServiceRole.DATA_SCIENTIST, -): +) -> TestUser: fake = Faker() if name is None: name = fake.name() diff --git a/tests/scenarios/bigquery/level_0_basic_test.py b/tests/scenarios/bigquery/level_0_basic_test.py deleted file mode 100644 index e7cbcfd71e8..00000000000 --- a/tests/scenarios/bigquery/level_0_basic_test.py +++ /dev/null @@ -1,28 +0,0 @@ -# @pytest.mark.asyncio -# async def test_level_2_basic_scenario(request): -# ensure_package_installed("google-cloud-bigquery", "google.cloud.bigquery") -# ensure_package_installed("db-dtypes", "db_dtypes") - -# scenario = Scenario( -# name="test_create_apis_and_triage_requests", -# events=[ -# EVENT_USER_ADMIN_CREATED, -# EVENT_PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED, -# EVENT_EXTERNAL_REGISTRY_BIGQUERY_CREATED, -# EVENT_WORKER_POOL_CREATED, -# EVENT_ALLOW_GUEST_SIGNUP_DISABLED, -# EVENT_USERS_CREATED, -# EVENT_USERS_CREATED_CHECKED, -# EVENT_QUERY_ENDPOINT_CREATED, -# EVENT_QUERY_ENDPOINT_CONFIGURED, -# EVENT_SCHEMA_ENDPOINT_CREATED, -# EVENT_SUBMIT_QUERY_ENDPOINT_CREATED, -# EVENT_SUBMIT_QUERY_ENDPOINT_CONFIGURED, -# EVENT_USERS_CAN_QUERY_MOCK, -# EVENT_USERS_CAN_SUBMIT_QUERY, -# EVENT_USERS_QUERY_NOT_READY, -# EVENT_ADMIN_APPROVED_FIRST_REQUEST, -# EVENT_USERS_CAN_GET_APPROVED_RESULT, -# ], -# ) - diff --git a/tests/scenarios/bigquery/level_0_test.py b/tests/scenarios/bigquery/level_0_test.py index a98c7e51572..ee51ab5131a 100644 --- a/tests/scenarios/bigquery/level_0_test.py +++ b/tests/scenarios/bigquery/level_0_test.py @@ -7,7 +7,9 @@ from helpers.api import create_endpoints_query from helpers.api import create_endpoints_schema from helpers.api import create_endpoints_submit_query +from helpers.api import run_api_path from helpers.api import set_endpoint_settings +from helpers.asserts import result_is from helpers.events import Event from helpers.events import EventManager from helpers.events import Scenario @@ -21,6 +23,7 @@ from helpers.workers import create_prebuilt_worker_image from helpers.workers import create_worker_pool from helpers.workers import get_prebuilt_worker_image +from level_2_basic_test import query_sql import pytest from unsync import unsync @@ -72,9 +75,23 @@ async def user_low_side_activity(_, events, user, after=None): guest_user_setup_flow(user.email) # login_user + user_client = user.client() # submit_code # request_approval + test_query_path = "bigquery.test_query" + await result_is( + events, + lambda: len(run_api_path(user_client, test_query_path, sql_query=query_sql())) + == 10000, + matches=True, + after=[ + Event.QUERY_ENDPOINT_CONFIGURED, + Event.USERS_CREATED_CHECKED, + Event.ADMIN_SYNC_HIGH_TO_LOW, + ], + register=Event.USERS_CAN_QUERY_MOCK, + ) # loop: wait for approval @@ -236,7 +253,7 @@ async def admin_low_side_activity(_, events): @unsync_ -async def admin_create_api_endpoint( +async def admin_create_sync_api_endpoints( _, events, admin_client_high, @@ -323,7 +340,7 @@ async def admin_high_side_activity(_, events): events, ) - admin_create_api_endpoint( + admin_create_sync_api_endpoints( _, events, admin_client_high, @@ -356,11 +373,11 @@ async def test_level_0_k8s(request): events.add_scenario(scenario) events.monitor() - # start admin activity on high side + # start admin activity on low side admin_low_side_activity(request, events) # todo - # admin_high_side_activity(request, events) + admin_high_side_activity(request, events) # todo - only start syncing after the root user created other admin users # root_sync_activity(request, events, after=Event.USER_ADMIN_CREATED) From a9266aa2c05321e773d6323b6de0ed8fab669f9b Mon Sep 17 00:00:00 2001 From: khoaguin Date: Fri, 20 Sep 2024 13:24:58 +0700 Subject: [PATCH 09/78] user on low-side submits queries and waiting for approval --- tests/scenarios/bigquery/level_0_test.py | 37 +++++++++++++++++++----- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/tests/scenarios/bigquery/level_0_test.py b/tests/scenarios/bigquery/level_0_test.py index ee51ab5131a..9fe6ad3429d 100644 --- a/tests/scenarios/bigquery/level_0_test.py +++ b/tests/scenarios/bigquery/level_0_test.py @@ -10,6 +10,7 @@ from helpers.api import run_api_path from helpers.api import set_endpoint_settings from helpers.asserts import result_is +from helpers.code import run_code from helpers.events import Event from helpers.events import EventManager from helpers.events import Scenario @@ -27,6 +28,9 @@ import pytest from unsync import unsync +# syft absolute +import syft as sy + random.seed(42069) @@ -68,7 +72,6 @@ async def guest_user_setup_flow(_, events, user): @unsync_ async def user_low_side_activity(_, events, user, after=None): - # loop: guest user creation is allowed if after: await events.await_for(event_name=after) @@ -78,11 +81,10 @@ async def user_low_side_activity(_, events, user, after=None): user_client = user.client() # submit_code - # request_approval - test_query_path = "bigquery.test_query" + submit_query_path = "bigquery.test_query" await result_is( events, - lambda: len(run_api_path(user_client, test_query_path, sql_query=query_sql())) + lambda: len(run_api_path(user_client, submit_query_path, sql_query=query_sql())) == 10000, matches=True, after=[ @@ -93,13 +95,32 @@ async def user_low_side_activity(_, events, user, after=None): register=Event.USERS_CAN_QUERY_MOCK, ) - # loop: wait for approval + func_name = "test_func" + await result_is( + events, + lambda: run_api_path( + user_client, + submit_query_path, + func_name=func_name, + query=query_sql(), + ), + matches="*Query submitted*", + after=[Event.SUBMIT_QUERY_ENDPOINT_CONFIGURED, Event.USERS_CREATED_CHECKED], + register=Event.USERS_CAN_SUBMIT_QUERY, + ) - # execute code - # get result + # this should fail to complete because no work will be approved or denied + await result_is( + events, + lambda: run_code(user_client, method_name=f"{func_name}*"), + matches=sy.SyftException(public_message="*Your code is waiting for approval*"), + after=[Event.USERS_CAN_SUBMIT_QUERY], + register=Event.USERS_QUERY_NOT_READY, + ) # dump result in a file - pass + + events.register(Event.USER_LOW_SIDE_WAITING_FOR_APPROVAL) @unsync_ From 4abda81ad07db58b0bf73e63000100225b12037a Mon Sep 17 00:00:00 2001 From: khoaguin Date: Fri, 20 Sep 2024 14:57:08 +0700 Subject: [PATCH 10/78] [tox] create tox task for k8s bigquery sync scenario test --- tox.ini | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tox.ini b/tox.ini index 6a54b437098..7db91c03669 100644 --- a/tox.ini +++ b/tox.ini @@ -21,6 +21,7 @@ envlist = syft.test.security syft.test.unit syft.test.scenario + stack.test.scenario.sync.k8s syft.test.notebook syft.test.notebook.scenario syft.test.notebook.scenario.sync @@ -1564,3 +1565,46 @@ commands_post = bash -c "CLUSTER_NAME=${DATASITE_CLUSTER_NAME} tox -e dev.k8s.destroy || true" bash -c 'rm -f ${MIGRATION_DATA_DIR}/migration.blob' bash -c 'rm -f ${MIGRATION_DATA_DIR}/migration.yaml' + + +[testenv:stack.test.scenario.sync.k8s] +description = Big Query Scenario Tests with Syncing (Level 0)over k8s +deps = + -e{toxinidir}/packages/syft[dev,data_science] + pytest-asyncio + pytest-timeout + anyio + unsync +changedir = {toxinidir}/tests/scenarios +allowlist_externals = + tox + bash +setenv = + CLUSTER_NAME_HIGH = {env:CLUSTER_NAME_HIGH:bigquery-high} + CLUSTER_NAME_LOW = {env:CLUSTER_NAME_LOW:bigquery-low} + CLUSTER_HTTP_PORT_HIGH={env:CLUSTER_HTTP_PORT_HIGH:8080} + CLUSTER_HTTP_PORT_LOW={env:CLUSTER_HTTP_PORT_LOW:8081} +commands = + # destroy high and low side big query clusters + bash -c "echo Running highlow with ORCHESTRA_DEPLOYMENT_TYPE=$ORCHESTRA_DEPLOYMENT_TYPE DEV_MODE=$DEV_MODE TEST_NOTEBOOK_PATHS=$TEST_NOTEBOOK_PATHS; date" + bash -c 'CLUSTER_NAME_HIGH=${CLUSTER_NAME_HIGH} CLUSTER_NAME_LOW=${CLUSTER_NAME_LOW} tox -e dev.k8s.destroy.datasite.highlow' + bash -c "k3d registry delete k3d-registry.localhost || true" + bash -c "docker volume rm k3d-${CLUSTER_NAME_HIGH}-images --force || true" + bash -c "docker volume rm k3d-${CLUSTER_NAME_LOW}-images --force || true" + + # Now create high and low side big query clusters + bash -c '\ + export CLUSTER_NAME_HIGH=${CLUSTER_NAME_HIGH} \ + CLUSTER_NAME_LOW=${CLUSTER_NAME_LOW} \ + CLUSTER_PORT_HIGH=${CLUSTER_PORT_HIGH} \ + CLUSTER_PORT_LOW=${CLUSTER_PORT_LOW} && \ + tox -e dev.k8s.launch.datasite.highlow' + + # Run level 0 tests with k8s + pytest -s --disable-warnings bigquery/level_0_test.py + + # Clean up again + bash -c 'CLUSTER_NAME_HIGH=${CLUSTER_NAME_HIGH} CLUSTER_NAME_LOW=${CLUSTER_NAME_LOW} tox -e dev.k8s.destroy.datasite.highlow' + bash -c "k3d registry delete k3d-registry.localhost || true" + bash -c "docker volume rm k3d-${CLUSTER_NAME_HIGH}-images --force || true" + bash -c "docker volume rm k3d-${CLUSTER_NAME_LOW}-images --force || true" From e60ab2993c3fd44df6f12ae4727a021d6c1c0256 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Fri, 20 Sep 2024 15:01:20 +0700 Subject: [PATCH 11/78] fix wrong import --- tests/scenarios/bigquery/helpers/api.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tests/scenarios/bigquery/helpers/api.py b/tests/scenarios/bigquery/helpers/api.py index 1a6d8a4002d..48009db40e5 100644 --- a/tests/scenarios/bigquery/helpers/api.py +++ b/tests/scenarios/bigquery/helpers/api.py @@ -3,13 +3,8 @@ # syft absolute import syft as sy -from syft import test_helpers # noqa: F401 - -# the fuck? fix test_helpers -if True: - # third party - from apis import make_schema # type: ignore - from apis import make_test_query +from syft.util.test_helpers.apis import make_schema +from syft.util.test_helpers.apis import make_test_query # Define any helper methods for our rate limiter From ca356d7f055df22fc7b32c0b94f556ffe918e08b Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Fri, 20 Sep 2024 13:51:37 +0530 Subject: [PATCH 12/78] register users --- tests/scenarios/bigquery/helpers/api.py | 14 ++++ tests/scenarios/bigquery/level_0_test.py | 78 ++++++++----------- .../scenarios/bigquery/level_2_basic_test.py | 15 +--- 3 files changed, 46 insertions(+), 61 deletions(-) diff --git a/tests/scenarios/bigquery/helpers/api.py b/tests/scenarios/bigquery/helpers/api.py index 48009db40e5..2a4547e6d1c 100644 --- a/tests/scenarios/bigquery/helpers/api.py +++ b/tests/scenarios/bigquery/helpers/api.py @@ -3,6 +3,7 @@ # syft absolute import syft as sy +from syft import test_settings from syft.util.test_helpers.apis import make_schema from syft.util.test_helpers.apis import make_test_query @@ -156,3 +157,16 @@ def run_api_path(client, path, **kwargs): api_method = api_for_path(client, path) result = api_method(**kwargs) return result + + +def query_sql(): + dataset_2 = test_settings.get("dataset_2", default="dataset_2") + table_2 = test_settings.get("table_2", default="table_2") + table_2_col_id = test_settings.get("table_2_col_id", default="table_id") + table_2_col_score = test_settings.get("table_2_col_score", default="colname") + + query = f"SELECT {table_2_col_id}, AVG({table_2_col_score}) AS average_score \ + FROM {dataset_2}.{table_2} \ + GROUP BY {table_2_col_id} \ + LIMIT 10000" + return query diff --git a/tests/scenarios/bigquery/level_0_test.py b/tests/scenarios/bigquery/level_0_test.py index 9fe6ad3429d..45bba24d5fa 100644 --- a/tests/scenarios/bigquery/level_0_test.py +++ b/tests/scenarios/bigquery/level_0_test.py @@ -1,12 +1,12 @@ # stdlib import asyncio from functools import wraps -import random # third party from helpers.api import create_endpoints_query from helpers.api import create_endpoints_schema from helpers.api import create_endpoints_submit_query +from helpers.api import query_sql from helpers.api import run_api_path from helpers.api import set_endpoint_settings from helpers.asserts import result_is @@ -18,21 +18,17 @@ from helpers.fixtures_sync import make_guest_client from helpers.fixtures_sync import make_user from helpers.fixtures_sync import sync_clients -from helpers.users import set_settings_allow_guest_signup from helpers.workers import add_external_registry from helpers.workers import check_worker_pool_exists from helpers.workers import create_prebuilt_worker_image from helpers.workers import create_worker_pool from helpers.workers import get_prebuilt_worker_image -from level_2_basic_test import query_sql import pytest from unsync import unsync # syft absolute import syft as sy -random.seed(42069) - def unsync_guard(): "Make sure we exit early if an exception occurs" @@ -75,8 +71,6 @@ async def user_low_side_activity(_, events, user, after=None): if after: await events.await_for(event_name=after) - guest_user_setup_flow(user.email) - # login_user user_client = user.client() @@ -154,17 +148,16 @@ async def root_sync_activity(_, events, after): @unsync_ async def admin_create_worker_pool( _, + events, admin_client, worker_pool_name, worker_docker_tag, - events, ): """ - Worker pool creation typically involves + Worker pool flow: - Register custom image - Launch worker pool - Scale worker pool - """ create_prebuilt_worker_image( @@ -203,32 +196,35 @@ async def admin_create_worker_pool( after=Event.WORKER_POOL_CREATED, ) - # TODO - # scale_worker_pool( - # events, - # admin_client, - # worker_pool_name, - # event_name=Event.WORKER_POOL_SCALED, - # after=Event.WORKER_POOL_CREATED, - # ) - @unsync_ async def mark_completed(events, register, after): if after: await events.await_for(event_name=after) + events.register(register) + + +@unsync_ +async def admin_signup_users(_, events, admin_client, users, register): + for user in users: + print(f"Registering user {user.name} ({user.email})") + admin_client.register( + name=user.name, + email=user.email, + password=user.password, + password_verify=user.password, + ) events.register(register) @unsync_ -async def admin_low_side_activity(_, events): +async def admin_low_side_activity(_, events, users): """ Typical admin activity on low-side server - 1. Login to low-side server - 2. Enable guest sign up - 3. Create a worker pool - 3. Start checking requests every 'n' seconds + - Login to low-side server + - Create users + - Create a worker pool """ worker_pool_name = "bigquery-pool" @@ -241,31 +237,23 @@ async def admin_low_side_activity(_, events): password="changethis", ) - # enable guest sign up - set_settings_allow_guest_signup( + admin_signup_users( + _, events, admin_client, - True, - Event.ALLOW_GUEST_SIGNUP_ENABLED, + users, + register=Event.USERS_CREATED, ) # create worker pool on low side admin_create_worker_pool( _, + events, admin_client, worker_pool_name, worker_docker_tag, - events, ) - # start checking requests every 5s - # triage_requests( - # events, - # admin_client, - # register=Event.ADMIN_APPROVED_REQUEST, - # sleep=5, - # ) - mark_completed( events, register=Event.ADMIN_LOW_SIDE_WORKFLOW_COMPLETED, @@ -355,10 +343,10 @@ async def admin_high_side_activity(_, events): admin_create_worker_pool( _, + events, admin_client_high, worker_pool_name, worker_docker_tag, - events, ) admin_create_sync_api_endpoints( @@ -394,8 +382,10 @@ async def test_level_0_k8s(request): events.add_scenario(scenario) events.monitor() + users = [make_user(password="password") for _ in range(2)] + # start admin activity on low side - admin_low_side_activity(request, events) + admin_low_side_activity(request, events, users) # todo admin_high_side_activity(request, events) @@ -405,14 +395,8 @@ async def test_level_0_k8s(request): # todo [ - user_low_side_activity( - request, - events, - make_user(), - after=Event.ALLOW_GUEST_SIGNUP_ENABLED, - ) - for i in range(5) + user_low_side_activity(request, events, user, after=Event.USERS_CREATED) + for user in users ] - await events.await_scenario(scenario_name="test_level_0_k8s", timeout=30) assert events.scenario_completed("test_level_0_k8s") diff --git a/tests/scenarios/bigquery/level_2_basic_test.py b/tests/scenarios/bigquery/level_2_basic_test.py index 9b98169d202..34cfdcd073f 100644 --- a/tests/scenarios/bigquery/level_2_basic_test.py +++ b/tests/scenarios/bigquery/level_2_basic_test.py @@ -2,6 +2,7 @@ from helpers.api import create_endpoints_query from helpers.api import create_endpoints_schema from helpers.api import create_endpoints_submit_query +from helpers.api import query_sql from helpers.api import run_api_path from helpers.api import set_endpoint_settings from helpers.asserts import ensure_package_installed @@ -28,20 +29,6 @@ # syft absolute import syft as sy -from syft import test_settings - - -def query_sql(): - dataset_2 = test_settings.get("dataset_2", default="dataset_2") - table_2 = test_settings.get("table_2", default="table_2") - table_2_col_id = test_settings.get("table_2_col_id", default="table_id") - table_2_col_score = test_settings.get("table_2_col_score", default="colname") - - query = f"SELECT {table_2_col_id}, AVG({table_2_col_score}) AS average_score \ - FROM {dataset_2}.{table_2} \ - GROUP BY {table_2_col_id} \ - LIMIT 10000" - return query @pytest.mark.asyncio From 5a43d46432c0f747892657a1e34f12904376c3af Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Fri, 20 Sep 2024 17:37:51 +0530 Subject: [PATCH 13/78] use unsync_guard --- tests/scenarios/bigquery/helpers/api.py | 2 +- tests/scenarios/bigquery/helpers/code.py | 2 +- tests/scenarios/bigquery/helpers/events.py | 34 ++++++++++- .../bigquery/helpers/fixtures_sync.py | 2 +- tests/scenarios/bigquery/helpers/users.py | 2 +- tests/scenarios/bigquery/helpers/workers.py | 2 +- tests/scenarios/bigquery/level_0_test.py | 57 +++---------------- 7 files changed, 46 insertions(+), 55 deletions(-) diff --git a/tests/scenarios/bigquery/helpers/api.py b/tests/scenarios/bigquery/helpers/api.py index 2a4547e6d1c..ce670e605ae 100644 --- a/tests/scenarios/bigquery/helpers/api.py +++ b/tests/scenarios/bigquery/helpers/api.py @@ -1,5 +1,5 @@ # third party -from unsync import unsync +from helpers.events import unsync # syft absolute import syft as sy diff --git a/tests/scenarios/bigquery/helpers/code.py b/tests/scenarios/bigquery/helpers/code.py index 400dd3c0c48..ef63d8667cc 100644 --- a/tests/scenarios/bigquery/helpers/code.py +++ b/tests/scenarios/bigquery/helpers/code.py @@ -3,7 +3,7 @@ # third party from helpers.api import api_for_path -from unsync import unsync +from helpers.events import unsync # syft absolute from syft.service.code.user_code import UserCode diff --git a/tests/scenarios/bigquery/helpers/events.py b/tests/scenarios/bigquery/helpers/events.py index 01da8c6784f..5abc4d2c836 100644 --- a/tests/scenarios/bigquery/helpers/events.py +++ b/tests/scenarios/bigquery/helpers/events.py @@ -1,6 +1,7 @@ # stdlib import asyncio from dataclasses import dataclass +from functools import wraps import inspect import json import os @@ -8,7 +9,9 @@ import time # third party -from unsync import unsync +import unsync as unsync_lib + +__all__ = ["Event", "EventManager", "Scenario", "unsync"] class Event: @@ -231,7 +234,7 @@ def clear_events(self): with open(self.event_file, "w") as f: json.dump({}, f) - @unsync + @unsync_lib.unsync async def monitor(self, period: float = 2): while True: await asyncio.sleep(period) @@ -260,3 +263,30 @@ def __del__(self): # if os.path.exists(self.event_file): # os.remove(self.event_file) pass + + +def unsync_guard(): + "Make sure we exit early if an exception occurs" + + def decorator(func): + @wraps(func) + @unsync_lib.unsync + async def wrapper(*args, **kwargs): + try: + result = await func(*args, **kwargs) + return result + except Exception as e: + print(f"Exception occurred: {e}") + for arg in args: + if isinstance(arg, EventManager): + print("Registering exception event") + arg.register(Event.EXCEPTION_OCCURRED) + break + raise + + return wrapper + + return decorator + + +unsync = unsync_guard() diff --git a/tests/scenarios/bigquery/helpers/fixtures_sync.py b/tests/scenarios/bigquery/helpers/fixtures_sync.py index 0ce1dac7289..80a21aa1cfc 100644 --- a/tests/scenarios/bigquery/helpers/fixtures_sync.py +++ b/tests/scenarios/bigquery/helpers/fixtures_sync.py @@ -3,9 +3,9 @@ # third party from faker import Faker +from helpers.events import unsync from helpers.users import TestUser import pandas as pd -from unsync import unsync # syft absolute import syft as sy diff --git a/tests/scenarios/bigquery/helpers/users.py b/tests/scenarios/bigquery/helpers/users.py index fe3895f8468..c90b3159c6f 100644 --- a/tests/scenarios/bigquery/helpers/users.py +++ b/tests/scenarios/bigquery/helpers/users.py @@ -4,7 +4,7 @@ # third party from faker import Faker -from unsync import unsync +from helpers.events import unsync # syft absolute import syft as sy diff --git a/tests/scenarios/bigquery/helpers/workers.py b/tests/scenarios/bigquery/helpers/workers.py index 96f00a490ac..66341d3503e 100644 --- a/tests/scenarios/bigquery/helpers/workers.py +++ b/tests/scenarios/bigquery/helpers/workers.py @@ -1,5 +1,5 @@ # third party -from unsync import unsync +from helpers.events import unsync # syft absolute import syft as sy diff --git a/tests/scenarios/bigquery/level_0_test.py b/tests/scenarios/bigquery/level_0_test.py index 45bba24d5fa..269e5e5abd4 100644 --- a/tests/scenarios/bigquery/level_0_test.py +++ b/tests/scenarios/bigquery/level_0_test.py @@ -1,6 +1,5 @@ # stdlib import asyncio -from functools import wraps # third party from helpers.api import create_endpoints_query @@ -14,8 +13,8 @@ from helpers.events import Event from helpers.events import EventManager from helpers.events import Scenario +from helpers.events import unsync from helpers.fixtures_sync import make_client -from helpers.fixtures_sync import make_guest_client from helpers.fixtures_sync import make_user from helpers.fixtures_sync import sync_clients from helpers.workers import add_external_registry @@ -24,49 +23,12 @@ from helpers.workers import create_worker_pool from helpers.workers import get_prebuilt_worker_image import pytest -from unsync import unsync # syft absolute import syft as sy -def unsync_guard(): - "Make sure we exit early if an exception occurs" - - def decorator(func): - @wraps(func) - @unsync - async def wrapper(*args, **kwargs): - try: - result = await func(*args, **kwargs) - return result - except Exception as e: - print(f"Exception occurred: {e}") - for arg in args: - if isinstance(arg, EventManager): - print("Registering exception event") - arg.register(Event.EXCEPTION_OCCURRED) - break - raise - - return wrapper - - return decorator - - -unsync_ = unsync_guard() -# unsync_ = unsync - - -@unsync_ -async def guest_user_setup_flow(_, events, user): - user_client = make_guest_client(url="http://localhost:8081") - print(f"Logged in as guest user {user.email}") - user_client.forgot_password(email=user.email) - print(f"Requested password reset {user.email}") - - -@unsync_ +@unsync async def user_low_side_activity(_, events, user, after=None): if after: await events.await_for(event_name=after) @@ -117,7 +79,7 @@ async def user_low_side_activity(_, events, user, after=None): events.register(Event.USER_LOW_SIDE_WAITING_FOR_APPROVAL) -@unsync_ +@unsync async def root_sync_activity(_, events, after): if after: await events.await_for(event_name=after) @@ -145,7 +107,7 @@ async def root_sync_activity(_, events, after): ) -@unsync_ +@unsync async def admin_create_worker_pool( _, events, @@ -197,14 +159,14 @@ async def admin_create_worker_pool( ) -@unsync_ +@unsync async def mark_completed(events, register, after): if after: await events.await_for(event_name=after) events.register(register) -@unsync_ +@unsync async def admin_signup_users(_, events, admin_client, users, register): for user in users: print(f"Registering user {user.name} ({user.email})") @@ -218,7 +180,7 @@ async def admin_signup_users(_, events, admin_client, users, register): events.register(register) -@unsync_ +@unsync async def admin_low_side_activity(_, events, users): """ Typical admin activity on low-side server @@ -261,7 +223,7 @@ async def admin_low_side_activity(_, events, users): ) -@unsync_ +@unsync async def admin_create_sync_api_endpoints( _, events, @@ -324,7 +286,7 @@ async def admin_create_sync_api_endpoints( ) -@unsync_ +@unsync async def admin_high_side_activity(_, events): # login admin_client_high = make_client( @@ -372,7 +334,6 @@ async def test_level_0_k8s(request): scenario = Scenario( name="test_level_0_k8s", events=[ - Event.ALLOW_GUEST_SIGNUP_ENABLED, Event.ADMIN_LOW_SIDE_WORKFLOW_COMPLETED, # Event.ADMIN_HIGH_SIDE_WORKFLOW_COMPLETED, ], From 587ae51caf04a3b309e55fc50d4d8fbedffb715a Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Mon, 23 Sep 2024 15:09:14 +0530 Subject: [PATCH 14/78] polish things up, separately --- tests/scenariosv2/l0_test.py | 345 ++++++++++++++++++++++++++++++ tests/scenariosv2/sim/__init__.py | 0 tests/scenariosv2/sim/core.py | 181 ++++++++++++++++ 3 files changed, 526 insertions(+) create mode 100644 tests/scenariosv2/l0_test.py create mode 100644 tests/scenariosv2/sim/__init__.py create mode 100644 tests/scenariosv2/sim/core.py diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py new file mode 100644 index 00000000000..9a883e1e9fe --- /dev/null +++ b/tests/scenariosv2/l0_test.py @@ -0,0 +1,345 @@ +# stdlib +import asyncio +from enum import auto +import random + +# third party +from faker import Faker +import pytest +from sim.core import Event +from sim.core import Simulator +from sim.core import SimulatorContext +from sim.core import sim_activity +from sim.core import sim_entrypoint + +# syft absolute +import syft as sy +from syft import test_settings +from syft.client.client import SyftClient +from syft.util.test_helpers.apis import make_schema +from syft.util.test_helpers.apis import make_test_query + +fake = Faker() + + +class Events(Event): + INIT = auto() + + GUEST_USERS_CREATED = auto() + + ADMIN_TEST_ENDPOINT_CREATED = auto() + ADMIN_SUBMIT_ENDPOINT_CREATED = auto() + ADMIN_SCHEMA_ENDPOINT_CREATED = auto() + ADMIN_ALL_ENDPOINTS_CREATED = auto() + ADMIN_FLOW_COMPLETED = auto() + ADMIN_SYNC_COMPLETED = auto() + + USER_FLOW_COMPLETED = auto() + USER_CAN_QUERY_TEST_ENDPOINT = auto() + USER_CAN_SUBMIT_QUERY = auto() + + +# ------------------------------------------------------------------------------------------------ + + +def query_sql(): + dataset_2 = test_settings.get("dataset_2", default="dataset_2") + table_2 = test_settings.get("table_2", default="table_2") + table_2_col_id = test_settings.get("table_2_col_id", default="table_id") + table_2_col_score = test_settings.get("table_2_col_score", default="colname") + + query = f"SELECT {table_2_col_id}, AVG({table_2_col_score}) AS average_score \ + FROM {dataset_2}.{table_2} \ + GROUP BY {table_2_col_id} \ + LIMIT 10000" + return query + + +def get_code_from_msg(msg: str): + return str(msg.split("`")[1].replace("()", "").replace("client.", "")) + + +@sim_activity( + wait_for=Events.ADMIN_TEST_ENDPOINT_CREATED, + trigger=Events.USER_CAN_QUERY_TEST_ENDPOINT, +) +async def user_query_test_endpoint(ctx: SimulatorContext, client: sy.DatasiteClient): + """Run query on test endpoint""" + + user = client.logged_in_user + + ctx.logger.info(f"User {user}: Calling client.api.bigquery.test_query") + res = client.api.bigquery.test_query(sql_query=query_sql()) + assert len(res) == 10000 + ctx.logger.info(f"User: {user}: Received {len(res)} rows") + + +@sim_activity( + wait_for=[ + Events.ADMIN_SUBMIT_ENDPOINT_CREATED, + Events.USER_CAN_QUERY_TEST_ENDPOINT, + ], + trigger=Events.USER_CAN_SUBMIT_QUERY, +) +async def user_bq_submit(ctx: SimulatorContext, client: sy.DatasiteClient): + """Submit query to be run on private data""" + user = client.logged_in_user + + ctx.logger.info(f"User {user}: Calling client.api.services.bigquery.submit_query") + res = client.api.bigquery.submit_query(func_name="invalid_func", query=query_sql()) + ctx.logger.info(f"User {user}: Received {res}") + + +@sim_activity(wait_for=Events.GUEST_USERS_CREATED, trigger=Events.USER_FLOW_COMPLETED) +async def user_flow(ctx: SimulatorContext, server_url_low: str, user: dict): + client = sy.login( + url=server_url_low, + email=user["email"], + password=user["password"], + ) + ctx.logger.info(f"User {client.logged_in_user}: logged in") + + await ctx.gather( + user_query_test_endpoint(ctx, client), + # user_bq_submit(ctx, client), + ) + + +# ------------------------------------------------------------------------------------------------ + + +@sim_activity(trigger=Events.GUEST_USERS_CREATED) +async def admin_signup_users( + ctx: SimulatorContext, admin_client: SyftClient, users: list[dict] +): + for user in users: + ctx.logger.info(f"Admin: Creating guest user {user['email']}") + admin_client.register( + name=user["name"], + email=user["email"], + password=user["password"], + password_verify=user["password"], + ) + + +@sim_activity(trigger=Events.ADMIN_SCHEMA_ENDPOINT_CREATED) +async def bq_schema_endpoint( + ctx: SimulatorContext, + admin_client: SyftClient, + worker_pool: str | None = None, +): + path = "bigquery.schema" + schema_function = make_schema( + settings={ + "calls_per_min": 5, + }, + worker_pool=worker_pool, + ) + + try: + ctx.logger.info(f"Admin: Creating endpoint '{path}'") + result = admin_client.custom_api.add(endpoint=schema_function) + assert isinstance(result, sy.SyftSuccess), result + except sy.SyftException as e: + ctx.logger.error(f"Admin: Failed to add api endpoint '{path}' - {e}") + + +@sim_activity(trigger=Events.ADMIN_TEST_ENDPOINT_CREATED) +async def bq_test_endpoint( + ctx: SimulatorContext, + admin_client: SyftClient, + worker_pool: str | None = None, +): + path = "bigquery.test_query" + + private_query_function = make_test_query( + settings={ + "rate_limiter_enabled": False, + } + ) + mock_query_function = make_test_query( + settings={ + "rate_limiter_enabled": True, + "calls_per_min": 10, + } + ) + + new_endpoint = sy.TwinAPIEndpoint( + path=path, + description="This endpoint allows to query Bigquery storage via SQL queries.", + private_function=private_query_function, + mock_function=mock_query_function, + worker_pool=worker_pool, + ) + + try: + ctx.logger.info(f"Admin: Creating endpoint '{path}'") + result = admin_client.custom_api.add(endpoint=new_endpoint) + assert isinstance(result, sy.SyftSuccess), result + except sy.SyftException as e: + ctx.logger.error(f"Admin: Failed to add api endpoint '{path}' - {e}") + + +@sim_activity(trigger=Events.ADMIN_SUBMIT_ENDPOINT_CREATED) +async def bq_submit_endpoint( + ctx: SimulatorContext, + admin_client: sy.DatasiteClient, + worker_pool: str | None = None, +): + """Setup on Low Side""" + + path = "bigquery.submit_query" + + @sy.api_endpoint( + path=path, + description="API endpoint that allows you to submit SQL queries to run on the private data.", + worker_pool=worker_pool, + settings={"worker": worker_pool}, + ) + def submit_query( + context, + func_name: str, + query: str, + ) -> str: + # stdlib + import hashlib + + # syft absolute + import syft as sy + + hash_object = hashlib.new("sha256") + hash_object.update(context.user.email.encode("utf-8")) + func_name = func_name + "_" + hash_object.hexdigest()[:6] + + @sy.syft_function( + name=func_name, + input_policy=sy.MixedInputPolicy( + endpoint=sy.Constant( + val=context.admin_client.api.services.bigquery.test_query + ), + query=sy.Constant(val=query), + client=context.admin_client, + ), + worker_pool_name=context.settings["worker"], + ) + def execute_query(query: str, endpoint): + res = endpoint(sql_query=query) + return res + + request = context.user_client.code.request_code_execution(execute_query) + if isinstance(request, sy.SyftError): + return request + context.admin_client.requests.set_tags(request, ["autosync"]) + + return f"Query submitted {request}. Use `client.code.{func_name}()` to run your query" + + try: + ctx.logger.info(f"Admin: Creating endpoint '{path}'") + result = admin_client.custom_api.add(endpoint=submit_query) + assert isinstance(result, sy.SyftSuccess), result + except sy.SyftException as e: + ctx.logger.error(f"Admin: Failed to add api endpoint '{path}' - {e}") + + +@sim_activity(trigger=Events.ADMIN_ALL_ENDPOINTS_CREATED) +async def admin_create_endpoint(ctx: SimulatorContext, admin_client: SyftClient): + worker_pool = None + await ctx.gather( + bq_test_endpoint(ctx, admin_client, worker_pool=worker_pool), + bq_submit_endpoint(ctx, admin_client, worker_pool=worker_pool), + bq_schema_endpoint(ctx, admin_client, worker_pool=worker_pool), + ) + ctx.logger.info("Admin: Created all endpoints") + + +@sim_activity(trigger=Events.ADMIN_FLOW_COMPLETED) +async def admin_flow(ctx: SimulatorContext, admin_auth, users): + admin_client = sy.login(**admin_auth) + ctx.logger.info("Admin: logged in") + + await ctx.gather( + admin_signup_users(ctx, admin_client, users), + admin_create_endpoint(ctx, admin_client), + ) + + +# ------------------------------------------------------------------------------------------------ + + +@sim_activity(trigger=Events.ADMIN_SYNC_COMPLETED) +async def admin_sync_flow(ctx: SimulatorContext, admin_auth_high, admin_auth_low): + high_client = sy.login(**admin_auth_high) + ctx.logger.info("Admin: logged in to high-side") + + low_client = sy.login(**admin_auth_low) + ctx.logger.info("Admin: logged in to low-side") + + while True: + await asyncio.sleep(random.uniform(5, 10)) + + result = sy.sync(high_client, low_client) + if isinstance(result, sy.SyftSuccess): + ctx.logger.info("Admin: Nothing to sync") + continue + + ctx.logger.info(f"Admin: Syncing high->low {result.__dict__}") + result._share_all() + result._sync_all() + + ctx.logger.info("Admin: Synced high->low") + break + + +# ------------------------------------------------------------------------------------------------ + + +@sim_entrypoint() +async def sim_l0_scenario(ctx: SimulatorContext): + users = [ + dict( # noqa: C408 + name=fake.name(), + email=fake.email(), + password="password", + ) + for i in range(3) + ] + + server_url_high = "http://localhost:8080" + admin_auth_high = dict( # noqa: C408, F841 + url=server_url_high, + email="info@openmined.org", + password="changethis", + ) + + server_url_low = "http://localhost:8081" + admin_auth_low = dict( # noqa: C408 + url=server_url_low, + email="info@openmined.org", + password="changethis", + ) + + ctx.events.trigger(Events.INIT) + await ctx.gather( + admin_flow(ctx, admin_auth_low, users), + *[user_flow(ctx, server_url_low, user) for user in users], + ) + + +@pytest.mark.asyncio +async def test_l0_scenario(request): + sim = Simulator() + + await sim.start( + sim_l0_scenario, + random_wait=(0.5, 1.5), + check_events=[ + # admin + Events.GUEST_USERS_CREATED, + Events.ADMIN_ALL_ENDPOINTS_CREATED, + Events.ADMIN_FLOW_COMPLETED, + # users + Events.USER_CAN_QUERY_TEST_ENDPOINT, + Events.USER_FLOW_COMPLETED, + ], + timeout=60, + ) diff --git a/tests/scenariosv2/sim/__init__.py b/tests/scenariosv2/sim/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/scenariosv2/sim/core.py b/tests/scenariosv2/sim/core.py new file mode 100644 index 00000000000..4975f3f3d21 --- /dev/null +++ b/tests/scenariosv2/sim/core.py @@ -0,0 +1,181 @@ +# stdlib +import asyncio +from datetime import datetime +from enum import Enum +from functools import wraps +import logging +import random +import time + +EVENTS_LOG = "sim.events.log" +EXECUTIONS_LOG = "sim.executions.log" +ACTIVITY_LOG = "sim.activity.log" + +logging.Formatter.formatTime = ( + lambda self, record, datefmt=None: datetime.fromtimestamp(record.created).isoformat( + sep="T", + timespec="microseconds", + ) +) + +DEFAULT_FORMATTER = logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", +) +EVENT_FORMATTER = logging.Formatter( + "%(asctime)s - %(message)s", +) + + +class TestFailure(Exception): + """Custom exception to signal test failures""" + + pass + + +class Event(Enum): + """Base class for events. Subclass this to define your specific events.""" + + pass + + +class EventManager: + def __init__(self): + self.events = {} + self.logger = logging.getLogger("events") + file_handler = logging.FileHandler(EVENTS_LOG, mode="w") + file_handler.setFormatter(EVENT_FORMATTER) + self.logger.addHandler(file_handler) + self.logger.setLevel(logging.INFO) + + async def wait_for(self, event: Event): + if event not in self.events: + self.events[event] = asyncio.Event() + await self.events[event].wait() + + def trigger(self, event: Event): + if event not in self.events: + self.events[event] = asyncio.Event() + self.logger.info(f"Triggered: {event.name}") + self.events[event].set() + + def is_set(self, event: Event) -> bool: + if event not in self.events: + return False + return self.events[event].is_set() + + +class SimulatorContext: + def __init__(self, random_wait=None): + self.events = EventManager() + self.random_wait = random_wait + + self.logger = logging.getLogger("activity") + file_handler = logging.FileHandler(ACTIVITY_LOG, mode="w") + file_handler.setFormatter(DEFAULT_FORMATTER) + self.logger.addHandler(file_handler) + self.logger.setLevel(logging.INFO) + + # private logger + self._elogger = logging.getLogger("executions") + file_handler = logging.FileHandler(EXECUTIONS_LOG, mode="w") + file_handler.setFormatter(DEFAULT_FORMATTER) + self._elogger.addHandler(file_handler) + self._elogger.setLevel(logging.DEBUG) + + def unfired_events(self, events: list[Event]): + evts = filter(lambda e: not self.events.is_set(e), events) + evts = [e.name for e in evts] + return evts + + @staticmethod + async def gather(*tasks): + return await asyncio.gather(*tasks) + + +class Simulator: + async def start(self, *tasks, check_events=None, random_wait=None, timeout=60): + context = SimulatorContext(random_wait) + results = None + + try: + results = await asyncio.wait_for( + asyncio.gather(*[task(context) for task in tasks]), + timeout=timeout, + ) + except asyncio.TimeoutError: + if check_events: + context._elogger.error( + f"Timed out. Unfired Events = {context.unfired_events(check_events)}" + ) + raise TestFailure(f"simulator timed out after {timeout}s") + + if check_events: + evts = context.unfired_events(check_events) + if evts: + raise TestFailure(f"Unfired events: {evts}") + + return results + + +def sim_entrypoint(): + def decorator(func): + @wraps(func) + async def wrapper(ctx: SimulatorContext, *args, **kwargs): + try: + ctx._elogger.info(f"Started: {func.__name__}") + result = await func(ctx, *args, **kwargs) + ctx._elogger.info(f"Completed: {func.__name__}") + return result + except Exception as e: + ctx._elogger.error(f"{func.__name__} - {str(e)}") + raise + + return wrapper + + return decorator + + +def sim_activity( + wait_for: Event | list[Event] | None = None, + trigger: Event | None = None, +): + def decorator(func): + @wraps(func) + async def wrapper(ctx: SimulatorContext, *args, **kwargs): + fsig = f"{func.__name__}({args}, {kwargs})" + + _wait_for = kwargs.get("wait_for", wait_for) + _trigger = kwargs.get("after", trigger) + + if _wait_for: + ctx._elogger.debug(f"Blocked: for={_wait_for} {fsig}") + if isinstance(_wait_for, list): + await asyncio.gather(*[ctx.events.wait_for(e) for e in _wait_for]) + else: + await ctx.events.wait_for(_wait_for) + ctx._elogger.debug(f"Unblocked: {fsig}") + + wait = 0 + if ctx.random_wait: + wait = random.uniform(*ctx.random_wait) + await asyncio.sleep(wait) + + try: + ctx._elogger.info(f"Started: {fsig} time_wait={wait:.3f}s") + start = time.time() + result = await func(ctx, *args, **kwargs) + total = time.time() - start + + ctx._elogger.info(f"Completed: {fsig} time_taken={total:.3f}s") + + if _trigger: + ctx.events.trigger(_trigger) + + return result + except Exception as e: + ctx._elogger.error(f"{fsig} - {str(e)}") + raise + + return wrapper + + return decorator From ab685c38b5328104d57437d5bc6eaa7c84c76acb Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Mon, 23 Sep 2024 19:26:54 +0530 Subject: [PATCH 15/78] l0 till bigquery.test_query --- tests/scenariosv2/l0_test.py | 178 ++++++++++++++++++++++++++-------- tests/scenariosv2/sim/core.py | 18 ++-- 2 files changed, 144 insertions(+), 52 deletions(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 9a883e1e9fe..892d9110eb1 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -6,7 +6,7 @@ # third party from faker import Faker import pytest -from sim.core import Event +from sim.core import BaseEvent from sim.core import Simulator from sim.core import SimulatorContext from sim.core import sim_activity @@ -18,23 +18,34 @@ from syft.client.client import SyftClient from syft.util.test_helpers.apis import make_schema from syft.util.test_helpers.apis import make_test_query +from syft.util.test_helpers.worker_helpers import ( + build_and_launch_worker_pool_from_docker_str, +) fake = Faker() -class Events(Event): +class Event(BaseEvent): + # overall state INIT = auto() - - GUEST_USERS_CREATED = auto() - - ADMIN_TEST_ENDPOINT_CREATED = auto() - ADMIN_SUBMIT_ENDPOINT_CREATED = auto() - ADMIN_SCHEMA_ENDPOINT_CREATED = auto() + ADMIN_LOWSIDE_FLOW_COMPLETED = auto() + ADMIN_HIGHSIDE_FLOW_COMPLETED = auto() + USER_FLOW_COMPLETED = auto() + # admin - endpoints ADMIN_ALL_ENDPOINTS_CREATED = auto() - ADMIN_FLOW_COMPLETED = auto() + ADMIN_BQ_TEST_ENDPOINT_CREATED = auto() + ADMIN_BQ_SUBMIT_ENDPOINT_CREATED = auto() + ADMIN_BQ_SCHEMA_ENDPOINT_CREATED = auto() + ADMIN_LOW_SIDE_ENDPOINTS_AVAILABLE = auto() + # admin - worker pool + ADMIN_WORKER_POOL_CREATED = auto() + ADMIN_LOWSIDE_WORKER_POOL_CREATED = auto() + ADMIN_HIGHSIDE_WORKER_POOL_CREATED = auto() + # admin sync ADMIN_SYNC_COMPLETED = auto() - - USER_FLOW_COMPLETED = auto() + ADMIN_SYNCED_HIGH_TO_LOW = auto() + # users + GUEST_USERS_CREATED = auto() USER_CAN_QUERY_TEST_ENDPOINT = auto() USER_CAN_SUBMIT_QUERY = auto() @@ -59,9 +70,16 @@ def get_code_from_msg(msg: str): return str(msg.split("`")[1].replace("()", "").replace("client.", "")) +# ------------------------------------------------------------------------------------------------ + + @sim_activity( - wait_for=Events.ADMIN_TEST_ENDPOINT_CREATED, - trigger=Events.USER_CAN_QUERY_TEST_ENDPOINT, + # ! yeah this is fucking ugly + wait_for=[ + Event.ADMIN_BQ_TEST_ENDPOINT_CREATED, + Event.ADMIN_LOW_SIDE_ENDPOINTS_AVAILABLE, + ], + trigger=Event.USER_CAN_QUERY_TEST_ENDPOINT, ) async def user_query_test_endpoint(ctx: SimulatorContext, client: sy.DatasiteClient): """Run query on test endpoint""" @@ -76,10 +94,10 @@ async def user_query_test_endpoint(ctx: SimulatorContext, client: sy.DatasiteCli @sim_activity( wait_for=[ - Events.ADMIN_SUBMIT_ENDPOINT_CREATED, - Events.USER_CAN_QUERY_TEST_ENDPOINT, + Event.ADMIN_BQ_SUBMIT_ENDPOINT_CREATED, + Event.USER_CAN_QUERY_TEST_ENDPOINT, ], - trigger=Events.USER_CAN_SUBMIT_QUERY, + trigger=Event.USER_CAN_SUBMIT_QUERY, ) async def user_bq_submit(ctx: SimulatorContext, client: sy.DatasiteClient): """Submit query to be run on private data""" @@ -90,7 +108,7 @@ async def user_bq_submit(ctx: SimulatorContext, client: sy.DatasiteClient): ctx.logger.info(f"User {user}: Received {res}") -@sim_activity(wait_for=Events.GUEST_USERS_CREATED, trigger=Events.USER_FLOW_COMPLETED) +@sim_activity(wait_for=Event.GUEST_USERS_CREATED, trigger=Event.USER_FLOW_COMPLETED) async def user_flow(ctx: SimulatorContext, server_url_low: str, user: dict): client = sy.login( url=server_url_low, @@ -99,7 +117,7 @@ async def user_flow(ctx: SimulatorContext, server_url_low: str, user: dict): ) ctx.logger.info(f"User {client.logged_in_user}: logged in") - await ctx.gather( + await asyncio.gather( user_query_test_endpoint(ctx, client), # user_bq_submit(ctx, client), ) @@ -108,7 +126,7 @@ async def user_flow(ctx: SimulatorContext, server_url_low: str, user: dict): # ------------------------------------------------------------------------------------------------ -@sim_activity(trigger=Events.GUEST_USERS_CREATED) +@sim_activity(trigger=Event.GUEST_USERS_CREATED) async def admin_signup_users( ctx: SimulatorContext, admin_client: SyftClient, users: list[dict] ): @@ -122,7 +140,7 @@ async def admin_signup_users( ) -@sim_activity(trigger=Events.ADMIN_SCHEMA_ENDPOINT_CREATED) +@sim_activity(trigger=Event.ADMIN_BQ_SCHEMA_ENDPOINT_CREATED) async def bq_schema_endpoint( ctx: SimulatorContext, admin_client: SyftClient, @@ -144,7 +162,7 @@ async def bq_schema_endpoint( ctx.logger.error(f"Admin: Failed to add api endpoint '{path}' - {e}") -@sim_activity(trigger=Events.ADMIN_TEST_ENDPOINT_CREATED) +@sim_activity(trigger=Event.ADMIN_BQ_TEST_ENDPOINT_CREATED) async def bq_test_endpoint( ctx: SimulatorContext, admin_client: SyftClient, @@ -180,7 +198,7 @@ async def bq_test_endpoint( ctx.logger.error(f"Admin: Failed to add api endpoint '{path}' - {e}") -@sim_activity(trigger=Events.ADMIN_SUBMIT_ENDPOINT_CREATED) +@sim_activity(trigger=Event.ADMIN_BQ_SUBMIT_ENDPOINT_CREATED) async def bq_submit_endpoint( ctx: SimulatorContext, admin_client: sy.DatasiteClient, @@ -241,10 +259,11 @@ def execute_query(query: str, endpoint): ctx.logger.error(f"Admin: Failed to add api endpoint '{path}' - {e}") -@sim_activity(trigger=Events.ADMIN_ALL_ENDPOINTS_CREATED) +@sim_activity(trigger=Event.ADMIN_ALL_ENDPOINTS_CREATED) async def admin_create_endpoint(ctx: SimulatorContext, admin_client: SyftClient): - worker_pool = None - await ctx.gather( + worker_pool = "biquery-pool" + + await asyncio.gather( bq_test_endpoint(ctx, admin_client, worker_pool=worker_pool), bq_submit_endpoint(ctx, admin_client, worker_pool=worker_pool), bq_schema_endpoint(ctx, admin_client, worker_pool=worker_pool), @@ -252,21 +271,84 @@ async def admin_create_endpoint(ctx: SimulatorContext, admin_client: SyftClient) ctx.logger.info("Admin: Created all endpoints") -@sim_activity(trigger=Events.ADMIN_FLOW_COMPLETED) -async def admin_flow(ctx: SimulatorContext, admin_auth, users): +@sim_activity(wait_for=Event.ADMIN_SYNCED_HIGH_TO_LOW) +async def admin_watch_sync(ctx: SimulatorContext, admin_client: SyftClient): + # fuckall function that just watches for ADMIN_SYNCED_HIGH_TO_LOW + # only to trigger ADMIN_LOW_SIDE_ENDPOINTS_AVAILABLE that + ctx.logger.info("Admin: Got a sync from high-side.") + + # trigger any event we want after sync + ctx.events.trigger(Event.ADMIN_LOW_SIDE_ENDPOINTS_AVAILABLE) + + +# @sim_activity(trigger=Event.ADMIN_WORKER_POOL_CREATED) +async def admin_create_bq_pool(ctx: SimulatorContext, admin_client: SyftClient): + worker_pool = "biquery-pool" + + base_image = admin_client.images.get_all()[0] + + external_registry_url = "k3d-registry.localhost:5800" + worker_image_tag = str(base_image.image_identifier).replace( + "backend", "worker-bigquery" + ) + + worker_dockerfile = f""" + FROM {str(base_image.image_identifier)} + RUN uv pip install db-dtypes google-cloud-bigquery + """.strip() + + ctx.logger.info(f"Admin: Creating worker pool with tag='{worker_image_tag}'") + + build_and_launch_worker_pool_from_docker_str( + environment="remote", + client=admin_client, + worker_pool_name=worker_pool, + worker_dockerfile=worker_dockerfile, + external_registry=external_registry_url, + docker_tag=worker_image_tag, + custom_pool_pod_annotations=None, + custom_pool_pod_labels=None, + scale_to=1, + ) + + +@sim_activity(trigger=Event.ADMIN_HIGHSIDE_WORKER_POOL_CREATED) +async def admin_create_bq_pool_high(ctx: SimulatorContext, admin_client: SyftClient): + await admin_create_bq_pool(ctx, admin_client) + + +@sim_activity(trigger=Event.ADMIN_LOWSIDE_WORKER_POOL_CREATED) +async def admin_create_bq_pool_low(ctx: SimulatorContext, admin_client: SyftClient): + await admin_create_bq_pool(ctx, admin_client) + + +@sim_activity(trigger=Event.ADMIN_HIGHSIDE_FLOW_COMPLETED) +async def admin_high_side(ctx: SimulatorContext, admin_auth): admin_client = sy.login(**admin_auth) - ctx.logger.info("Admin: logged in") + ctx.logger.info("Admin high-side: logged in") - await ctx.gather( - admin_signup_users(ctx, admin_client, users), + await asyncio.gather( + admin_create_bq_pool_high(ctx, admin_client), admin_create_endpoint(ctx, admin_client), ) +@sim_activity(trigger=Event.ADMIN_LOWSIDE_FLOW_COMPLETED) +async def admin_low_side(ctx: SimulatorContext, admin_auth, users): + admin_client = sy.login(**admin_auth) + ctx.logger.info("Admin low-side: logged in") + + await asyncio.gather( + admin_watch_sync(ctx, admin_client), + admin_signup_users(ctx, admin_client, users), + admin_create_bq_pool_low(ctx, admin_client), + ) + + # ------------------------------------------------------------------------------------------------ -@sim_activity(trigger=Events.ADMIN_SYNC_COMPLETED) +@sim_activity(trigger=Event.ADMIN_SYNC_COMPLETED) async def admin_sync_flow(ctx: SimulatorContext, admin_auth_high, admin_auth_low): high_client = sy.login(**admin_auth_high) ctx.logger.info("Admin: logged in to high-side") @@ -282,10 +364,12 @@ async def admin_sync_flow(ctx: SimulatorContext, admin_auth_high, admin_auth_low ctx.logger.info("Admin: Nothing to sync") continue - ctx.logger.info(f"Admin: Syncing high->low {result.__dict__}") + ctx.logger.info(f"Admin: Syncing high->low {result}") result._share_all() result._sync_all() + # trigger an event so that guest users can start querying + ctx.events.trigger(Event.ADMIN_SYNCED_HIGH_TO_LOW) ctx.logger.info("Admin: Synced high->low") break @@ -318,9 +402,11 @@ async def sim_l0_scenario(ctx: SimulatorContext): password="changethis", ) - ctx.events.trigger(Events.INIT) - await ctx.gather( - admin_flow(ctx, admin_auth_low, users), + ctx.events.trigger(Event.INIT) + await asyncio.gather( + admin_low_side(ctx, admin_auth_low, users), + admin_high_side(ctx, admin_auth_high), + admin_sync_flow(ctx, admin_auth_high, admin_auth_low), *[user_flow(ctx, server_url_low, user) for user in users], ) @@ -331,15 +417,21 @@ async def test_l0_scenario(request): await sim.start( sim_l0_scenario, - random_wait=(0.5, 1.5), + random_wait=None, # (0.5, 1.5), check_events=[ - # admin - Events.GUEST_USERS_CREATED, - Events.ADMIN_ALL_ENDPOINTS_CREATED, - Events.ADMIN_FLOW_COMPLETED, + # admin lowside + Event.GUEST_USERS_CREATED, + Event.ADMIN_LOWSIDE_WORKER_POOL_CREATED, + Event.ADMIN_LOWSIDE_FLOW_COMPLETED, + # admin high side + Event.ADMIN_ALL_ENDPOINTS_CREATED, + Event.ADMIN_HIGHSIDE_WORKER_POOL_CREATED, + Event.ADMIN_HIGHSIDE_FLOW_COMPLETED, + # admin sync + Event.ADMIN_SYNC_COMPLETED, # users - Events.USER_CAN_QUERY_TEST_ENDPOINT, - Events.USER_FLOW_COMPLETED, + # Event.USER_CAN_QUERY_TEST_ENDPOINT, + # Event.USER_FLOW_COMPLETED, ], - timeout=60, + timeout=300, ) diff --git a/tests/scenariosv2/sim/core.py b/tests/scenariosv2/sim/core.py index 4975f3f3d21..2cb60cc447d 100644 --- a/tests/scenariosv2/sim/core.py +++ b/tests/scenariosv2/sim/core.py @@ -32,7 +32,7 @@ class TestFailure(Exception): pass -class Event(Enum): +class BaseEvent(Enum): """Base class for events. Subclass this to define your specific events.""" pass @@ -47,18 +47,18 @@ def __init__(self): self.logger.addHandler(file_handler) self.logger.setLevel(logging.INFO) - async def wait_for(self, event: Event): + async def wait_for(self, event: BaseEvent): if event not in self.events: self.events[event] = asyncio.Event() await self.events[event].wait() - def trigger(self, event: Event): + def trigger(self, event: BaseEvent): if event not in self.events: self.events[event] = asyncio.Event() self.logger.info(f"Triggered: {event.name}") self.events[event].set() - def is_set(self, event: Event) -> bool: + def is_set(self, event: BaseEvent) -> bool: if event not in self.events: return False return self.events[event].is_set() @@ -82,14 +82,14 @@ def __init__(self, random_wait=None): self._elogger.addHandler(file_handler) self._elogger.setLevel(logging.DEBUG) - def unfired_events(self, events: list[Event]): + def unfired_events(self, events: list[BaseEvent]): evts = filter(lambda e: not self.events.is_set(e), events) evts = [e.name for e in evts] return evts @staticmethod async def gather(*tasks): - return await asyncio.gather(*tasks) + return asyncio.gather(*tasks) class Simulator: @@ -136,14 +136,15 @@ async def wrapper(ctx: SimulatorContext, *args, **kwargs): def sim_activity( - wait_for: Event | list[Event] | None = None, - trigger: Event | None = None, + wait_for: BaseEvent | list[BaseEvent] | None = None, + trigger: BaseEvent | None = None, ): def decorator(func): @wraps(func) async def wrapper(ctx: SimulatorContext, *args, **kwargs): fsig = f"{func.__name__}({args}, {kwargs})" + # ! todo: this isn't working _wait_for = kwargs.get("wait_for", wait_for) _trigger = kwargs.get("after", trigger) @@ -165,7 +166,6 @@ async def wrapper(ctx: SimulatorContext, *args, **kwargs): start = time.time() result = await func(ctx, *args, **kwargs) total = time.time() - start - ctx._elogger.info(f"Completed: {fsig} time_taken={total:.3f}s") if _trigger: From 63785b2c1fbdf8f247961b1ea309fca672569c78 Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Mon, 23 Sep 2024 19:41:48 +0530 Subject: [PATCH 16/78] logs --- tests/scenariosv2/l0_test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 892d9110eb1..6a10d5bc063 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -86,10 +86,10 @@ async def user_query_test_endpoint(ctx: SimulatorContext, client: sy.DatasiteCli user = client.logged_in_user - ctx.logger.info(f"User {user}: Calling client.api.bigquery.test_query") + ctx.logger.info(f"User: {user} - Calling client.api.bigquery.test_query") res = client.api.bigquery.test_query(sql_query=query_sql()) assert len(res) == 10000 - ctx.logger.info(f"User: {user}: Received {len(res)} rows") + ctx.logger.info(f"User: {user} - Received {len(res)} rows") @sim_activity( @@ -103,9 +103,9 @@ async def user_bq_submit(ctx: SimulatorContext, client: sy.DatasiteClient): """Submit query to be run on private data""" user = client.logged_in_user - ctx.logger.info(f"User {user}: Calling client.api.services.bigquery.submit_query") + ctx.logger.info(f"User: {user} - Calling client.api.services.bigquery.submit_query") res = client.api.bigquery.submit_query(func_name="invalid_func", query=query_sql()) - ctx.logger.info(f"User {user}: Received {res}") + ctx.logger.info(f"User: {user} - Received {res}") @sim_activity(wait_for=Event.GUEST_USERS_CREATED, trigger=Event.USER_FLOW_COMPLETED) @@ -115,7 +115,7 @@ async def user_flow(ctx: SimulatorContext, server_url_low: str, user: dict): email=user["email"], password=user["password"], ) - ctx.logger.info(f"User {client.logged_in_user}: logged in") + ctx.logger.info(f"User: {client.logged_in_user} - logged in") await asyncio.gather( user_query_test_endpoint(ctx, client), From da9608f8ba60e3b035ff83bdba4732d17960a317 Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Mon, 23 Sep 2024 22:38:22 +0530 Subject: [PATCH 17/78] to_thread worker pool create --- tests/scenariosv2/l0_test.py | 6 +++++- tests/scenariosv2/sim/core.py | 23 ++++++++++++++++------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 6a10d5bc063..9d0c629a513 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -299,7 +299,10 @@ async def admin_create_bq_pool(ctx: SimulatorContext, admin_client: SyftClient): ctx.logger.info(f"Admin: Creating worker pool with tag='{worker_image_tag}'") - build_and_launch_worker_pool_from_docker_str( + # build_and_launch_worker_pool_from_docker_str is a blocking call + # so you just run it in a different thread. + await ctx.blocking_call( + build_and_launch_worker_pool_from_docker_str, environment="remote", client=admin_client, worker_pool_name=worker_pool, @@ -310,6 +313,7 @@ async def admin_create_bq_pool(ctx: SimulatorContext, admin_client: SyftClient): custom_pool_pod_labels=None, scale_to=1, ) + ctx.logger.info(f"Admin: Worker pool created with tag='{worker_image_tag}'") @sim_activity(trigger=Event.ADMIN_HIGHSIDE_WORKER_POOL_CREATED) diff --git a/tests/scenariosv2/sim/core.py b/tests/scenariosv2/sim/core.py index 2cb60cc447d..497ce8d0b38 100644 --- a/tests/scenariosv2/sim/core.py +++ b/tests/scenariosv2/sim/core.py @@ -19,10 +19,10 @@ ) DEFAULT_FORMATTER = logging.Formatter( - "%(asctime)s - %(levelname)s - %(message)s", + "%(asctime)s - %(threadName)s - %(levelname)s - %(message)s", ) EVENT_FORMATTER = logging.Formatter( - "%(asctime)s - %(message)s", + "%(asctime)s - %(threadName)s - %(message)s", ) @@ -87,9 +87,13 @@ def unfired_events(self, events: list[BaseEvent]): evts = [e.name for e in evts] return evts + @staticmethod + async def blocking_call(func, /, *args, **kwargs): + return await asyncio.to_thread(func, *args, **kwargs) + @staticmethod async def gather(*tasks): - return asyncio.gather(*tasks) + return await asyncio.gather(*tasks) class Simulator: @@ -126,8 +130,11 @@ async def wrapper(ctx: SimulatorContext, *args, **kwargs): result = await func(ctx, *args, **kwargs) ctx._elogger.info(f"Completed: {func.__name__}") return result - except Exception as e: - ctx._elogger.error(f"{func.__name__} - {str(e)}") + except Exception: + ctx._elogger.error( + f"sim_entrypoint - {func.__name__} - Unhandled exception", + exc_info=True, + ) raise return wrapper @@ -172,8 +179,10 @@ async def wrapper(ctx: SimulatorContext, *args, **kwargs): ctx.events.trigger(_trigger) return result - except Exception as e: - ctx._elogger.error(f"{fsig} - {str(e)}") + except Exception: + ctx._elogger.error( + f"sim_activity - {fsig} - Unhandled exception", exc_info=True + ) raise return wrapper From fd040630da2f897ba0c14ecbc1d15b1ed90934f3 Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Tue, 24 Sep 2024 02:53:04 +0530 Subject: [PATCH 18/78] l0 submit_query & sync --- tests/scenariosv2/l0_test.py | 125 ++++++++++++++++++++++++++--------- 1 file changed, 93 insertions(+), 32 deletions(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 9d0c629a513..80070cc23cc 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -44,6 +44,7 @@ class Event(BaseEvent): # admin sync ADMIN_SYNC_COMPLETED = auto() ADMIN_SYNCED_HIGH_TO_LOW = auto() + ADMIN_SYNCED_LOW_TO_HIGH = auto() # users GUEST_USERS_CREATED = auto() USER_CAN_QUERY_TEST_ENDPOINT = auto() @@ -74,11 +75,7 @@ def get_code_from_msg(msg: str): @sim_activity( - # ! yeah this is fucking ugly - wait_for=[ - Event.ADMIN_BQ_TEST_ENDPOINT_CREATED, - Event.ADMIN_LOW_SIDE_ENDPOINTS_AVAILABLE, - ], + wait_for=Event.ADMIN_LOW_SIDE_ENDPOINTS_AVAILABLE, trigger=Event.USER_CAN_QUERY_TEST_ENDPOINT, ) async def user_query_test_endpoint(ctx: SimulatorContext, client: sy.DatasiteClient): @@ -86,26 +83,34 @@ async def user_query_test_endpoint(ctx: SimulatorContext, client: sy.DatasiteCli user = client.logged_in_user - ctx.logger.info(f"User: {user} - Calling client.api.bigquery.test_query") - res = client.api.bigquery.test_query(sql_query=query_sql()) - assert len(res) == 10000 - ctx.logger.info(f"User: {user} - Received {len(res)} rows") + def _query_endpoint(): + ctx.logger.info(f"User: {user} - Calling client.api.bigquery.test_query (mock)") + res = client.api.bigquery.test_query(sql_query=query_sql()) + assert len(res) == 10000 + ctx.logger.info(f"User: {user} - Received {len(res)} rows") + + await asyncio.to_thread(_query_endpoint) @sim_activity( - wait_for=[ - Event.ADMIN_BQ_SUBMIT_ENDPOINT_CREATED, - Event.USER_CAN_QUERY_TEST_ENDPOINT, - ], + wait_for=Event.USER_CAN_QUERY_TEST_ENDPOINT, trigger=Event.USER_CAN_SUBMIT_QUERY, ) async def user_bq_submit(ctx: SimulatorContext, client: sy.DatasiteClient): """Submit query to be run on private data""" user = client.logged_in_user - ctx.logger.info(f"User: {user} - Calling client.api.services.bigquery.submit_query") - res = client.api.bigquery.submit_query(func_name="invalid_func", query=query_sql()) - ctx.logger.info(f"User: {user} - Received {res}") + def _submit_endpoint(): + ctx.logger.info( + f"User: {user} - Calling client.api.services.bigquery.submit_query" + ) + res = client.api.bigquery.submit_query( + func_name="invalid_func", + query=query_sql(), + ) + ctx.logger.info(f"User: {user} - Received {res}") + + await asyncio.to_thread(_submit_endpoint) @sim_activity(wait_for=Event.GUEST_USERS_CREATED, trigger=Event.USER_FLOW_COMPLETED) @@ -117,10 +122,8 @@ async def user_flow(ctx: SimulatorContext, server_url_low: str, user: dict): ) ctx.logger.info(f"User: {client.logged_in_user} - logged in") - await asyncio.gather( - user_query_test_endpoint(ctx, client), - # user_bq_submit(ctx, client), - ) + await user_query_test_endpoint(ctx, client) + await user_bq_submit(ctx, client) # ------------------------------------------------------------------------------------------------ @@ -141,7 +144,7 @@ async def admin_signup_users( @sim_activity(trigger=Event.ADMIN_BQ_SCHEMA_ENDPOINT_CREATED) -async def bq_schema_endpoint( +async def admin_endpoint_bq_schema( ctx: SimulatorContext, admin_client: SyftClient, worker_pool: str | None = None, @@ -163,7 +166,7 @@ async def bq_schema_endpoint( @sim_activity(trigger=Event.ADMIN_BQ_TEST_ENDPOINT_CREATED) -async def bq_test_endpoint( +async def admin_endpoint_bq_test( ctx: SimulatorContext, admin_client: SyftClient, worker_pool: str | None = None, @@ -199,7 +202,7 @@ async def bq_test_endpoint( @sim_activity(trigger=Event.ADMIN_BQ_SUBMIT_ENDPOINT_CREATED) -async def bq_submit_endpoint( +async def admin_endpoint_bq_submit( ctx: SimulatorContext, admin_client: sy.DatasiteClient, worker_pool: str | None = None, @@ -264,14 +267,20 @@ async def admin_create_endpoint(ctx: SimulatorContext, admin_client: SyftClient) worker_pool = "biquery-pool" await asyncio.gather( - bq_test_endpoint(ctx, admin_client, worker_pool=worker_pool), - bq_submit_endpoint(ctx, admin_client, worker_pool=worker_pool), - bq_schema_endpoint(ctx, admin_client, worker_pool=worker_pool), + admin_endpoint_bq_test(ctx, admin_client, worker_pool=worker_pool), + admin_endpoint_bq_submit(ctx, admin_client, worker_pool=worker_pool), + admin_endpoint_bq_schema(ctx, admin_client, worker_pool=worker_pool), ) ctx.logger.info("Admin: Created all endpoints") -@sim_activity(wait_for=Event.ADMIN_SYNCED_HIGH_TO_LOW) +@sim_activity( + wait_for=[ + Event.ADMIN_SYNCED_HIGH_TO_LOW, + # endpoints work only after low side worker pool is created + Event.ADMIN_LOWSIDE_WORKER_POOL_CREATED, + ] +) async def admin_watch_sync(ctx: SimulatorContext, admin_client: SyftClient): # fuckall function that just watches for ADMIN_SYNCED_HIGH_TO_LOW # only to trigger ADMIN_LOW_SIDE_ENDPOINTS_AVAILABLE that @@ -326,6 +335,27 @@ async def admin_create_bq_pool_low(ctx: SimulatorContext, admin_client: SyftClie await admin_create_bq_pool(ctx, admin_client) +@sim_activity( + wait_for=[ + Event.USER_CAN_SUBMIT_QUERY, + Event.ADMIN_SYNCED_LOW_TO_HIGH, + ] +) +async def admin_triage_requests(ctx: SimulatorContext, admin_client: SyftClient): + while True: + await asyncio.sleep(random.uniform(5, 10)) + + # check if there are any requests + for request in admin_client.requests: + ctx.logger.info(f"Admin: Found request {request.__dict__}") + pass + # ! approve or deny. + # * If func_name has `invalid_func` in it, deny. + # * If not, then approve + # ! approved ones can exec succesfully or fail + # ! whatever is the case, after that just sync back to low-side (taken care by admin_sync_to_low_flow) + + @sim_activity(trigger=Event.ADMIN_HIGHSIDE_FLOW_COMPLETED) async def admin_high_side(ctx: SimulatorContext, admin_auth): admin_client = sy.login(**admin_auth) @@ -334,6 +364,7 @@ async def admin_high_side(ctx: SimulatorContext, admin_auth): await asyncio.gather( admin_create_bq_pool_high(ctx, admin_client), admin_create_endpoint(ctx, admin_client), + admin_triage_requests(ctx, admin_client), ) @@ -343,9 +374,9 @@ async def admin_low_side(ctx: SimulatorContext, admin_auth, users): ctx.logger.info("Admin low-side: logged in") await asyncio.gather( - admin_watch_sync(ctx, admin_client), admin_signup_users(ctx, admin_client, users), admin_create_bq_pool_low(ctx, admin_client), + admin_watch_sync(ctx, admin_client), ) @@ -353,7 +384,11 @@ async def admin_low_side(ctx: SimulatorContext, admin_auth, users): @sim_activity(trigger=Event.ADMIN_SYNC_COMPLETED) -async def admin_sync_flow(ctx: SimulatorContext, admin_auth_high, admin_auth_low): +async def admin_sync_to_low_flow( + ctx: SimulatorContext, + admin_auth_high, + admin_auth_low, +): high_client = sy.login(**admin_auth_high) ctx.logger.info("Admin: logged in to high-side") @@ -365,7 +400,7 @@ async def admin_sync_flow(ctx: SimulatorContext, admin_auth_high, admin_auth_low result = sy.sync(high_client, low_client) if isinstance(result, sy.SyftSuccess): - ctx.logger.info("Admin: Nothing to sync") + ctx.logger.info("Admin: Nothing to sync high->low") continue ctx.logger.info(f"Admin: Syncing high->low {result}") @@ -375,7 +410,32 @@ async def admin_sync_flow(ctx: SimulatorContext, admin_auth_high, admin_auth_low # trigger an event so that guest users can start querying ctx.events.trigger(Event.ADMIN_SYNCED_HIGH_TO_LOW) ctx.logger.info("Admin: Synced high->low") - break + + +@sim_activity(trigger=Event.ADMIN_SYNC_COMPLETED) +async def admin_sync_to_high_flow( + ctx: SimulatorContext, admin_auth_high, admin_auth_low +): + high_client = sy.login(**admin_auth_high) + ctx.logger.info("Admin: logged in to high-side") + + low_client = sy.login(**admin_auth_low) + ctx.logger.info("Admin: logged in to low-side") + + while True: + await asyncio.sleep(random.uniform(5, 10)) + + result = sy.sync(low_client, high_client) + if isinstance(result, sy.SyftSuccess): + ctx.logger.info("Admin: Nothing to sync low->high") + continue + + ctx.logger.info(f"Admin: Syncing low->high {result}") + result._share_all() + result._sync_all() + + ctx.events.trigger(Event.ADMIN_SYNCED_LOW_TO_HIGH) + ctx.logger.info("Admin: Synced low->high") # ------------------------------------------------------------------------------------------------ @@ -410,7 +470,8 @@ async def sim_l0_scenario(ctx: SimulatorContext): await asyncio.gather( admin_low_side(ctx, admin_auth_low, users), admin_high_side(ctx, admin_auth_high), - admin_sync_flow(ctx, admin_auth_high, admin_auth_low), + admin_sync_to_low_flow(ctx, admin_auth_high, admin_auth_low), + admin_sync_to_high_flow(ctx, admin_auth_high, admin_auth_low), *[user_flow(ctx, server_url_low, user) for user in users], ) From 92d2cb6554a4ba345c2993b70f3ddfe0b1df6020 Mon Sep 17 00:00:00 2001 From: dk Date: Tue, 24 Sep 2024 09:00:43 +0700 Subject: [PATCH 19/78] fix lint --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 521e3f9b60c..ada6d322b70 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,7 +27,7 @@ repos: always_run: true - id: name-tests-test always_run: true - exclude: ^(.*/tests/utils/)|^(.*fixtures.py)|^(tests/scenarios/bigquery/helpers) + exclude: ^(.*/tests/utils/)|^(.*fixtures.py)|^(tests/scenarios/bigquery/helpers)|^(tests/scenariosv2/sim) - id: requirements-txt-fixer always_run: true - id: mixed-line-ending From 97686917e11687d12e9f0647aad2eb48b199870e Mon Sep 17 00:00:00 2001 From: khoaguin Date: Tue, 24 Sep 2024 17:39:35 +0700 Subject: [PATCH 20/78] [tests/scenario] level 2 bigquery scenario test version 2 --- tests/scenariosv2/l2_test.py | 168 +++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 tests/scenariosv2/l2_test.py diff --git a/tests/scenariosv2/l2_test.py b/tests/scenariosv2/l2_test.py new file mode 100644 index 00000000000..5388da4c5b1 --- /dev/null +++ b/tests/scenariosv2/l2_test.py @@ -0,0 +1,168 @@ +# RUN: just reset-high && pytest -s tests/scenariosv2/l2_test.py +## .logs files will be created in pwd + +# stdlib +import asyncio +import random + +# third party +from faker import Faker +from l0_test import Event +from l0_test import admin_create_bq_pool_high +from l0_test import admin_create_endpoint +from l0_test import admin_signup_users +from l0_test import query_sql +import pytest +from sim.core import Simulator +from sim.core import SimulatorContext +from sim.core import sim_activity +from sim.core import sim_entrypoint + +# syft absolute +import syft as sy +from syft.client.client import SyftClient + +fake = Faker() + + +# ---------------------------------- admin ---------------------------------- +@sim_activity( + wait_for=[ + Event.USER_CAN_SUBMIT_QUERY, + ] +) +async def admin_triage_requests(ctx: SimulatorContext, admin_client: SyftClient): + while True: + await asyncio.sleep(random.uniform(3, 5)) + ctx.logger.info("Admin: Triaging requests") + + pending_requests = admin_client.requests.get_all_pending() + if len(pending_requests) == 0: + break + for request in admin_client.requests.get_all_pending(): + ctx.logger.info(f"Admin: Found request {request.__dict__}") + if "invalid_func" in request.code.service_func_name: + request.deny(reason="you submitted an invalid code") + else: + request.approve() + + +@sim_activity(trigger=Event.ADMIN_HIGHSIDE_FLOW_COMPLETED) +async def admin_flow( + ctx: SimulatorContext, admin_auth: dict, users: list[dict] +) -> None: + admin_client = sy.login(**admin_auth) + ctx.logger.info("Admin: logged in") + + await asyncio.gather( + admin_signup_users(ctx, admin_client, users), + admin_create_bq_pool_high(ctx, admin_client), + admin_create_endpoint(ctx, admin_client), + admin_triage_requests(ctx, admin_client), + ) + + +# ---------------------------------- user ---------------------------------- +@sim_activity( + wait_for=[ + Event.ADMIN_ALL_ENDPOINTS_CREATED, + Event.ADMIN_HIGHSIDE_WORKER_POOL_CREATED, + ], + trigger=Event.USER_CAN_QUERY_TEST_ENDPOINT, +) +async def user_query_test_endpoint(ctx: SimulatorContext, client: sy.DatasiteClient): + """Run query on test endpoint""" + + user = client.logged_in_user + + def _query_endpoint(): + ctx.logger.info(f"User: {user} - Calling client.api.bigquery.test_query (mock)") + res = client.api.bigquery.test_query(sql_query=query_sql()) + assert len(res) == 10000 + ctx.logger.info(f"User: {user} - Received {len(res)} rows") + + await asyncio.to_thread(_query_endpoint) + + +@sim_activity( + wait_for=[ + Event.USER_CAN_QUERY_TEST_ENDPOINT, + Event.ADMIN_HIGHSIDE_WORKER_POOL_CREATED, + ], + trigger=Event.USER_CAN_SUBMIT_QUERY, +) +async def user_bq_submit(ctx: SimulatorContext, client: sy.DatasiteClient): + """Submit query to be run on private data""" + user = client.logged_in_user + + def _submit_endpoint(): + ctx.logger.info( + f"User: {user} - Calling client.api.services.bigquery.submit_query" + ) + res = client.api.bigquery.submit_query( + func_name="invalid_func", + query=query_sql(), + ) + ctx.logger.info(f"User: {user} - Received {res}") + + await asyncio.to_thread(_submit_endpoint) + + +@sim_activity( + wait_for=[Event.GUEST_USERS_CREATED, Event.ADMIN_ALL_ENDPOINTS_CREATED], + trigger=Event.USER_FLOW_COMPLETED, +) +async def user_flow(ctx: SimulatorContext, server_url: str, user: dict): + client = sy.login( + url=server_url, + email=user["email"], + password=user["password"], + ) + ctx.logger.info(f"User: {client.logged_in_user} - logged in") + + await user_query_test_endpoint(ctx, client) + await user_bq_submit(ctx, client) + + +# ---------------------------------- test ---------------------------------- +@sim_entrypoint() +async def sim_l2_scenario(ctx: SimulatorContext): + users = [ + { + "name": fake.name(), + "email": fake.email(), + "password": "password", + } + for i in range(3) + ] + + server_url = "http://localhost:8080" + + admin_auth = { + "url": server_url, + "email": "info@openmined.org", + "password": "changethis", + } + + ctx.events.trigger(Event.INIT) + await asyncio.gather( + admin_flow(ctx, admin_auth, users), + *[user_flow(ctx, server_url, user) for user in users], + ) + + +@pytest.mark.asyncio +async def test_l2_scenario(request): + sim = Simulator() + + await sim.start( + sim_l2_scenario, + random_wait=None, + check_events=[ + Event.GUEST_USERS_CREATED, + Event.ADMIN_HIGHSIDE_WORKER_POOL_CREATED, + Event.ADMIN_ALL_ENDPOINTS_CREATED, + Event.ADMIN_HIGHSIDE_FLOW_COMPLETED, + ], + timeout=300, + ) From 0a54fd05c5cb67d56b94b7d06584397587c49b9c Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Wed, 25 Sep 2024 00:10:12 +0530 Subject: [PATCH 21/78] l0 admin high triage requests --- tests/scenariosv2/l0_test.py | 53 +++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 80070cc23cc..4130adc1525 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -16,6 +16,7 @@ import syft as sy from syft import test_settings from syft.client.client import SyftClient +from syft.service.request.request import RequestStatus from syft.util.test_helpers.apis import make_schema from syft.util.test_helpers.apis import make_test_query from syft.util.test_helpers.worker_helpers import ( @@ -101,11 +102,13 @@ async def user_bq_submit(ctx: SimulatorContext, client: sy.DatasiteClient): user = client.logged_in_user def _submit_endpoint(): + func_name = "invalid_func" if random.random() < 0.5 else "test_query" ctx.logger.info( - f"User: {user} - Calling client.api.services.bigquery.submit_query" + f"User: {user} - Calling client.api.services.bigquery.submit_query func_name={func_name}" ) + res = client.api.bigquery.submit_query( - func_name="invalid_func", + func_name=func_name, query=query_sql(), ) ctx.logger.info(f"User: {user} - Received {res}") @@ -341,19 +344,31 @@ async def admin_create_bq_pool_low(ctx: SimulatorContext, admin_client: SyftClie Event.ADMIN_SYNCED_LOW_TO_HIGH, ] ) -async def admin_triage_requests(ctx: SimulatorContext, admin_client: SyftClient): +async def admin_triage_requests_high(ctx: SimulatorContext, admin_client: SyftClient): while True: await asyncio.sleep(random.uniform(5, 10)) # check if there are any requests - for request in admin_client.requests: - ctx.logger.info(f"Admin: Found request {request.__dict__}") + # BUG: request that are executed request.code() are always in pending state + pending_requests = [ + req + for req in admin_client.requests + if req.get_status() == RequestStatus.PENDING + ] + for request in pending_requests: + ctx.logger.info(f"Admin high: Found request {request.__dict__}") + if getattr(request, "code", None): + if "invalid_func" in request.code.service_func_name: + ctx.logger.info(f"Admin high: Denying request {request}") + request.deny("You gave me an `invalid_func` function") + else: + ctx.logger.info( + f"Admin high: Approving request by executing {request}" + ) + job = request.code(blocking=False) + result = job.wait() + ctx.logger.info(f"Admin high: Request result {result}") pass - # ! approve or deny. - # * If func_name has `invalid_func` in it, deny. - # * If not, then approve - # ! approved ones can exec succesfully or fail - # ! whatever is the case, after that just sync back to low-side (taken care by admin_sync_to_low_flow) @sim_activity(trigger=Event.ADMIN_HIGHSIDE_FLOW_COMPLETED) @@ -364,7 +379,7 @@ async def admin_high_side(ctx: SimulatorContext, admin_auth): await asyncio.gather( admin_create_bq_pool_high(ctx, admin_client), admin_create_endpoint(ctx, admin_client), - admin_triage_requests(ctx, admin_client), + admin_triage_requests_high(ctx, admin_client), ) @@ -400,16 +415,16 @@ async def admin_sync_to_low_flow( result = sy.sync(high_client, low_client) if isinstance(result, sy.SyftSuccess): - ctx.logger.info("Admin: Nothing to sync high->low") + ctx.logger.info("Admin high: Nothing to sync high->low") continue - ctx.logger.info(f"Admin: Syncing high->low {result}") + ctx.logger.info(f"Admin high: Syncing high->low {result}") result._share_all() result._sync_all() # trigger an event so that guest users can start querying ctx.events.trigger(Event.ADMIN_SYNCED_HIGH_TO_LOW) - ctx.logger.info("Admin: Synced high->low") + ctx.logger.info("Admin high: Synced high->low") @sim_activity(trigger=Event.ADMIN_SYNC_COMPLETED) @@ -417,25 +432,25 @@ async def admin_sync_to_high_flow( ctx: SimulatorContext, admin_auth_high, admin_auth_low ): high_client = sy.login(**admin_auth_high) - ctx.logger.info("Admin: logged in to high-side") + ctx.logger.info("Admin low: logged in to high-side") low_client = sy.login(**admin_auth_low) - ctx.logger.info("Admin: logged in to low-side") + ctx.logger.info("Admin low: logged in to low-side") while True: await asyncio.sleep(random.uniform(5, 10)) result = sy.sync(low_client, high_client) if isinstance(result, sy.SyftSuccess): - ctx.logger.info("Admin: Nothing to sync low->high") + ctx.logger.info("Admin low: Nothing to sync low->high") continue - ctx.logger.info(f"Admin: Syncing low->high {result}") + ctx.logger.info(f"Admin low: Syncing low->high {result}") result._share_all() result._sync_all() ctx.events.trigger(Event.ADMIN_SYNCED_LOW_TO_HIGH) - ctx.logger.info("Admin: Synced low->high") + ctx.logger.info("Admin low: Synced low->high") # ------------------------------------------------------------------------------------------------ From ef702764b1c83e26705962000700a2468936480f Mon Sep 17 00:00:00 2001 From: khoaguin Date: Wed, 25 Sep 2024 15:37:21 +0700 Subject: [PATCH 22/78] [[tests/scenario] admin low triggers signal when all results are available so admin high can stop triaging --- tests/scenariosv2/l0_test.py | 68 ++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 4130adc1525..45b49320ada 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -24,6 +24,8 @@ ) fake = Faker() +NUM_USERS = 3 +NUM_ENDPOINTS = 3 # test_query, submit_query, schema_query class Event(BaseEvent): @@ -31,6 +33,7 @@ class Event(BaseEvent): INIT = auto() ADMIN_LOWSIDE_FLOW_COMPLETED = auto() ADMIN_HIGHSIDE_FLOW_COMPLETED = auto() + ADMIN_LOW_ALL_RESULTS_AVAILABLE = auto() USER_FLOW_COMPLETED = auto() # admin - endpoints ADMIN_ALL_ENDPOINTS_CREATED = auto() @@ -137,7 +140,7 @@ async def admin_signup_users( ctx: SimulatorContext, admin_client: SyftClient, users: list[dict] ): for user in users: - ctx.logger.info(f"Admin: Creating guest user {user['email']}") + ctx.logger.info(f"Admin low: Creating guest user {user['email']}") admin_client.register( name=user["name"], email=user["email"], @@ -161,11 +164,11 @@ async def admin_endpoint_bq_schema( ) try: - ctx.logger.info(f"Admin: Creating endpoint '{path}'") + ctx.logger.info(f"Admin high: Creating endpoint '{path}'") result = admin_client.custom_api.add(endpoint=schema_function) assert isinstance(result, sy.SyftSuccess), result except sy.SyftException as e: - ctx.logger.error(f"Admin: Failed to add api endpoint '{path}' - {e}") + ctx.logger.error(f"Admin high: Failed to add api endpoint '{path}' - {e}") @sim_activity(trigger=Event.ADMIN_BQ_TEST_ENDPOINT_CREATED) @@ -197,11 +200,11 @@ async def admin_endpoint_bq_test( ) try: - ctx.logger.info(f"Admin: Creating endpoint '{path}'") + ctx.logger.info(f"Admin high: Creating endpoint '{path}'") result = admin_client.custom_api.add(endpoint=new_endpoint) assert isinstance(result, sy.SyftSuccess), result except sy.SyftException as e: - ctx.logger.error(f"Admin: Failed to add api endpoint '{path}' - {e}") + ctx.logger.error(f"Admin high: Failed to add api endpoint '{path}' - {e}") @sim_activity(trigger=Event.ADMIN_BQ_SUBMIT_ENDPOINT_CREATED) @@ -258,11 +261,11 @@ def execute_query(query: str, endpoint): return f"Query submitted {request}. Use `client.code.{func_name}()` to run your query" try: - ctx.logger.info(f"Admin: Creating endpoint '{path}'") + ctx.logger.info(f"Admin high: Creating endpoint '{path}'") result = admin_client.custom_api.add(endpoint=submit_query) assert isinstance(result, sy.SyftSuccess), result except sy.SyftException as e: - ctx.logger.error(f"Admin: Failed to add api endpoint '{path}' - {e}") + ctx.logger.error(f"Admin high: Failed to add api endpoint '{path}' - {e}") @sim_activity(trigger=Event.ADMIN_ALL_ENDPOINTS_CREATED) @@ -274,7 +277,7 @@ async def admin_create_endpoint(ctx: SimulatorContext, admin_client: SyftClient) admin_endpoint_bq_submit(ctx, admin_client, worker_pool=worker_pool), admin_endpoint_bq_schema(ctx, admin_client, worker_pool=worker_pool), ) - ctx.logger.info("Admin: Created all endpoints") + ctx.logger.info("Admin high: Created all endpoints") @sim_activity( @@ -285,12 +288,35 @@ async def admin_create_endpoint(ctx: SimulatorContext, admin_client: SyftClient) ] ) async def admin_watch_sync(ctx: SimulatorContext, admin_client: SyftClient): - # fuckall function that just watches for ADMIN_SYNCED_HIGH_TO_LOW - # only to trigger ADMIN_LOW_SIDE_ENDPOINTS_AVAILABLE that - ctx.logger.info("Admin: Got a sync from high-side.") + while True: + await asyncio.sleep(random.uniform(5, 10)) - # trigger any event we want after sync - ctx.events.trigger(Event.ADMIN_LOW_SIDE_ENDPOINTS_AVAILABLE) + # Check if endpoints are available + endpoints = admin_client.custom_api.get_all() + if len(endpoints) == NUM_ENDPOINTS: + ctx.logger.info( + f"Admin low: All {NUM_ENDPOINTS} API endpoints are synced from high." + ) + ctx.logger.info(f"Endpoints: {endpoints}") + ctx.events.trigger(Event.ADMIN_LOW_SIDE_ENDPOINTS_AVAILABLE) + + # Check if all requests are approved or denied + requests = admin_client.requests.get_all() + ctx.logger.info(f"Number of requests: {len(requests)}") + for req in requests: + ctx.logger.info(f"Request status: {req.get_status()}") + if len(requests) == NUM_USERS: # NOTE: currently hard coding this since + # each user in `user_flow` submits 1 query request + pending_requests = [] + for req in admin_client.requests: + if req.get_status() == RequestStatus.PENDING: + pending_requests.append(req) + if len(pending_requests) == 0: + ctx.logger.info("Admin low: All requests are approved / denined.") + ctx.logger.info(f"Requests: {requests}") + ctx.events.trigger(Event.ADMIN_LOW_ALL_RESULTS_AVAILABLE) + else: + ctx.logger.info(f"Admin low: Pending requests: {pending_requests}") # @sim_activity(trigger=Event.ADMIN_WORKER_POOL_CREATED) @@ -342,7 +368,8 @@ async def admin_create_bq_pool_low(ctx: SimulatorContext, admin_client: SyftClie wait_for=[ Event.USER_CAN_SUBMIT_QUERY, Event.ADMIN_SYNCED_LOW_TO_HIGH, - ] + ], + trigger=Event.ADMIN_HIGHSIDE_FLOW_COMPLETED, ) async def admin_triage_requests_high(ctx: SimulatorContext, admin_client: SyftClient): while True: @@ -355,6 +382,7 @@ async def admin_triage_requests_high(ctx: SimulatorContext, admin_client: SyftCl for req in admin_client.requests if req.get_status() == RequestStatus.PENDING ] + ctx.logger.info(f"Admin high: Found {len(pending_requests)} pending requests") for request in pending_requests: ctx.logger.info(f"Admin high: Found request {request.__dict__}") if getattr(request, "code", None): @@ -368,7 +396,11 @@ async def admin_triage_requests_high(ctx: SimulatorContext, admin_client: SyftCl job = request.code(blocking=False) result = job.wait() ctx.logger.info(f"Admin high: Request result {result}") - pass + + if ctx.events.is_set(Event.ADMIN_LOW_ALL_RESULTS_AVAILABLE): + break + + ctx.logger.info("Admin high: Done approving / denying all requests") @sim_activity(trigger=Event.ADMIN_HIGHSIDE_FLOW_COMPLETED) @@ -410,7 +442,7 @@ async def admin_sync_to_low_flow( low_client = sy.login(**admin_auth_low) ctx.logger.info("Admin: logged in to low-side") - while True: + while not ctx.events.is_set(Event.ADMIN_HIGHSIDE_FLOW_COMPLETED): await asyncio.sleep(random.uniform(5, 10)) result = sy.sync(high_client, low_client) @@ -437,7 +469,7 @@ async def admin_sync_to_high_flow( low_client = sy.login(**admin_auth_low) ctx.logger.info("Admin low: logged in to low-side") - while True: + while not ctx.events.is_set(Event.ADMIN_HIGHSIDE_FLOW_COMPLETED): await asyncio.sleep(random.uniform(5, 10)) result = sy.sync(low_client, high_client) @@ -464,7 +496,7 @@ async def sim_l0_scenario(ctx: SimulatorContext): email=fake.email(), password="password", ) - for i in range(3) + for _ in range(NUM_USERS) ] server_url_high = "http://localhost:8080" From ea9c3e9d35c8d32928c2bf838fc51f4d1fe2c405 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Wed, 25 Sep 2024 15:59:18 +0700 Subject: [PATCH 23/78] [test/scenarios] admin low breaking from watching sync coroutine after receiving all results (synced from high) --- tests/scenariosv2/l0_test.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 45b49320ada..1c5dc0b9665 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -303,8 +303,6 @@ async def admin_watch_sync(ctx: SimulatorContext, admin_client: SyftClient): # Check if all requests are approved or denied requests = admin_client.requests.get_all() ctx.logger.info(f"Number of requests: {len(requests)}") - for req in requests: - ctx.logger.info(f"Request status: {req.get_status()}") if len(requests) == NUM_USERS: # NOTE: currently hard coding this since # each user in `user_flow` submits 1 query request pending_requests = [] @@ -315,6 +313,7 @@ async def admin_watch_sync(ctx: SimulatorContext, admin_client: SyftClient): ctx.logger.info("Admin low: All requests are approved / denined.") ctx.logger.info(f"Requests: {requests}") ctx.events.trigger(Event.ADMIN_LOW_ALL_RESULTS_AVAILABLE) + break else: ctx.logger.info(f"Admin low: Pending requests: {pending_requests}") @@ -442,7 +441,7 @@ async def admin_sync_to_low_flow( low_client = sy.login(**admin_auth_low) ctx.logger.info("Admin: logged in to low-side") - while not ctx.events.is_set(Event.ADMIN_HIGHSIDE_FLOW_COMPLETED): + while True: await asyncio.sleep(random.uniform(5, 10)) result = sy.sync(high_client, low_client) @@ -458,6 +457,10 @@ async def admin_sync_to_low_flow( ctx.events.trigger(Event.ADMIN_SYNCED_HIGH_TO_LOW) ctx.logger.info("Admin high: Synced high->low") + if ctx.events.is_set(Event.ADMIN_HIGHSIDE_FLOW_COMPLETED): + ctx.logger.info("Admin high: Done syncing high->low") + break + @sim_activity(trigger=Event.ADMIN_SYNC_COMPLETED) async def admin_sync_to_high_flow( @@ -484,6 +487,10 @@ async def admin_sync_to_high_flow( ctx.events.trigger(Event.ADMIN_SYNCED_LOW_TO_HIGH) ctx.logger.info("Admin low: Synced low->high") + if ctx.events.is_set(Event.ADMIN_HIGHSIDE_FLOW_COMPLETED): + ctx.logger.info("Admin high: Done syncing high->low") + break + # ------------------------------------------------------------------------------------------------ @@ -534,6 +541,7 @@ async def test_l0_scenario(request): # admin lowside Event.GUEST_USERS_CREATED, Event.ADMIN_LOWSIDE_WORKER_POOL_CREATED, + Event.ADMIN_LOW_ALL_RESULTS_AVAILABLE, Event.ADMIN_LOWSIDE_FLOW_COMPLETED, # admin high side Event.ADMIN_ALL_ENDPOINTS_CREATED, @@ -542,8 +550,8 @@ async def test_l0_scenario(request): # admin sync Event.ADMIN_SYNC_COMPLETED, # users - # Event.USER_CAN_QUERY_TEST_ENDPOINT, - # Event.USER_FLOW_COMPLETED, + Event.USER_CAN_QUERY_TEST_ENDPOINT, + Event.USER_FLOW_COMPLETED, ], timeout=300, ) From c152819f2fc14bdf2ed7d898dca69d029ce5bcb0 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Thu, 26 Sep 2024 09:39:04 +0700 Subject: [PATCH 24/78] [tests/scenarios] add a coroutine for users to check results after they are available --- tests/scenariosv2/l0_test.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 1c5dc0b9665..8308cb80ccd 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -53,6 +53,7 @@ class Event(BaseEvent): GUEST_USERS_CREATED = auto() USER_CAN_QUERY_TEST_ENDPOINT = auto() USER_CAN_SUBMIT_QUERY = auto() + USER_CHECKED_RESULTS = auto() # ------------------------------------------------------------------------------------------------ @@ -119,6 +120,25 @@ def _submit_endpoint(): await asyncio.to_thread(_submit_endpoint) +@sim_activity( + wait_for=Event.ADMIN_LOW_ALL_RESULTS_AVAILABLE, + trigger=Event.USER_CHECKED_RESULTS, +) +async def user_checks_results(ctx: SimulatorContext, client: sy.DatasiteClient): + def _check_results(): + for request in client.requests: + if request.get_status() == RequestStatus.APPROVED: + job = request.code(blocking=False) + result = job.wait() + assert len(result) == 10000 + if request.get_status() == RequestStatus.REJECTED: + ctx.logger.info( + f"User: Request with function named {request.code.service_func_name} was rejected" + ) + + await asyncio.to_thread(_check_results) + + @sim_activity(wait_for=Event.GUEST_USERS_CREATED, trigger=Event.USER_FLOW_COMPLETED) async def user_flow(ctx: SimulatorContext, server_url_low: str, user: dict): client = sy.login( @@ -130,6 +150,7 @@ async def user_flow(ctx: SimulatorContext, server_url_low: str, user: dict): await user_query_test_endpoint(ctx, client) await user_bq_submit(ctx, client) + await user_checks_results(ctx, client) # ------------------------------------------------------------------------------------------------ @@ -551,6 +572,7 @@ async def test_l0_scenario(request): Event.ADMIN_SYNC_COMPLETED, # users Event.USER_CAN_QUERY_TEST_ENDPOINT, + Event.USER_CHECKED_RESULTS, Event.USER_FLOW_COMPLETED, ], timeout=300, From 8b1337d00c0e7ae8081e1b0b82c554ee598755c6 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Thu, 26 Sep 2024 10:30:37 +0700 Subject: [PATCH 25/78] [tests/scenarios] logs are now saved at /scenariosv2/logs/{date_time} --- tests/scenariosv2/l0_test.py | 2 ++ tests/scenariosv2/l2_test.py | 2 ++ tests/scenariosv2/sim/core.py | 17 +++++++++++++---- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 8308cb80ccd..df068412298 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -542,6 +542,8 @@ async def sim_l0_scenario(ctx: SimulatorContext): ) ctx.events.trigger(Event.INIT) + ctx.logger.info("--- Initializing L0 BigQuery Scenario Test ---") + await asyncio.gather( admin_low_side(ctx, admin_auth_low, users), admin_high_side(ctx, admin_auth_high), diff --git a/tests/scenariosv2/l2_test.py b/tests/scenariosv2/l2_test.py index 5388da4c5b1..af4f287bca0 100644 --- a/tests/scenariosv2/l2_test.py +++ b/tests/scenariosv2/l2_test.py @@ -145,6 +145,8 @@ async def sim_l2_scenario(ctx: SimulatorContext): } ctx.events.trigger(Event.INIT) + ctx.logger.info("--- Initializing L2 BigQuery Scenario Test ---") + await asyncio.gather( admin_flow(ctx, admin_auth, users), *[user_flow(ctx, server_url, user) for user in users], diff --git a/tests/scenariosv2/sim/core.py b/tests/scenariosv2/sim/core.py index 497ce8d0b38..f9e0148ab92 100644 --- a/tests/scenariosv2/sim/core.py +++ b/tests/scenariosv2/sim/core.py @@ -4,12 +4,18 @@ from enum import Enum from functools import wraps import logging +from pathlib import Path import random import time -EVENTS_LOG = "sim.events.log" -EXECUTIONS_LOG = "sim.executions.log" -ACTIVITY_LOG = "sim.activity.log" +TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S") +LOGS_DIR = Path(__file__).resolve().parents[1] / "logs" / TIMESTAMP +LOGS_DIR.mkdir(parents=True, exist_ok=True) + +EXECUTIONS_LOG = LOGS_DIR / "sim.executions.log" +EVENTS_LOG = LOGS_DIR / "sim.events.log" +ACTIVITY_LOG = LOGS_DIR / "sim.activity.log" + logging.Formatter.formatTime = ( lambda self, record, datefmt=None: datetime.fromtimestamp(record.created).isoformat( @@ -46,6 +52,7 @@ def __init__(self): file_handler.setFormatter(EVENT_FORMATTER) self.logger.addHandler(file_handler) self.logger.setLevel(logging.INFO) + print(f"EvenManager initialized. Logs are saved in: {LOGS_DIR}") async def wait_for(self, event: BaseEvent): if event not in self.events: @@ -111,7 +118,9 @@ async def start(self, *tasks, check_events=None, random_wait=None, timeout=60): context._elogger.error( f"Timed out. Unfired Events = {context.unfired_events(check_events)}" ) - raise TestFailure(f"simulator timed out after {timeout}s") + raise TestFailure( + f"simulator timed out after {timeout}s. Please check logs at {LOGS_DIR} for more details." + ) if check_events: evts = context.unfired_events(check_events) From 52a0600631c709d5f77f35eaee77bb21e0dfb9ce Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Thu, 26 Sep 2024 10:34:56 +0530 Subject: [PATCH 26/78] fix 'worker_pool_name' var name change --- tests/scenariosv2/l0_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index df068412298..569ec3ba030 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -181,7 +181,7 @@ async def admin_endpoint_bq_schema( settings={ "calls_per_min": 5, }, - worker_pool=worker_pool, + worker_pool_name=worker_pool, ) try: @@ -217,7 +217,7 @@ async def admin_endpoint_bq_test( description="This endpoint allows to query Bigquery storage via SQL queries.", private_function=private_query_function, mock_function=mock_query_function, - worker_pool=worker_pool, + worker_pool_name=worker_pool, ) try: @@ -241,7 +241,7 @@ async def admin_endpoint_bq_submit( @sy.api_endpoint( path=path, description="API endpoint that allows you to submit SQL queries to run on the private data.", - worker_pool=worker_pool, + worker_pool_name=worker_pool, settings={"worker": worker_pool}, ) def submit_query( From 0fff1f9822857898a9db2fff9e917676bf634685 Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Thu, 26 Sep 2024 10:47:32 +0530 Subject: [PATCH 27/78] drop F841 --- tests/scenariosv2/l0_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 569ec3ba030..86448445f7c 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -528,7 +528,7 @@ async def sim_l0_scenario(ctx: SimulatorContext): ] server_url_high = "http://localhost:8080" - admin_auth_high = dict( # noqa: C408, F841 + admin_auth_high = dict( # noqa: C408 url=server_url_high, email="info@openmined.org", password="changethis", From a203de9267a4de263e883aa316ea70571629cd82 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Thu, 26 Sep 2024 14:31:50 +0700 Subject: [PATCH 28/78] [CI] installing `just` and running bigquery scenariov2 on k8s with just commands --- .github/workflows/pr-tests-syft.yml | 7 ++++++- tox.ini | 9 ++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr-tests-syft.yml b/.github/workflows/pr-tests-syft.yml index f13f355e7b0..43439ffd095 100644 --- a/.github/workflows/pr-tests-syft.yml +++ b/.github/workflows/pr-tests-syft.yml @@ -262,7 +262,12 @@ jobs: restore-keys: | ${{ runner.os }}-uv-py${{ matrix.python-version }}- - - name: Run notebook scenario tests + - name: Install just + run: | + curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to ~/.local/bin + echo "$HOME/.local/bin" >> $GITHUB_PATH + + - name: Run scenario tests if: steps.changes.outputs.syft == 'true' || steps.changes.outputs.notebooks_scenario == 'true' env: ORCHESTRA_DEPLOYMENT_TYPE: "${{ matrix.deployment-type }}" diff --git a/tox.ini b/tox.ini index ea53783a81e..056ab2823ad 100644 --- a/tox.ini +++ b/tox.ini @@ -299,14 +299,17 @@ deps = pytest-asyncio pytest-timeout anyio - unsync allowlist_externals = bash uv -changedir = {toxinidir}/tests/scenarios + just +changedir = {toxinidir}/tests/scenariosv2 setenv = commands = - pytest -s --disable-warnings + # l2 test with a k8s high cluster + just delete-high start-high deploy-high && pytest -s --disable-warnings l2_test.py + # l0 test with a k8s high and low cluster + just reset-high && just delete-low start-low deploy-low && pytest -s --disable-warnings l0_test.py [testenv:syft.test.notebook] description = Syft Notebook Tests From 7bd65730f065022cf397411970d6781bc0a629a4 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Thu, 26 Sep 2024 14:38:28 +0700 Subject: [PATCH 29/78] [CI] fix bash commands for `syft.test.scenario` --- tox.ini | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index 056ab2823ad..c9d38fcded9 100644 --- a/tox.ini +++ b/tox.ini @@ -307,9 +307,11 @@ changedir = {toxinidir}/tests/scenariosv2 setenv = commands = # l2 test with a k8s high cluster - just delete-high start-high deploy-high && pytest -s --disable-warnings l2_test.py + bash -c "just delete-high start-high deploy-high && \ + pytest -s --disable-warnings l2_test.py" # l0 test with a k8s high and low cluster - just reset-high && just delete-low start-low deploy-low && pytest -s --disable-warnings l0_test.py + bash -c "just reset-high && just delete-low start-low deploy-low && \ + pytest -s --disable-warnings l0_test.py" [testenv:syft.test.notebook] description = Syft Notebook Tests From dc9c3ce30ab97949c006eb250a50ac68e9cccfc9 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Thu, 26 Sep 2024 15:25:44 +0700 Subject: [PATCH 30/78] [CI] move testing simulation scenario tests to pr-tests-stack [tox] clean up & fix stack.test.scenario.k8s task --- .github/workflows/pr-tests-stack.yml | 127 +++++++++++++++++++++++++++ .github/workflows/pr-tests-syft.yml | 6 +- tox.ini | 36 ++++++-- 3 files changed, 156 insertions(+), 13 deletions(-) diff --git a/.github/workflows/pr-tests-stack.yml b/.github/workflows/pr-tests-stack.yml index 35c4e40823a..409393c7d75 100644 --- a/.github/workflows/pr-tests-stack.yml +++ b/.github/workflows/pr-tests-stack.yml @@ -956,3 +956,130 @@ jobs: export PATH=`pwd`:$PATH k3d cluster delete bigquery-high || true k3d cluster delete bigquery-low || true + + pr-tests-syft-scenario-k8s: + strategy: + max-parallel: 99 + matrix: + os: [ubuntu-latest] + python-version: ["3.12"] + fail-fast: false + + runs-on: ${{matrix.os}} + + steps: + - name: Permission to home directory + run: | + sudo chown -R $USER:$USER $HOME + - uses: actions/checkout@v4 + - name: Check for file changes + uses: dorny/paths-filter@v3 + id: changes + with: + base: ${{ github.ref }} + token: ${{ github.token }} + filters: .github/file-filters.yml + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + if: steps.changes.outputs.stack == 'true' + with: + python-version: ${{ matrix.python-version }} + + - name: Add K3d Registry + run: | + sudo python ./scripts/patch_hosts.py --add-k3d-registry + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + tool-cache: true + large-packages: false + + # free 10GB of space + - name: Remove unnecessary files + if: matrix.os == 'ubuntu-latest' + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + docker image prune --all --force + docker builder prune --all --force + docker system prune --all --force + + - name: Install pip dependencies + if: steps.changes.outputs.stack == 'true' + run: | + python -m pip install --upgrade pip + pip install uv==0.2.17 tox==4.16.0 tox-uv==1.9.0 + uv --version + + - name: Get uv cache dir + if: steps.changes.outputs.stack == 'true' + id: pip-cache + shell: bash + run: | + echo "dir=$(uv cache dir)" >> $GITHUB_OUTPUT + + - name: Load github cache + uses: actions/cache@v4 + if: steps.changes.outputs.stack == 'true' + with: + path: ${{ steps.pip-cache.outputs.dir }} + key: ${{ runner.os }}-uv-py${{ matrix.python-version }} + restore-keys: | + ${{ runner.os }}-uv-py${{ matrix.python-version }} + + - name: Install kubectl + if: steps.changes.outputs.stack == 'true' + run: | + # cleanup apt version + sudo apt remove kubectl || true + # install kubectl 1.27 + curl -LO https://dl.k8s.io/release/v1.27.2/bin/linux/amd64/kubectl + chmod +x kubectl + sudo install kubectl /usr/local/bin; + + - name: Install helm + if: steps.changes.outputs.stack == 'true' + run: | + # install helm + curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 + chmod 700 get_helm.sh + ./get_helm.sh + + - name: Install just + if: steps.changes.outputs.stack == 'true' + run: | + curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to ~/.local/bin + echo "$HOME/.local/bin" >> $GITHUB_PATH + + - name: Run scenario tests + if: steps.changes.outputs.syft == 'true' || steps.changes.outputs.notebooks_scenario == 'true' + env: + ORCHESTRA_DEPLOYMENT_TYPE: "${{ matrix.deployment-type }}" + BUMP_VERSION: "${{ matrix.bump-version }}" + TOX_PYTHON: python${{ matrix.python-version }} + shell: bash + run: | + K3D_VERSION=v5.6.3 + DEVSPACE_VERSION=v6.3.12 + # install k3d + wget https://github.com/k3d-io/k3d/releases/download/${K3D_VERSION}/k3d-linux-amd64 + mv k3d-linux-amd64 k3d + chmod +x k3d + export PATH=`pwd`:$PATH + k3d version + curl -sSL https://github.com/loft-sh/devspace/releases/download/${DEVSPACE_VERSION}/devspace-linux-amd64 -o ./devspace + chmod +x devspace + devspace version + tox -e stack.test.scenario.k8s + + # todo: collect logs + + - name: Cleanup k3d + if: steps.changes.outputs.stack == 'true' && failure() + shell: bash + run: | + export PATH=`pwd`:$PATH + k3d cluster delete syft-high || true + k3d cluster delete syft-low || true + k3d registry delete k3d-registry.localhost || true diff --git a/.github/workflows/pr-tests-syft.yml b/.github/workflows/pr-tests-syft.yml index 43439ffd095..cc5fd14f893 100644 --- a/.github/workflows/pr-tests-syft.yml +++ b/.github/workflows/pr-tests-syft.yml @@ -189,6 +189,7 @@ jobs: command: tox -e syft.test.notebook pr-tests-syft-scenario: + if: ${{ false }} strategy: max-parallel: 99 matrix: @@ -262,11 +263,6 @@ jobs: restore-keys: | ${{ runner.os }}-uv-py${{ matrix.python-version }}- - - name: Install just - run: | - curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to ~/.local/bin - echo "$HOME/.local/bin" >> $GITHUB_PATH - - name: Run scenario tests if: steps.changes.outputs.syft == 'true' || steps.changes.outputs.notebooks_scenario == 'true' env: diff --git a/tox.ini b/tox.ini index c9d38fcded9..740cff2e501 100644 --- a/tox.ini +++ b/tox.ini @@ -21,7 +21,6 @@ envlist = syft.test.security syft.test.unit syft.test.scenario - stack.test.scenario.sync.k8s syft.test.notebook syft.test.notebook.scenario syft.test.notebook.scenario.sync @@ -31,6 +30,7 @@ envlist = stack.test.integration.k8s stack.test.notebook.scenario.k8s stack.test.notebook.scenario.k8s.sync + stack.test.scenario.k8s frontend.test.unit frontend.test.e2e frontend.generate.types @@ -292,8 +292,8 @@ commands = bash -c 'ulimit -n 4096 || true' pytest -n auto --dist loadgroup --durations=20 --disable-warnings -[testenv:syft.test.scenario] -description = Syft Scenario Tests +[testenv:stack.test.scenario.k8s] +description = Syft Scenario Tests on K8s deps = -e{toxinidir}/packages/syft[dev,data_science] pytest-asyncio @@ -303,15 +303,35 @@ allowlist_externals = bash uv just -changedir = {toxinidir}/tests/scenariosv2 setenv = commands = # l2 test with a k8s high cluster - bash -c "just delete-high start-high deploy-high && \ - pytest -s --disable-warnings l2_test.py" + bash -c "just delete-high" + bash -c "just start-high" + bash -c "just deploy-high" + + # wait for syft-high + bash packages/grid/scripts/wait_for.sh service postgres --context k3d-syft-high --namespace syft + bash packages/grid/scripts/wait_for.sh service backend --context k3d-syft-high --namespace syft + bash packages/grid/scripts/wait_for.sh service proxy --context k3d-syft-high --namespace syft + bash packages/grid/scripts/wait_for.sh service seaweedfs --context k3d-syft-high --namespace syft + bash packages/grid/scripts/wait_for.sh service frontend --context k3d-syft-high --namespace syft + + bash -c "pytest -s --disable-warnings tests/scenariosv2/l2_test.py" + # l0 test with a k8s high and low cluster - bash -c "just reset-high && just delete-low start-low deploy-low && \ - pytest -s --disable-warnings l0_test.py" + bash -c "just reset-high" + bash -c "just delete-low" + bash -c "just start-low deploy-low" + + # wait for syft-low + bash packages/grid/scripts/wait_for.sh service postgres --context k3d-syft-low --namespace syft + bash packages/grid/scripts/wait_for.sh service backend --context k3d-syft-low --namespace syft + bash packages/grid/scripts/wait_for.sh service proxy --context k3d-syft-low --namespace syft + bash packages/grid/scripts/wait_for.sh service seaweedfs --context k3d-syft-low --namespace syft + bash packages/grid/scripts/wait_for.sh service frontend --context k3d-syft-low --namespace syft + + bash -c "pytest -s --disable-warnings tests/scenariosv2/l0_test.py" [testenv:syft.test.notebook] description = Syft Notebook Tests From 5f30a84981ae508bb8549dd4961c8dade0ba2df8 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Thu, 26 Sep 2024 16:49:15 +0700 Subject: [PATCH 31/78] [CI] checking python version --- tox.ini | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index 740cff2e501..eb6bb7bd0f7 100644 --- a/tox.ini +++ b/tox.ini @@ -298,11 +298,14 @@ deps = -e{toxinidir}/packages/syft[dev,data_science] pytest-asyncio pytest-timeout - anyio allowlist_externals = bash - uv just + pytest + devspace + kubectl + grep + k3d setenv = commands = # l2 test with a k8s high cluster From 6c1267b9e78cc0cf8b1a4eeee5be0c71a9c05296 Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Thu, 26 Sep 2024 21:24:58 +0530 Subject: [PATCH 32/78] drop scenarios --- tests/scenarios/bigquery/helpers/api.py | 172 --------- tests/scenarios/bigquery/helpers/asserts.py | 109 ------ tests/scenarios/bigquery/helpers/code.py | 91 ----- tests/scenarios/bigquery/helpers/events.py | 292 -------------- .../bigquery/helpers/fixtures_sync.py | 171 --------- tests/scenarios/bigquery/helpers/users.py | 66 ---- tests/scenarios/bigquery/helpers/workers.py | 83 ---- tests/scenarios/bigquery/level_0_test.py | 363 ------------------ .../scenarios/bigquery/level_2_basic_test.py | 226 ----------- 9 files changed, 1573 deletions(-) delete mode 100644 tests/scenarios/bigquery/helpers/api.py delete mode 100644 tests/scenarios/bigquery/helpers/asserts.py delete mode 100644 tests/scenarios/bigquery/helpers/code.py delete mode 100644 tests/scenarios/bigquery/helpers/events.py delete mode 100644 tests/scenarios/bigquery/helpers/fixtures_sync.py delete mode 100644 tests/scenarios/bigquery/helpers/users.py delete mode 100644 tests/scenarios/bigquery/helpers/workers.py delete mode 100644 tests/scenarios/bigquery/level_0_test.py delete mode 100644 tests/scenarios/bigquery/level_2_basic_test.py diff --git a/tests/scenarios/bigquery/helpers/api.py b/tests/scenarios/bigquery/helpers/api.py deleted file mode 100644 index ce670e605ae..00000000000 --- a/tests/scenarios/bigquery/helpers/api.py +++ /dev/null @@ -1,172 +0,0 @@ -# third party -from helpers.events import unsync - -# syft absolute -import syft as sy -from syft import test_settings -from syft.util.test_helpers.apis import make_schema -from syft.util.test_helpers.apis import make_test_query - - -# Define any helper methods for our rate limiter -def is_within_rate_limit(context): - """Rate limiter for custom API calls made by users.""" - # stdlib - import datetime - - state = context.state - settings = context.settings - email = context.user.email - - current_time = datetime.datetime.now() - calls_last_min = [ - 1 if (current_time - call_time).seconds < 60 else 0 - for call_time in state[email] - ] - - return sum(calls_last_min) < settings["CALLS_PER_MIN"] - - -@unsync -async def create_endpoints_query(events, client, worker_pool_name: str, register: str): - private_query_function = make_test_query( - settings={ - "rate_limiter_enabled": False, - } - ) - mock_query_function = make_test_query( - settings={ - "rate_limiter_enabled": True, - "calls_per_min": 10, - } - ) - - new_endpoint = sy.TwinAPIEndpoint( - path="bigquery.test_query", - description="This endpoint allows to query Bigquery storage via SQL queries.", - private_function=private_query_function, - mock_function=mock_query_function, - worker_pool=worker_pool_name, - ) - - result = client.custom_api.add(endpoint=new_endpoint) - - if register: - if isinstance(result, sy.SyftSuccess): - events.register(register) - else: - print("Failed to add api endpoint") - - -@unsync -async def create_endpoints_schema(events, client, worker_pool_name: str, register: str): - schema_function = make_schema( - settings={ - "calls_per_min": 5, - }, - worker_pool=worker_pool_name, - ) - result = client.custom_api.add(endpoint=schema_function) - - if register: - if isinstance(result, sy.SyftSuccess): - events.register(register) - else: - print("Failed to add schema_function") - - -@unsync -async def create_endpoints_submit_query( - events, client, worker_pool_name: str, register: str -): - @sy.api_endpoint( - path="bigquery.submit_query", - description="API endpoint that allows you to submit SQL queries to run on the private data.", - worker_pool=worker_pool_name, - settings={"worker": worker_pool_name}, - ) - def submit_query( - context, - func_name: str, - query: str, - ) -> str: - # stdlib - import hashlib - - # syft absolute - import syft as sy - - hash_object = hashlib.new("sha256") - - hash_object.update(context.user.email.encode("utf-8")) - func_name = func_name + "_" + hash_object.hexdigest()[:6] - - @sy.syft_function( - name=func_name, - input_policy=sy.MixedInputPolicy( - endpoint=sy.Constant( - val=context.admin_client.api.services.bigquery.test_query - ), - query=sy.Constant(val=query), - client=context.admin_client, - ), - worker_pool_name=context.settings["worker"], - ) - def execute_query(query: str, endpoint): - res = endpoint(sql_query=query) - return res - - request = context.user_client.code.request_code_execution(execute_query) - if isinstance(request, sy.SyftError): - return request - context.admin_client.requests.set_tags(request, ["autosync"]) - - return f"Query submitted {request}. Use `client.code.{func_name}()` to run your query" - - result = client.custom_api.add(endpoint=submit_query) - - if register: - if isinstance(result, sy.SyftSuccess): - events.register(register) - else: - print("Failed to add api endpoint") - - -@unsync -async def set_endpoint_settings( - events, client, path, kwargs, after: str, register: str -): - if after: - await events.await_for(event_name=after) - - # Here, we update the endpoint to timeout after 100s (rather the default of 60s) - result = client.api.services.api.update(endpoint_path=path, **kwargs) - if isinstance(result, sy.SyftSuccess): - events.register(register) - - -def api_for_path(client, path): - root = client.api.services - for part in path.split("."): - if hasattr(root, part): - root = getattr(root, part) - return root - - -def run_api_path(client, path, **kwargs): - api_method = api_for_path(client, path) - result = api_method(**kwargs) - return result - - -def query_sql(): - dataset_2 = test_settings.get("dataset_2", default="dataset_2") - table_2 = test_settings.get("table_2", default="table_2") - table_2_col_id = test_settings.get("table_2_col_id", default="table_id") - table_2_col_score = test_settings.get("table_2_col_score", default="colname") - - query = f"SELECT {table_2_col_id}, AVG({table_2_col_score}) AS average_score \ - FROM {dataset_2}.{table_2} \ - GROUP BY {table_2_col_id} \ - LIMIT 10000" - return query diff --git a/tests/scenarios/bigquery/helpers/asserts.py b/tests/scenarios/bigquery/helpers/asserts.py deleted file mode 100644 index 734e02fd0f3..00000000000 --- a/tests/scenarios/bigquery/helpers/asserts.py +++ /dev/null @@ -1,109 +0,0 @@ -# stdlib -import importlib.util -import inspect -import subprocess -import sys - -# third party -import anyio - -# syft absolute -import syft as sy - - -class FailedAssert(Exception): - pass - - -async def has(expr, expects="", timeout=10, retry=1): - try: - with anyio.fail_after(timeout): - result = expr() - while not result: - print(f"> {expects} {expr}...not yet satisfied") - await anyio.sleep(retry) - except TimeoutError: - lambda_source = inspect.getsource(expr) - raise FailedAssert(f"{lambda_source} {expects}") - - -def check_import_exists(module_name: str): - # can pass . paths like google.cloud.bigquery - spec = importlib.util.find_spec(module_name) - return spec is not None - - -def install_package(package_name: str): - try: - subprocess.check_call([sys.executable, "-m", "pip", "install", package_name]) - except subprocess.CalledProcessError: - print(f"pip failed to install {package_name}. Trying uv pip...") - try: - subprocess.check_call(["uv", "pip", "install", package_name]) - except subprocess.CalledProcessError as e: - print( - f"An error occurred while trying to install {package_name} with uv pip: {e}" - ) - - -def ensure_package_installed(package_name, module_name): - if not check_import_exists(module_name): - print(f"{module_name} not found. Installing...") - install_package(package_name) - else: - print(f"{module_name} is already installed.") - - -async def result_is( - events, - expr, - matches: bool | str | type | object, - after: str | None = None, - register: str | None = None, -): - if after: - await events.await_for(event_name=after) - - lambda_source = inspect.getsource(expr) - try: - result = None - try: - result = expr() - except Exception as e: - if isinstance(e, sy.SyftException): - result = e - else: - raise e - - assertion = False - if isinstance(matches, bool): - assertion = result == matches - elif isinstance(matches, type): - assertion = isinstance(result, matches) - elif isinstance(matches, str): - message = matches.replace("*", "") - assertion = message in str(result) - else: - type_matches = isinstance(result, type(matches)) - message_matches = True - - message = None - if isinstance(matches, sy.service.response.SyftResponseMessage): - message = matches.message.replace("*", "") - elif isinstance(result, sy.SyftException): - message = matches.public_message.replace("*", "") - - if message: - if isinstance(result, sy.service.response.SyftResponseMessage): - message_matches = message in str(result) - elif isinstance(result, sy.SyftException): - message_matches = message in result.public_message - - assertion = type_matches and message_matches - if assertion and register: - events.register(event_name=register) - return assertion - except Exception as e: - print(f"insinstance({lambda_source}, {matches}). {e}") - - return False diff --git a/tests/scenarios/bigquery/helpers/code.py b/tests/scenarios/bigquery/helpers/code.py deleted file mode 100644 index ef63d8667cc..00000000000 --- a/tests/scenarios/bigquery/helpers/code.py +++ /dev/null @@ -1,91 +0,0 @@ -# stdlib -import asyncio - -# third party -from helpers.api import api_for_path -from helpers.events import unsync - -# syft absolute -from syft.service.code.user_code import UserCode -from syft.service.job.job_stash import Job - - -def get_approved(client): - results = [] - for request in client.requests: - if str(request.status) == "RequestStatus.APPROVED": - results.append(request) - return results - - -def run_code(client, method_name, **kwargs): - service_func_name = method_name - if "*" in method_name: - matcher = method_name.replace("*", "") - all_code = client.api.services.code.get_all() - for code in all_code: - if matcher in code.service_func_name: - service_func_name = code.service_func_name - break - - api_method = api_for_path(client, path=f"code.{service_func_name}") - # can raise - result = api_method(**kwargs) - return result - - -def approve_and_deposit(client, request_id): - request = client.requests.get_by_uid(uid=request_id) - code = request.code - - if not isinstance(code, UserCode): - return - - func_name = request.code.service_func_name - job = run_code(client, func_name, blocking=False) - if not isinstance(job, Job): - return None - - job.wait() - job_info = job.info(result=True) - result = request.deposit_result(job_info, approve=True) - return result - - -def get_pending(client): - results = [] - for request in client.requests: - if str(request.status) == "RequestStatus.PENDING": - results.append(request) - return results - - -@unsync -async def triage_requests(events, client, after, register, sleep=2): - if after: - await events.await_for(event_name=after) - while True: - await asyncio.sleep(sleep) - requests = get_pending(client) - for request in requests: - approve_and_deposit(client, request.id) - events.register(event_name=register) - - -@unsync -async def get_results(events, client, method_name, after, register): - method_name = method_name.replace("*", "") - if after: - await events.await_for(event_name=after) - while True: - await asyncio.sleep(1) - requests = get_approved(client) - for request in requests: - if method_name in request.code.service_func_name: - job = run_code(client, request.code.service_func_name, blocking=False) - if not isinstance(job, Job): - continue - else: - result = job.wait().get() - if hasattr(result, "__len__") and len(result) == 10000: - events.register(event_name=register) diff --git a/tests/scenarios/bigquery/helpers/events.py b/tests/scenarios/bigquery/helpers/events.py deleted file mode 100644 index 5abc4d2c836..00000000000 --- a/tests/scenarios/bigquery/helpers/events.py +++ /dev/null @@ -1,292 +0,0 @@ -# stdlib -import asyncio -from dataclasses import dataclass -from functools import wraps -import inspect -import json -import os -from threading import Lock -import time - -# third party -import unsync as unsync_lib - -__all__ = ["Event", "EventManager", "Scenario", "unsync"] - - -class Event: - USER_ADMIN_CREATED = "user_admin_created" - USERS_CREATED = "users_created" - DATASET_UPLOADED = "dataset_uploaded" - DATASET_MOCK_READABLE = "dataset_mock_readable" - PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED = "prebuilt_worker_image_bigquery_created" - EXTERNAL_REGISTRY_BIGQUERY_CREATED = "external_registry_bigquery_created" - WORKER_POOL_CREATED = "worker_pool_created" - WORKER_POOL_SCALED = "worker_pool_scaled" - ALLOW_GUEST_SIGNUP_ENABLED = "allow_guest_signup_enabled" - ALLOW_GUEST_SIGNUP_DISABLED = "allow_guest_signup_disabled" - USERS_CREATED_CHECKED = "users_created_checked" - SCHEMA_ENDPOINT_CREATED = "schema_endpoint_created" - SUBMIT_QUERY_ENDPOINT_CREATED = "submit_query_endpoint_created" - SUBMIT_QUERY_ENDPOINT_CONFIGURED = "submit_query_endpoint_configured" - USERS_CAN_QUERY_MOCK = "users_can_query_mock" - USERS_CAN_SUBMIT_QUERY = "users_can_submit_query" - ADMIN_APPROVED_REQUEST = "admin_approved_request" - ADMIN_APPROVED_FIRST_REQUEST = "admin_approved_first_request" - USERS_CAN_GET_APPROVED_RESULT = "users_can_get_approved_result" - USERS_QUERY_NOT_READY = "users_query_not_ready" - QUERY_ENDPOINT_CREATED = "query_endpoint_created" - QUERY_ENDPOINT_CONFIGURED = "query_endpoint_configured" - - ADMIN_LOW_SIDE_WORKFLOW_COMPLETED = "admin_low_side_workflow_completed" - ADMIN_HIGH_SIDE_WORKFLOW_COMPLETED = "admin_high_side_workflow_completed" - ADMIN_SYNC_HIGH_TO_LOW = "admin_sync_high_to_low" - ADMIN_SYNC_LOW_TO_HIGH = "admin_sync_low_to_high" - - EXCEPTION_OCCURRED = "exception_occurred" - - -@dataclass -class Scenario: - name: str - events: list[str] - - def add_event(self, event: str): - self.events.append(event) - - -class EventManager: - def __init__( - self, - test_name: str | None = None, - test_dir: str | None = None, - reset: bool = True, - ): - self.start_time = time.time() - self.event_file = self._get_event_file(test_name, test_dir) - self.lock = Lock() - self._ensure_file_exists() - self.scenarios = {} - if reset: - self.clear_events() - - def add_scenario(self, scenario: Scenario): - with self.lock: - with open(self.event_file, "r+") as f: - events = json.load(f) - for event in scenario.events: - if event not in events: - events[event] = None - self.scenarios[scenario.name] = scenario.events - f.seek(0) - json.dump(events, f) - f.truncate() - - def wait_scenario( - self, scenario_name: str, timeout: float = 15.0, show: bool = True - ) -> bool: - start_time = time.time() - while time.time() - start_time < timeout: - if self.scenario_completed(scenario_name): - return True - if show: - time_left = timeout - (time.time() - start_time) - print(f"wait_for_scenario: {scenario_name}. Time left: {time_left}") - - time.sleep(1) - return False - - async def await_scenario( - self, scenario_name: str, timeout: float = 15.0, show: bool = True - ) -> bool: - start_time = time.time() - while time.time() - start_time < timeout: - if self.scenario_completed(scenario_name): - return True - if show: - time_left = timeout - (time.time() - start_time) - print( - f"async await_for_scenario: {scenario_name}. Time left: {time_left}" - ) - await asyncio.sleep(1) - return False - - def scenario_completed(self, scenario_name: str) -> bool: - with self.lock: - with open(self.event_file) as f: - events = json.load(f) - scenario_events = self.scenarios.get(scenario_name, []) - incomplete_events = [ - event for event in scenario_events if events.get(event) is None - ] - if Event.EXCEPTION_OCCURRED in events: - msg = f"Scenario '{scenario_name}' failed due to an exception. Missing events: {incomplete_events}" - raise Exception(msg) - - if incomplete_events: - print( - f"Scenario '{scenario_name}' is incomplete. Missing events: {incomplete_events}" - ) - return False - return True - - def _get_event_file( - self, test_name: str | None = None, test_dir: str | None = None - ): - # Get the calling test function's name - if not test_name: - current_frame = inspect.currentframe() - caller_frame = current_frame.f_back - while caller_frame: - if caller_frame.f_code.co_name.startswith("test_"): - test_name = caller_frame.f_code.co_name - break - caller_frame = caller_frame.f_back - else: - test_name = "unknown_test" - - # Get the directory of the calling test file - if not test_dir: - current_frame = inspect.currentframe() - caller_frame = current_frame.f_back - caller_file = inspect.getfile(caller_frame) - test_dir = os.path.dirname(os.path.abspath(caller_file)) - - # Create a unique filename for this test - return os.path.join(test_dir, f"{test_name}_events.json.events") - - def _ensure_file_exists(self): - if not os.path.exists(self.event_file): - with open(self.event_file, "w") as f: - json.dump({}, f) - - def register(self, event_name: str): - with self.lock: - with open(self.event_file, "r+") as f: - now = time.time() - events = json.load(f) - events[event_name] = now - f.seek(0) - json.dump(events, f) - f.truncate() - print(f"> Event: {event_name} occured at: {now}") - - def wait_for( - self, - event_name: str | list[str] | tuple[str], - timeout: float = 15.0, - show: bool = True, - ) -> bool: - event_names = event_name - if isinstance(event_names, str): - event_names = [event_names] - - start_time = time.time() - while time.time() - start_time < timeout: - if all(self.happened(event_name) for event_name in event_names): - return True - if show: - time_left = timeout - (time.time() - start_time) - print(f"wait_for: {event_names}. Time left: {time_left}") - - time.sleep(1) - return False - - async def await_for( - self, - event_name: str | list[str] | tuple[str], - timeout: float = 15.0, - show: bool = True, - ) -> bool: - event_names = event_name - if isinstance(event_names, str): - event_names = [event_names] - - start_time = time.time() - while time.time() - start_time < timeout: - if all(self.happened(event_name) for event_name in event_names): - return True - if show: - time_left = timeout - (time.time() - start_time) - print(f"async await_for: {event_names}. Time left: {time_left}") - await asyncio.sleep(5) - return False - - def happened(self, event_name: str) -> bool: - try: - with self.lock: - with open(self.event_file) as f: - events = json.load(f) - if event_name in events: - return events[event_name] - except Exception as e: - print("e", e) - return False - - def get_event_time(self, event_name: str) -> float | None: - with self.lock: - with open(self.event_file) as f: - events = json.load(f) - return events.get(event_name) - - def clear_events(self): - with self.lock: - with open(self.event_file, "w") as f: - json.dump({}, f) - - @unsync_lib.unsync - async def monitor(self, period: float = 2): - while True: - await asyncio.sleep(period) - self.flush_monitor() - - def flush_monitor(self): - with self.lock: - with open(self.event_file) as f: - events = json.load(f) - if not events: - return - for event, timestamp in sorted(events.items(), key=lambda x: x[1]): - if timestamp: - now = time.time() - time_since_start = now - timestamp - print( - f"Event: {event} happened {time_since_start:.2f} seconds ago" - ) - else: - print( - f"Event: {event} is registered but has not happened yet. Pending..." - ) - - def __del__(self): - # Clean up the file when the EventManager is destroyed - # if os.path.exists(self.event_file): - # os.remove(self.event_file) - pass - - -def unsync_guard(): - "Make sure we exit early if an exception occurs" - - def decorator(func): - @wraps(func) - @unsync_lib.unsync - async def wrapper(*args, **kwargs): - try: - result = await func(*args, **kwargs) - return result - except Exception as e: - print(f"Exception occurred: {e}") - for arg in args: - if isinstance(arg, EventManager): - print("Registering exception event") - arg.register(Event.EXCEPTION_OCCURRED) - break - raise - - return wrapper - - return decorator - - -unsync = unsync_guard() diff --git a/tests/scenarios/bigquery/helpers/fixtures_sync.py b/tests/scenarios/bigquery/helpers/fixtures_sync.py deleted file mode 100644 index 80a21aa1cfc..00000000000 --- a/tests/scenarios/bigquery/helpers/fixtures_sync.py +++ /dev/null @@ -1,171 +0,0 @@ -# stdlib -from typing import Any - -# third party -from faker import Faker -from helpers.events import unsync -from helpers.users import TestUser -import pandas as pd - -# syft absolute -import syft as sy -from syft import autocache -from syft.service.user.user_roles import ServiceRole - - -def make_user( - name: str | None = None, - email: str | None = None, - password: str | None = None, - role: ServiceRole = ServiceRole.DATA_SCIENTIST, -) -> TestUser: - fake = Faker() - if name is None: - name = fake.name() - if email is None: - email = fake.email() - if password is None: - password = fake.password() - - return TestUser(name=name, email=email, password=password, role=role) - - -def make_admin(email="info@openmined.org", password="changethis"): - fake = Faker() - return make_user( - email=email, password=password, name=fake.name(), role=ServiceRole.ADMIN - ) - - -def trade_flow_df(): - canada_dataset_url = "https://github.com/OpenMined/datasets/blob/main/trade_flow/ca%20-%20feb%202021.csv?raw=True" - df = pd.read_csv(autocache(canada_dataset_url)) - return df - - -def trade_flow_df_mock(df): - return df[10:20] - - -def user_exists(root_client, email: str) -> bool: - users = root_client.api.services.user - for user in users: - if user.email == email: - return True - return False - - -def create_user(root_client, test_user): - if not user_exists(root_client, test_user.email): - fake = Faker() - root_client.register( - name=test_user.name, - email=test_user.email, - password=test_user.password, - password_verify=test_user.password, - institution=fake.company(), - website=fake.url(), - ) - else: - print("User already exists", test_user) - - -def dataset_exists(root_client, dataset_name: str) -> bool: - datasets = root_client.api.services.dataset - for dataset in datasets: - if dataset.name == dataset_name: - return True - return False - - -def upload_dataset(user_client, dataset): - if not dataset_exists(user_client, dataset): - user_client.upload_dataset(dataset) - else: - print("Dataset already exists") - - -def create_dataset(name: str): - df = trade_flow_df() - ca_data = df[0:10] - mock_ca_data = trade_flow_df_mock(df) - dataset = sy.Dataset(name=name) - dataset.set_description("Canada Trade Data Markdown Description") - dataset.set_summary("Canada Trade Data Short Summary") - dataset.add_citation("Person, place or thing") - dataset.add_url("https://github.com/OpenMined/datasets/tree/main/trade_flow") - dataset.add_contributor( - name="Andrew Trask", - email="andrew@openmined.org", - note="Andrew runs this datasite and prepared the dataset metadata.", - ) - dataset.add_contributor( - name="Madhava Jay", - email="madhava@openmined.org", - note="Madhava tweaked the description to add the URL because Andrew forgot.", - ) - ctf = sy.Asset(name="canada_trade_flow") - ctf.set_description( - "Canada trade flow represents export & import of different commodities to other countries" - ) - ctf.add_contributor( - name="Andrew Trask", - email="andrew@openmined.org", - note="Andrew runs this datasite and prepared the asset.", - ) - ctf.set_obj(ca_data) - ctf.set_shape(ca_data.shape) - ctf.set_mock(mock_ca_data, mock_is_real=False) - dataset.add_asset(ctf) - return dataset - - -def make_server(request: Any | None = None, server_name: str | None = None) -> Any: - # TODO: make it compatible with remote deployments - print("making server") - if server_name is None: - faker = Faker() - server_name = faker.name() - server = sy.orchestra.launch( - name=server_name, - port="auto", - dev_mode=True, - reset=True, - n_consumers=1, - create_producer=True, - ) - - def cleanup(): - print("landing server") - server.land() - - if not request: - print("WARNING: No pytest request supplied, no finalizer added") - else: - request.addfinalizer(cleanup) - return server - - -def make_client(url: str, email: str, password: str) -> Any: - return sy.login(url=url, email=email, password=password) - - -def make_guest_client(url: str) -> Any: - return sy.login_as_guest(url=url) - - -@unsync -async def sync_clients(events, from_client, to_client, event_name, after=None): - if after: - await events.await_for(event_name=after) - widget = sy.sync(from_client, to_client) - widget._share_all() - widget._sync_all() - events.register(event_name) - - -@unsync -async def create_users(root_client, events, users, event_name): - for test_user in users: - create_user(root_client, test_user) - events.register(event_name) diff --git a/tests/scenarios/bigquery/helpers/users.py b/tests/scenarios/bigquery/helpers/users.py deleted file mode 100644 index c90b3159c6f..00000000000 --- a/tests/scenarios/bigquery/helpers/users.py +++ /dev/null @@ -1,66 +0,0 @@ -# stdlib -from dataclasses import dataclass -from typing import Any - -# third party -from faker import Faker -from helpers.events import unsync - -# syft absolute -import syft as sy -from syft.service.user.user_roles import ServiceRole - - -@dataclass -class TestUser: - name: str - email: str - password: str - role: ServiceRole - server_cache: Any | None = None - - def client(self, server=None): - if server is None: - server = self.server_cache - else: - self.server_cache = server - - return server.login(email=self.email, password=self.password) - - -@unsync -async def set_settings_allow_guest_signup( - events, client, enabled, event_name: str | None = None -): - result = client.settings.allow_guest_signup(enable=enabled) - if event_name: - if isinstance(result, sy.SyftSuccess): - events.register(event_name) - - -@unsync -async def check_users_created(events, client, users, event_name, event_set): - expected_emails = {user.email for user in users} - found_emails = set() - await events.await_for(event_name=event_name) - user_results = client.api.services.user.get_all() - for user_result in user_results: - if user_result.email in expected_emails: - found_emails.add(user_result.email) - - if len(found_emails) == len(expected_emails): - events.register(event_set) - - -def guest_register(client, test_user): - guest_client = client.guest() - fake = Faker() - result = guest_client.register( - name=test_user.name, - email=test_user.email, - password=test_user.password, - password_verify=test_user.password, - institution=fake.company(), - website=fake.url(), - ) - return result diff --git a/tests/scenarios/bigquery/helpers/workers.py b/tests/scenarios/bigquery/helpers/workers.py deleted file mode 100644 index 66341d3503e..00000000000 --- a/tests/scenarios/bigquery/helpers/workers.py +++ /dev/null @@ -1,83 +0,0 @@ -# third party -from helpers.events import unsync - -# syft absolute -import syft as sy -from syft import test_settings - - -@unsync -async def get_prebuilt_worker_image(events, client, expected_tag, after): - await events.await_for(event_name=after, show=True) - worker_images = client.images.get_all() - for worker_image in worker_images: - if expected_tag in str(worker_image.image_identifier): - assert expected_tag in str(worker_image.image_identifier) - return worker_image - - -@unsync -async def create_prebuilt_worker_image(events, client, expected_tag, event_name): - external_registry = test_settings.get("external_registry", default="docker.io") - docker_config = sy.PrebuiltWorkerConfig(tag=f"{external_registry}/{expected_tag}") - result = client.api.services.worker_image.submit(worker_config=docker_config) - assert isinstance(result, sy.SyftSuccess) - events.register(event_name) - - -@unsync -async def add_external_registry(events, client, event_name): - external_registry = test_settings.get("external_registry", default="docker.io") - result = client.api.services.image_registry.add(external_registry) - assert isinstance(result, sy.SyftSuccess) - events.register(event_name) - - -@unsync -async def create_worker_pool( - events, - client, - worker_pool_name, - worker_pool_result, - event_name, -): - # block until this is available - worker_image = worker_pool_result.result(timeout=5) - - result = client.api.services.worker_pool.launch( - pool_name=worker_pool_name, - image_uid=worker_image.id, - num_workers=1, - ) - - if isinstance(result, list) and isinstance( - result[0], sy.service.worker.worker_pool.ContainerSpawnStatus - ): - events.register(event_name) - - -@unsync -async def check_worker_pool_exists(events, client, worker_pool_name, after): - timeout = 30 - await events.await_for(event_name=after, timeout=timeout) - pools = client.worker_pools.get_all() - for pool in pools: - if worker_pool_name == pool.name: - assert worker_pool_name == pool.name - return worker_pool_name == pool.name - - -@unsync -async def scale_worker_pool( - events, client, worker_pool_name, num_workers, event_name, after -): - if after: - await events.await_for(event_name=after) - - result = client.api.services.worker_pool.scale( - pool_name=worker_pool_name, - num_workers=num_workers, - ) - - assert isinstance(result, sy.SyftSuccess) - events.register(event_name) diff --git a/tests/scenarios/bigquery/level_0_test.py b/tests/scenarios/bigquery/level_0_test.py deleted file mode 100644 index 269e5e5abd4..00000000000 --- a/tests/scenarios/bigquery/level_0_test.py +++ /dev/null @@ -1,363 +0,0 @@ -# stdlib -import asyncio - -# third party -from helpers.api import create_endpoints_query -from helpers.api import create_endpoints_schema -from helpers.api import create_endpoints_submit_query -from helpers.api import query_sql -from helpers.api import run_api_path -from helpers.api import set_endpoint_settings -from helpers.asserts import result_is -from helpers.code import run_code -from helpers.events import Event -from helpers.events import EventManager -from helpers.events import Scenario -from helpers.events import unsync -from helpers.fixtures_sync import make_client -from helpers.fixtures_sync import make_user -from helpers.fixtures_sync import sync_clients -from helpers.workers import add_external_registry -from helpers.workers import check_worker_pool_exists -from helpers.workers import create_prebuilt_worker_image -from helpers.workers import create_worker_pool -from helpers.workers import get_prebuilt_worker_image -import pytest - -# syft absolute -import syft as sy - - -@unsync -async def user_low_side_activity(_, events, user, after=None): - if after: - await events.await_for(event_name=after) - - # login_user - user_client = user.client() - - # submit_code - submit_query_path = "bigquery.test_query" - await result_is( - events, - lambda: len(run_api_path(user_client, submit_query_path, sql_query=query_sql())) - == 10000, - matches=True, - after=[ - Event.QUERY_ENDPOINT_CONFIGURED, - Event.USERS_CREATED_CHECKED, - Event.ADMIN_SYNC_HIGH_TO_LOW, - ], - register=Event.USERS_CAN_QUERY_MOCK, - ) - - func_name = "test_func" - await result_is( - events, - lambda: run_api_path( - user_client, - submit_query_path, - func_name=func_name, - query=query_sql(), - ), - matches="*Query submitted*", - after=[Event.SUBMIT_QUERY_ENDPOINT_CONFIGURED, Event.USERS_CREATED_CHECKED], - register=Event.USERS_CAN_SUBMIT_QUERY, - ) - - # this should fail to complete because no work will be approved or denied - await result_is( - events, - lambda: run_code(user_client, method_name=f"{func_name}*"), - matches=sy.SyftException(public_message="*Your code is waiting for approval*"), - after=[Event.USERS_CAN_SUBMIT_QUERY], - register=Event.USERS_QUERY_NOT_READY, - ) - - # dump result in a file - - events.register(Event.USER_LOW_SIDE_WAITING_FOR_APPROVAL) - - -@unsync -async def root_sync_activity(_, events, after): - if after: - await events.await_for(event_name=after) - - # login to high side - admin_client_high = make_client( - url="http://localhost:8080", - email="info@openmined.org", - password="changethis", - ) - - admin_client_low = make_client( - url="http://localhost:8081", - email="info@openmined.org", - password="changethis", - ) - - while True: - await asyncio.sleep(3) - sync_clients( - events, - admin_client_low, - admin_client_high, - event_name=Event.ADMIN_SYNC_LOW_TO_HIGH, - ) - - -@unsync -async def admin_create_worker_pool( - _, - events, - admin_client, - worker_pool_name, - worker_docker_tag, -): - """ - Worker pool flow: - - Register custom image - - Launch worker pool - - Scale worker pool - """ - - create_prebuilt_worker_image( - events, - admin_client, - worker_docker_tag, - Event.PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED, - ) - - worker_image_result = get_prebuilt_worker_image( - events, - admin_client, - worker_docker_tag, - after=Event.PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED, - ) - - # todo - configure this manually?? - add_external_registry( - events, - admin_client, - Event.EXTERNAL_REGISTRY_BIGQUERY_CREATED, - ) - - create_worker_pool( - events, - admin_client, - worker_pool_name, - worker_image_result, - Event.WORKER_POOL_CREATED, - ) - - check_worker_pool_exists( - events, - admin_client, - worker_pool_name, - after=Event.WORKER_POOL_CREATED, - ) - - -@unsync -async def mark_completed(events, register, after): - if after: - await events.await_for(event_name=after) - events.register(register) - - -@unsync -async def admin_signup_users(_, events, admin_client, users, register): - for user in users: - print(f"Registering user {user.name} ({user.email})") - admin_client.register( - name=user.name, - email=user.email, - password=user.password, - password_verify=user.password, - ) - - events.register(register) - - -@unsync -async def admin_low_side_activity(_, events, users): - """ - Typical admin activity on low-side server - - Login to low-side server - - Create users - - Create a worker pool - """ - - worker_pool_name = "bigquery-pool" - worker_docker_tag = "openmined/worker-bigquery:0.9.1" - - # login to low side - admin_client = make_client( - url="http://localhost:8081", - email="info@openmined.org", - password="changethis", - ) - - admin_signup_users( - _, - events, - admin_client, - users, - register=Event.USERS_CREATED, - ) - - # create worker pool on low side - admin_create_worker_pool( - _, - events, - admin_client, - worker_pool_name, - worker_docker_tag, - ) - - mark_completed( - events, - register=Event.ADMIN_LOW_SIDE_WORKFLOW_COMPLETED, - after=Event.WORKER_POOL_CREATED, - ) - - -@unsync -async def admin_create_sync_api_endpoints( - _, - events, - admin_client_high, - admin_client_low, - worker_pool_name, - after=None, -): - if after: - await events.await_for(event_name=after) - - test_query_path = "bigquery.test_query" - submit_query_path = "bigquery.submit_query" - - create_endpoints_query( - events, - admin_client_high, - worker_pool_name=worker_pool_name, - register=Event.QUERY_ENDPOINT_CREATED, - ) - - set_endpoint_settings( - events, - admin_client_high, - path=test_query_path, - kwargs={"endpoint_timeout": 120, "hide_mock_definition": True}, - after=Event.QUERY_ENDPOINT_CREATED, - register=Event.QUERY_ENDPOINT_CONFIGURED, - ) - - create_endpoints_schema( - events, - admin_client_high, - worker_pool_name=worker_pool_name, - register=Event.SCHEMA_ENDPOINT_CREATED, - ) - - create_endpoints_submit_query( - events, - admin_client_high, - worker_pool_name=worker_pool_name, - register=Event.SUBMIT_QUERY_ENDPOINT_CREATED, - ) - - set_endpoint_settings( - events, - admin_client_high, - path=submit_query_path, - kwargs={"hide_mock_definition": True}, - after=Event.SUBMIT_QUERY_ENDPOINT_CREATED, - register=Event.SUBMIT_QUERY_ENDPOINT_CONFIGURED, - ) - - sync_clients( - events, - admin_client_low, - admin_client_high, - event_name=Event.ADMIN_SYNC_HIGH_TO_LOW, - after=Event.SUBMIT_QUERY_ENDPOINT_CONFIGURED, - ) - - -@unsync -async def admin_high_side_activity(_, events): - # login - admin_client_high = make_client( - url="http://localhost:8080", - email="info@openmined.org", - password="changethis", - ) - admin_client_low = make_client( - url="http://localhost:8081", - email="info@openmined.org", - password="changethis", - ) - - worker_pool_name = "bigquery-pool" - worker_docker_tag = "openmined/worker-bigquery:0.9.1" - - admin_create_worker_pool( - _, - events, - admin_client_high, - worker_pool_name, - worker_docker_tag, - ) - - admin_create_sync_api_endpoints( - _, - events, - admin_client_high, - admin_client_low, - worker_pool_name, - after=None, - ) - - events.register(Event.ADMIN_HIGH_SIDE_WORKFLOW_COMPLETED) - - -@pytest.mark.asyncio -async def test_level_0_k8s(request): - """ - Goal - - Setup two datasites - high & low - - Root client of each datasite creates an multiple admin users - - """ - scenario = Scenario( - name="test_level_0_k8s", - events=[ - Event.ADMIN_LOW_SIDE_WORKFLOW_COMPLETED, - # Event.ADMIN_HIGH_SIDE_WORKFLOW_COMPLETED, - ], - ) - - events = EventManager() - events.add_scenario(scenario) - events.monitor() - - users = [make_user(password="password") for _ in range(2)] - - # start admin activity on low side - admin_low_side_activity(request, events, users) - - # todo - admin_high_side_activity(request, events) - - # todo - only start syncing after the root user created other admin users - # root_sync_activity(request, events, after=Event.USER_ADMIN_CREATED) - - # todo - [ - user_low_side_activity(request, events, user, after=Event.USERS_CREATED) - for user in users - ] - await events.await_scenario(scenario_name="test_level_0_k8s", timeout=30) - assert events.scenario_completed("test_level_0_k8s") diff --git a/tests/scenarios/bigquery/level_2_basic_test.py b/tests/scenarios/bigquery/level_2_basic_test.py deleted file mode 100644 index 34cfdcd073f..00000000000 --- a/tests/scenarios/bigquery/level_2_basic_test.py +++ /dev/null @@ -1,226 +0,0 @@ -# third party -from helpers.api import create_endpoints_query -from helpers.api import create_endpoints_schema -from helpers.api import create_endpoints_submit_query -from helpers.api import query_sql -from helpers.api import run_api_path -from helpers.api import set_endpoint_settings -from helpers.asserts import ensure_package_installed -from helpers.asserts import result_is -from helpers.code import get_results -from helpers.code import run_code -from helpers.code import triage_requests -from helpers.events import Event -from helpers.events import EventManager -from helpers.events import Scenario -from helpers.fixtures_sync import create_users -from helpers.fixtures_sync import make_admin -from helpers.fixtures_sync import make_server -from helpers.fixtures_sync import make_user -from helpers.users import check_users_created -from helpers.users import guest_register -from helpers.users import set_settings_allow_guest_signup -from helpers.workers import add_external_registry -from helpers.workers import check_worker_pool_exists -from helpers.workers import create_prebuilt_worker_image -from helpers.workers import create_worker_pool -from helpers.workers import get_prebuilt_worker_image -import pytest - -# syft absolute -import syft as sy - - -@pytest.mark.asyncio -async def test_level_2_basic_scenario(request): - ensure_package_installed("google-cloud-bigquery", "google.cloud.bigquery") - ensure_package_installed("db-dtypes", "db_dtypes") - - scenario = Scenario( - name="test_create_apis_and_triage_requests", - events=[ - Event.USER_ADMIN_CREATED, - Event.PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED, - Event.EXTERNAL_REGISTRY_BIGQUERY_CREATED, - Event.WORKER_POOL_CREATED, - Event.ALLOW_GUEST_SIGNUP_DISABLED, - Event.USERS_CREATED, - Event.USERS_CREATED_CHECKED, - Event.QUERY_ENDPOINT_CREATED, - Event.QUERY_ENDPOINT_CONFIGURED, - Event.SCHEMA_ENDPOINT_CREATED, - Event.SUBMIT_QUERY_ENDPOINT_CREATED, - Event.SUBMIT_QUERY_ENDPOINT_CONFIGURED, - Event.USERS_CAN_QUERY_MOCK, - Event.USERS_CAN_SUBMIT_QUERY, - Event.USERS_QUERY_NOT_READY, - Event.ADMIN_APPROVED_FIRST_REQUEST, - Event.USERS_CAN_GET_APPROVED_RESULT, - ], - ) - - events = EventManager() - events.add_scenario(scenario) - events.monitor() - - server = make_server(request) - - admin = make_admin() - events.register(Event.USER_ADMIN_CREATED) - - await events.await_for(event_name=Event.USER_ADMIN_CREATED) - assert events.happened(Event.USER_ADMIN_CREATED) - - root_client = admin.client(server) - triage_requests( - events, - root_client, - after=Event.USER_ADMIN_CREATED, - register=Event.ADMIN_APPROVED_FIRST_REQUEST, - ) - - worker_pool_name = "bigquery-pool" - - worker_docker_tag = f"openmined/bigquery:{sy.__version__}" - - create_prebuilt_worker_image( - events, - root_client, - worker_docker_tag, - Event.PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED, - ) - - worker_image_result = get_prebuilt_worker_image( - events, - root_client, - worker_docker_tag, - Event.PREBUILT_WORKER_IMAGE_BIGQUERY_CREATED, - ) - - add_external_registry(events, root_client, Event.EXTERNAL_REGISTRY_BIGQUERY_CREATED) - - create_worker_pool( - events, - root_client, - worker_pool_name, - worker_image_result, - Event.WORKER_POOL_CREATED, - ) - - check_worker_pool_exists( - events, root_client, worker_pool_name, Event.WORKER_POOL_CREATED - ) - - set_settings_allow_guest_signup( - events, root_client, False, Event.ALLOW_GUEST_SIGNUP_DISABLED - ) - - users = [make_user() for i in range(2)] - - create_users(root_client, events, users, Event.USERS_CREATED) - - check_users_created( - events, root_client, users, Event.USERS_CREATED, Event.USERS_CREATED_CHECKED - ) - - create_endpoints_query( - events, - root_client, - worker_pool_name=worker_pool_name, - register=Event.QUERY_ENDPOINT_CREATED, - ) - - test_query_path = "bigquery.test_query" - set_endpoint_settings( - events, - root_client, - path=test_query_path, - kwargs={"endpoint_timeout": 120, "hide_mock_definition": True}, - after=Event.QUERY_ENDPOINT_CREATED, - register=Event.QUERY_ENDPOINT_CONFIGURED, - ) - - create_endpoints_schema( - events, - root_client, - worker_pool_name=worker_pool_name, - register=Event.SCHEMA_ENDPOINT_CREATED, - ) - - create_endpoints_submit_query( - events, - root_client, - worker_pool_name=worker_pool_name, - register=Event.SUBMIT_QUERY_ENDPOINT_CREATED, - ) - - submit_query_path = "bigquery.submit_query" - set_endpoint_settings( - events, - root_client, - path=submit_query_path, - kwargs={"hide_mock_definition": True}, - after=Event.SUBMIT_QUERY_ENDPOINT_CREATED, - register=Event.SUBMIT_QUERY_ENDPOINT_CONFIGURED, - ) - - await result_is( - events, - lambda: len( - run_api_path( - users[0].client(server), test_query_path, sql_query=query_sql() - ) - ) - == 10000, - matches=True, - after=[Event.QUERY_ENDPOINT_CONFIGURED, Event.USERS_CREATED_CHECKED], - register=Event.USERS_CAN_QUERY_MOCK, - ) - - func_name = "test_func" - - await result_is( - events, - lambda: run_api_path( - users[0].client(server), - submit_query_path, - func_name=func_name, - query=query_sql(), - ), - matches="*Query submitted*", - after=[Event.SUBMIT_QUERY_ENDPOINT_CONFIGURED, Event.USERS_CREATED_CHECKED], - register=Event.USERS_CAN_SUBMIT_QUERY, - ) - - await result_is( - events, - lambda: run_code(users[0].client(server), method_name=f"{func_name}*"), - matches=sy.SyftException(public_message="*Your code is waiting for approval*"), - after=[Event.USERS_CAN_SUBMIT_QUERY], - register=Event.USERS_QUERY_NOT_READY, - ) - - get_results( - events, - users[0].client(server), - method_name=f"{func_name}*", - after=Event.USERS_QUERY_NOT_READY, - register=Event.USERS_CAN_GET_APPROVED_RESULT, - ) - - res = await result_is( - events, - lambda: guest_register(root_client, make_user()), - matches=sy.SyftException( - public_message="*You have no permission to create an account*" - ), - after=Event.ALLOW_GUEST_SIGNUP_DISABLED, - ) - - assert res is True - - await events.await_scenario( - scenario_name="test_create_apis_and_triage_requests", - timeout=30, - ) - assert events.scenario_completed("test_create_apis_and_triage_requests") From 74f66942f9374c6038d9f5933895d6cecbcd586c Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Thu, 26 Sep 2024 21:44:50 +0530 Subject: [PATCH 33/78] tweaked logging --- tests/scenariosv2/l0_test.py | 2 +- tests/scenariosv2/l2_test.py | 2 +- tests/scenariosv2/sim/core.py | 61 ++++++++++++++++++----------------- 3 files changed, 33 insertions(+), 32 deletions(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 86448445f7c..f691b8a4971 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -555,7 +555,7 @@ async def sim_l0_scenario(ctx: SimulatorContext): @pytest.mark.asyncio async def test_l0_scenario(request): - sim = Simulator() + sim = Simulator("l0_scenario") await sim.start( sim_l0_scenario, diff --git a/tests/scenariosv2/l2_test.py b/tests/scenariosv2/l2_test.py index af4f287bca0..28ace2990cf 100644 --- a/tests/scenariosv2/l2_test.py +++ b/tests/scenariosv2/l2_test.py @@ -155,7 +155,7 @@ async def sim_l2_scenario(ctx: SimulatorContext): @pytest.mark.asyncio async def test_l2_scenario(request): - sim = Simulator() + sim = Simulator("l2_scenario") await sim.start( sim_l2_scenario, diff --git a/tests/scenariosv2/sim/core.py b/tests/scenariosv2/sim/core.py index f9e0148ab92..618a8aeaa66 100644 --- a/tests/scenariosv2/sim/core.py +++ b/tests/scenariosv2/sim/core.py @@ -8,14 +8,7 @@ import random import time -TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S") -LOGS_DIR = Path(__file__).resolve().parents[1] / "logs" / TIMESTAMP -LOGS_DIR.mkdir(parents=True, exist_ok=True) - -EXECUTIONS_LOG = LOGS_DIR / "sim.executions.log" -EVENTS_LOG = LOGS_DIR / "sim.events.log" -ACTIVITY_LOG = LOGS_DIR / "sim.activity.log" - +LOGS_DIR = Path(__file__).resolve().parents[1] / ".logs" logging.Formatter.formatTime = ( lambda self, record, datefmt=None: datetime.fromtimestamp(record.created).isoformat( @@ -32,6 +25,24 @@ ) +def make_logger( + name: str, + instance: str, + formatter=DEFAULT_FORMATTER, + level=logging.INFO, +): + log_file = f"{int(time.time())}_{instance}" + log_path = Path(LOGS_DIR, log_file, name).with_suffix(".log") + log_path.parent.mkdir(parents=True, exist_ok=True) + + logger = logging.getLogger(name) + file_handler = logging.FileHandler(log_path, mode="w") + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + logger.setLevel(level) + return logger + + class TestFailure(Exception): """Custom exception to signal test failures""" @@ -45,14 +56,10 @@ class BaseEvent(Enum): class EventManager: - def __init__(self): + def __init__(self, name: str): + self.name = name self.events = {} - self.logger = logging.getLogger("events") - file_handler = logging.FileHandler(EVENTS_LOG, mode="w") - file_handler.setFormatter(EVENT_FORMATTER) - self.logger.addHandler(file_handler) - self.logger.setLevel(logging.INFO) - print(f"EvenManager initialized. Logs are saved in: {LOGS_DIR}") + self.logger = make_logger("events", instance=name, level=logging.INFO) async def wait_for(self, event: BaseEvent): if event not in self.events: @@ -72,22 +79,13 @@ def is_set(self, event: BaseEvent) -> bool: class SimulatorContext: - def __init__(self, random_wait=None): - self.events = EventManager() + def __init__(self, name: str, random_wait=None): + self.name = name + self.events = EventManager(name) self.random_wait = random_wait - self.logger = logging.getLogger("activity") - file_handler = logging.FileHandler(ACTIVITY_LOG, mode="w") - file_handler.setFormatter(DEFAULT_FORMATTER) - self.logger.addHandler(file_handler) - self.logger.setLevel(logging.INFO) - - # private logger - self._elogger = logging.getLogger("executions") - file_handler = logging.FileHandler(EXECUTIONS_LOG, mode="w") - file_handler.setFormatter(DEFAULT_FORMATTER) - self._elogger.addHandler(file_handler) - self._elogger.setLevel(logging.DEBUG) + self.logger = make_logger("activity", instance=name, level=logging.INFO) + self._elogger = make_logger("executions", instance=name, level=logging.DEBUG) def unfired_events(self, events: list[BaseEvent]): evts = filter(lambda e: not self.events.is_set(e), events) @@ -104,8 +102,11 @@ async def gather(*tasks): class Simulator: + def __init__(self, name: str): + self.name = name + async def start(self, *tasks, check_events=None, random_wait=None, timeout=60): - context = SimulatorContext(random_wait) + context = SimulatorContext(self.name, random_wait) results = None try: From 2ca51cf11609fcdd32cf488417e2d967f7d35994 Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Fri, 27 Sep 2024 00:05:22 +0530 Subject: [PATCH 34/78] attempt to make things common --- .pre-commit-config.yaml | 2 +- tests/scenariosv2/__init__.py | 0 tests/scenariosv2/flows/__init__.py | 0 tests/scenariosv2/flows/user_bigquery_api.py | 64 +++++++++++ tests/scenariosv2/l0_test.py | 108 ++++++------------- tests/scenariosv2/l2_test.py | 49 +++------ 6 files changed, 111 insertions(+), 112 deletions(-) create mode 100644 tests/scenariosv2/__init__.py create mode 100644 tests/scenariosv2/flows/__init__.py create mode 100644 tests/scenariosv2/flows/user_bigquery_api.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 45822b202d1..8943a32efc2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,7 +27,7 @@ repos: always_run: true - id: name-tests-test always_run: true - exclude: ^(.*/tests/utils/)|^(.*fixtures.py)|^(tests/scenarios/bigquery/helpers)|^(tests/scenariosv2/sim) + exclude: ^(.*/tests/utils/)|^(.*fixtures.py)|^(tests/scenariosv2/(sim|flows)) - id: requirements-txt-fixer always_run: true - id: mixed-line-ending diff --git a/tests/scenariosv2/__init__.py b/tests/scenariosv2/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/scenariosv2/flows/__init__.py b/tests/scenariosv2/flows/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/scenariosv2/flows/user_bigquery_api.py b/tests/scenariosv2/flows/user_bigquery_api.py new file mode 100644 index 00000000000..be959ed21bf --- /dev/null +++ b/tests/scenariosv2/flows/user_bigquery_api.py @@ -0,0 +1,64 @@ +# stdlib +import random + +# syft absolute +import syft as sy +from syft import test_settings +from syft.service.request.request import RequestStatus + +# relative +from ..sim.core import SimulatorContext + +__all__ = ["bq_test_query", "bq_submit_query", "bq_submit_query_results"] + + +def query_sql(): + dataset_2 = test_settings.get("dataset_2", default="dataset_2") + table_2 = test_settings.get("table_2", default="table_2") + table_2_col_id = test_settings.get("table_2_col_id", default="table_id") + table_2_col_score = test_settings.get("table_2_col_score", default="colname") + + query = f"SELECT {table_2_col_id}, AVG({table_2_col_score}) AS average_score \ + FROM {dataset_2}.{table_2} \ + GROUP BY {table_2_col_id} \ + LIMIT 10000" + return query + + +def bq_test_query(ctx: SimulatorContext, client: sy.DatasiteClient): + ctx.logger.info( + f"User: {client.logged_in_user} - Calling client.api.bigquery.test_query (mock)" + ) + res = client.api.bigquery.test_query(sql_query=query_sql()) + assert len(res) == 10000 + ctx.logger.info(f"User: {client.logged_in_user} - Received {len(res)} rows") + return res + + +def bq_submit_query(ctx: SimulatorContext, client: sy.DatasiteClient): + # Randomly define a func_name a function to call + func_name = "invalid_func" if random.random() < 0.5 else "test_query" + + ctx.logger.info( + f"User: {client.logged_in_user} - Calling client.api.services.bigquery.submit_query func_name={func_name}" + ) + res = client.api.bigquery.submit_query( + func_name=func_name, + query=query_sql(), + ) + ctx.logger.info(f"User: {client.logged_in_user} - Received {res}") + return res + + +def bq_submit_query_results(ctx: SimulatorContext, client: sy.DatasiteClient): + for request in client.requests: + if request.get_status() == RequestStatus.APPROVED: + job = request.code(blocking=False) + result = job.wait() + assert len(result) == 10000 + if request.get_status() == RequestStatus.REJECTED: + ctx.logger.info( + f"User: {client.logged_in_user} - Request rejected {request.code.service_func_name}" + ) + + return True diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index f691b8a4971..6812fbd0e16 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -6,16 +6,9 @@ # third party from faker import Faker import pytest -from sim.core import BaseEvent -from sim.core import Simulator -from sim.core import SimulatorContext -from sim.core import sim_activity -from sim.core import sim_entrypoint # syft absolute import syft as sy -from syft import test_settings -from syft.client.client import SyftClient from syft.service.request.request import RequestStatus from syft.util.test_helpers.apis import make_schema from syft.util.test_helpers.apis import make_test_query @@ -23,6 +16,16 @@ build_and_launch_worker_pool_from_docker_str, ) +# relative +from .flows.user_bigquery_api import bq_submit_query +from .flows.user_bigquery_api import bq_submit_query_results +from .flows.user_bigquery_api import bq_test_query +from .sim.core import BaseEvent +from .sim.core import Simulator +from .sim.core import SimulatorContext +from .sim.core import sim_activity +from .sim.core import sim_entrypoint + fake = Faker() NUM_USERS = 3 NUM_ENDPOINTS = 3 # test_query, submit_query, schema_query @@ -59,42 +62,13 @@ class Event(BaseEvent): # ------------------------------------------------------------------------------------------------ -def query_sql(): - dataset_2 = test_settings.get("dataset_2", default="dataset_2") - table_2 = test_settings.get("table_2", default="table_2") - table_2_col_id = test_settings.get("table_2_col_id", default="table_id") - table_2_col_score = test_settings.get("table_2_col_score", default="colname") - - query = f"SELECT {table_2_col_id}, AVG({table_2_col_score}) AS average_score \ - FROM {dataset_2}.{table_2} \ - GROUP BY {table_2_col_id} \ - LIMIT 10000" - return query - - -def get_code_from_msg(msg: str): - return str(msg.split("`")[1].replace("()", "").replace("client.", "")) - - -# ------------------------------------------------------------------------------------------------ - - @sim_activity( wait_for=Event.ADMIN_LOW_SIDE_ENDPOINTS_AVAILABLE, trigger=Event.USER_CAN_QUERY_TEST_ENDPOINT, ) async def user_query_test_endpoint(ctx: SimulatorContext, client: sy.DatasiteClient): """Run query on test endpoint""" - - user = client.logged_in_user - - def _query_endpoint(): - ctx.logger.info(f"User: {user} - Calling client.api.bigquery.test_query (mock)") - res = client.api.bigquery.test_query(sql_query=query_sql()) - assert len(res) == 10000 - ctx.logger.info(f"User: {user} - Received {len(res)} rows") - - await asyncio.to_thread(_query_endpoint) + await asyncio.to_thread(bq_test_query, ctx, client) @sim_activity( @@ -103,21 +77,7 @@ def _query_endpoint(): ) async def user_bq_submit(ctx: SimulatorContext, client: sy.DatasiteClient): """Submit query to be run on private data""" - user = client.logged_in_user - - def _submit_endpoint(): - func_name = "invalid_func" if random.random() < 0.5 else "test_query" - ctx.logger.info( - f"User: {user} - Calling client.api.services.bigquery.submit_query func_name={func_name}" - ) - - res = client.api.bigquery.submit_query( - func_name=func_name, - query=query_sql(), - ) - ctx.logger.info(f"User: {user} - Received {res}") - - await asyncio.to_thread(_submit_endpoint) + await asyncio.to_thread(bq_submit_query, ctx, client) @sim_activity( @@ -125,18 +85,7 @@ def _submit_endpoint(): trigger=Event.USER_CHECKED_RESULTS, ) async def user_checks_results(ctx: SimulatorContext, client: sy.DatasiteClient): - def _check_results(): - for request in client.requests: - if request.get_status() == RequestStatus.APPROVED: - job = request.code(blocking=False) - result = job.wait() - assert len(result) == 10000 - if request.get_status() == RequestStatus.REJECTED: - ctx.logger.info( - f"User: Request with function named {request.code.service_func_name} was rejected" - ) - - await asyncio.to_thread(_check_results) + await asyncio.to_thread(bq_submit_query_results, ctx, client) @sim_activity(wait_for=Event.GUEST_USERS_CREATED, trigger=Event.USER_FLOW_COMPLETED) @@ -148,6 +97,7 @@ async def user_flow(ctx: SimulatorContext, server_url_low: str, user: dict): ) ctx.logger.info(f"User: {client.logged_in_user} - logged in") + # this must be executed sequentially. await user_query_test_endpoint(ctx, client) await user_bq_submit(ctx, client) await user_checks_results(ctx, client) @@ -158,7 +108,7 @@ async def user_flow(ctx: SimulatorContext, server_url_low: str, user: dict): @sim_activity(trigger=Event.GUEST_USERS_CREATED) async def admin_signup_users( - ctx: SimulatorContext, admin_client: SyftClient, users: list[dict] + ctx: SimulatorContext, admin_client: sy.DatasiteClient, users: list[dict] ): for user in users: ctx.logger.info(f"Admin low: Creating guest user {user['email']}") @@ -173,7 +123,7 @@ async def admin_signup_users( @sim_activity(trigger=Event.ADMIN_BQ_SCHEMA_ENDPOINT_CREATED) async def admin_endpoint_bq_schema( ctx: SimulatorContext, - admin_client: SyftClient, + admin_client: sy.DatasiteClient, worker_pool: str | None = None, ): path = "bigquery.schema" @@ -195,7 +145,7 @@ async def admin_endpoint_bq_schema( @sim_activity(trigger=Event.ADMIN_BQ_TEST_ENDPOINT_CREATED) async def admin_endpoint_bq_test( ctx: SimulatorContext, - admin_client: SyftClient, + admin_client: sy.DatasiteClient, worker_pool: str | None = None, ): path = "bigquery.test_query" @@ -290,7 +240,7 @@ def execute_query(query: str, endpoint): @sim_activity(trigger=Event.ADMIN_ALL_ENDPOINTS_CREATED) -async def admin_create_endpoint(ctx: SimulatorContext, admin_client: SyftClient): +async def admin_create_endpoint(ctx: SimulatorContext, admin_client: sy.DatasiteClient): worker_pool = "biquery-pool" await asyncio.gather( @@ -308,7 +258,7 @@ async def admin_create_endpoint(ctx: SimulatorContext, admin_client: SyftClient) Event.ADMIN_LOWSIDE_WORKER_POOL_CREATED, ] ) -async def admin_watch_sync(ctx: SimulatorContext, admin_client: SyftClient): +async def admin_watch_sync(ctx: SimulatorContext, admin_client: sy.DatasiteClient): while True: await asyncio.sleep(random.uniform(5, 10)) @@ -340,7 +290,7 @@ async def admin_watch_sync(ctx: SimulatorContext, admin_client: SyftClient): # @sim_activity(trigger=Event.ADMIN_WORKER_POOL_CREATED) -async def admin_create_bq_pool(ctx: SimulatorContext, admin_client: SyftClient): +async def admin_create_bq_pool(ctx: SimulatorContext, admin_client: sy.DatasiteClient): worker_pool = "biquery-pool" base_image = admin_client.images.get_all()[0] @@ -375,12 +325,16 @@ async def admin_create_bq_pool(ctx: SimulatorContext, admin_client: SyftClient): @sim_activity(trigger=Event.ADMIN_HIGHSIDE_WORKER_POOL_CREATED) -async def admin_create_bq_pool_high(ctx: SimulatorContext, admin_client: SyftClient): +async def admin_create_bq_pool_high( + ctx: SimulatorContext, admin_client: sy.DatasiteClient +): await admin_create_bq_pool(ctx, admin_client) @sim_activity(trigger=Event.ADMIN_LOWSIDE_WORKER_POOL_CREATED) -async def admin_create_bq_pool_low(ctx: SimulatorContext, admin_client: SyftClient): +async def admin_create_bq_pool_low( + ctx: SimulatorContext, admin_client: sy.DatasiteClient +): await admin_create_bq_pool(ctx, admin_client) @@ -391,7 +345,9 @@ async def admin_create_bq_pool_low(ctx: SimulatorContext, admin_client: SyftClie ], trigger=Event.ADMIN_HIGHSIDE_FLOW_COMPLETED, ) -async def admin_triage_requests_high(ctx: SimulatorContext, admin_client: SyftClient): +async def admin_triage_requests_high( + ctx: SimulatorContext, admin_client: sy.DatasiteClient +): while True: await asyncio.sleep(random.uniform(5, 10)) @@ -452,9 +408,7 @@ async def admin_low_side(ctx: SimulatorContext, admin_auth, users): @sim_activity(trigger=Event.ADMIN_SYNC_COMPLETED) async def admin_sync_to_low_flow( - ctx: SimulatorContext, - admin_auth_high, - admin_auth_low, + ctx: SimulatorContext, admin_auth_high: dict, admin_auth_low: dict ): high_client = sy.login(**admin_auth_high) ctx.logger.info("Admin: logged in to high-side") @@ -485,7 +439,7 @@ async def admin_sync_to_low_flow( @sim_activity(trigger=Event.ADMIN_SYNC_COMPLETED) async def admin_sync_to_high_flow( - ctx: SimulatorContext, admin_auth_high, admin_auth_low + ctx: SimulatorContext, admin_auth_high: dict, admin_auth_low: dict ): high_client = sy.login(**admin_auth_high) ctx.logger.info("Admin low: logged in to high-side") diff --git a/tests/scenariosv2/l2_test.py b/tests/scenariosv2/l2_test.py index 28ace2990cf..b4cacc4490e 100644 --- a/tests/scenariosv2/l2_test.py +++ b/tests/scenariosv2/l2_test.py @@ -7,20 +7,22 @@ # third party from faker import Faker -from l0_test import Event -from l0_test import admin_create_bq_pool_high -from l0_test import admin_create_endpoint -from l0_test import admin_signup_users -from l0_test import query_sql import pytest -from sim.core import Simulator -from sim.core import SimulatorContext -from sim.core import sim_activity -from sim.core import sim_entrypoint # syft absolute import syft as sy -from syft.client.client import SyftClient + +# relative +from .flows.user_bigquery_api import bq_submit_query +from .flows.user_bigquery_api import bq_test_query +from .l0_test import Event +from .l0_test import admin_create_bq_pool_high +from .l0_test import admin_create_endpoint +from .l0_test import admin_signup_users +from .sim.core import Simulator +from .sim.core import SimulatorContext +from .sim.core import sim_activity +from .sim.core import sim_entrypoint fake = Faker() @@ -31,7 +33,7 @@ Event.USER_CAN_SUBMIT_QUERY, ] ) -async def admin_triage_requests(ctx: SimulatorContext, admin_client: SyftClient): +async def admin_triage_requests(ctx: SimulatorContext, admin_client: sy.DatasiteClient): while True: await asyncio.sleep(random.uniform(3, 5)) ctx.logger.info("Admin: Triaging requests") @@ -72,16 +74,7 @@ async def admin_flow( ) async def user_query_test_endpoint(ctx: SimulatorContext, client: sy.DatasiteClient): """Run query on test endpoint""" - - user = client.logged_in_user - - def _query_endpoint(): - ctx.logger.info(f"User: {user} - Calling client.api.bigquery.test_query (mock)") - res = client.api.bigquery.test_query(sql_query=query_sql()) - assert len(res) == 10000 - ctx.logger.info(f"User: {user} - Received {len(res)} rows") - - await asyncio.to_thread(_query_endpoint) + await asyncio.to_thread(bq_test_query, ctx, client) @sim_activity( @@ -93,19 +86,7 @@ def _query_endpoint(): ) async def user_bq_submit(ctx: SimulatorContext, client: sy.DatasiteClient): """Submit query to be run on private data""" - user = client.logged_in_user - - def _submit_endpoint(): - ctx.logger.info( - f"User: {user} - Calling client.api.services.bigquery.submit_query" - ) - res = client.api.bigquery.submit_query( - func_name="invalid_func", - query=query_sql(), - ) - ctx.logger.info(f"User: {user} - Received {res}") - - await asyncio.to_thread(_submit_endpoint) + await asyncio.to_thread(bq_submit_query, ctx, client) @sim_activity( From 14187564086a81f758fad005de6911432eda6636 Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Fri, 27 Sep 2024 00:33:41 +0530 Subject: [PATCH 35/78] rename bq api user flows --- tests/scenariosv2/flows/user_bigquery_api.py | 34 +++++++++++++------- tests/scenariosv2/l0_test.py | 18 +++++------ tests/scenariosv2/l2_test.py | 10 +++--- 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/tests/scenariosv2/flows/user_bigquery_api.py b/tests/scenariosv2/flows/user_bigquery_api.py index be959ed21bf..9f0fc7d68e7 100644 --- a/tests/scenariosv2/flows/user_bigquery_api.py +++ b/tests/scenariosv2/flows/user_bigquery_api.py @@ -9,7 +9,7 @@ # relative from ..sim.core import SimulatorContext -__all__ = ["bq_test_query", "bq_submit_query", "bq_submit_query_results"] +__all__ = ["bq_test_query", "bq_submit_query", "bq_check_query_results"] def query_sql(): @@ -26,39 +26,51 @@ def query_sql(): def bq_test_query(ctx: SimulatorContext, client: sy.DatasiteClient): - ctx.logger.info( - f"User: {client.logged_in_user} - Calling client.api.bigquery.test_query (mock)" - ) + user = client.logged_in_user + ctx.logger.info(f"User: {user} - Calling client.api.bigquery.test_query (mock)") res = client.api.bigquery.test_query(sql_query=query_sql()) assert len(res) == 10000 - ctx.logger.info(f"User: {client.logged_in_user} - Received {len(res)} rows") + ctx.logger.info(f"User: {user} - Received {len(res)} rows") return res def bq_submit_query(ctx: SimulatorContext, client: sy.DatasiteClient): + user = client.logged_in_user # Randomly define a func_name a function to call func_name = "invalid_func" if random.random() < 0.5 else "test_query" ctx.logger.info( - f"User: {client.logged_in_user} - Calling client.api.services.bigquery.submit_query func_name={func_name}" + f"User: {user} - Calling client.api.services.bigquery.submit_query func_name={func_name}" ) res = client.api.bigquery.submit_query( func_name=func_name, query=query_sql(), ) - ctx.logger.info(f"User: {client.logged_in_user} - Received {res}") + assert isinstance(res, sy.SyftSuccess), res + ctx.logger.info(f"User: {user} - Received {res}") return res -def bq_submit_query_results(ctx: SimulatorContext, client: sy.DatasiteClient): +def bq_check_query_results(ctx: SimulatorContext, client: sy.DatasiteClient): + user = client.logged_in_user + for request in client.requests: - if request.get_status() == RequestStatus.APPROVED: + status = request.get_status() + + if status == RequestStatus.APPROVED: job = request.code(blocking=False) result = job.wait() assert len(result) == 10000 - if request.get_status() == RequestStatus.REJECTED: ctx.logger.info( - f"User: {client.logged_in_user} - Request rejected {request.code.service_func_name}" + f"User: {user} - {request.code.service_func_name} - Request approved" + ) + elif status == RequestStatus.REJECTED: + ctx.logger.info( + f"User: {user} - {request.code.service_func_name} - Request rejected" + ) + else: + ctx.logger.info( + f"User: {user} - {request.code.service_func_name} - Request pending" ) return True diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 6812fbd0e16..61499b7f82a 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -18,7 +18,7 @@ # relative from .flows.user_bigquery_api import bq_submit_query -from .flows.user_bigquery_api import bq_submit_query_results +from .flows.user_bigquery_api import bq_check_query_results from .flows.user_bigquery_api import bq_test_query from .sim.core import BaseEvent from .sim.core import Simulator @@ -66,16 +66,16 @@ class Event(BaseEvent): wait_for=Event.ADMIN_LOW_SIDE_ENDPOINTS_AVAILABLE, trigger=Event.USER_CAN_QUERY_TEST_ENDPOINT, ) -async def user_query_test_endpoint(ctx: SimulatorContext, client: sy.DatasiteClient): +async def user_bq_test_query(ctx: SimulatorContext, client: sy.DatasiteClient): """Run query on test endpoint""" await asyncio.to_thread(bq_test_query, ctx, client) @sim_activity( - wait_for=Event.USER_CAN_QUERY_TEST_ENDPOINT, + wait_for=Event.ADMIN_LOW_SIDE_ENDPOINTS_AVAILABLE, trigger=Event.USER_CAN_SUBMIT_QUERY, ) -async def user_bq_submit(ctx: SimulatorContext, client: sy.DatasiteClient): +async def user_bq_submit_query(ctx: SimulatorContext, client: sy.DatasiteClient): """Submit query to be run on private data""" await asyncio.to_thread(bq_submit_query, ctx, client) @@ -84,8 +84,8 @@ async def user_bq_submit(ctx: SimulatorContext, client: sy.DatasiteClient): wait_for=Event.ADMIN_LOW_ALL_RESULTS_AVAILABLE, trigger=Event.USER_CHECKED_RESULTS, ) -async def user_checks_results(ctx: SimulatorContext, client: sy.DatasiteClient): - await asyncio.to_thread(bq_submit_query_results, ctx, client) +async def user_bq_results(ctx: SimulatorContext, client: sy.DatasiteClient): + await asyncio.to_thread(bq_check_query_results, ctx, client) @sim_activity(wait_for=Event.GUEST_USERS_CREATED, trigger=Event.USER_FLOW_COMPLETED) @@ -98,9 +98,9 @@ async def user_flow(ctx: SimulatorContext, server_url_low: str, user: dict): ctx.logger.info(f"User: {client.logged_in_user} - logged in") # this must be executed sequentially. - await user_query_test_endpoint(ctx, client) - await user_bq_submit(ctx, client) - await user_checks_results(ctx, client) + await user_bq_test_query(ctx, client) + await user_bq_submit_query(ctx, client) + await user_bq_results(ctx, client) # ------------------------------------------------------------------------------------------------ diff --git a/tests/scenariosv2/l2_test.py b/tests/scenariosv2/l2_test.py index b4cacc4490e..a04ed274f44 100644 --- a/tests/scenariosv2/l2_test.py +++ b/tests/scenariosv2/l2_test.py @@ -72,19 +72,19 @@ async def admin_flow( ], trigger=Event.USER_CAN_QUERY_TEST_ENDPOINT, ) -async def user_query_test_endpoint(ctx: SimulatorContext, client: sy.DatasiteClient): +async def user_bq_test_query(ctx: SimulatorContext, client: sy.DatasiteClient): """Run query on test endpoint""" await asyncio.to_thread(bq_test_query, ctx, client) @sim_activity( wait_for=[ - Event.USER_CAN_QUERY_TEST_ENDPOINT, + Event.ADMIN_ALL_ENDPOINTS_CREATED, Event.ADMIN_HIGHSIDE_WORKER_POOL_CREATED, ], trigger=Event.USER_CAN_SUBMIT_QUERY, ) -async def user_bq_submit(ctx: SimulatorContext, client: sy.DatasiteClient): +async def user_bq_submit_query(ctx: SimulatorContext, client: sy.DatasiteClient): """Submit query to be run on private data""" await asyncio.to_thread(bq_submit_query, ctx, client) @@ -101,8 +101,8 @@ async def user_flow(ctx: SimulatorContext, server_url: str, user: dict): ) ctx.logger.info(f"User: {client.logged_in_user} - logged in") - await user_query_test_endpoint(ctx, client) - await user_bq_submit(ctx, client) + await user_bq_test_query(ctx, client) + await user_bq_submit_query(ctx, client) # ---------------------------------- test ---------------------------------- From 2227c3adc2bfdf716968409ed93deeab6e1fa53f Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Fri, 27 Sep 2024 01:16:04 +0530 Subject: [PATCH 36/78] refactor bq_create_pool --- .../syft/util/test_helpers/worker_helpers.py | 5 +- .../scenariosv2/flows/admin_bigquery_pool.py | 44 ++++++++++++++ tests/scenariosv2/flows/utils.py | 2 + tests/scenariosv2/l0_test.py | 57 +++++-------------- 4 files changed, 65 insertions(+), 43 deletions(-) create mode 100644 tests/scenariosv2/flows/admin_bigquery_pool.py create mode 100644 tests/scenariosv2/flows/utils.py diff --git a/packages/syft/src/syft/util/test_helpers/worker_helpers.py b/packages/syft/src/syft/util/test_helpers/worker_helpers.py index 3c2667fecc8..1c4acecc7aa 100644 --- a/packages/syft/src/syft/util/test_helpers/worker_helpers.py +++ b/packages/syft/src/syft/util/test_helpers/worker_helpers.py @@ -50,7 +50,10 @@ def build_and_launch_worker_pool_from_docker_str( print(result) # assert 'success' in str(result.message) - if environment == "remote": + # scale_to > 1 is valid for scale up + # scale_to = 0 is valid for removing all pods + # scale_to < 0 should return error from server + if environment == "remote" and scale_to != 1: result = client.worker_pools.scale(number=scale_to, pool_name=worker_pool_name) print(result) diff --git a/tests/scenariosv2/flows/admin_bigquery_pool.py b/tests/scenariosv2/flows/admin_bigquery_pool.py new file mode 100644 index 00000000000..6dbba7e6702 --- /dev/null +++ b/tests/scenariosv2/flows/admin_bigquery_pool.py @@ -0,0 +1,44 @@ +# syft absolute +import syft as sy +from syft.util.test_helpers.worker_helpers import ( + build_and_launch_worker_pool_from_docker_str, +) + +# relative +from ..sim.core import SimulatorContext +from .utils import server_info + +__all__ = ["bq_create_pool"] + + +def bq_create_pool( + ctx: SimulatorContext, + admin_client: sy.DatasiteClient, + worker_pool="biquery-pool", + external_registry_url="k3d-registry.localhost:5800", +): + base_image = admin_client.images.get_all()[0] + worker_image_tag = str(base_image.image_identifier).replace( + "syft-backend", worker_pool + ) + + worker_dockerfile = ( + f"FROM {str(base_image.image_identifier)}\n" + f"RUN uv pip install db-dtypes google-cloud-bigquery" + ) + + msg = f"Admin: Worker Pool tag '{worker_image_tag}' on {server_info(admin_client)}" + + ctx.logger.info(f"{msg} - Creating") + build_and_launch_worker_pool_from_docker_str( + environment="remote", + client=admin_client, + worker_pool_name=worker_pool, + worker_dockerfile=worker_dockerfile, + external_registry=external_registry_url, + docker_tag=worker_image_tag, + custom_pool_pod_annotations=None, + custom_pool_pod_labels=None, + scale_to=1, + ) + ctx.logger.info(f"{msg} - Created") diff --git a/tests/scenariosv2/flows/utils.py b/tests/scenariosv2/flows/utils.py new file mode 100644 index 00000000000..9aa0cb5f39f --- /dev/null +++ b/tests/scenariosv2/flows/utils.py @@ -0,0 +1,2 @@ +def server_info(client) -> str: + return f"{client.name}=>{client.connection}" diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 61499b7f82a..18039d0698b 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -12,13 +12,11 @@ from syft.service.request.request import RequestStatus from syft.util.test_helpers.apis import make_schema from syft.util.test_helpers.apis import make_test_query -from syft.util.test_helpers.worker_helpers import ( - build_and_launch_worker_pool_from_docker_str, -) # relative -from .flows.user_bigquery_api import bq_submit_query +from .flows.admin_bigquery_pool import bq_create_pool from .flows.user_bigquery_api import bq_check_query_results +from .flows.user_bigquery_api import bq_submit_query from .flows.user_bigquery_api import bq_test_query from .sim.core import BaseEvent from .sim.core import Simulator @@ -90,6 +88,17 @@ async def user_bq_results(ctx: SimulatorContext, client: sy.DatasiteClient): @sim_activity(wait_for=Event.GUEST_USERS_CREATED, trigger=Event.USER_FLOW_COMPLETED) async def user_flow(ctx: SimulatorContext, server_url_low: str, user: dict): + """ + Replicates user's flow on the low side + - User logs in + - User invokes the test query endpoint to get mock results - user_bq_test_query + - User submits a query to be run on the private data for approval - user_bq_submit_query + - User checks if request is approved and retrieves the results - user_bq_results + + The test -> submit -> results are typically done in sequence. + test & submit can be done in parallel but results can be checked only after submit is done. + """ + client = sy.login( url=server_url_low, email=user["email"], @@ -97,7 +106,6 @@ async def user_flow(ctx: SimulatorContext, server_url_low: str, user: dict): ) ctx.logger.info(f"User: {client.logged_in_user} - logged in") - # this must be executed sequentially. await user_bq_test_query(ctx, client) await user_bq_submit_query(ctx, client) await user_bq_results(ctx, client) @@ -289,53 +297,18 @@ async def admin_watch_sync(ctx: SimulatorContext, admin_client: sy.DatasiteClien ctx.logger.info(f"Admin low: Pending requests: {pending_requests}") -# @sim_activity(trigger=Event.ADMIN_WORKER_POOL_CREATED) -async def admin_create_bq_pool(ctx: SimulatorContext, admin_client: sy.DatasiteClient): - worker_pool = "biquery-pool" - - base_image = admin_client.images.get_all()[0] - - external_registry_url = "k3d-registry.localhost:5800" - worker_image_tag = str(base_image.image_identifier).replace( - "backend", "worker-bigquery" - ) - - worker_dockerfile = f""" - FROM {str(base_image.image_identifier)} - RUN uv pip install db-dtypes google-cloud-bigquery - """.strip() - - ctx.logger.info(f"Admin: Creating worker pool with tag='{worker_image_tag}'") - - # build_and_launch_worker_pool_from_docker_str is a blocking call - # so you just run it in a different thread. - await ctx.blocking_call( - build_and_launch_worker_pool_from_docker_str, - environment="remote", - client=admin_client, - worker_pool_name=worker_pool, - worker_dockerfile=worker_dockerfile, - external_registry=external_registry_url, - docker_tag=worker_image_tag, - custom_pool_pod_annotations=None, - custom_pool_pod_labels=None, - scale_to=1, - ) - ctx.logger.info(f"Admin: Worker pool created with tag='{worker_image_tag}'") - - @sim_activity(trigger=Event.ADMIN_HIGHSIDE_WORKER_POOL_CREATED) async def admin_create_bq_pool_high( ctx: SimulatorContext, admin_client: sy.DatasiteClient ): - await admin_create_bq_pool(ctx, admin_client) + await asyncio.to_thread(bq_create_pool, ctx, admin_client) @sim_activity(trigger=Event.ADMIN_LOWSIDE_WORKER_POOL_CREATED) async def admin_create_bq_pool_low( ctx: SimulatorContext, admin_client: sy.DatasiteClient ): - await admin_create_bq_pool(ctx, admin_client) + await asyncio.to_thread(bq_create_pool, ctx, admin_client) @sim_activity( From de4b749ad67e9ca221c25cf6a001e8cedc296ccd Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Fri, 27 Sep 2024 01:43:35 +0530 Subject: [PATCH 37/78] register users concurrently --- tests/scenariosv2/flows/admin_common.py | 20 ++++++++++++++++++++ tests/scenariosv2/flows/user_bigquery_api.py | 2 +- tests/scenariosv2/l0_test.py | 18 +++++++----------- 3 files changed, 28 insertions(+), 12 deletions(-) create mode 100644 tests/scenariosv2/flows/admin_common.py diff --git a/tests/scenariosv2/flows/admin_common.py b/tests/scenariosv2/flows/admin_common.py new file mode 100644 index 00000000000..fc03a123cb3 --- /dev/null +++ b/tests/scenariosv2/flows/admin_common.py @@ -0,0 +1,20 @@ +# syft absolute +import syft as sy + +# relative +from ..sim.core import SimulatorContext +from .utils import server_info + +__all__ = ["register_user"] + + +def register_user(ctx: SimulatorContext, admin_client: sy.DatasiteClient, user: dict): + msg = f"Admin: User {user['email']} on {server_info(admin_client)}" + ctx.logger.info(f"{msg} - Creating") + _ = admin_client.register( + name=user["name"], + email=user["email"], + password=user["password"], + password_verify=user["password"], + ) + ctx.logger.info(f"{msg} - Created") diff --git a/tests/scenariosv2/flows/user_bigquery_api.py b/tests/scenariosv2/flows/user_bigquery_api.py index 9f0fc7d68e7..4f80399b2f5 100644 --- a/tests/scenariosv2/flows/user_bigquery_api.py +++ b/tests/scenariosv2/flows/user_bigquery_api.py @@ -46,7 +46,7 @@ def bq_submit_query(ctx: SimulatorContext, client: sy.DatasiteClient): func_name=func_name, query=query_sql(), ) - assert isinstance(res, sy.SyftSuccess), res + assert "Query submitted" in str(res) ctx.logger.info(f"User: {user} - Received {res}") return res diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 18039d0698b..329302a581f 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -15,6 +15,7 @@ # relative from .flows.admin_bigquery_pool import bq_create_pool +from .flows.admin_common import register_user from .flows.user_bigquery_api import bq_check_query_results from .flows.user_bigquery_api import bq_submit_query from .flows.user_bigquery_api import bq_test_query @@ -89,7 +90,7 @@ async def user_bq_results(ctx: SimulatorContext, client: sy.DatasiteClient): @sim_activity(wait_for=Event.GUEST_USERS_CREATED, trigger=Event.USER_FLOW_COMPLETED) async def user_flow(ctx: SimulatorContext, server_url_low: str, user: dict): """ - Replicates user's flow on the low side + User flow on low-side: - User logs in - User invokes the test query endpoint to get mock results - user_bq_test_query - User submits a query to be run on the private data for approval - user_bq_submit_query @@ -115,17 +116,12 @@ async def user_flow(ctx: SimulatorContext, server_url_low: str, user: dict): @sim_activity(trigger=Event.GUEST_USERS_CREATED) -async def admin_signup_users( +async def admin_register_users( ctx: SimulatorContext, admin_client: sy.DatasiteClient, users: list[dict] ): - for user in users: - ctx.logger.info(f"Admin low: Creating guest user {user['email']}") - admin_client.register( - name=user["name"], - email=user["email"], - password=user["password"], - password_verify=user["password"], - ) + await asyncio.gather( + *[asyncio.to_thread(register_user, ctx, admin_client, user) for user in users], + ) @sim_activity(trigger=Event.ADMIN_BQ_SCHEMA_ENDPOINT_CREATED) @@ -370,7 +366,7 @@ async def admin_low_side(ctx: SimulatorContext, admin_auth, users): ctx.logger.info("Admin low-side: logged in") await asyncio.gather( - admin_signup_users(ctx, admin_client, users), + admin_register_users(ctx, admin_client, users), admin_create_bq_pool_low(ctx, admin_client), admin_watch_sync(ctx, admin_client), ) From 4726bee119e93bf0da69b2fda3d60506c0104bb9 Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Fri, 27 Sep 2024 03:45:37 +0530 Subject: [PATCH 38/78] admin api concurrency --- .../util/test_helpers/apis/mock/schema.py | 8 +- tests/scenariosv2/flows/admin_bigquery_api.py | 130 +++++++++++++++++ tests/scenariosv2/flows/user_bigquery_api.py | 27 ++-- tests/scenariosv2/flows/utils.py | 9 +- tests/scenariosv2/l0_test.py | 132 +++--------------- tests/scenariosv2/l2_test.py | 8 +- 6 files changed, 180 insertions(+), 134 deletions(-) create mode 100644 tests/scenariosv2/flows/admin_bigquery_api.py diff --git a/packages/syft/src/syft/util/test_helpers/apis/mock/schema.py b/packages/syft/src/syft/util/test_helpers/apis/mock/schema.py index 0780cf7b8e8..f79281d2dc2 100644 --- a/packages/syft/src/syft/util/test_helpers/apis/mock/schema.py +++ b/packages/syft/src/syft/util/test_helpers/apis/mock/schema.py @@ -9,7 +9,11 @@ from .data import schema_dict -def make_schema(settings, worker_pool_name) -> Callable: +def make_schema( + settings, + worker_pool_name, + path="bigquery.schema", +) -> Callable: updated_settings = { "calls_per_min": 5, "rate_limiter_enabled": True, @@ -17,7 +21,7 @@ def make_schema(settings, worker_pool_name) -> Callable: } | settings @sy.api_endpoint( - path="bigquery.schema", + path=path, description="This endpoint allows for visualising the metadata of tables available in BigQuery.", settings=updated_settings, helper_functions=[is_within_rate_limit], diff --git a/tests/scenariosv2/flows/admin_bigquery_api.py b/tests/scenariosv2/flows/admin_bigquery_api.py new file mode 100644 index 00000000000..7ae68383a8b --- /dev/null +++ b/tests/scenariosv2/flows/admin_bigquery_api.py @@ -0,0 +1,130 @@ +# stdlib +from typing import Any + +# syft absolute +import syft as sy +from syft.util.test_helpers.apis import make_schema +from syft.util.test_helpers.apis import make_test_query + +# relative +from ..sim.core import SimulatorContext +from .utils import server_info + +__all__ = ["bq_schema_endpoint", "bq_test_endpoint", "bq_submit_endpoint"] + + +def bq_schema_endpoint( + ctx: SimulatorContext, + admin_client: sy.DatasiteClient, + worker_pool: str, + path: str = "bigquery.schema", +): + schema_function = make_schema( + settings={ + "calls_per_min": 5, + }, + worker_pool_name=worker_pool, + path=path, + ) + + # Call admin_client.custom_api.add + __create_endpoint(ctx, admin_client, schema_function, path) + + +def bq_test_endpoint( + ctx: SimulatorContext, + admin_client: sy.DatasiteClient, + worker_pool: str, + path="bigquery.test_query", +): + private_query_function = make_test_query( + settings={ + "rate_limiter_enabled": False, + } + ) + mock_query_function = make_test_query( + settings={ + "rate_limiter_enabled": True, + "calls_per_min": 10, + } + ) + + test_endpoint = sy.TwinAPIEndpoint( + path=path, + description="This endpoint allows to query Bigquery storage via SQL queries.", + private_function=private_query_function, + mock_function=mock_query_function, + worker_pool_name=worker_pool, + ) + + # Call admin_client.custom_api.add + __create_endpoint(ctx, admin_client, test_endpoint, path) + + +def bq_submit_endpoint( + ctx: SimulatorContext, + admin_client: sy.DatasiteClient, + worker_pool: str, + path="bigquery.submit_query", +): + @sy.api_endpoint( + path=path, + description="API endpoint that allows you to submit SQL queries to run on the private data.", + worker_pool_name=worker_pool, + settings={"worker": worker_pool}, + ) + def submit_query( + context, + func_name: str, + query: str, + ) -> str: + # stdlib + import hashlib + + # syft absolute + import syft as sy + + hash_object = hashlib.new("sha256") + hash_object.update(context.user.email.encode("utf-8")) + func_name = func_name + "_" + hash_object.hexdigest()[:6] + + @sy.syft_function( + name=func_name, + input_policy=sy.MixedInputPolicy( + endpoint=sy.Constant( + val=context.admin_client.api.services.bigquery.test_query + ), + query=sy.Constant(val=query), + client=context.admin_client, + ), + worker_pool_name=context.settings["worker"], + ) + def execute_query(query: str, endpoint): + res = endpoint(sql_query=query) + return res + + request = context.user_client.code.request_code_execution(execute_query) + if isinstance(request, sy.SyftError): + return request + context.admin_client.requests.set_tags(request, ["autosync"]) + + return f"Query submitted {request}. Use `client.code.{func_name}()` to run your query" + + # Call admin_client.custom_api.add + __create_endpoint(ctx, admin_client, submit_query, path) + + +def __create_endpoint( + ctx: SimulatorContext, + admin_client: sy.DatasiteClient, + endpoint: Any, + path: str, +): + msg = f"Admin: Endpoint '{path}' on {server_info(admin_client)}" + ctx.logger.info(f"{msg} - Creating") + + # Create the endpoint + result = admin_client.custom_api.add(endpoint=endpoint) + assert isinstance(result, sy.SyftSuccess), result + + ctx.logger.info(f"{msg} - Created") diff --git a/tests/scenariosv2/flows/user_bigquery_api.py b/tests/scenariosv2/flows/user_bigquery_api.py index 4f80399b2f5..deb8a9cbbbc 100644 --- a/tests/scenariosv2/flows/user_bigquery_api.py +++ b/tests/scenariosv2/flows/user_bigquery_api.py @@ -27,10 +27,12 @@ def query_sql(): def bq_test_query(ctx: SimulatorContext, client: sy.DatasiteClient): user = client.logged_in_user - ctx.logger.info(f"User: {user} - Calling client.api.bigquery.test_query (mock)") + + msg = f"User: {user} - bigquery.test_query" + ctx.logger.info(f"{msg} = Invoked") res = client.api.bigquery.test_query(sql_query=query_sql()) assert len(res) == 10000 - ctx.logger.info(f"User: {user} - Received {len(res)} rows") + ctx.logger.info(f"{msg} - Response - {len(res)} rows") return res @@ -39,15 +41,14 @@ def bq_submit_query(ctx: SimulatorContext, client: sy.DatasiteClient): # Randomly define a func_name a function to call func_name = "invalid_func" if random.random() < 0.5 else "test_query" - ctx.logger.info( - f"User: {user} - Calling client.api.services.bigquery.submit_query func_name={func_name}" - ) + msg = f"User: {user} - bigquery.submit_query(func_name={func_name})" + ctx.logger.info(f"{msg} - Calling") res = client.api.bigquery.submit_query( func_name=func_name, query=query_sql(), ) assert "Query submitted" in str(res) - ctx.logger.info(f"User: {user} - Received {res}") + ctx.logger.info(f"{msg} - Response - {res}") return res @@ -57,20 +58,16 @@ def bq_check_query_results(ctx: SimulatorContext, client: sy.DatasiteClient): for request in client.requests: status = request.get_status() + msg = f"User: {user} - Request {request.code.service_func_name}" + if status == RequestStatus.APPROVED: job = request.code(blocking=False) result = job.wait() assert len(result) == 10000 - ctx.logger.info( - f"User: {user} - {request.code.service_func_name} - Request approved" - ) + ctx.logger.info(f"{msg} - Approved") elif status == RequestStatus.REJECTED: - ctx.logger.info( - f"User: {user} - {request.code.service_func_name} - Request rejected" - ) + ctx.logger.info(f"{user} - Rejected") else: - ctx.logger.info( - f"User: {user} - {request.code.service_func_name} - Request pending" - ) + ctx.logger.info(f"{user} - Pending") return True diff --git a/tests/scenariosv2/flows/utils.py b/tests/scenariosv2/flows/utils.py index 9aa0cb5f39f..b339c148733 100644 --- a/tests/scenariosv2/flows/utils.py +++ b/tests/scenariosv2/flows/utils.py @@ -1,2 +1,7 @@ -def server_info(client) -> str: - return f"{client.name}=>{client.connection}" +# syft absolute +import syft as sy + + +def server_info(client: sy.DatasiteClient) -> str: + url = getattr(client.connection, "url", "python") + return f"{client.name}(url={url}, side={client.metadata.server_side_type})" diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 329302a581f..7a01ffb793e 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -10,10 +10,11 @@ # syft absolute import syft as sy from syft.service.request.request import RequestStatus -from syft.util.test_helpers.apis import make_schema -from syft.util.test_helpers.apis import make_test_query # relative +from .flows.admin_bigquery_api import bq_schema_endpoint +from .flows.admin_bigquery_api import bq_submit_endpoint +from .flows.admin_bigquery_api import bq_test_endpoint from .flows.admin_bigquery_pool import bq_create_pool from .flows.admin_common import register_user from .flows.user_bigquery_api import bq_check_query_results @@ -125,132 +126,40 @@ async def admin_register_users( @sim_activity(trigger=Event.ADMIN_BQ_SCHEMA_ENDPOINT_CREATED) -async def admin_endpoint_bq_schema( - ctx: SimulatorContext, - admin_client: sy.DatasiteClient, - worker_pool: str | None = None, +async def admin_create_bq_schema_endpoint( + ctx: SimulatorContext, admin_client: sy.DatasiteClient, worker_pool: str ): - path = "bigquery.schema" - schema_function = make_schema( - settings={ - "calls_per_min": 5, - }, - worker_pool_name=worker_pool, - ) - - try: - ctx.logger.info(f"Admin high: Creating endpoint '{path}'") - result = admin_client.custom_api.add(endpoint=schema_function) - assert isinstance(result, sy.SyftSuccess), result - except sy.SyftException as e: - ctx.logger.error(f"Admin high: Failed to add api endpoint '{path}' - {e}") + await asyncio.to_thread(bq_schema_endpoint, ctx, admin_client, worker_pool) @sim_activity(trigger=Event.ADMIN_BQ_TEST_ENDPOINT_CREATED) -async def admin_endpoint_bq_test( +async def admin_create_bq_test_endpoint( ctx: SimulatorContext, admin_client: sy.DatasiteClient, - worker_pool: str | None = None, + worker_pool: str, ): - path = "bigquery.test_query" - - private_query_function = make_test_query( - settings={ - "rate_limiter_enabled": False, - } - ) - mock_query_function = make_test_query( - settings={ - "rate_limiter_enabled": True, - "calls_per_min": 10, - } - ) - - new_endpoint = sy.TwinAPIEndpoint( - path=path, - description="This endpoint allows to query Bigquery storage via SQL queries.", - private_function=private_query_function, - mock_function=mock_query_function, - worker_pool_name=worker_pool, - ) - - try: - ctx.logger.info(f"Admin high: Creating endpoint '{path}'") - result = admin_client.custom_api.add(endpoint=new_endpoint) - assert isinstance(result, sy.SyftSuccess), result - except sy.SyftException as e: - ctx.logger.error(f"Admin high: Failed to add api endpoint '{path}' - {e}") + await asyncio.to_thread(bq_test_endpoint, ctx, admin_client, worker_pool) @sim_activity(trigger=Event.ADMIN_BQ_SUBMIT_ENDPOINT_CREATED) -async def admin_endpoint_bq_submit( +async def admin_create_bq_submit_endpoint( ctx: SimulatorContext, admin_client: sy.DatasiteClient, - worker_pool: str | None = None, + worker_pool: str, ): - """Setup on Low Side""" - - path = "bigquery.submit_query" - - @sy.api_endpoint( - path=path, - description="API endpoint that allows you to submit SQL queries to run on the private data.", - worker_pool_name=worker_pool, - settings={"worker": worker_pool}, - ) - def submit_query( - context, - func_name: str, - query: str, - ) -> str: - # stdlib - import hashlib - - # syft absolute - import syft as sy - - hash_object = hashlib.new("sha256") - hash_object.update(context.user.email.encode("utf-8")) - func_name = func_name + "_" + hash_object.hexdigest()[:6] - - @sy.syft_function( - name=func_name, - input_policy=sy.MixedInputPolicy( - endpoint=sy.Constant( - val=context.admin_client.api.services.bigquery.test_query - ), - query=sy.Constant(val=query), - client=context.admin_client, - ), - worker_pool_name=context.settings["worker"], - ) - def execute_query(query: str, endpoint): - res = endpoint(sql_query=query) - return res - - request = context.user_client.code.request_code_execution(execute_query) - if isinstance(request, sy.SyftError): - return request - context.admin_client.requests.set_tags(request, ["autosync"]) - - return f"Query submitted {request}. Use `client.code.{func_name}()` to run your query" - - try: - ctx.logger.info(f"Admin high: Creating endpoint '{path}'") - result = admin_client.custom_api.add(endpoint=submit_query) - assert isinstance(result, sy.SyftSuccess), result - except sy.SyftException as e: - ctx.logger.error(f"Admin high: Failed to add api endpoint '{path}' - {e}") + await asyncio.to_thread(bq_submit_endpoint, ctx, admin_client, worker_pool) @sim_activity(trigger=Event.ADMIN_ALL_ENDPOINTS_CREATED) -async def admin_create_endpoint(ctx: SimulatorContext, admin_client: sy.DatasiteClient): +async def admin_create_endpoints( + ctx: SimulatorContext, admin_client: sy.DatasiteClient +): worker_pool = "biquery-pool" await asyncio.gather( - admin_endpoint_bq_test(ctx, admin_client, worker_pool=worker_pool), - admin_endpoint_bq_submit(ctx, admin_client, worker_pool=worker_pool), - admin_endpoint_bq_schema(ctx, admin_client, worker_pool=worker_pool), + admin_create_bq_test_endpoint(ctx, admin_client, worker_pool), + admin_create_bq_submit_endpoint(ctx, admin_client, worker_pool), + admin_create_bq_schema_endpoint(ctx, admin_client, worker_pool), ) ctx.logger.info("Admin high: Created all endpoints") @@ -355,7 +264,7 @@ async def admin_high_side(ctx: SimulatorContext, admin_auth): await asyncio.gather( admin_create_bq_pool_high(ctx, admin_client), - admin_create_endpoint(ctx, admin_client), + admin_create_endpoints(ctx, admin_client), admin_triage_requests_high(ctx, admin_client), ) @@ -419,12 +328,13 @@ async def admin_sync_to_high_flow( while not ctx.events.is_set(Event.ADMIN_HIGHSIDE_FLOW_COMPLETED): await asyncio.sleep(random.uniform(5, 10)) + ctx.logger.info("Admin low: Started sy.sync low->high") result = sy.sync(low_client, high_client) if isinstance(result, sy.SyftSuccess): ctx.logger.info("Admin low: Nothing to sync low->high") continue - ctx.logger.info(f"Admin low: Syncing low->high {result}") + ctx.logger.info(f"Admin low: sy.sync low->high result={result}") result._share_all() result._sync_all() diff --git a/tests/scenariosv2/l2_test.py b/tests/scenariosv2/l2_test.py index a04ed274f44..c52b6636248 100644 --- a/tests/scenariosv2/l2_test.py +++ b/tests/scenariosv2/l2_test.py @@ -17,8 +17,8 @@ from .flows.user_bigquery_api import bq_test_query from .l0_test import Event from .l0_test import admin_create_bq_pool_high -from .l0_test import admin_create_endpoint -from .l0_test import admin_signup_users +from .l0_test import admin_create_endpoints +from .l0_test import admin_register_users from .sim.core import Simulator from .sim.core import SimulatorContext from .sim.core import sim_activity @@ -57,9 +57,9 @@ async def admin_flow( ctx.logger.info("Admin: logged in") await asyncio.gather( - admin_signup_users(ctx, admin_client, users), + admin_register_users(ctx, admin_client, users), admin_create_bq_pool_high(ctx, admin_client), - admin_create_endpoint(ctx, admin_client), + admin_create_endpoints(ctx, admin_client), admin_triage_requests(ctx, admin_client), ) From c4dae92a03890da1b3123244e83a5d0cb20680d6 Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Fri, 27 Sep 2024 03:46:00 +0530 Subject: [PATCH 39/78] de-nest sim_entrypoint wrapper --- tests/scenariosv2/l0_test.py | 2 +- tests/scenariosv2/l2_test.py | 4 +++- tests/scenariosv2/sim/core.py | 37 ++++++++++++++++------------------- 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 7a01ffb793e..a53d657ed95 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -349,7 +349,7 @@ async def admin_sync_to_high_flow( # ------------------------------------------------------------------------------------------------ -@sim_entrypoint() +@sim_entrypoint async def sim_l0_scenario(ctx: SimulatorContext): users = [ dict( # noqa: C408 diff --git a/tests/scenariosv2/l2_test.py b/tests/scenariosv2/l2_test.py index c52b6636248..6e9be43dc6b 100644 --- a/tests/scenariosv2/l2_test.py +++ b/tests/scenariosv2/l2_test.py @@ -106,7 +106,9 @@ async def user_flow(ctx: SimulatorContext, server_url: str, user: dict): # ---------------------------------- test ---------------------------------- -@sim_entrypoint() + + +@sim_entrypoint async def sim_l2_scenario(ctx: SimulatorContext): users = [ { diff --git a/tests/scenariosv2/sim/core.py b/tests/scenariosv2/sim/core.py index 618a8aeaa66..b79f4f7b038 100644 --- a/tests/scenariosv2/sim/core.py +++ b/tests/scenariosv2/sim/core.py @@ -131,25 +131,22 @@ async def start(self, *tasks, check_events=None, random_wait=None, timeout=60): return results -def sim_entrypoint(): - def decorator(func): - @wraps(func) - async def wrapper(ctx: SimulatorContext, *args, **kwargs): - try: - ctx._elogger.info(f"Started: {func.__name__}") - result = await func(ctx, *args, **kwargs) - ctx._elogger.info(f"Completed: {func.__name__}") - return result - except Exception: - ctx._elogger.error( - f"sim_entrypoint - {func.__name__} - Unhandled exception", - exc_info=True, - ) - raise - - return wrapper +def sim_entrypoint(func): + @wraps(func) + async def wrapper(ctx: SimulatorContext, *args, **kwargs): + try: + ctx._elogger.info(f"Started: {func.__name__}") + result = await func(ctx, *args, **kwargs) + ctx._elogger.info(f"Completed: {func.__name__}") + return result + except Exception: + ctx._elogger.error( + f"sim_entrypoint - {func.__name__} - Unhandled exception", + exc_info=True, + ) + raise - return decorator + return wrapper def sim_activity( @@ -189,11 +186,11 @@ async def wrapper(ctx: SimulatorContext, *args, **kwargs): ctx.events.trigger(_trigger) return result - except Exception: + except Exception as e: ctx._elogger.error( f"sim_activity - {fsig} - Unhandled exception", exc_info=True ) - raise + raise TestFailure(e) return wrapper From dae5bbea94adfdf264918f4ef5e694a10567e17c Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Fri, 27 Sep 2024 04:58:25 +0530 Subject: [PATCH 40/78] admin sync refactor --- tests/scenariosv2/l0_test.py | 222 ++++++++++++++++++----------------- tests/scenariosv2/l2_test.py | 8 +- 2 files changed, 116 insertions(+), 114 deletions(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index a53d657ed95..d07559f14cb 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -2,6 +2,7 @@ import asyncio from enum import auto import random +from venv import logger # third party from faker import Faker @@ -89,7 +90,7 @@ async def user_bq_results(ctx: SimulatorContext, client: sy.DatasiteClient): @sim_activity(wait_for=Event.GUEST_USERS_CREATED, trigger=Event.USER_FLOW_COMPLETED) -async def user_flow(ctx: SimulatorContext, server_url_low: str, user: dict): +async def user_low_side_flow(ctx: SimulatorContext, server_url_low: str, user: dict): """ User flow on low-side: - User logs in @@ -151,7 +152,7 @@ async def admin_create_bq_submit_endpoint( @sim_activity(trigger=Event.ADMIN_ALL_ENDPOINTS_CREATED) -async def admin_create_endpoints( +async def admin_high_create_endpoints( ctx: SimulatorContext, admin_client: sy.DatasiteClient ): worker_pool = "biquery-pool" @@ -164,53 +165,60 @@ async def admin_create_endpoints( ctx.logger.info("Admin high: Created all endpoints") +def all_available(paths: list[str], expected: list[str]): + return set(expected).issubset(set(paths)) + + @sim_activity( - wait_for=[ - Event.ADMIN_SYNCED_HIGH_TO_LOW, - # endpoints work only after low side worker pool is created - Event.ADMIN_LOWSIDE_WORKER_POOL_CREATED, - ] + # endpoints work only after low side worker pool is created + wait_for=Event.ADMIN_LOWSIDE_WORKER_POOL_CREATED ) -async def admin_watch_sync(ctx: SimulatorContext, admin_client: sy.DatasiteClient): +async def admin_low_triage_requests( + ctx: SimulatorContext, admin_client: sy.DatasiteClient +): + expected_paths = [ + "bigquery.test_query", + "bigquery.submit_query", + "bigquery.schema", + ] + while True: await asyncio.sleep(random.uniform(5, 10)) - # Check if endpoints are available - endpoints = admin_client.custom_api.get_all() - if len(endpoints) == NUM_ENDPOINTS: - ctx.logger.info( - f"Admin low: All {NUM_ENDPOINTS} API endpoints are synced from high." - ) - ctx.logger.info(f"Endpoints: {endpoints}") - ctx.events.trigger(Event.ADMIN_LOW_SIDE_ENDPOINTS_AVAILABLE) + # check if endpoints are available + if not ctx.events.is_set(Event.ADMIN_LOW_SIDE_ENDPOINTS_AVAILABLE): + endpoints = admin_client.custom_api.get_all() + paths = [ep.path for ep in endpoints] + ctx.logger.debug(f"Admin low: API endpoints - {paths}") + + if all_available(paths, expected_paths): + ctx.logger.info("Admin low: All endpoints available") + ctx.events.trigger(Event.ADMIN_LOW_SIDE_ENDPOINTS_AVAILABLE) + else: + ctx.logger.info(f"Admin low: Waiting for all endpoints {paths}") # Check if all requests are approved or denied requests = admin_client.requests.get_all() - ctx.logger.info(f"Number of requests: {len(requests)}") - if len(requests) == NUM_USERS: # NOTE: currently hard coding this since - # each user in `user_flow` submits 1 query request - pending_requests = [] - for req in admin_client.requests: - if req.get_status() == RequestStatus.PENDING: - pending_requests.append(req) - if len(pending_requests) == 0: - ctx.logger.info("Admin low: All requests are approved / denined.") - ctx.logger.info(f"Requests: {requests}") - ctx.events.trigger(Event.ADMIN_LOW_ALL_RESULTS_AVAILABLE) - break - else: - ctx.logger.info(f"Admin low: Pending requests: {pending_requests}") + pending = [req for req in requests if req.status == RequestStatus.PENDING] + ctx.logger.info(f"Admin low: Requests={len(requests)} Pending={len(pending)}") + + # If all requests have been triaged, then exit + if len(requests) == NUM_USERS and len(pending) == 0: + ctx.events.trigger(Event.ADMIN_LOW_ALL_RESULTS_AVAILABLE) + break + + ctx.logger.info("Admin low: All requests triaged.") @sim_activity(trigger=Event.ADMIN_HIGHSIDE_WORKER_POOL_CREATED) -async def admin_create_bq_pool_high( +async def admin_high_create_bq_pool( ctx: SimulatorContext, admin_client: sy.DatasiteClient ): await asyncio.to_thread(bq_create_pool, ctx, admin_client) @sim_activity(trigger=Event.ADMIN_LOWSIDE_WORKER_POOL_CREATED) -async def admin_create_bq_pool_low( +async def admin_low_create_bq_pool( ctx: SimulatorContext, admin_client: sy.DatasiteClient ): await asyncio.to_thread(bq_create_pool, ctx, admin_client) @@ -223,127 +231,121 @@ async def admin_create_bq_pool_low( ], trigger=Event.ADMIN_HIGHSIDE_FLOW_COMPLETED, ) -async def admin_triage_requests_high( +async def admin_high_triage_requests( ctx: SimulatorContext, admin_client: sy.DatasiteClient ): - while True: + while not ctx.events.is_set(Event.ADMIN_LOW_ALL_RESULTS_AVAILABLE): await asyncio.sleep(random.uniform(5, 10)) # check if there are any requests # BUG: request that are executed request.code() are always in pending state - pending_requests = [ - req - for req in admin_client.requests - if req.get_status() == RequestStatus.PENDING - ] - ctx.logger.info(f"Admin high: Found {len(pending_requests)} pending requests") - for request in pending_requests: - ctx.logger.info(f"Admin high: Found request {request.__dict__}") - if getattr(request, "code", None): - if "invalid_func" in request.code.service_func_name: - ctx.logger.info(f"Admin high: Denying request {request}") - request.deny("You gave me an `invalid_func` function") - else: - ctx.logger.info( - f"Admin high: Approving request by executing {request}" - ) - job = request.code(blocking=False) - result = job.wait() - ctx.logger.info(f"Admin high: Request result {result}") - - if ctx.events.is_set(Event.ADMIN_LOW_ALL_RESULTS_AVAILABLE): - break + requests = admin_client.requests.get_all() + pending = [req for req in requests if req.status == RequestStatus.PENDING] + ctx.logger.info(f"Admin high: Requests={len(requests)} Pending={len(pending)}") + + for request in pending: + # ignore non-code requests + if not getattr(request, "code", None): + continue + + if "invalid_func" in request.code.service_func_name: + ctx.logger.info(f"Admin high: Denying request {request}") + request.deny("You gave me an `invalid_func` function") + else: + ctx.logger.info(f"Admin high: Approving request by executing {request}") + job = request.code(blocking=False) + result = job.wait() + ctx.logger.info(f"Admin high: Request result {result}") - ctx.logger.info("Admin high: Done approving / denying all requests") + ctx.logger.info("Admin high: All requests triaged.") @sim_activity(trigger=Event.ADMIN_HIGHSIDE_FLOW_COMPLETED) -async def admin_high_side(ctx: SimulatorContext, admin_auth): +async def admin_high_side_flow(ctx: SimulatorContext, admin_auth): admin_client = sy.login(**admin_auth) ctx.logger.info("Admin high-side: logged in") await asyncio.gather( - admin_create_bq_pool_high(ctx, admin_client), - admin_create_endpoints(ctx, admin_client), - admin_triage_requests_high(ctx, admin_client), + admin_high_create_bq_pool(ctx, admin_client), + admin_high_create_endpoints(ctx, admin_client), + admin_high_triage_requests(ctx, admin_client), ) @sim_activity(trigger=Event.ADMIN_LOWSIDE_FLOW_COMPLETED) -async def admin_low_side(ctx: SimulatorContext, admin_auth, users): +async def admin_low_side_flow(ctx: SimulatorContext, admin_auth, users): admin_client = sy.login(**admin_auth) ctx.logger.info("Admin low-side: logged in") await asyncio.gather( admin_register_users(ctx, admin_client, users), - admin_create_bq_pool_low(ctx, admin_client), - admin_watch_sync(ctx, admin_client), + admin_low_create_bq_pool(ctx, admin_client), + admin_low_triage_requests(ctx, admin_client), ) # ------------------------------------------------------------------------------------------------ -@sim_activity(trigger=Event.ADMIN_SYNC_COMPLETED) -async def admin_sync_to_low_flow( - ctx: SimulatorContext, admin_auth_high: dict, admin_auth_low: dict +async def admin_sync( + ctx: SimulatorContext, + from_auth: dict, + to_auth: dict, + trigger: Event, + exit_after: Event, ): - high_client = sy.login(**admin_auth_high) - ctx.logger.info("Admin: logged in to high-side") + from_client = sy.login(**from_auth) + to_client = sy.login(**to_auth) - low_client = sy.login(**admin_auth_low) - ctx.logger.info("Admin: logged in to low-side") + from_ = from_client.metadata.server_side_type + to_ = to_client.metadata.server_side_type - while True: + while not ctx.events.is_set(exit_after): await asyncio.sleep(random.uniform(5, 10)) - result = sy.sync(high_client, low_client) + ctx.logger.info(f"Admin: Sync {from_}->{to_} - Checking") + result = sy.sync(from_client, to_client) if isinstance(result, sy.SyftSuccess): - ctx.logger.info("Admin high: Nothing to sync high->low") continue - ctx.logger.info(f"Admin high: Syncing high->low {result}") + ctx.logger.info(f"Admin: Sync {from_}->{to_} - Result={result}") result._share_all() result._sync_all() - # trigger an event so that guest users can start querying - ctx.events.trigger(Event.ADMIN_SYNCED_HIGH_TO_LOW) - ctx.logger.info("Admin high: Synced high->low") + ctx.events.trigger(trigger) + ctx.logger.info(f"Admin: Sync {from_}->{to_} - Synced") - if ctx.events.is_set(Event.ADMIN_HIGHSIDE_FLOW_COMPLETED): - ctx.logger.info("Admin high: Done syncing high->low") - break + ctx.logger.info(f"Admin: Sync {from_}->{to_} - Closed") @sim_activity(trigger=Event.ADMIN_SYNC_COMPLETED) -async def admin_sync_to_high_flow( +async def admin_sync_high_to_low_flow( ctx: SimulatorContext, admin_auth_high: dict, admin_auth_low: dict ): - high_client = sy.login(**admin_auth_high) - ctx.logger.info("Admin low: logged in to high-side") - - low_client = sy.login(**admin_auth_low) - ctx.logger.info("Admin low: logged in to low-side") - - while not ctx.events.is_set(Event.ADMIN_HIGHSIDE_FLOW_COMPLETED): - await asyncio.sleep(random.uniform(5, 10)) - - ctx.logger.info("Admin low: Started sy.sync low->high") - result = sy.sync(low_client, high_client) - if isinstance(result, sy.SyftSuccess): - ctx.logger.info("Admin low: Nothing to sync low->high") - continue - - ctx.logger.info(f"Admin low: sy.sync low->high result={result}") - result._share_all() - result._sync_all() + await admin_sync( + ctx, + # high -> low + from_auth=admin_auth_high, + to_auth=admin_auth_low, + trigger=Event.ADMIN_SYNCED_HIGH_TO_LOW, + # todo: see if we have a better exit clause + exit_after=Event.ADMIN_HIGHSIDE_FLOW_COMPLETED, + ) - ctx.events.trigger(Event.ADMIN_SYNCED_LOW_TO_HIGH) - ctx.logger.info("Admin low: Synced low->high") - if ctx.events.is_set(Event.ADMIN_HIGHSIDE_FLOW_COMPLETED): - ctx.logger.info("Admin high: Done syncing high->low") - break +@sim_activity(trigger=Event.ADMIN_SYNC_COMPLETED) +async def admin_sync_low_to_high_flow( + ctx: SimulatorContext, admin_auth_high: dict, admin_auth_low: dict +): + await admin_sync( + ctx, + # low -> high + from_auth=admin_auth_low, + to_auth=admin_auth_high, + trigger=Event.ADMIN_SYNCED_LOW_TO_HIGH, + # todo: see if we have a better exit clause + exit_after=Event.ADMIN_LOWSIDE_FLOW_COMPLETED, + ) # ------------------------------------------------------------------------------------------------ @@ -378,11 +380,11 @@ async def sim_l0_scenario(ctx: SimulatorContext): ctx.logger.info("--- Initializing L0 BigQuery Scenario Test ---") await asyncio.gather( - admin_low_side(ctx, admin_auth_low, users), - admin_high_side(ctx, admin_auth_high), - admin_sync_to_low_flow(ctx, admin_auth_high, admin_auth_low), - admin_sync_to_high_flow(ctx, admin_auth_high, admin_auth_low), - *[user_flow(ctx, server_url_low, user) for user in users], + admin_low_side_flow(ctx, admin_auth_low, users), + admin_high_side_flow(ctx, admin_auth_high), + admin_sync_high_to_low_flow(ctx, admin_auth_high, admin_auth_low), + admin_sync_low_to_high_flow(ctx, admin_auth_high, admin_auth_low), + *[user_low_side_flow(ctx, server_url_low, user) for user in users], ) diff --git a/tests/scenariosv2/l2_test.py b/tests/scenariosv2/l2_test.py index 6e9be43dc6b..2baf59fda4e 100644 --- a/tests/scenariosv2/l2_test.py +++ b/tests/scenariosv2/l2_test.py @@ -16,8 +16,8 @@ from .flows.user_bigquery_api import bq_submit_query from .flows.user_bigquery_api import bq_test_query from .l0_test import Event -from .l0_test import admin_create_bq_pool_high -from .l0_test import admin_create_endpoints +from .l0_test import admin_high_create_bq_pool +from .l0_test import admin_high_create_endpoints from .l0_test import admin_register_users from .sim.core import Simulator from .sim.core import SimulatorContext @@ -58,8 +58,8 @@ async def admin_flow( await asyncio.gather( admin_register_users(ctx, admin_client, users), - admin_create_bq_pool_high(ctx, admin_client), - admin_create_endpoints(ctx, admin_client), + admin_high_create_bq_pool(ctx, admin_client), + admin_high_create_endpoints(ctx, admin_client), admin_triage_requests(ctx, admin_client), ) From 7a98935678aff6d0c86236fd5556adde0103c26d Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Fri, 27 Sep 2024 04:58:46 +0530 Subject: [PATCH 41/78] add .logs ignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index c4b8b2ebb31..a20bb4e56bc 100644 --- a/.gitignore +++ b/.gitignore @@ -92,3 +92,6 @@ notebooks/scenarios/bigquery/sync/*.json notebooks/scenarios/bigquery/sync/*.json.lock notebooks/tutorials/version-upgrades/*.yaml notebooks/tutorials/version-upgrades/*.blob + +# logs dir generated by sim tests +.logs From b5e3f1265610ed0336c7c0d3733900d2264298cb Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Fri, 27 Sep 2024 05:09:22 +0530 Subject: [PATCH 42/78] fix merge conflicts --- packages/grid/devspace.yaml | 2 +- .../{gcp.nosync.yaml => gcp.bucketsync.yaml} | 52 ++++++++++++++++--- packages/grid/helm/examples/gcp/gcp.high.yaml | 51 +++--------------- 3 files changed, 53 insertions(+), 52 deletions(-) rename packages/grid/helm/examples/gcp/{gcp.nosync.yaml => gcp.bucketsync.yaml} (56%) diff --git a/packages/grid/devspace.yaml b/packages/grid/devspace.yaml index b9a31457d4e..bf796f8bc42 100644 --- a/packages/grid/devspace.yaml +++ b/packages/grid/devspace.yaml @@ -133,7 +133,7 @@ profiles: value: tracing: enabled: true - otlpEndpoint: "http://syft-signoz-otel-collector.platform:4317" + otlpEndpoint: "http://host.k3d.internal:4317" otelProtocol: "grpc" - name: bigquery-scenario-tests diff --git a/packages/grid/helm/examples/gcp/gcp.nosync.yaml b/packages/grid/helm/examples/gcp/gcp.bucketsync.yaml similarity index 56% rename from packages/grid/helm/examples/gcp/gcp.nosync.yaml rename to packages/grid/helm/examples/gcp/gcp.bucketsync.yaml index 8e622be5254..3a106824897 100644 --- a/packages/grid/helm/examples/gcp/gcp.nosync.yaml +++ b/packages/grid/helm/examples/gcp/gcp.bucketsync.yaml @@ -1,14 +1,16 @@ # ================================================================================= # Syft on GKE Cluster # -# Server side : high -# Automount : NO -# Ingress : gce +# Server side : high +# Automount : YES. 1 GCS bucket mounted to seaweedfs +# Ingress : gce # Extras: # - BackendConfig for increased timeout +# - Secret for seaweedfs mount # ================================================================================= server: + # Basic Server Config name: syft-gcp side: high @@ -16,7 +18,16 @@ server: # Useful when workload identity is setup useInternalRegistry: false - # Resources set inline with c3-standard-4 machine type + # Force backend to write results to this bucket + # should be same as mountApi.mounts.local_bucket + defaultBucketName: syft-bucket-high-gcs + + # For autopilot clusters with GKE 1.28+, uncomment this + # nodeSelector: + # cloud.google.com/compute-class: Performance + # cloud.google.com/machine-family: c3 + + # Pod resources set inline with c3-standard-4 machine type resources: requests: cpu: 2 @@ -28,9 +39,28 @@ server: # ================================================================================= seaweedfs: + # SeaweedFS PVC size storageSize: 100Gi - # Resources set inline with c3-standard-4 machine type + # For autopilot clusters with GKE 1.28+, uncomment this + # nodeSelector: + # cloud.google.com/compute-class: Performance + # cloud.google.com/machine-family: c3 + + # Automount Config + # -- Mounts GCS bucket "syft-bucket-high" to SeaweedFS bucket "syft-bucket-high-gcs" + # -- "gcs_creds.json" must exist in "seaweedfs-mount-secret" + # -- "seaweedfs-mount-secret" must be provisioned externally or in extraResources + mountApi: + mounts: + - local_bucket: syft-bucket-high-gcs + remote_bucket: + type: gcs + bucket_name: syft-bucket-high + creds: /run/secrets/mount/gcs_creds.json + secretKeyName: seaweedfs-mount-secret + + # Pod resources set inline with c3-standard-4 machine type resources: requests: cpu: 2 @@ -73,7 +103,7 @@ postgres: # ================================================================================= extraResources: - # Configure load balancer backend service + # Configure GCE load balancer backend # https://cloud.google.com/kubernetes-engine/docs/how-to/ingress-configuration#configuring_ingress_features_through_backendconfig_parameters - apiVersion: cloud.google.com/v1 kind: BackendConfig @@ -81,3 +111,13 @@ extraResources: name: custom-backend-config spec: timeoutSec: 1800 + + # Secret to mount GCS bucket in seaweedfs + - apiVersion: v1 + kind: Secret + metadata: + name: seaweedfs-mount-secret + type: Opaque + data: + # base 64 encoded value + gcs_creds.json: e30= diff --git a/packages/grid/helm/examples/gcp/gcp.high.yaml b/packages/grid/helm/examples/gcp/gcp.high.yaml index 2a430807fac..8e622be5254 100644 --- a/packages/grid/helm/examples/gcp/gcp.high.yaml +++ b/packages/grid/helm/examples/gcp/gcp.high.yaml @@ -1,16 +1,14 @@ # ================================================================================= # Syft on GKE Cluster # -# Server side : high -# Automount : YES. 1 GCS bucket mounted to seaweedfs -# Ingress : gce +# Server side : high +# Automount : NO +# Ingress : gce # Extras: # - BackendConfig for increased timeout -# - Secret for seaweedfs mount # ================================================================================= server: - # Basic Server Config name: syft-gcp side: high @@ -18,16 +16,7 @@ server: # Useful when workload identity is setup useInternalRegistry: false - # Force backend to write results to this bucket - # should be same as mountApi.mounts.local_bucket - defaultBucketName: syft-bucket-high-gcs - - # For autopilot clusters with GKE 1.28+, uncomment this - # nodeSelector: - # cloud.google.com/compute-class: Performance - # cloud.google.com/machine-family: c3 - - # Pod resources set inline with c3-standard-4 machine type + # Resources set inline with c3-standard-4 machine type resources: requests: cpu: 2 @@ -39,28 +28,9 @@ server: # ================================================================================= seaweedfs: - # SeaweedFS PVC size storageSize: 100Gi - # For autopilot clusters with GKE 1.28+, uncomment this - # nodeSelector: - # cloud.google.com/compute-class: Performance - # cloud.google.com/machine-family: c3 - - # Automount Config - # -- Mounts GCS bucket "syft-bucket-high" to SeaweedFS bucket "syft-bucket-high-gcs" - # -- "gcs_creds.json" must exist in "seaweedfs-mount-secret" - # -- "seaweedfs-mount-secret" must be provisioned externally or in extraResources - mountApi: - mounts: - - local_bucket: syft-bucket-high-gcs - remote_bucket: - type: gcs - bucket_name: syft-bucket-high - creds: /run/secrets/mount/gcs_creds.json - secretKeyName: seaweedfs-mount-secret - - # Pod resources set inline with c3-standard-4 machine type + # Resources set inline with c3-standard-4 machine type resources: requests: cpu: 2 @@ -103,7 +73,7 @@ postgres: # ================================================================================= extraResources: - # Configure GCE load balancer backend + # Configure load balancer backend service # https://cloud.google.com/kubernetes-engine/docs/how-to/ingress-configuration#configuring_ingress_features_through_backendconfig_parameters - apiVersion: cloud.google.com/v1 kind: BackendConfig @@ -111,12 +81,3 @@ extraResources: name: custom-backend-config spec: timeoutSec: 1800 - - # Secret to mount GCS bucket in seaweedfs - - apiVersion: v1 - kind: Secret - metadata: - name: seaweedfs-mount-secret - type: Opaque - data: - gcs_creds.json: base64 encoded value From 2f9fe3dcd307de70df77f59885989ebaf00bd9a4 Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Fri, 27 Sep 2024 05:10:06 +0530 Subject: [PATCH 43/78] drop unsync --- packages/syft/setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/syft/setup.cfg b/packages/syft/setup.cfg index 519063b3ddf..b2f5fed0484 100644 --- a/packages/syft/setup.cfg +++ b/packages/syft/setup.cfg @@ -142,7 +142,6 @@ test_plugins = pytest-asyncio pytest-timeout anyio - unsync [options.entry_points] console_scripts = From 2abe42048bcfefd6aa5a366e731bf49a2b9c8ec9 Mon Sep 17 00:00:00 2001 From: dk Date: Fri, 27 Sep 2024 10:10:04 +0700 Subject: [PATCH 44/78] remove unused import. upper case TODO --- tests/scenariosv2/l0_test.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index d07559f14cb..c53b85f055f 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -2,7 +2,6 @@ import asyncio from enum import auto import random -from venv import logger # third party from faker import Faker @@ -328,7 +327,7 @@ async def admin_sync_high_to_low_flow( from_auth=admin_auth_high, to_auth=admin_auth_low, trigger=Event.ADMIN_SYNCED_HIGH_TO_LOW, - # todo: see if we have a better exit clause + # TODO: see if we have a better exit clause exit_after=Event.ADMIN_HIGHSIDE_FLOW_COMPLETED, ) @@ -343,7 +342,7 @@ async def admin_sync_low_to_high_flow( from_auth=admin_auth_low, to_auth=admin_auth_high, trigger=Event.ADMIN_SYNCED_LOW_TO_HIGH, - # todo: see if we have a better exit clause + # TODO: see if we have a better exit clause exit_after=Event.ADMIN_LOWSIDE_FLOW_COMPLETED, ) From bb13be4203a7e2b3922f033c296dfdc8a2808e85 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Fri, 27 Sep 2024 10:48:17 +0700 Subject: [PATCH 45/78] [tests/scenarios] always print side-type after "Admin" in logs --- tests/scenariosv2/flows/admin_bigquery_api.py | 2 +- tests/scenariosv2/flows/admin_bigquery_pool.py | 5 ++++- tests/scenariosv2/flows/admin_common.py | 2 +- tests/scenariosv2/l0_test.py | 12 ++++++------ 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tests/scenariosv2/flows/admin_bigquery_api.py b/tests/scenariosv2/flows/admin_bigquery_api.py index 7ae68383a8b..aff811d4199 100644 --- a/tests/scenariosv2/flows/admin_bigquery_api.py +++ b/tests/scenariosv2/flows/admin_bigquery_api.py @@ -120,7 +120,7 @@ def __create_endpoint( endpoint: Any, path: str, ): - msg = f"Admin: Endpoint '{path}' on {server_info(admin_client)}" + msg = f"Admin {admin_client.metadata.server_side_type}: Endpoint '{path}' on {server_info(admin_client)}" ctx.logger.info(f"{msg} - Creating") # Create the endpoint diff --git a/tests/scenariosv2/flows/admin_bigquery_pool.py b/tests/scenariosv2/flows/admin_bigquery_pool.py index 6dbba7e6702..6f42d6a72b2 100644 --- a/tests/scenariosv2/flows/admin_bigquery_pool.py +++ b/tests/scenariosv2/flows/admin_bigquery_pool.py @@ -27,7 +27,10 @@ def bq_create_pool( f"RUN uv pip install db-dtypes google-cloud-bigquery" ) - msg = f"Admin: Worker Pool tag '{worker_image_tag}' on {server_info(admin_client)}" + msg = ( + f"Admin {admin_client.metadata.server_side_type}: " + f"Worker Pool tag '{worker_image_tag}' on {server_info(admin_client)}" + ) ctx.logger.info(f"{msg} - Creating") build_and_launch_worker_pool_from_docker_str( diff --git a/tests/scenariosv2/flows/admin_common.py b/tests/scenariosv2/flows/admin_common.py index fc03a123cb3..2e8454ff1f7 100644 --- a/tests/scenariosv2/flows/admin_common.py +++ b/tests/scenariosv2/flows/admin_common.py @@ -9,7 +9,7 @@ def register_user(ctx: SimulatorContext, admin_client: sy.DatasiteClient, user: dict): - msg = f"Admin: User {user['email']} on {server_info(admin_client)}" + msg = f"Admin {admin_client.metadata.server_side_type}: User {user['email']} on {server_info(admin_client)}" ctx.logger.info(f"{msg} - Creating") _ = admin_client.register( name=user["name"], diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index c53b85f055f..7aa086b043c 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -262,7 +262,7 @@ async def admin_high_triage_requests( @sim_activity(trigger=Event.ADMIN_HIGHSIDE_FLOW_COMPLETED) async def admin_high_side_flow(ctx: SimulatorContext, admin_auth): admin_client = sy.login(**admin_auth) - ctx.logger.info("Admin high-side: logged in") + ctx.logger.info("Admin high: logged in") await asyncio.gather( admin_high_create_bq_pool(ctx, admin_client), @@ -274,7 +274,7 @@ async def admin_high_side_flow(ctx: SimulatorContext, admin_auth): @sim_activity(trigger=Event.ADMIN_LOWSIDE_FLOW_COMPLETED) async def admin_low_side_flow(ctx: SimulatorContext, admin_auth, users): admin_client = sy.login(**admin_auth) - ctx.logger.info("Admin low-side: logged in") + ctx.logger.info("Admin low: logged in") await asyncio.gather( admin_register_users(ctx, admin_client, users), @@ -302,19 +302,19 @@ async def admin_sync( while not ctx.events.is_set(exit_after): await asyncio.sleep(random.uniform(5, 10)) - ctx.logger.info(f"Admin: Sync {from_}->{to_} - Checking") + ctx.logger.info(f"Admin {from_}: Sync {from_}->{to_} - Checking") result = sy.sync(from_client, to_client) if isinstance(result, sy.SyftSuccess): continue - ctx.logger.info(f"Admin: Sync {from_}->{to_} - Result={result}") + ctx.logger.info(f"Admin {from_}: Sync {from_}->{to_} - Result={result}") result._share_all() result._sync_all() ctx.events.trigger(trigger) - ctx.logger.info(f"Admin: Sync {from_}->{to_} - Synced") + ctx.logger.info(f"Admin {from_}: Sync {from_}->{to_} - Synced") - ctx.logger.info(f"Admin: Sync {from_}->{to_} - Closed") + ctx.logger.info(f"Admin {from_}: Sync {from_}->{to_} - Closed") @sim_activity(trigger=Event.ADMIN_SYNC_COMPLETED) From 3a2a0ca7b4b2339ebf14dcf2d7feb38d736dd106 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Fri, 27 Sep 2024 14:47:10 +0700 Subject: [PATCH 46/78] [tests/scenarios] adding a try catch in `admin_sync` --- tests/scenariosv2/l0_test.py | 26 ++++++++++++++++---------- tests/scenariosv2/sim/core.py | 1 + 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 7aa086b043c..c141fdd1039 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -300,19 +300,25 @@ async def admin_sync( to_ = to_client.metadata.server_side_type while not ctx.events.is_set(exit_after): - await asyncio.sleep(random.uniform(5, 10)) + try: + await asyncio.sleep(random.uniform(3, 5)) + + ctx.logger.info(f"Admin {from_}: Sync {from_}->{to_} - Checking") + result = sy.sync(from_client, to_client) + if isinstance(result, sy.SyftSuccess): + continue - ctx.logger.info(f"Admin {from_}: Sync {from_}->{to_} - Checking") - result = sy.sync(from_client, to_client) - if isinstance(result, sy.SyftSuccess): - continue + ctx.logger.info(f"Admin {from_}: Sync {from_}->{to_} - Result={result}") + result._share_all() + result._sync_all() - ctx.logger.info(f"Admin {from_}: Sync {from_}->{to_} - Result={result}") - result._share_all() - result._sync_all() + ctx.events.trigger(trigger) + ctx.logger.info(f"Admin {from_}: Sync {from_}->{to_} - Synced") - ctx.events.trigger(trigger) - ctx.logger.info(f"Admin {from_}: Sync {from_}->{to_} - Synced") + except Exception as e: + ctx.logger.error(f"Admin {from_}: Sync {from_}->{to_} - Error: {str(e)}") + ctx.logger.info(f"Admin {from_}: Sync {from_}->{to_} - Waiting a bit..") + await asyncio.sleep(random.uniform(2, 4)) ctx.logger.info(f"Admin {from_}: Sync {from_}->{to_} - Closed") diff --git a/tests/scenariosv2/sim/core.py b/tests/scenariosv2/sim/core.py index b79f4f7b038..341d799d9da 100644 --- a/tests/scenariosv2/sim/core.py +++ b/tests/scenariosv2/sim/core.py @@ -184,6 +184,7 @@ async def wrapper(ctx: SimulatorContext, *args, **kwargs): if _trigger: ctx.events.trigger(_trigger) + ctx.logger.info(f"Triggering event: {_trigger.name}") return result except Exception as e: From 36be022c6bf14e0fd2744db542d4333409084f1e Mon Sep 17 00:00:00 2001 From: khoaguin Date: Fri, 27 Sep 2024 15:28:03 +0700 Subject: [PATCH 47/78] [tox] trying to fix `No interpreter found for Python 3.12 in system toolchains` issue on CI --- .github/workflows/pr-tests-stack.yml | 1 + tox.ini | 6 +----- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pr-tests-stack.yml b/.github/workflows/pr-tests-stack.yml index 409393c7d75..4e937d7a7d2 100644 --- a/.github/workflows/pr-tests-stack.yml +++ b/.github/workflows/pr-tests-stack.yml @@ -989,6 +989,7 @@ jobs: - name: Add K3d Registry run: | sudo python ./scripts/patch_hosts.py --add-k3d-registry + - name: Free Disk Space (Ubuntu) uses: jlumbroso/free-disk-space@main with: diff --git a/tox.ini b/tox.ini index eb6bb7bd0f7..38de481dcad 100644 --- a/tox.ini +++ b/tox.ini @@ -294,6 +294,7 @@ commands = [testenv:stack.test.scenario.k8s] description = Syft Scenario Tests on K8s +changedir = {toxinidir} deps = -e{toxinidir}/packages/syft[dev,data_science] pytest-asyncio @@ -302,11 +303,6 @@ allowlist_externals = bash just pytest - devspace - kubectl - grep - k3d -setenv = commands = # l2 test with a k8s high cluster bash -c "just delete-high" From a3d2470636f8e110a0047a2c4218ab8d0eac8303 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Fri, 27 Sep 2024 15:58:05 +0700 Subject: [PATCH 48/78] [CI] install just to `/usr/local/bin` --- .github/workflows/pr-tests-stack.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/pr-tests-stack.yml b/.github/workflows/pr-tests-stack.yml index 4e937d7a7d2..84868d2e90b 100644 --- a/.github/workflows/pr-tests-stack.yml +++ b/.github/workflows/pr-tests-stack.yml @@ -1050,8 +1050,7 @@ jobs: - name: Install just if: steps.changes.outputs.stack == 'true' run: | - curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to ~/.local/bin - echo "$HOME/.local/bin" >> $GITHUB_PATH + curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to /usr/local/bin - name: Run scenario tests if: steps.changes.outputs.syft == 'true' || steps.changes.outputs.notebooks_scenario == 'true' From 10de6ec2acd8cc42d852fdd3e7a4b4a433d2dd44 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Fri, 27 Sep 2024 16:04:56 +0700 Subject: [PATCH 49/78] [tox] change `deps` for `stack.test.scenario.k8s` --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 38de481dcad..042ead3a5e3 100644 --- a/tox.ini +++ b/tox.ini @@ -296,7 +296,7 @@ commands = description = Syft Scenario Tests on K8s changedir = {toxinidir} deps = - -e{toxinidir}/packages/syft[dev,data_science] + {[testenv:syft]deps} pytest-asyncio pytest-timeout allowlist_externals = From c063522a493054d22534a046ce4c672e9fe45249 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Fri, 27 Sep 2024 16:25:10 +0700 Subject: [PATCH 50/78] [CI] update uv, tox and uv-tox version --- .github/workflows/pr-tests-stack.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pr-tests-stack.yml b/.github/workflows/pr-tests-stack.yml index 84868d2e90b..ddbff963441 100644 --- a/.github/workflows/pr-tests-stack.yml +++ b/.github/workflows/pr-tests-stack.yml @@ -875,7 +875,7 @@ jobs: if: steps.changes.outputs.stack == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.2.17 tox==4.16.0 tox-uv==1.9.0 + pip install uv==0.4.16 tox==4.16.0 tox-uv==1.9.0 uv --version - name: Get uv cache dir if: steps.changes.outputs.stack == 'true' @@ -957,7 +957,7 @@ jobs: k3d cluster delete bigquery-high || true k3d cluster delete bigquery-low || true - pr-tests-syft-scenario-k8s: + pr-tests-simulation-scenario-k8s: strategy: max-parallel: 99 matrix: @@ -1010,7 +1010,7 @@ jobs: if: steps.changes.outputs.stack == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.2.17 tox==4.16.0 tox-uv==1.9.0 + pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 uv --version - name: Get uv cache dir From b0f7971394a7617081242d0325ce03b55329c7f0 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Fri, 27 Sep 2024 16:32:43 +0700 Subject: [PATCH 51/78] [CI] update uv and tox to latest versions for all tasks --- .github/workflows/pr-tests-stack.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/pr-tests-stack.yml b/.github/workflows/pr-tests-stack.yml index ddbff963441..9b6bbdc7eb5 100644 --- a/.github/workflows/pr-tests-stack.yml +++ b/.github/workflows/pr-tests-stack.yml @@ -53,7 +53,7 @@ jobs: if: steps.changes.outputs.stack == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 + pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 uv --version - name: Run syft backend base image building test @@ -96,7 +96,7 @@ jobs: if: steps.changes.outputs.stack == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 + pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 uv --version - name: Get uv cache dir @@ -179,7 +179,7 @@ jobs: if: steps.changes.outputs.stack == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 + pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 uv --version - name: Get uv cache dir @@ -328,7 +328,7 @@ jobs: if: steps.changes.outputs.stack == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 + pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 uv --version - name: Get uv cache dir @@ -471,7 +471,7 @@ jobs: if: steps.changes.outputs.stack == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 + pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 uv --version - name: Get uv cache dir @@ -597,7 +597,7 @@ jobs: if: steps.changes.outputs.syft == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 + pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 uv --version - name: Get uv cache dir @@ -657,7 +657,7 @@ jobs: if: steps.changes.outputs.syft == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 + pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 uv --version - name: Get uv cache dir @@ -734,7 +734,7 @@ jobs: if: steps.changes.outputs.stack == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 + pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 uv --version - name: Get uv cache dir @@ -875,7 +875,7 @@ jobs: if: steps.changes.outputs.stack == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.4.16 tox==4.16.0 tox-uv==1.9.0 + pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 uv --version - name: Get uv cache dir if: steps.changes.outputs.stack == 'true' From 9185cf9be90bc9920dcceadff1c918601590fbe2 Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Sun, 29 Sep 2024 23:34:01 +0530 Subject: [PATCH 52/78] just wait-pods --- justfile | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/justfile b/justfile index ec1827e7415..43367c10391 100644 --- a/justfile +++ b/justfile @@ -119,6 +119,9 @@ reset-high: (reset-syft _ctx_high _ns_default) [group('highside')] cleanup-high: (yank-ns _ctx_high _ns_default) +[group('highside')] +wait-high: (wait-pods _ctx_high _ns_default) + # K9s into the Datasite High cluster [group('highside')] k9s-high: @@ -146,6 +149,9 @@ reset-low: (reset-syft _ctx_low _ns_default) [group('lowside')] cleanup-low: (yank-ns _ctx_low _ns_default) +[group('lowside')] +wait-low: (wait-pods _ctx_low _ns_default) + # K9s into the Datesite Low cluster [group('lowside')] k9s-low: @@ -173,6 +179,9 @@ reset-gw: (reset-syft _ctx_gw _ns_default) [group('gateway')] cleanup-gw: (yank-ns _ctx_gw _ns_default) +[group('gateway')] +wait-gw: (wait-pods _ctx_gw _ns_default) + # K9s into the Gateway cluster [group('gateway')] k9s-gw: @@ -522,3 +531,17 @@ yank-ns kube_context namespace: kubectl replace --context {{ kube_context }} --raw /api/v1/namespaces/{{ namespace }}/finalize -f - @echo "Done" + +# Wait for all pods to be ready in a namespace +[group('utils')] +@wait-pods kube_context namespace: + echo "Waiting for all pods to be ready in cluster={{ kube_context }} namespace={{ namespace }}" + kubectl wait --for=condition=ready pod --all --timeout=300s --context {{ kube_context }} --namespace {{ namespace }} + + # if the above doesn't wait as we expect the drop the above and use the below + # @bash packages/grid/scripts/wait_for.sh service proxy --context {{ kube_context }} --namespace {{ namespace }} + # @bash packages/grid/scripts/wait_for.sh service frontend --context {{ kube_context }} --namespace {{ namespace }} + # @bash packages/grid/scripts/wait_for.sh service postgres --context {{ kube_context }} --namespace {{ namespace }} + # @bash packages/grid/scripts/wait_for.sh service seaweedfs --context {{ kube_context }} --namespace {{ namespace }} + # @bash packages/grid/scripts/wait_for.sh service backend --context {{ kube_context }} --namespace {{ namespace }} + echo "All pods are ready" From c7ee5fe29d05daea91e278a9e4271e07eae2e0fd Mon Sep 17 00:00:00 2001 From: dk Date: Mon, 30 Sep 2024 09:10:49 +0700 Subject: [PATCH 53/78] [CI] reverted to use `uv==0.4.1 tox==4.18.0 tox-uv==1.11.2` --- .github/workflows/pr-tests-stack.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/pr-tests-stack.yml b/.github/workflows/pr-tests-stack.yml index 9b6bbdc7eb5..0ea38581fd9 100644 --- a/.github/workflows/pr-tests-stack.yml +++ b/.github/workflows/pr-tests-stack.yml @@ -53,7 +53,7 @@ jobs: if: steps.changes.outputs.stack == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 + pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 uv --version - name: Run syft backend base image building test @@ -96,7 +96,7 @@ jobs: if: steps.changes.outputs.stack == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 + pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 uv --version - name: Get uv cache dir @@ -179,7 +179,7 @@ jobs: if: steps.changes.outputs.stack == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 + pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 uv --version - name: Get uv cache dir @@ -328,7 +328,7 @@ jobs: if: steps.changes.outputs.stack == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 + pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 uv --version - name: Get uv cache dir @@ -471,7 +471,7 @@ jobs: if: steps.changes.outputs.stack == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 + pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 uv --version - name: Get uv cache dir @@ -597,7 +597,7 @@ jobs: if: steps.changes.outputs.syft == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 + pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 uv --version - name: Get uv cache dir @@ -657,7 +657,7 @@ jobs: if: steps.changes.outputs.syft == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 + pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 uv --version - name: Get uv cache dir @@ -734,7 +734,7 @@ jobs: if: steps.changes.outputs.stack == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 + pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 uv --version - name: Get uv cache dir @@ -875,7 +875,7 @@ jobs: if: steps.changes.outputs.stack == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 + pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 uv --version - name: Get uv cache dir if: steps.changes.outputs.stack == 'true' @@ -1010,7 +1010,7 @@ jobs: if: steps.changes.outputs.stack == 'true' run: | python -m pip install --upgrade pip - pip install uv==0.4.16 tox==4.20.0 tox-uv==1.13.0 + pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 uv --version - name: Get uv cache dir From 9aa6732a80915b5d77d4baddae7565365bcd8421 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Mon, 30 Sep 2024 09:56:11 +0700 Subject: [PATCH 54/78] [tox] separate simulation k8s test for L0 and L2 - use new `just` commands for everything [CI] separate simulation k8s test for L0 and L2 --- .github/workflows/pr-tests-stack.yml | 119 +++++++++++++++++++++++++-- tox.ini | 48 ++++++----- 2 files changed, 137 insertions(+), 30 deletions(-) diff --git a/.github/workflows/pr-tests-stack.yml b/.github/workflows/pr-tests-stack.yml index 0ea38581fd9..07171b8e992 100644 --- a/.github/workflows/pr-tests-stack.yml +++ b/.github/workflows/pr-tests-stack.yml @@ -1075,11 +1075,120 @@ jobs: # todo: collect logs - - name: Cleanup k3d - if: steps.changes.outputs.stack == 'true' && failure() + pr-tests-simulation-scenario-k8s-sync: + strategy: + max-parallel: 99 + matrix: + os: [ubuntu-latest] + python-version: ["3.12"] + fail-fast: false + + runs-on: ${{matrix.os}} + + steps: + - name: Permission to home directory + run: | + sudo chown -R $USER:$USER $HOME + - uses: actions/checkout@v4 + - name: Check for file changes + uses: dorny/paths-filter@v3 + id: changes + with: + base: ${{ github.ref }} + token: ${{ github.token }} + filters: .github/file-filters.yml + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + if: steps.changes.outputs.stack == 'true' + with: + python-version: ${{ matrix.python-version }} + + - name: Add K3d Registry + run: | + sudo python ./scripts/patch_hosts.py --add-k3d-registry + + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + tool-cache: true + large-packages: false + + # free 10GB of space + - name: Remove unnecessary files + if: matrix.os == 'ubuntu-latest' + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + docker image prune --all --force + docker builder prune --all --force + docker system prune --all --force + + - name: Install pip dependencies + if: steps.changes.outputs.stack == 'true' + run: | + python -m pip install --upgrade pip + pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 + uv --version + + - name: Get uv cache dir + if: steps.changes.outputs.stack == 'true' + id: pip-cache shell: bash run: | + echo "dir=$(uv cache dir)" >> $GITHUB_OUTPUT + + - name: Load github cache + uses: actions/cache@v4 + if: steps.changes.outputs.stack == 'true' + with: + path: ${{ steps.pip-cache.outputs.dir }} + key: ${{ runner.os }}-uv-py${{ matrix.python-version }} + restore-keys: | + ${{ runner.os }}-uv-py${{ matrix.python-version }} + + - name: Install kubectl + if: steps.changes.outputs.stack == 'true' + run: | + # cleanup apt version + sudo apt remove kubectl || true + # install kubectl 1.27 + curl -LO https://dl.k8s.io/release/v1.27.2/bin/linux/amd64/kubectl + chmod +x kubectl + sudo install kubectl /usr/local/bin; + + - name: Install helm + if: steps.changes.outputs.stack == 'true' + run: | + # install helm + curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 + chmod 700 get_helm.sh + ./get_helm.sh + + - name: Install just + if: steps.changes.outputs.stack == 'true' + run: | + curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to /usr/local/bin + + - name: Run scenario tests + if: steps.changes.outputs.syft == 'true' || steps.changes.outputs.notebooks_scenario == 'true' + env: + ORCHESTRA_DEPLOYMENT_TYPE: "${{ matrix.deployment-type }}" + BUMP_VERSION: "${{ matrix.bump-version }}" + TOX_PYTHON: python${{ matrix.python-version }} + shell: bash + run: | + K3D_VERSION=v5.6.3 + DEVSPACE_VERSION=v6.3.12 + # install k3d + wget https://github.com/k3d-io/k3d/releases/download/${K3D_VERSION}/k3d-linux-amd64 + mv k3d-linux-amd64 k3d + chmod +x k3d export PATH=`pwd`:$PATH - k3d cluster delete syft-high || true - k3d cluster delete syft-low || true - k3d registry delete k3d-registry.localhost || true + k3d version + curl -sSL https://github.com/loft-sh/devspace/releases/download/${DEVSPACE_VERSION}/devspace-linux-amd64 -o ./devspace + chmod +x devspace + devspace version + tox -e stack.test.scenario.k8s.sync + + # todo: collect logs diff --git a/tox.ini b/tox.ini index 042ead3a5e3..8d58f378c4b 100644 --- a/tox.ini +++ b/tox.ini @@ -31,6 +31,7 @@ envlist = stack.test.notebook.scenario.k8s stack.test.notebook.scenario.k8s.sync stack.test.scenario.k8s + stack.test.scenario.k8s.sync frontend.test.unit frontend.test.e2e frontend.generate.types @@ -293,7 +294,7 @@ commands = pytest -n auto --dist loadgroup --durations=20 --disable-warnings [testenv:stack.test.scenario.k8s] -description = Syft Scenario Tests on K8s +description = BigQuery Scenario Tests on K8s (L2) changedir = {toxinidir} deps = {[testenv:syft]deps} @@ -303,34 +304,31 @@ allowlist_externals = bash just pytest +commands_pre = + just delete-all start-high deploy-high wait-high commands = - # l2 test with a k8s high cluster - bash -c "just delete-high" - bash -c "just start-high" - bash -c "just deploy-high" - - # wait for syft-high - bash packages/grid/scripts/wait_for.sh service postgres --context k3d-syft-high --namespace syft - bash packages/grid/scripts/wait_for.sh service backend --context k3d-syft-high --namespace syft - bash packages/grid/scripts/wait_for.sh service proxy --context k3d-syft-high --namespace syft - bash packages/grid/scripts/wait_for.sh service seaweedfs --context k3d-syft-high --namespace syft - bash packages/grid/scripts/wait_for.sh service frontend --context k3d-syft-high --namespace syft - bash -c "pytest -s --disable-warnings tests/scenariosv2/l2_test.py" +commands_post = + just delete-all - # l0 test with a k8s high and low cluster - bash -c "just reset-high" - bash -c "just delete-low" - bash -c "just start-low deploy-low" - - # wait for syft-low - bash packages/grid/scripts/wait_for.sh service postgres --context k3d-syft-low --namespace syft - bash packages/grid/scripts/wait_for.sh service backend --context k3d-syft-low --namespace syft - bash packages/grid/scripts/wait_for.sh service proxy --context k3d-syft-low --namespace syft - bash packages/grid/scripts/wait_for.sh service seaweedfs --context k3d-syft-low --namespace syft - bash packages/grid/scripts/wait_for.sh service frontend --context k3d-syft-low --namespace syft - +[testenv:stack.test.scenario.k8s.sync] +description = BigQuery Scenario Tests on K8s (L0) +changedir = {toxinidir} +deps = + {[testenv:syft]deps} + pytest-asyncio + pytest-timeout +allowlist_externals = + bash + just + pytest +commands_pre = + just delete-all start-high deploy-high wait-high + just start-low deploy-low wait-low +commands = bash -c "pytest -s --disable-warnings tests/scenariosv2/l0_test.py" +commands_post = + just delete-all [testenv:syft.test.notebook] description = Syft Notebook Tests From 53a4cf856774f74eeeb9e761b640c4c37d8e814c Mon Sep 17 00:00:00 2001 From: khoaguin Date: Mon, 30 Sep 2024 10:18:55 +0700 Subject: [PATCH 55/78] [CI] clean up k3d for simulation k8 tests --- .github/workflows/pr-tests-stack.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.github/workflows/pr-tests-stack.yml b/.github/workflows/pr-tests-stack.yml index 07171b8e992..35f1186cf5f 100644 --- a/.github/workflows/pr-tests-stack.yml +++ b/.github/workflows/pr-tests-stack.yml @@ -1075,6 +1075,14 @@ jobs: # todo: collect logs + - name: Cleanup k3d + if: steps.changes.outputs.stack == 'true' && failure() + shell: bash + run: | + export PATH=`pwd`:$PATH + k3d cluster delete syft-low || true + k3d registry delete k3d-registry.localhost || true + pr-tests-simulation-scenario-k8s-sync: strategy: max-parallel: 99 @@ -1192,3 +1200,12 @@ jobs: tox -e stack.test.scenario.k8s.sync # todo: collect logs + + - name: Cleanup k3d + if: steps.changes.outputs.stack == 'true' && failure() + shell: bash + run: | + export PATH=`pwd`:$PATH + k3d cluster delete syft-low || true + k3d cluster delete syft-high || true + k3d registry delete k3d-registry.localhost || true From 5bb27eead553c263214df6a68a647d8ebf1a279d Mon Sep 17 00:00:00 2001 From: dk Date: Mon, 30 Sep 2024 11:43:08 +0700 Subject: [PATCH 56/78] [test/scenarios] launching python server if testing on python --- tests/scenariosv2/flows/utils.py | 18 ++++++++++++++++++ tests/scenariosv2/l0_test.py | 3 +++ tests/scenariosv2/l2_test.py | 9 +++++---- tox.ini | 4 ++++ 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/tests/scenariosv2/flows/utils.py b/tests/scenariosv2/flows/utils.py index b339c148733..efd53d8747a 100644 --- a/tests/scenariosv2/flows/utils.py +++ b/tests/scenariosv2/flows/utils.py @@ -1,7 +1,25 @@ +# stdlib +import os +from urllib.parse import urlparse + # syft absolute import syft as sy +from syft.orchestra import DeploymentType + +# relative +from ..sim.core import SimulatorContext def server_info(client: sy.DatasiteClient) -> str: url = getattr(client.connection, "url", "python") return f"{client.name}(url={url}, side={client.metadata.server_side_type})" + + +def launch_server(ctx: SimulatorContext, server_url: str, server_name: str): + deployment_type = os.environ.get("ORCHESTRA_DEPLOYMENT_TYPE", DeploymentType.PYTHON) + ctx.logger.info(f"Deployment type: {deployment_type}") + if deployment_type == DeploymentType.PYTHON: + ctx.logger.info(f"Launching python server '{server_name}' at {server_url}") + parsed_url = urlparse(server_url) + port = parsed_url.port + sy.orchestra.launch(name=server_name, reset=True, dev_mode=True, port=port) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index c141fdd1039..e57c7618a6a 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -20,6 +20,7 @@ from .flows.user_bigquery_api import bq_check_query_results from .flows.user_bigquery_api import bq_submit_query from .flows.user_bigquery_api import bq_test_query +from .flows.utils import launch_server from .sim.core import BaseEvent from .sim.core import Simulator from .sim.core import SimulatorContext @@ -368,6 +369,7 @@ async def sim_l0_scenario(ctx: SimulatorContext): ] server_url_high = "http://localhost:8080" + launch_server(ctx, server_url_high, "syft-high") admin_auth_high = dict( # noqa: C408 url=server_url_high, email="info@openmined.org", @@ -375,6 +377,7 @@ async def sim_l0_scenario(ctx: SimulatorContext): ) server_url_low = "http://localhost:8081" + launch_server(ctx, server_url_low, "syft-low") admin_auth_low = dict( # noqa: C408 url=server_url_low, email="info@openmined.org", diff --git a/tests/scenariosv2/l2_test.py b/tests/scenariosv2/l2_test.py index 2baf59fda4e..9198338e12d 100644 --- a/tests/scenariosv2/l2_test.py +++ b/tests/scenariosv2/l2_test.py @@ -15,6 +15,7 @@ # relative from .flows.user_bigquery_api import bq_submit_query from .flows.user_bigquery_api import bq_test_query +from .flows.utils import launch_server from .l0_test import Event from .l0_test import admin_high_create_bq_pool from .l0_test import admin_high_create_endpoints @@ -110,6 +111,9 @@ async def user_flow(ctx: SimulatorContext, server_url: str, user: dict): @sim_entrypoint async def sim_l2_scenario(ctx: SimulatorContext): + ctx.events.trigger(Event.INIT) + ctx.logger.info("--- Initializing L2 BigQuery Scenario Test ---") + users = [ { "name": fake.name(), @@ -120,16 +124,13 @@ async def sim_l2_scenario(ctx: SimulatorContext): ] server_url = "http://localhost:8080" - + launch_server(ctx, server_url, "syft-high") admin_auth = { "url": server_url, "email": "info@openmined.org", "password": "changethis", } - ctx.events.trigger(Event.INIT) - ctx.logger.info("--- Initializing L2 BigQuery Scenario Test ---") - await asyncio.gather( admin_flow(ctx, admin_auth, users), *[user_flow(ctx, server_url, user) for user in users], diff --git a/tox.ini b/tox.ini index 8d58f378c4b..d51946d6826 100644 --- a/tox.ini +++ b/tox.ini @@ -296,6 +296,8 @@ commands = [testenv:stack.test.scenario.k8s] description = BigQuery Scenario Tests on K8s (L2) changedir = {toxinidir} +setenv = + ORCHESTRA_DEPLOYMENT_TYPE = {env:ORCHESTRA_DEPLOYMENT_TYPE:remote} deps = {[testenv:syft]deps} pytest-asyncio @@ -314,6 +316,8 @@ commands_post = [testenv:stack.test.scenario.k8s.sync] description = BigQuery Scenario Tests on K8s (L0) changedir = {toxinidir} +setenv = + ORCHESTRA_DEPLOYMENT_TYPE = {env:ORCHESTRA_DEPLOYMENT_TYPE:remote} deps = {[testenv:syft]deps} pytest-asyncio From ff6f1689d4d4de9ac6dc99c5b8bb474c32a9c2ca Mon Sep 17 00:00:00 2001 From: dk Date: Mon, 30 Sep 2024 12:09:54 +0700 Subject: [PATCH 57/78] [tests/scenarios] launching python nodes with producer and consumers [just] trying to fix `wait-pods` error: `no matching resources found` --- justfile | 3 +++ tests/scenariosv2/flows/utils.py | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/justfile b/justfile index 43367c10391..ed7a5c6bb6d 100644 --- a/justfile +++ b/justfile @@ -536,6 +536,9 @@ yank-ns kube_context namespace: [group('utils')] @wait-pods kube_context namespace: echo "Waiting for all pods to be ready in cluster={{ kube_context }} namespace={{ namespace }}" + # Wait for at least one pod to appear (timeout after 5 minutes) + timeout 300 bash -c 'until kubectl get pods --context {{ kube_context }} --namespace {{ namespace }} 2>/dev/null | grep -q ""; do sleep 5; done' + kubectl wait --for=condition=ready pod --all --timeout=300s --context {{ kube_context }} --namespace {{ namespace }} # if the above doesn't wait as we expect the drop the above and use the below diff --git a/tests/scenariosv2/flows/utils.py b/tests/scenariosv2/flows/utils.py index efd53d8747a..8492ea1cec1 100644 --- a/tests/scenariosv2/flows/utils.py +++ b/tests/scenariosv2/flows/utils.py @@ -22,4 +22,11 @@ def launch_server(ctx: SimulatorContext, server_url: str, server_name: str): ctx.logger.info(f"Launching python server '{server_name}' at {server_url}") parsed_url = urlparse(server_url) port = parsed_url.port - sy.orchestra.launch(name=server_name, reset=True, dev_mode=True, port=port) + sy.orchestra.launch( + name=server_name, + reset=True, + dev_mode=True, + port=port, + create_producer=True, + n_consumers=1, + ) From 40c938776678051db0cd6e86531760579cf121fe Mon Sep 17 00:00:00 2001 From: khoaguin Date: Mon, 30 Sep 2024 14:51:05 +0700 Subject: [PATCH 58/78] [tests/scenario] l0 and l2 bigquery tests work with python servers --- .../scenariosv2/flows/admin_bigquery_pool.py | 9 ++++- tests/scenariosv2/flows/utils.py | 36 ++++++++++--------- tests/scenariosv2/l0_test.py | 25 +++++++++++-- tests/scenariosv2/l2_test.py | 14 +++++--- 4 files changed, 60 insertions(+), 24 deletions(-) diff --git a/tests/scenariosv2/flows/admin_bigquery_pool.py b/tests/scenariosv2/flows/admin_bigquery_pool.py index 6f42d6a72b2..11c75feb641 100644 --- a/tests/scenariosv2/flows/admin_bigquery_pool.py +++ b/tests/scenariosv2/flows/admin_bigquery_pool.py @@ -1,5 +1,9 @@ +# stdlib +import os + # syft absolute import syft as sy +from syft.orchestra import DeploymentType from syft.util.test_helpers.worker_helpers import ( build_and_launch_worker_pool_from_docker_str, ) @@ -33,8 +37,11 @@ def bq_create_pool( ) ctx.logger.info(f"{msg} - Creating") + + deployment_type = os.environ.get("ORCHESTRA_DEPLOYMENT_TYPE", DeploymentType.PYTHON) + build_and_launch_worker_pool_from_docker_str( - environment="remote", + environment=str(deployment_type), client=admin_client, worker_pool_name=worker_pool, worker_dockerfile=worker_dockerfile, diff --git a/tests/scenariosv2/flows/utils.py b/tests/scenariosv2/flows/utils.py index 8492ea1cec1..c96a346e3a8 100644 --- a/tests/scenariosv2/flows/utils.py +++ b/tests/scenariosv2/flows/utils.py @@ -1,10 +1,9 @@ # stdlib -import os from urllib.parse import urlparse # syft absolute import syft as sy -from syft.orchestra import DeploymentType +from syft.orchestra import ServerHandle # relative from ..sim.core import SimulatorContext @@ -15,18 +14,21 @@ def server_info(client: sy.DatasiteClient) -> str: return f"{client.name}(url={url}, side={client.metadata.server_side_type})" -def launch_server(ctx: SimulatorContext, server_url: str, server_name: str): - deployment_type = os.environ.get("ORCHESTRA_DEPLOYMENT_TYPE", DeploymentType.PYTHON) - ctx.logger.info(f"Deployment type: {deployment_type}") - if deployment_type == DeploymentType.PYTHON: - ctx.logger.info(f"Launching python server '{server_name}' at {server_url}") - parsed_url = urlparse(server_url) - port = parsed_url.port - sy.orchestra.launch( - name=server_name, - reset=True, - dev_mode=True, - port=port, - create_producer=True, - n_consumers=1, - ) +def launch_server( + ctx: SimulatorContext, + server_url: str, + server_name: str, + server_side_type: str | None = "high", +) -> ServerHandle | None: + ctx.logger.info(f"Launching python server '{server_name}' at {server_url}") + parsed_url = urlparse(server_url) + port = parsed_url.port + return sy.orchestra.launch( + name=server_name, + server_side_type=server_side_type, + reset=True, + dev_mode=True, + port=port, + create_producer=True, + n_consumers=1, + ) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index e57c7618a6a..061cea97bfb 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -1,6 +1,7 @@ # stdlib import asyncio from enum import auto +import os import random # third party @@ -9,6 +10,7 @@ # syft absolute import syft as sy +from syft.orchestra import DeploymentType from syft.service.request.request import RequestStatus # relative @@ -368,8 +370,17 @@ async def sim_l0_scenario(ctx: SimulatorContext): for _ in range(NUM_USERS) ] + deployment_type = os.environ.get("ORCHESTRA_DEPLOYMENT_TYPE", DeploymentType.PYTHON) + ctx.logger.info(f"Deployment type: {deployment_type}") + server_url_high = "http://localhost:8080" - launch_server(ctx, server_url_high, "syft-high") + if deployment_type == DeploymentType.PYTHON: + server_high = launch_server( + ctx=ctx, + server_url=server_url_high, + server_name="syft-high", + server_side_type="high", + ) admin_auth_high = dict( # noqa: C408 url=server_url_high, email="info@openmined.org", @@ -377,7 +388,13 @@ async def sim_l0_scenario(ctx: SimulatorContext): ) server_url_low = "http://localhost:8081" - launch_server(ctx, server_url_low, "syft-low") + if deployment_type == DeploymentType.PYTHON: + server_low = launch_server( + ctx=ctx, + server_url=server_url_low, + server_name="syft-low", + server_side_type="low", + ) admin_auth_low = dict( # noqa: C408 url=server_url_low, email="info@openmined.org", @@ -395,6 +412,10 @@ async def sim_l0_scenario(ctx: SimulatorContext): *[user_low_side_flow(ctx, server_url_low, user) for user in users], ) + if deployment_type == DeploymentType.PYTHON: + server_high.land() + server_low.land() + @pytest.mark.asyncio async def test_l0_scenario(request): diff --git a/tests/scenariosv2/l2_test.py b/tests/scenariosv2/l2_test.py index 9198338e12d..20937eb30c3 100644 --- a/tests/scenariosv2/l2_test.py +++ b/tests/scenariosv2/l2_test.py @@ -1,8 +1,6 @@ -# RUN: just reset-high && pytest -s tests/scenariosv2/l2_test.py -## .logs files will be created in pwd - # stdlib import asyncio +import os import random # third party @@ -11,6 +9,7 @@ # syft absolute import syft as sy +from syft.orchestra import DeploymentType # relative from .flows.user_bigquery_api import bq_submit_query @@ -124,7 +123,11 @@ async def sim_l2_scenario(ctx: SimulatorContext): ] server_url = "http://localhost:8080" - launch_server(ctx, server_url, "syft-high") + deployment_type = os.environ.get("ORCHESTRA_DEPLOYMENT_TYPE", DeploymentType.PYTHON) + ctx.logger.info(f"Deployment type: {deployment_type}") + if deployment_type == DeploymentType.PYTHON: + server = launch_server(ctx, server_url, "syft-high") + admin_auth = { "url": server_url, "email": "info@openmined.org", @@ -136,6 +139,9 @@ async def sim_l2_scenario(ctx: SimulatorContext): *[user_flow(ctx, server_url, user) for user in users], ) + if deployment_type == DeploymentType.PYTHON: + server.land() + @pytest.mark.asyncio async def test_l2_scenario(request): From 6cdd3d6b80fcf01cd822fab9c23ead966b4b76e0 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Mon, 30 Sep 2024 15:35:40 +0700 Subject: [PATCH 59/78] [CI] fix the ORCHESTRA_DEPLOYMENT_TYPE env variable set to wrong value --- .github/workflows/pr-tests-stack.yml | 3 -- .../api/0.8/11-container-images-k8s.ipynb | 7 ++++- .../0-prepare-migration-data.ipynb | 2 +- .../1-dump-database-to-file.ipynb | 2 +- .../2-migrate-from-file.ipynb | 2 +- tox.ini | 29 +++++++++++++++++++ 6 files changed, 38 insertions(+), 7 deletions(-) diff --git a/.github/workflows/pr-tests-stack.yml b/.github/workflows/pr-tests-stack.yml index 35f1186cf5f..58adf53caf2 100644 --- a/.github/workflows/pr-tests-stack.yml +++ b/.github/workflows/pr-tests-stack.yml @@ -1055,8 +1055,6 @@ jobs: - name: Run scenario tests if: steps.changes.outputs.syft == 'true' || steps.changes.outputs.notebooks_scenario == 'true' env: - ORCHESTRA_DEPLOYMENT_TYPE: "${{ matrix.deployment-type }}" - BUMP_VERSION: "${{ matrix.bump-version }}" TOX_PYTHON: python${{ matrix.python-version }} shell: bash run: | @@ -1181,7 +1179,6 @@ jobs: - name: Run scenario tests if: steps.changes.outputs.syft == 'true' || steps.changes.outputs.notebooks_scenario == 'true' env: - ORCHESTRA_DEPLOYMENT_TYPE: "${{ matrix.deployment-type }}" BUMP_VERSION: "${{ matrix.bump-version }}" TOX_PYTHON: python${{ matrix.python-version }} shell: bash diff --git a/notebooks/api/0.8/11-container-images-k8s.ipynb b/notebooks/api/0.8/11-container-images-k8s.ipynb index ef21f427a26..e7e2917f4aa 100644 --- a/notebooks/api/0.8/11-container-images-k8s.ipynb +++ b/notebooks/api/0.8/11-container-images-k8s.ipynb @@ -1394,6 +1394,11 @@ } ], "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, "language_info": { "codemirror_mode": { "name": "ipython", @@ -1404,7 +1409,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.5" + "version": "3.12.2" } }, "nbformat": 4, diff --git a/notebooks/tutorials/version-upgrades/0-prepare-migration-data.ipynb b/notebooks/tutorials/version-upgrades/0-prepare-migration-data.ipynb index 03d4e29cfc5..9c070a9734b 100644 --- a/notebooks/tutorials/version-upgrades/0-prepare-migration-data.ipynb +++ b/notebooks/tutorials/version-upgrades/0-prepare-migration-data.ipynb @@ -251,7 +251,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.12.2" } }, "nbformat": 4, diff --git a/notebooks/tutorials/version-upgrades/1-dump-database-to-file.ipynb b/notebooks/tutorials/version-upgrades/1-dump-database-to-file.ipynb index bc1bd06f036..f093b7b458e 100644 --- a/notebooks/tutorials/version-upgrades/1-dump-database-to-file.ipynb +++ b/notebooks/tutorials/version-upgrades/1-dump-database-to-file.ipynb @@ -144,7 +144,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.12.2" } }, "nbformat": 4, diff --git a/notebooks/tutorials/version-upgrades/2-migrate-from-file.ipynb b/notebooks/tutorials/version-upgrades/2-migrate-from-file.ipynb index 930e2212911..96c6e432764 100644 --- a/notebooks/tutorials/version-upgrades/2-migrate-from-file.ipynb +++ b/notebooks/tutorials/version-upgrades/2-migrate-from-file.ipynb @@ -309,7 +309,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.12.2" } }, "nbformat": 4, diff --git a/tox.ini b/tox.ini index d51946d6826..cf189861ec2 100644 --- a/tox.ini +++ b/tox.ini @@ -21,6 +21,7 @@ envlist = syft.test.security syft.test.unit syft.test.scenario + syft.test.scenario.sync syft.test.notebook syft.test.notebook.scenario syft.test.notebook.scenario.sync @@ -293,6 +294,32 @@ commands = bash -c 'ulimit -n 4096 || true' pytest -n auto --dist loadgroup --durations=20 --disable-warnings +[testenv:syft.test.scenario] +description = BigQuery Scenario Tests on Python Servers (L2) +changedir = {toxinidir} +deps = + {[testenv:syft]deps} + pytest-asyncio + pytest-timeout +allowlist_externals = + bash + pytest +commands = + bash -c "pytest -s --disable-warnings tests/scenariosv2/l2_test.py" + +[testenv:syft.test.scenario.sync] +description = BigQuery Scenario Tests on Python Servers (L0) +changedir = {toxinidir} +deps = + {[testenv:syft]deps} + pytest-asyncio + pytest-timeout +allowlist_externals = + bash + pytest +commands = + bash -c "pytest -s --disable-warnings tests/scenariosv2/l0_test.py" + [testenv:stack.test.scenario.k8s] description = BigQuery Scenario Tests on K8s (L2) changedir = {toxinidir} @@ -309,6 +336,7 @@ allowlist_externals = commands_pre = just delete-all start-high deploy-high wait-high commands = + bash -c "echo Running BigQuery Scenario Tests on K8s (L2) with ORCHESTRA_DEPLOYMENT_TYPE=$ORCHESTRA_DEPLOYMENT_TYPE" bash -c "pytest -s --disable-warnings tests/scenariosv2/l2_test.py" commands_post = just delete-all @@ -330,6 +358,7 @@ commands_pre = just delete-all start-high deploy-high wait-high just start-low deploy-low wait-low commands = + bash -c "echo Running BigQuery Scenario Tests on K8s (L0) with ORCHESTRA_DEPLOYMENT_TYPE=$ORCHESTRA_DEPLOYMENT_TYPE" bash -c "pytest -s --disable-warnings tests/scenariosv2/l0_test.py" commands_post = just delete-all From 723d21b505668849591299220c8a5118f7682fab Mon Sep 17 00:00:00 2001 From: khoaguin Date: Mon, 30 Sep 2024 15:49:57 +0700 Subject: [PATCH 60/78] [CI] fix tox command for scenario k8s test --- tox.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index cf189861ec2..d882afbb601 100644 --- a/tox.ini +++ b/tox.ini @@ -336,7 +336,7 @@ allowlist_externals = commands_pre = just delete-all start-high deploy-high wait-high commands = - bash -c "echo Running BigQuery Scenario Tests on K8s (L2) with ORCHESTRA_DEPLOYMENT_TYPE=$ORCHESTRA_DEPLOYMENT_TYPE" + bash -c "echo Running L2 BigQuery Scenario Tests on K8s with ORCHESTRA_DEPLOYMENT_TYPE=$ORCHESTRA_DEPLOYMENT_TYPE" bash -c "pytest -s --disable-warnings tests/scenariosv2/l2_test.py" commands_post = just delete-all @@ -358,7 +358,7 @@ commands_pre = just delete-all start-high deploy-high wait-high just start-low deploy-low wait-low commands = - bash -c "echo Running BigQuery Scenario Tests on K8s (L0) with ORCHESTRA_DEPLOYMENT_TYPE=$ORCHESTRA_DEPLOYMENT_TYPE" + bash -c "echo Running L0 BigQuery Scenario Tests on K8s with ORCHESTRA_DEPLOYMENT_TYPE=$ORCHESTRA_DEPLOYMENT_TYPE" bash -c "pytest -s --disable-warnings tests/scenariosv2/l0_test.py" commands_post = just delete-all From 53481ca6146e8d767ea42c11f6f81a8ae5bf7199 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Mon, 30 Sep 2024 16:37:32 +0700 Subject: [PATCH 61/78] [CI] re-enable `test-syft-scenario` and add `test-syft-scenario-sync` task --- .github/workflows/pr-tests-syft.yml | 86 ++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr-tests-syft.yml b/.github/workflows/pr-tests-syft.yml index cc5fd14f893..37e15e02cf8 100644 --- a/.github/workflows/pr-tests-syft.yml +++ b/.github/workflows/pr-tests-syft.yml @@ -189,7 +189,6 @@ jobs: command: tox -e syft.test.notebook pr-tests-syft-scenario: - if: ${{ false }} strategy: max-parallel: 99 matrix: @@ -273,6 +272,91 @@ jobs: run: | tox -e syft.test.scenario + pr-tests-syft-scenario-sync: + if: ${{ false }} + strategy: + max-parallel: 99 + matrix: + # TODO try enabling on other OS + os: [ubuntu-latest] + python-version: ["3.12"] + deployment-type: ["python"] + bump-version: ["False"] + include: + - python-version: "3.11" + os: "ubuntu-latest" + deployment-type: "python" + - python-version: "3.10" + os: "ubuntu-latest" + deployment-type: "python" + - python-version: "3.12" + os: "ubuntu-latest" + deployment-type: "python" + bump-version: "True" + + runs-on: ${{ matrix.os }} + steps: + # - name: Permission to home directory + # if: matrix.os == 'ubuntu-latest' + # run: | + # sudo chown -R $USER:$USER $HOME + - name: "clean .git/config" + if: matrix.os == 'windows-latest' + continue-on-error: true + shell: bash + run: | + echo "deleting ${GITHUB_WORKSPACE}/.git/config" + rm ${GITHUB_WORKSPACE}/.git/config + + - uses: actions/checkout@v4 + + - name: Check for file changes + uses: dorny/paths-filter@v3 + id: changes + with: + base: ${{ github.ref }} + token: ${{ github.token }} + filters: .github/file-filters.yml + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + if: steps.changes.outputs.syft == 'true' || steps.changes.outputs.notebooks_scenario == 'true' + with: + python-version: ${{ matrix.python-version }} + + - name: Install pip packages + if: steps.changes.outputs.syft == 'true' || steps.changes.outputs.notebooks_scenario == 'true' + run: | + python -m pip install --upgrade pip + pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 + uv --version + + - name: Get uv cache dir + id: pip-cache + if: steps.changes.outputs.syft == 'true' || steps.changes.outputs.notebooks_scenario == 'true' + shell: bash + run: | + echo "dir=$(uv cache dir)" >> $GITHUB_OUTPUT + + - name: Load github cache + uses: actions/cache@v4 + if: steps.changes.outputs.syft == 'true' || steps.changes.outputs.notebooks_scenario == 'true' + with: + path: ${{ steps.pip-cache.outputs.dir }} + key: ${{ runner.os }}-uv-py${{ matrix.python-version }}-${{ hashFiles('setup.cfg') }} + restore-keys: | + ${{ runner.os }}-uv-py${{ matrix.python-version }}- + + - name: Run scenario tests + if: steps.changes.outputs.syft == 'true' || steps.changes.outputs.notebooks_scenario == 'true' + env: + ORCHESTRA_DEPLOYMENT_TYPE: "${{ matrix.deployment-type }}" + BUMP_VERSION: "${{ matrix.bump-version }}" + TOX_PYTHON: python${{ matrix.python-version }} + shell: bash + run: | + tox -e syft.test.scenario.sync + pr-tests-syft-notebook-scenario: strategy: max-parallel: 99 From 32ea796037b9bfc9e89c0a2618846630dc2fd020 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Mon, 30 Sep 2024 17:13:14 +0700 Subject: [PATCH 62/78] [tox] add necessary pip packages for syft scenario tests --- .github/workflows/pr-tests-syft.yml | 1 - tox.ini | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr-tests-syft.yml b/.github/workflows/pr-tests-syft.yml index 37e15e02cf8..1157e351a81 100644 --- a/.github/workflows/pr-tests-syft.yml +++ b/.github/workflows/pr-tests-syft.yml @@ -273,7 +273,6 @@ jobs: tox -e syft.test.scenario pr-tests-syft-scenario-sync: - if: ${{ false }} strategy: max-parallel: 99 matrix: diff --git a/tox.ini b/tox.ini index d882afbb601..f9f8636116f 100644 --- a/tox.ini +++ b/tox.ini @@ -301,6 +301,8 @@ deps = {[testenv:syft]deps} pytest-asyncio pytest-timeout + db-dtypes + google-cloud-bigquery allowlist_externals = bash pytest @@ -314,6 +316,8 @@ deps = {[testenv:syft]deps} pytest-asyncio pytest-timeout + db-dtypes + google-cloud-bigquery allowlist_externals = bash pytest From dce3ce4466c0e251e87923a5014f6b039ba375e1 Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Mon, 30 Sep 2024 16:55:13 +0530 Subject: [PATCH 63/78] DeploymentType.REMOTE as default - k8s first --- tests/scenariosv2/l0_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 061cea97bfb..fd72fe493d8 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -30,7 +30,7 @@ from .sim.core import sim_entrypoint fake = Faker() -NUM_USERS = 3 +NUM_USERS = 10 NUM_ENDPOINTS = 3 # test_query, submit_query, schema_query @@ -370,7 +370,7 @@ async def sim_l0_scenario(ctx: SimulatorContext): for _ in range(NUM_USERS) ] - deployment_type = os.environ.get("ORCHESTRA_DEPLOYMENT_TYPE", DeploymentType.PYTHON) + deployment_type = os.environ.get("ORCHESTRA_DEPLOYMENT_TYPE", DeploymentType.REMOTE) ctx.logger.info(f"Deployment type: {deployment_type}") server_url_high = "http://localhost:8080" From 23ac669dd835f6989326fa6e1c0a95fd7b34419f Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Mon, 30 Sep 2024 17:00:50 +0530 Subject: [PATCH 64/78] refactor orchestra stuff --- tests/scenariosv2/flows/utils.py | 5 --- tests/scenariosv2/l0_test.py | 55 ++++++++++++++++++++------------ 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/tests/scenariosv2/flows/utils.py b/tests/scenariosv2/flows/utils.py index c96a346e3a8..a114f48a59c 100644 --- a/tests/scenariosv2/flows/utils.py +++ b/tests/scenariosv2/flows/utils.py @@ -5,9 +5,6 @@ import syft as sy from syft.orchestra import ServerHandle -# relative -from ..sim.core import SimulatorContext - def server_info(client: sy.DatasiteClient) -> str: url = getattr(client.connection, "url", "python") @@ -15,12 +12,10 @@ def server_info(client: sy.DatasiteClient) -> str: def launch_server( - ctx: SimulatorContext, server_url: str, server_name: str, server_side_type: str | None = "high", ) -> ServerHandle | None: - ctx.logger.info(f"Launching python server '{server_name}' at {server_url}") parsed_url = urlparse(server_url) port = parsed_url.port return sy.orchestra.launch( diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index fd72fe493d8..e0c04a2288d 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -359,6 +359,38 @@ async def admin_sync_low_to_high_flow( # ------------------------------------------------------------------------------------------------ +def setup_servers(ctx: SimulatorContext, server_url_high, server_url_low): + deployment_type = os.environ.get("ORCHESTRA_DEPLOYMENT_TYPE", DeploymentType.REMOTE) + ctx.logger.info(f"Deployment type: {deployment_type}") + + if deployment_type == DeploymentType.REMOTE: + return None, None + + ctx.logger.info(f"Launching python server high side server on {server_url_high}") + server_high = launch_server( + server_url=server_url_high, + server_name="syft-high", + server_side_type="high", + ) + + ctx.logger.info(f"Launching python server low side server on {server_url_low}") + server_low = launch_server( + server_url=server_url_low, + server_name="syft-low", + server_side_type="low", + ) + + return server_high, server_low + + +def shutdown_servers(server_high, server_low): + if server_high: + server_high.land() + + if server_low: + server_low.land() + + @sim_entrypoint async def sim_l0_scenario(ctx: SimulatorContext): users = [ @@ -370,17 +402,7 @@ async def sim_l0_scenario(ctx: SimulatorContext): for _ in range(NUM_USERS) ] - deployment_type = os.environ.get("ORCHESTRA_DEPLOYMENT_TYPE", DeploymentType.REMOTE) - ctx.logger.info(f"Deployment type: {deployment_type}") - server_url_high = "http://localhost:8080" - if deployment_type == DeploymentType.PYTHON: - server_high = launch_server( - ctx=ctx, - server_url=server_url_high, - server_name="syft-high", - server_side_type="high", - ) admin_auth_high = dict( # noqa: C408 url=server_url_high, email="info@openmined.org", @@ -388,19 +410,14 @@ async def sim_l0_scenario(ctx: SimulatorContext): ) server_url_low = "http://localhost:8081" - if deployment_type == DeploymentType.PYTHON: - server_low = launch_server( - ctx=ctx, - server_url=server_url_low, - server_name="syft-low", - server_side_type="low", - ) admin_auth_low = dict( # noqa: C408 url=server_url_low, email="info@openmined.org", password="changethis", ) + server_high, server_low = setup_servers(ctx, server_url_high, server_url_low) + ctx.events.trigger(Event.INIT) ctx.logger.info("--- Initializing L0 BigQuery Scenario Test ---") @@ -412,9 +429,7 @@ async def sim_l0_scenario(ctx: SimulatorContext): *[user_low_side_flow(ctx, server_url_low, user) for user in users], ) - if deployment_type == DeploymentType.PYTHON: - server_high.land() - server_low.land() + shutdown_servers(server_high, server_low) @pytest.mark.asyncio From f243637afa72210a97e470f21317a68637a1ddf3 Mon Sep 17 00:00:00 2001 From: dk Date: Tue, 1 Oct 2024 09:42:17 +0700 Subject: [PATCH 65/78] [tests/scenario] fix small bug [tox] add `ORCHESTRA_DEPLOYMENT_TYPE` for scenario python tests --- tests/scenariosv2/l2_test.py | 4 ++-- tox.ini | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/scenariosv2/l2_test.py b/tests/scenariosv2/l2_test.py index 20937eb30c3..1d713249a57 100644 --- a/tests/scenariosv2/l2_test.py +++ b/tests/scenariosv2/l2_test.py @@ -123,10 +123,10 @@ async def sim_l2_scenario(ctx: SimulatorContext): ] server_url = "http://localhost:8080" - deployment_type = os.environ.get("ORCHESTRA_DEPLOYMENT_TYPE", DeploymentType.PYTHON) + deployment_type = os.environ.get("ORCHESTRA_DEPLOYMENT_TYPE", DeploymentType.REMOTE) ctx.logger.info(f"Deployment type: {deployment_type}") if deployment_type == DeploymentType.PYTHON: - server = launch_server(ctx, server_url, "syft-high") + server = launch_server(server_url, "syft-high") admin_auth = { "url": server_url, diff --git a/tox.ini b/tox.ini index f9f8636116f..1ca9a26fb82 100644 --- a/tox.ini +++ b/tox.ini @@ -297,6 +297,8 @@ commands = [testenv:syft.test.scenario] description = BigQuery Scenario Tests on Python Servers (L2) changedir = {toxinidir} +setenv = + ORCHESTRA_DEPLOYMENT_TYPE = {env:ORCHESTRA_DEPLOYMENT_TYPE:python} deps = {[testenv:syft]deps} pytest-asyncio @@ -307,11 +309,14 @@ allowlist_externals = bash pytest commands = + bash -c "echo Running L2 BigQuery Scenario Tests with ORCHESTRA_DEPLOYMENT_TYPE=$ORCHESTRA_DEPLOYMENT_TYPE" bash -c "pytest -s --disable-warnings tests/scenariosv2/l2_test.py" [testenv:syft.test.scenario.sync] description = BigQuery Scenario Tests on Python Servers (L0) changedir = {toxinidir} +setenv = + ORCHESTRA_DEPLOYMENT_TYPE = {env:ORCHESTRA_DEPLOYMENT_TYPE:python} deps = {[testenv:syft]deps} pytest-asyncio @@ -322,6 +327,7 @@ allowlist_externals = bash pytest commands = + bash -c "echo Running L0 BigQuery Scenario Tests with ORCHESTRA_DEPLOYMENT_TYPE=$ORCHESTRA_DEPLOYMENT_TYPE" bash -c "pytest -s --disable-warnings tests/scenariosv2/l0_test.py" [testenv:stack.test.scenario.k8s] From 1f1027333b0449cd9a1b86b05a7a4886c9d96cd5 Mon Sep 17 00:00:00 2001 From: dk Date: Tue, 1 Oct 2024 10:16:56 +0700 Subject: [PATCH 66/78] [CI] collecting logs for `tests-simulation-scenario-k8s` --- .github/workflows/pr-tests-stack.yml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr-tests-stack.yml b/.github/workflows/pr-tests-stack.yml index 58adf53caf2..dfffe4b57c7 100644 --- a/.github/workflows/pr-tests-stack.yml +++ b/.github/workflows/pr-tests-stack.yml @@ -1071,7 +1071,23 @@ jobs: devspace version tox -e stack.test.scenario.k8s - # todo: collect logs + - name: Collect logs + if: steps.changes.outputs.stack == 'true' && failure() + shell: bash + run: | + mkdir -p ./output-logs + if [ -d "tests/scenariosv2/.logs" ]; then + cp -R tests/scenariosv2/.logs/* ./output-logs/ + else + echo "Log directory not found" + fi + + - name: Upload logs to GitHub + uses: actions/upload-artifact@master + if: steps.changes.outputs.stack == 'true' && failure() + with: + name: simulation-scenario-k8s-logs-${{ matrix.os }}-${{ steps.date.outputs.date }} + path: ./output-logs/ - name: Cleanup k3d if: steps.changes.outputs.stack == 'true' && failure() From 9fb851222e13a9336adda905fd1ae336e64a24cd Mon Sep 17 00:00:00 2001 From: dk Date: Tue, 1 Oct 2024 10:37:36 +0700 Subject: [PATCH 67/78] [CI] collecting logs for `tests-simulation-scenario-sync-k8s` --- .github/workflows/pr-tests-stack.yml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr-tests-stack.yml b/.github/workflows/pr-tests-stack.yml index dfffe4b57c7..497175d8111 100644 --- a/.github/workflows/pr-tests-stack.yml +++ b/.github/workflows/pr-tests-stack.yml @@ -1212,7 +1212,23 @@ jobs: devspace version tox -e stack.test.scenario.k8s.sync - # todo: collect logs + - name: Collect logs + if: steps.changes.outputs.stack == 'true' && failure() + shell: bash + run: | + mkdir -p ./output-logs + if [ -d "tests/scenariosv2/.logs" ]; then + cp -R tests/scenariosv2/.logs/* ./output-logs/ + else + echo "Log directory not found" + fi + + - name: Upload logs to GitHub + uses: actions/upload-artifact@master + if: steps.changes.outputs.stack == 'true' && failure() + with: + name: simulation-scenario-k8s-sync-logs-${{ matrix.os }}-${{ steps.date.outputs.date }} + path: ./output-logs/ - name: Cleanup k3d if: steps.changes.outputs.stack == 'true' && failure() From e68ae99abc83a5b6583f847aed6ead25c0bc1f5d Mon Sep 17 00:00:00 2001 From: dk Date: Tue, 1 Oct 2024 11:16:38 +0700 Subject: [PATCH 68/78] [CI] collecting logs for syft-scenario and syft-scenario-sync tests --- .github/workflows/pr-tests-syft.yml | 36 +++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/.github/workflows/pr-tests-syft.yml b/.github/workflows/pr-tests-syft.yml index 1157e351a81..4edc006984d 100644 --- a/.github/workflows/pr-tests-syft.yml +++ b/.github/workflows/pr-tests-syft.yml @@ -272,6 +272,24 @@ jobs: run: | tox -e syft.test.scenario + - name: Collect logs + if: steps.changes.outputs.stack == 'true' && failure() + shell: bash + run: | + mkdir -p ./output-logs + if [ -d "tests/scenariosv2/.logs" ]; then + cp -R tests/scenariosv2/.logs/* ./output-logs/ + else + echo "Log directory not found" + fi + + - name: Upload logs to GitHub + uses: actions/upload-artifact@master + if: steps.changes.outputs.stack == 'true' && failure() + with: + name: simulation-scenario-logs-${{ matrix.os }}-${{ steps.date.outputs.date }} + path: ./output-logs/ + pr-tests-syft-scenario-sync: strategy: max-parallel: 99 @@ -356,6 +374,24 @@ jobs: run: | tox -e syft.test.scenario.sync + - name: Collect logs + if: steps.changes.outputs.stack == 'true' && failure() + shell: bash + run: | + mkdir -p ./output-logs + if [ -d "tests/scenariosv2/.logs" ]; then + cp -R tests/scenariosv2/.logs/* ./output-logs/ + else + echo "Log directory not found" + fi + + - name: Upload logs to GitHub + uses: actions/upload-artifact@master + if: steps.changes.outputs.stack == 'true' && failure() + with: + name: simulation-scenario-sync-logs-${{ matrix.os }}-${{ steps.date.outputs.date }} + path: ./output-logs/ + pr-tests-syft-notebook-scenario: strategy: max-parallel: 99 From 454ecbf3f5a82a44e3bc39d8de2d3d45a98ca40a Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Tue, 1 Oct 2024 15:15:53 +0200 Subject: [PATCH 69/78] fix ActionObject.get_sync_dependencies --- .../syft/src/syft/service/action/action_object.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 275767665c8..d3e952b8c9f 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -1171,16 +1171,11 @@ def syft_make_action_with_self( def get_sync_dependencies( self, context: AuthedServiceContext, **kwargs: dict - ) -> list[UID]: # type: ignore - # relative - from ..job.job_stash import Job - - job: Job | None = context.server.services.job.get_by_result_id( - context, self.id.id - ) # type: ignore - if job is not None: + ) -> list[UID]: + try: + job = context.server.services.job.get_by_result_id(context, self.id.id) return [job.id] - else: + except SyftException: return [] def syft_get_path(self) -> str: From 6681d8ddc4f60b7aa703a3017dda0bfe83384440 Mon Sep 17 00:00:00 2001 From: dk Date: Fri, 4 Oct 2024 09:10:35 +0700 Subject: [PATCH 70/78] [tests/scenarios] increase timeout for l0 --- tests/scenariosv2/l0_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index e0c04a2288d..647f8b24d76 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -32,6 +32,7 @@ fake = Faker() NUM_USERS = 10 NUM_ENDPOINTS = 3 # test_query, submit_query, schema_query +TIMEOUT = 600 class Event(BaseEvent): @@ -456,5 +457,5 @@ async def test_l0_scenario(request): Event.USER_CHECKED_RESULTS, Event.USER_FLOW_COMPLETED, ], - timeout=300, + timeout=TIMEOUT, ) From a40534a84a06447758f81690eef9da37330a86fb Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Fri, 4 Oct 2024 09:41:35 +0200 Subject: [PATCH 71/78] test with 5 users --- tests/scenariosv2/l0_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 647f8b24d76..9150099ef09 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -30,7 +30,7 @@ from .sim.core import sim_entrypoint fake = Faker() -NUM_USERS = 10 +NUM_USERS = 5 NUM_ENDPOINTS = 3 # test_query, submit_query, schema_query TIMEOUT = 600 From 6be9ea6b353b6d1512f580857659f5725540d0fe Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Sun, 6 Oct 2024 13:08:46 +0200 Subject: [PATCH 72/78] update code call --- tests/scenariosv2/flows/user_bigquery_api.py | 4 +++- tests/scenariosv2/l0_test.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/scenariosv2/flows/user_bigquery_api.py b/tests/scenariosv2/flows/user_bigquery_api.py index deb8a9cbbbc..eede3d2264b 100644 --- a/tests/scenariosv2/flows/user_bigquery_api.py +++ b/tests/scenariosv2/flows/user_bigquery_api.py @@ -61,7 +61,9 @@ def bq_check_query_results(ctx: SimulatorContext, client: sy.DatasiteClient): msg = f"User: {user} - Request {request.code.service_func_name}" if status == RequestStatus.APPROVED: - job = request.code(blocking=False) + func_name = request.code.service_func_name + api_func = getattr(client.code, func_name, None) + job = api_func(blocking=False) result = job.wait() assert len(result) == 10000 ctx.logger.info(f"{msg} - Approved") diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 9150099ef09..e9ed65831b1 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -256,7 +256,9 @@ async def admin_high_triage_requests( request.deny("You gave me an `invalid_func` function") else: ctx.logger.info(f"Admin high: Approving request by executing {request}") - job = request.code(blocking=False) + func_name = request.code.service_func_name + api_func = getattr(admin_client.code, func_name, None) + job = api_func(blocking=False) result = job.wait() ctx.logger.info(f"Admin high: Request result {result}") From 45b9f1d53cd79f67c8350ef84809d59e3a55552f Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Sun, 6 Oct 2024 13:15:27 +0200 Subject: [PATCH 73/78] increase timeout --- tests/scenariosv2/flows/admin_bigquery_api.py | 2 ++ tests/scenariosv2/flows/admin_bigquery_pool.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/scenariosv2/flows/admin_bigquery_api.py b/tests/scenariosv2/flows/admin_bigquery_api.py index aff811d4199..1deb967a4b8 100644 --- a/tests/scenariosv2/flows/admin_bigquery_api.py +++ b/tests/scenariosv2/flows/admin_bigquery_api.py @@ -55,6 +55,7 @@ def bq_test_endpoint( private_function=private_query_function, mock_function=mock_query_function, worker_pool_name=worker_pool, + endpoint_timeout=120, ) # Call admin_client.custom_api.add @@ -72,6 +73,7 @@ def bq_submit_endpoint( description="API endpoint that allows you to submit SQL queries to run on the private data.", worker_pool_name=worker_pool, settings={"worker": worker_pool}, + endpoint_timeout=120, ) def submit_query( context, diff --git a/tests/scenariosv2/flows/admin_bigquery_pool.py b/tests/scenariosv2/flows/admin_bigquery_pool.py index 11c75feb641..91fda486e9c 100644 --- a/tests/scenariosv2/flows/admin_bigquery_pool.py +++ b/tests/scenariosv2/flows/admin_bigquery_pool.py @@ -49,6 +49,6 @@ def bq_create_pool( docker_tag=worker_image_tag, custom_pool_pod_annotations=None, custom_pool_pod_labels=None, - scale_to=1, + scale_to=3, ) ctx.logger.info(f"{msg} - Created") From b98382f7bbe98579604e404ad92d0a403cc91278 Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Sun, 6 Oct 2024 13:28:43 +0200 Subject: [PATCH 74/78] increase test timeout --- tests/scenariosv2/l0_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index e9ed65831b1..d394aafd2a0 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -32,7 +32,7 @@ fake = Faker() NUM_USERS = 5 NUM_ENDPOINTS = 3 # test_query, submit_query, schema_query -TIMEOUT = 600 +TIMEOUT = 900 class Event(BaseEvent): From f1fdba3419c5d3f0858d5271ea3429b32d46cfd4 Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Wed, 9 Oct 2024 11:22:00 +0200 Subject: [PATCH 75/78] fix --- packages/syft/src/syft/util/reset_server.py | 4 ++-- tests/scenariosv2/l0_test.py | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/syft/src/syft/util/reset_server.py b/packages/syft/src/syft/util/reset_server.py index ef8018f4689..da9f28d7f57 100644 --- a/packages/syft/src/syft/util/reset_server.py +++ b/packages/syft/src/syft/util/reset_server.py @@ -24,11 +24,11 @@ def make_copy(server: ServerHandle) -> None: original_dir = str(cfg.path.resolve()) copy_dir = f"{original_dir}{COPY_SUFFIX}" copy_tree(original_dir, copy_dir) - print(f"moved\n{original_dir}\nto\n{copy_dir}\n") + print(f"copied\n{original_dir}\nto\n{copy_dir}\n") def restore_copy(copy_dir: str) -> None: copy_dir_path = Path(copy_dir) original_dir = make_original_path(copy_dir_path) copy_tree(copy_dir_path, original_dir) - print(f"moved\n{copy_dir}\nto\n{original_dir}\n") + print(f"copied\n{copy_dir}\nto\n{original_dir}\n") diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index d394aafd2a0..1c42bbb687d 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -30,9 +30,9 @@ from .sim.core import sim_entrypoint fake = Faker() -NUM_USERS = 5 +NUM_USERS = 10 NUM_ENDPOINTS = 3 # test_query, submit_query, schema_query -TIMEOUT = 900 +TIMEOUT = 300 class Event(BaseEvent): @@ -206,7 +206,7 @@ async def admin_low_triage_requests( ctx.logger.info(f"Admin low: Requests={len(requests)} Pending={len(pending)}") # If all requests have been triaged, then exit - if len(requests) == NUM_USERS and len(pending) == 0: + if len(requests) == NUM_USERS: ctx.events.trigger(Event.ADMIN_LOW_ALL_RESULTS_AVAILABLE) break @@ -261,7 +261,8 @@ async def admin_high_triage_requests( job = api_func(blocking=False) result = job.wait() ctx.logger.info(f"Admin high: Request result {result}") - + if len(requests) == NUM_USERS: + break ctx.logger.info("Admin high: All requests triaged.") From 78d6c06f77da71067b473c9d7dca4a534bd4cd74 Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Wed, 9 Oct 2024 11:41:22 +0200 Subject: [PATCH 76/78] fix --- tests/scenariosv2/sim/core.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/scenariosv2/sim/core.py b/tests/scenariosv2/sim/core.py index 341d799d9da..4b1ad074b44 100644 --- a/tests/scenariosv2/sim/core.py +++ b/tests/scenariosv2/sim/core.py @@ -115,10 +115,12 @@ async def start(self, *tasks, check_events=None, random_wait=None, timeout=60): timeout=timeout, ) except asyncio.TimeoutError: + unfired_events = context.unfired_events(check_events) + if len(unfired_events) == 0: + # simulator timed out and all events fired + return results if check_events: - context._elogger.error( - f"Timed out. Unfired Events = {context.unfired_events(check_events)}" - ) + context._elogger.error(f"Timed out. Unfired Events = {unfired_events}") raise TestFailure( f"simulator timed out after {timeout}s. Please check logs at {LOGS_DIR} for more details." ) From 3a22442b849644a07168b6ba90f26915af759210 Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Wed, 9 Oct 2024 11:42:22 +0200 Subject: [PATCH 77/78] fix results --- tests/scenariosv2/l0_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scenariosv2/l0_test.py b/tests/scenariosv2/l0_test.py index 1c42bbb687d..edf847d28ed 100644 --- a/tests/scenariosv2/l0_test.py +++ b/tests/scenariosv2/l0_test.py @@ -32,7 +32,7 @@ fake = Faker() NUM_USERS = 10 NUM_ENDPOINTS = 3 # test_query, submit_query, schema_query -TIMEOUT = 300 +TIMEOUT = 900 class Event(BaseEvent): From 3d9db2dd78bac6a95e3cf35b2a6899833adb9fe1 Mon Sep 17 00:00:00 2001 From: Aziz Berkay Yesilyurt Date: Wed, 9 Oct 2024 14:14:39 +0200 Subject: [PATCH 78/78] request.code.__call__ uses the same api as code.funcname --- .../syft/src/syft/service/code/user_code.py | 20 +------------------ 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/packages/syft/src/syft/service/code/user_code.py b/packages/syft/src/syft/service/code/user_code.py index 2e6edcde5d7..c20f2c58234 100644 --- a/packages/syft/src/syft/service/code/user_code.py +++ b/packages/syft/src/syft/service/code/user_code.py @@ -39,12 +39,9 @@ from ...abstract_server import ServerType from ...client.api import APIRegistry from ...client.api import ServerIdentity -from ...client.api import generate_remote_function from ...serde.deserialize import _deserialize from ...serde.serializable import serializable from ...serde.serialize import _serialize -from ...serde.signature import signature_remove_context -from ...serde.signature import signature_remove_self from ...server.credentials import SyftVerifyKey from ...store.linked_obj import LinkedObject from ...types.datetime import DateTime @@ -1113,22 +1110,7 @@ def show_code_cell(self) -> None: def __call__(self, *args: Any, **kwargs: Any) -> Any: api = self._get_api() - - signature = self.signature - signature = signature_remove_self(signature) - signature = signature_remove_context(signature) - remote_user_function = generate_remote_function( - api=api, - server_uid=self.server_uid, - signature=self.signature, - path="code.call", - make_call=api.make_call, - pre_kwargs={"uid": self.id}, - warning=None, - communication_protocol=api.communication_protocol, - unwrap_on_success=True, # TODO: look into this - ) - return remote_user_function(*args, **kwargs) + return getattr(api.code, self.service_func_name)(*args, **kwargs) class UserCodeUpdate(PartialSyftObject):