diff --git a/.bandit b/.bandit deleted file mode 100644 index cf6c1fc..0000000 --- a/.bandit +++ /dev/null @@ -1,10 +0,0 @@ -skips: [B101,B105,B108,B113,B311,B314,B405,B404,B501,B506,B602,B604,B605,B607,B608] - -assert_used: - skips: ['*_test.py', '*test_*.py'] - -B311: - skips: ['*_test.py', '*test_*.py', '*mock*.py', '*mock_*.py'] - -B105: - skips: ['*_test.py', '*test_*.py'] diff --git a/.flake8 b/.flake8 deleted file mode 100644 index f77ad44..0000000 --- a/.flake8 +++ /dev/null @@ -1,3 +0,0 @@ -[flake8] -exclude = - "sumomongodbatlascollector/test_mock.py" diff --git a/.github/workflows/runtest.yaml b/.github/workflows/runtest.yaml index b52a789..b37c902 100644 --- a/.github/workflows/runtest.yaml +++ b/.github/workflows/runtest.yaml @@ -9,17 +9,17 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Python 3.10 + - name: Set up Python 3.12 uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.12' - name: Install dev dependencies run: python -m pip install -U pip flake8 bandit mypy - name: Run Security Tests run: | - bandit -c .bandit -r sumomongodbatlascollector/ -f custom + bandit -r sumomongodbatlascollector/ --skip B101,B105,B108,B113,B311,B314,B405,B404,B501,B506,B602,B604,B605,B607,B608 - name: Run Linting Tests run: | @@ -34,24 +34,49 @@ jobs: # mypy --install-types ./api.py # mypy ./api.py --disable-error-code=import-untyped + build-test: + name: "Run Build Tests" + runs-on: "ubuntu-latest" + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: 3.12 + + - name: Install build dependencies + run: | + python -m pip install -U pip wheel setuptools + - name: "Build" + run: | + python setup.py sdist bdist_wheel + + - name: "Install" + run: | + pip install dist/sumologic_mongodb_atlas*.whl + + run-test: - name: "Run Build and Install Tests" + name: "Run Tests" runs-on: "ubuntu-latest" steps: - uses: actions/checkout@v4 - - name: Set up Python 3.10 + - name: Set up Python 3.12 uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.12' - name: Install build and test dependencies run: | python -m pip install --upgrade pip pip install -e . pip install pytest + pip install setuptools - name: "Run Pytest" run: | - pytest -vvv sumomongodbatlascollector/test_mongo_collector.py + pytest -vvv tests/test_mongo_collector.py diff --git a/INTERNAL_README.md b/INTERNAL_README.md index e359ad5..6ad5004 100644 --- a/INTERNAL_README.md +++ b/INTERNAL_README.md @@ -1,46 +1,59 @@ -We use sumoappclient for testing the lambda paackage. We can install it by: -``` -pip install sumologic-sdk -``` -Description: -``` -sumoappclient [-h] [-e {onprem,aws,gcp,azure}] [-d {prod,test,local}] [-c CONFIGPATH] [-pc PYTHON_CMD] [-g TARGET_FOLDER] -``` +### OnPrem -Deployment testing for PyPi: -1. For onprem pypi testing of this package, we perform these steps: - * Upgrade the version in the Version file, eg 1.0.10 -> 1.0.11 +1. For generating the build and running locally, we perform these steps: + + * Upgrade the version in the `VERSION` file, eg 1.0.10 -> 1.0.11 + * Run the `build.sh`. + * Create a `mongodbatlas.yaml` in home folder (for ex ~/sumo/mongodbatlas.yaml), refer the `sumomongodbatlascollector/mongodbatlas.yaml` file for instructions on each of the parameter. You can override any parameter in this file + ``` + SumoLogic: + HTTP_LOGS_ENDPOINT: + HTTP_METRICS_ENDPOINT: + + MongoDBAtlas: + ORGANIZATION_ID: + PROJECT_ID: + PRIVATE_API_KEY: + PUBLIC_API_KEY: + ``` + * Run the below command to start the collector + ``` + python -m sumomongodbatlascollector.main ~/sumo/mongodbatlas.yaml + ``` + +2. For deploying on test pypi account we perform these steps: * Run the following command: ``` - sumoappclient -d sumotest -c mongodbatlas.yaml -e onprem + python -m twine upload dist/* --repository sumotestpypi ``` * This deploys the package in the testing org of pypi via the credentials stored in the .pypirc file for the sumotestpypi section. You can find the file in the shared vault. -2. For onprem pypi production deployment, we perform these steps: - * Upgrade the version in the Version file, eg 1.0.10 -> 1.0.11 +3. For deploying on prod pypi account we perform these steps: * Run the following command: ``` - sumoappclient -d sumopypi -c mongodbatlas.yaml -e onprem + python -m twine upload dist/* --repository sumopypi ``` * This deploys the package in the production org of pypi via the credentials stored in the .pypirc file for the sumopypi section. You can find the file in the shared vault. -Deployment testing for AWS: -1. For testing of this package, we perform these steps: - * Update .aws file to use a personal aws account credentials - * Create a S3 bucket in the personal aws account - * Set two environment variables: - * export SAM_S3_BUCKET = 'name of the S3 Bucket created' - * export AWS_REGION = 'region of the deployment' - * Upgrade the SemanticVersion in the template.yaml file, eg 1.0.16 -> 1.0.17 - * Run the following command: - ``` - sumoappclient -d test -c mongodbatlas.yaml -e aws +### AWS + +1. For testing and deploying the lambda function, we perform these steps: + * Update .aws file to use a personal aws account credentials. + * Create a S3 bucket in the personal aws account. + * In samconfig.toml file update s3_bucket, region parameters. + * Generate credentials in Mongodb atlas portal and update parameter_overrides in samconfig.toml file. + * Upgrade the SemanticVersion in the template.yaml file and s3_prefix in samconfig.toml file + * Run the following commands: + ``` + sam build + sam package + sam deploy ``` * This deploys the package via a personal aws account onto AWS Serverless Application Repository -2. For production deployment, we perform these steps: - * Update .aws file to use sumocontent aws account credentials - * Upgrade the SemanticVersion in the template.yaml file, eg 1.0.16 -> 1.0.17 + * After deploying go the lambda function created by above command and run the function by clicking on test button. +2. For publishing the sam application, we perform these steps: + * Update the s3_bucket parameter to appdevstore bucket * Run the following command: ``` - sumoappclient -d prod -c mongodbatlas.yaml -e aws + sam publish ``` * This deploys the package via the sumocontent aws account onto AWS Serverless Application Repository diff --git a/MANIFEST.in b/MANIFEST.in index 2d0ae02..d78457c 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,6 @@ include requirements.txt include LICENSE include VERSION -include README +include README.md include sumomongodbatlascollector/mongodbatlas.yaml +recursive-exclude tests * diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..da69d12 --- /dev/null +++ b/build.sh @@ -0,0 +1,3 @@ +python setup.py bdist_wheel +pip uninstall -y sumologic_mongodb_atlas +pip install dist/sumologic_mongodb_atlas*.whl diff --git a/packaged.yaml b/packaged.yaml index 0165487..1da3e02 100644 --- a/packaged.yaml +++ b/packaged.yaml @@ -15,10 +15,10 @@ Metadata: - atlas - serverless - MongoDBAtlas - LicenseUrl: s3://appdevstore/MongoDBAtlas/v1.0.17/5122657d5b9a0d3713e24d3a33eae431 + LicenseUrl: s3://appdevstore/MongoDBAtlas/v1.0.18/5122657d5b9a0d3713e24d3a33eae431 Name: sumologic-mongodb-atlas - ReadmeUrl: s3://appdevstore/MongoDBAtlas/v1.0.17/3088978c83fe207a640a2584aff2c79d - SemanticVersion: 1.0.17 + ReadmeUrl: s3://appdevstore/MongoDBAtlas/v1.0.18/3088978c83fe207a640a2584aff2c79d + SemanticVersion: 1.0.18 SourceCodeUrl: https://github.com/SumoLogic/sumologic-mongodb-atlas SpdxLicenseId: Apache-2.0 Outputs: @@ -28,6 +28,10 @@ Outputs: Fn::GetAtt: - MongoDBAtlasFunction - Arn + MongoDBAtlasTable: + Description: MongoDBAtlasTable DynamoDB Table Name + Value: + Ref: MongoDBAtlasTableResource Parameters: HttpLogsEndpoint: Type: String @@ -44,9 +48,11 @@ Parameters: Resources: MongoDBAtlasFunction: Properties: - CodeUri: s3://appdevstore/MongoDBAtlas/v1.0.17/56c4530782ed537044f71da21d57b605 + CodeUri: s3://appdevstore/MongoDBAtlas/v1.0.18/5c5c3a5bfa0831572f5452a56ebc0b20 Environment: Variables: + DBNAME: + Ref: MongoDBAtlasTableResource ENABLE_CONSOLE_LOG: 'false' ENVIRONMENT: aws HTTP_LOGS_ENDPOINT: @@ -69,10 +75,37 @@ Resources: Handler: main.main MemorySize: 256 Policies: - - AmazonDynamoDBFullAccess + - Statement: + - Action: + - dynamodb:PutItem + - dynamodb:UpdateItem + - dynamodb:GetItem + - dynamodb:Scan + - dynamodb:Query + - dynamodb:DescribeTable + Effect: Allow + Resource: + Fn::GetAtt: + - MongoDBAtlasTableResource + - Arn + Version: '2012-10-17' Runtime: python3.11 Timeout: 900 Type: AWS::Serverless::Function Metadata: SamResourceId: MongoDBAtlasFunction + MongoDBAtlasTableResource: + Properties: + AttributeDefinitions: + - AttributeName: key_col + AttributeType: S + KeySchema: + - AttributeName: key_col + KeyType: HASH + ProvisionedThroughput: + ReadCapacityUnits: 30 + WriteCapacityUnits: 20 + Type: AWS::DynamoDB::Table + Metadata: + SamResourceId: MongoDBAtlasTableResource Transform: AWS::Serverless-2016-10-31 diff --git a/requirements.txt b/requirements.txt index ec973db..93b4939 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -sumologic-appclient-sdk==2.0.10 +sumologic-appclient-sdk>=2.0.10 requests>=2.32.3 -future-fstrings +future_fstrings==1.2.0 diff --git a/samconfig.toml b/samconfig.toml new file mode 100644 index 0000000..4c969da --- /dev/null +++ b/samconfig.toml @@ -0,0 +1,45 @@ +version = 0.1 + +[default.build] +[default.build.parameters] +cached = true +parallel = true +debug=true +use_container=true + +[default.package] +[default.package.parameters] +output_template_file="packaged.yaml" +s3_prefix="MongoDBAtlas/v1.0.18" +region="us-east-1" +s3_bucket="appdevstore-test" + +[default.deploy.parameters] +capabilities = "CAPABILITY_IAM" +confirm_changeset = true +stack_name="testingMongoDBAtlas" +parameter_overrides="HttpLogsEndpoint= HttpMetricsEndpoint= OrganizationId= PrivateApiKey= ProjectId= PublicApiKey=" +s3_prefix="MongoDBAtlas/v1.0.18" +region="us-east-1" +s3_bucket="appdevstore-test" + +[default.publish.parameters] +template="packaged.yaml" +region="us-east-1" + +[default.sync.parameters] +watch = true + +[default.local_start_api.parameters] +warm_containers = "EAGER" + +[default.local_invoke.parameters] +parameter_overrides="ENVIRONMENT=onprem HttpLogsEndpoint= HttpMetricsEndpoint= OrganizationId= PrivateApiKey= ProjectId= PublicApiKey=" + +# https://github.com/aws/aws-sam-cli/issues/5492 +# Allow specification of parameters that should apply to all (or multiple) environments in AWS SAM CLI configuration file (samconfig.toml) + +[prod] +[prod.sync] +[prod.sync.parameters] +watch = false diff --git a/setup.py b/setup.py index a3a07f9..c3c8155 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,8 @@ setup( name="sumologic-mongodb-atlas", version=__versionstr__, - packages=find_packages(), + packages=find_packages(exclude=["tests*"]), + package_data={"sumomongodbatlascollector": ["mongodbatlas.yaml"]}, install_requires=INSTALL_REQUIRES, extras_require={ "aws": ["boto3>=1.34.149", "botocore>=1.34.149"], @@ -42,7 +43,6 @@ keywords="sumologic python rest api log management analytics logreduce mongodb atlas agent security siem collector forwarder", url="https://github.com/SumoLogic/sumologic-mongodb-atlas", zip_safe=True, - include_package_data=True, classifiers=[ "Development Status :: 4 - Beta", "Intended Audience :: Developers", @@ -52,6 +52,7 @@ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Operating System :: OS Independent", ], entry_points={ diff --git a/sumomongodbatlascollector/api.py b/sumomongodbatlascollector/api.py index 279e896..4a1fdd4 100644 --- a/sumomongodbatlascollector/api.py +++ b/sumomongodbatlascollector/api.py @@ -1,14 +1,9 @@ -# -*- coding: future_fstrings -*- - import gzip import json import os -# import psutil -# import tracemalloc from io import BytesIO import time from requests.auth import HTTPDigestAuth -# import dateutil from sumoappclient.sumoclient.base import BaseAPI from sumoappclient.sumoclient.factory import OutputHandlerFactory from sumoappclient.common.utils import ( @@ -69,21 +64,16 @@ def _replace_cluster_name(self, full_name_with_cluster, cluster_mapping): class FetchMixin(MongoDBAPI): def fetch(self): log_type = self.get_key() - with TimeAndMemoryTracker(activate=True) as tracker: - start_message = tracker.start("OutputHandlerFactory.get_handler") - self.log.info(start_message) + with TimeAndMemoryTracker(activate=self.collection_config.get("ACTIVATE_TIME_AND_MEMORY_TRACKING", False)) as tracker: output_handler = OutputHandlerFactory.get_handler( self.collection_config["OUTPUT_HANDLER"], path=self.pathname, config=self.config, ) - - end_message = tracker.end("OutputHandlerFactory.get_handler") - self.log.info(end_message) start_message = tracker.start("self.build_fetch_params") url, kwargs = self.build_fetch_params() end_message = tracker.end("self.build_fetch_params") - self.log.info(f'''Fetching LogType: {log_type} kwargs: {kwargs} url: {url} end_message: {end_message}''') + self.log.info(f'''Fetching LogType: {log_type} kwargs: {kwargs} url: {url} {start_message} {end_message}''') state = None payload = [] try: @@ -97,26 +87,23 @@ def fetch(self): BACKOFF_FACTOR=self.collection_config["BACKOFF_FACTOR"], **kwargs, ) - self.log.info(f'''Fetching LogType: {log_type} kwargs: {kwargs} url: {url} end_message: {start_message}''') end_message = tracker.end("ClientMixin.make_request") - self.log.info(end_message) + self.log.debug(f'''Fetched LogType: {log_type} kwargs: {kwargs} url: {url} {start_message} {end_message}''') if fetch_success and len(content) > 0: payload, state = self.transform_data(content) # Todo Make this atomic if after sending -> Ctrl - C happens then it fails to save state params = self.build_send_params() start_message = tracker.start("OutputHandler.send") - self.log.info(f'''Sending LogType: {self.get_key()} Data: {len(content)} url: {url} start_message: {start_message}''') send_success = output_handler.send(payload, **params) end_message = tracker.end("OutputHandler.send") - self.log.info(f'''Sending LogType: {self.get_key()} Data: {len(content)} kwargs: {kwargs} url: {url} end_message: {end_message}''') if send_success: self.save_state(**state) - self.log.info(f"""Successfully sent LogType: {self.get_key()} Data: {len(content)}""") + self.log.info(f"""Successfully sent LogType: {self.get_key()} Data: {len(content)} kwargs: {kwargs} url: {url} {start_message} {end_message}""") else: - self.log.error(f"""Failed to send LogType: {self.get_key()}""") + self.log.error(f"""Failed to send LogType: {self.get_key()} Data: {len(content)} kwargs: {kwargs} url: {url} {start_message} {end_message}""") elif fetch_success and len(content) == 0: self.log.info( - f"""No results window LogType: {log_type} kwargs: {kwargs} status: {fetch_success} url: {url}""" + f"""No results window LogType: {log_type} status: {fetch_success} kwargs: {kwargs} url: {url}""" ) is_move_fetch_window, new_state = self.check_move_fetch_window(kwargs) if is_move_fetch_window: @@ -136,22 +123,20 @@ def fetch(self): class PaginatedFetchMixin(MongoDBAPI): def fetch(self): current_state = self.get_state() - with TimeAndMemoryTracker(activate=True) as tracker: + with TimeAndMemoryTracker(activate=self.collection_config.get("ACTIVATE_TIME_AND_MEMORY_TRACKING", False)) as tracker: output_handler = OutputHandlerFactory.get_handler(self.collection_config["OUTPUT_HANDLER"], path=self.pathname, config=self.config) start_message = tracker.start("self.build_fetch_params") url, kwargs = self.build_fetch_params() end_message = tracker.end("self.build_fetch_params") log_type = self.get_key() - self.log.info(f'''Fetching LogType: {log_type} kwargs: {kwargs} url: {url} end_message: {end_message} ''') + self.log.info(f'''Fetching LogType: {log_type} kwargs: {kwargs} url: {url} {start_message} {end_message} ''') next_request = True count = 0 - start_message = tracker.start("ClientMixin.get_new_session") sess = ClientMixin.get_new_session() - end_message = tracker.end("ClientMixin.get_new_session") - self.log.info(f'''Fetching LogType: {log_type} kwargs: {kwargs} url: {url} end_message: {end_message} ''') try: while next_request: send_success = has_next_page = False + start_message = tracker.start("ClientMixin.make_request") status, data = ClientMixin.make_request( url, method="get", @@ -162,7 +147,10 @@ def fetch(self): BACKOFF_FACTOR=self.collection_config["BACKOFF_FACTOR"], **kwargs, ) + end_message = tracker.end("ClientMixin.make_request") fetch_success = status and "results" in data + if (count < 4) or (count % 5 == 0): + self.log.info(f'''Fetched LogType: {log_type} kwargs: {kwargs} url: {url} {start_message} {end_message}''') if fetch_success: has_next_page = len(data["results"]) > 0 if has_next_page: @@ -173,9 +161,10 @@ def fetch(self): end_message = tracker.end("OutputHandler.send") if send_success: count += 1 - self.log.debug( - f"""Successfully sent LogType: {log_type} Page: {kwargs['params']['pageNum']} Datalen: {len(payload)} starttime: {kwargs['params']['minDate']} endtime: {kwargs['params']['maxDate']} end_message: {end_message}""" - ) + if (count < 4) or (count % 5 == 0): + self.log.info( + f"""Successfully sent LogType: {log_type} Page: {kwargs['params']['pageNum']} Datalen: {len(payload)} starttime: {kwargs['params']['minDate']} endtime: {kwargs['params']['maxDate']} {start_message} {end_message}""" + ) kwargs["params"]["pageNum"] += 1 # save and update last_time_epoch required for next invocation current_state.update(updated_state) @@ -198,7 +187,7 @@ def fetch(self): else: # show err unable to send save current state self.log.error( - f"""Failed to send LogType: {log_type} Page: {kwargs['params']['pageNum']} starttime: {kwargs['params']['minDate']} endtime: {kwargs['params']['maxDate']}""" + f"""Failed to send LogType: {log_type} Page: {kwargs['params']['pageNum']} starttime: {kwargs['params']['minDate']} endtime: {kwargs['params']['maxDate']} {start_message} {end_message}""" ) self.save_state( { @@ -407,7 +396,7 @@ def __init__(self, kvstore, process_id, config, cluster_mapping): self.cluster_mapping = cluster_mapping def get_key(self): - key = f"""{self.api_config['PROJECT_ID']}-{self.process_id}""" + key = f"""{self.api_config['PROJECT_ID']}-{self.process_id}-processmetrics""" return key def save_state(self, last_time_epoch): @@ -502,7 +491,7 @@ def __init__(self, kvstore, process_id, disk_name, config, cluster_mapping): self.cluster_mapping = cluster_mapping def get_key(self): - key = f"""{self.api_config['PROJECT_ID']}-{self.process_id}-{self.disk_name}""" + key = f"""{self.api_config['PROJECT_ID']}-{self.process_id}-{self.disk_name}-diskmetrics""" return key def save_state(self, last_time_epoch): @@ -597,7 +586,7 @@ def __init__(self, kvstore, process_id, database_name, config, cluster_mapping): self.cluster_mapping = cluster_mapping def get_key(self): - key = f"""{self.api_config['PROJECT_ID']}-{self.process_id}-{self.database_name}""" + key = f"""{self.api_config['PROJECT_ID']}-{self.process_id}-{self.database_name}-dbmetrics""" return key def save_state(self, last_time_epoch): @@ -939,6 +928,8 @@ def fetch(self): BACKOFF_FACTOR=self.collection_config["BACKOFF_FACTOR"], **kwargs, ) + if count < 4 or (count % 5 == 0): + self.log.info(f'''Fetched LogType: {log_type} kwargs: {kwargs} url: {url}''') fetch_success = status and "results" in data if fetch_success: has_next_page = len(data["results"]) > 0 @@ -949,9 +940,10 @@ def fetch(self): ) if send_success: count += 1 - self.log.debug( - f"""Successfully sent LogType: {log_type} Project: {self.api_config['PROJECT_ID']} Alerts Page: {kwargs['params']['pageNum']} Datalen: {len(payload)} """ - ) + if count < 4 or (count % 5 == 0): + self.log.info( + f"""Successfully sent LogType: {log_type} Project: {self.api_config['PROJECT_ID']} Alerts Page: {kwargs['params']['pageNum']} Datalen: {len(payload)} """ + ) current_state.update(updated_state) if current_state["last_page_offset"] == 0: # do not increase if num alerts < page limit diff --git a/sumomongodbatlascollector/main.py b/sumomongodbatlascollector/main.py index e29a698..ce1596b 100644 --- a/sumomongodbatlascollector/main.py +++ b/sumomongodbatlascollector/main.py @@ -1,9 +1,5 @@ -# -*- coding: future_fstrings -*- - import traceback import os -from concurrent import futures -from random import shuffle from requests.auth import HTTPDigestAuth from time_and_memory_tracker import TimeAndMemoryTracker @@ -26,6 +22,7 @@ class MongoDBAtlasCollector(BaseCollector): Design Doc: https://docs.google.com/document/d/15TgilyyuGTMjRIZUXVJa1UhpTu3wS-gMl-dDsXAV2gw/edit?usp=sharing """ + COLLECTOR_PROCESS_NAME = "sumomongodbatlascollector" SINGLE_PROCESS_LOCK_KEY = "is_mongodbatlascollector_running" CONFIG_FILENAME = "mongodbatlas.yaml" DATA_REFRESH_TIME = 60 * 60 * 1000 @@ -34,6 +31,7 @@ def __init__(self): self.project_dir = self.get_current_dir() super(MongoDBAtlasCollector, self).__init__(self.project_dir) self.api_config = self.config["MongoDBAtlas"] + self.digestauth = HTTPDigestAuth( username=self.api_config["PUBLIC_API_KEY"], password=self.api_config["PRIVATE_API_KEY"], @@ -42,6 +40,10 @@ def __init__(self): MAX_RETRY=self.collection_config["MAX_RETRY"], BACKOFF_FACTOR=self.collection_config["BACKOFF_FACTOR"], ) + # removing redundant handlers since AWS Lambda also sets up a handler, on the root logger + if self.collection_config["ENVIRONMENT"] == "aws": + for hdlr in self.log.handlers: + self.log.removeHandler(hdlr) def get_current_dir(self): cur_dir = os.path.dirname(__file__) @@ -100,19 +102,28 @@ def _get_all_processes_from_project(self): "params": {"itemsPerPage": self.api_config["PAGINATION_LIMIT"]}, } all_data = self.getpaginateddata(url, **kwargs) - process_ids = [obj["id"] for data in all_data for obj in data["results"]] - hostnames = [obj["hostname"] for data in all_data for obj in data["results"]] - # 'port': 27017, 'replicaSetName': 'M10AWSTestCluster-config-0', 'typeName': 'SHARD_CONFIG_PRIMARY' + all_cluster_aliases = list({self._get_cluster_name(obj["userAlias"]) for data in all_data for obj in data["results"]}) user_provided_clusters = self._get_user_provided_cluster_name() - cluster_mapping = {} - if len(user_provided_clusters) > 0: + + if len(all_cluster_aliases) > 0 and len(user_provided_clusters) > 0: + cluster_mapping = {} + process_ids = set() + hostnames = set() for obj in all_data: for obj in obj["results"]: - if obj["hostname"] in user_provided_clusters: - cluster_mapping[self._get_cluster_name(obj["hostname"])] = ( - self._get_cluster_name(obj["userAlias"]) - ) + cluster_alias = self._get_cluster_name(obj["userAlias"]) + if cluster_alias in user_provided_clusters: + cluster_mapping[self._get_cluster_name(obj["hostname"])] = cluster_alias + process_ids.add(obj['id']) + hostnames.add(obj['hostname']) + + if not cluster_mapping: + raise Exception(f"None of the user provided cluster matched the following cluster aliases: {','.join(all_cluster_aliases)}") + process_ids = list(process_ids) + hostnames = list(hostnames) else: + process_ids = list({obj["id"] for data in all_data for obj in data["results"]}) + hostnames = list({obj["hostname"] for data in all_data for obj in data["results"]}) cluster_mapping = { self._get_cluster_name(obj["hostname"]): self._get_cluster_name( obj["userAlias"] @@ -120,7 +131,7 @@ def _get_all_processes_from_project(self): for data in all_data for obj in data["results"] } - hostnames = list(set(hostnames)) + return process_ids, hostnames, cluster_mapping def _get_all_disks_from_host(self, process_ids): @@ -222,15 +233,9 @@ def _get_process_names(self): process_ids, hostnames = processes["process_ids"], processes["hostnames"] return process_ids, hostnames - def is_running(self): - self.log.debug("Acquiring single instance lock") - return self.kvstore.acquire_lock(self.SINGLE_PROCESS_LOCK_KEY) - - def stop_running(self): - self.log.debug("Releasing single instance lock") - return self.kvstore.release_lock(self.SINGLE_PROCESS_LOCK_KEY) - def build_task_params(self): + with TimeAndMemoryTracker(activate=self.collection_config.get("ACTIVATE_TIME_AND_MEMORY_TRACKING", False)) as tracker: + start_message = tracker.start("self.build_task_params") audit_files = ["mongodb-audit-log.gz", "mongos-audit-log.gz"] dblog_files = ["mongodb.gz", "mongos.gz"] filenames = [] @@ -301,57 +306,10 @@ def build_task_params(self): cluster_mapping, ) ) - self.log.info("%d Tasks Generated" % len(tasks)) - return tasks - def run(self): - if self.is_running(): - try: - self.log.info("Starting MongoDB Atlas Forwarder...") - with TimeAndMemoryTracker(activate=True) as tracker: - start_message = tracker.start("self.build_task_params") - task_params = self.build_task_params() - end_message = tracker.end("self.build_task_params") - self.log.info(f'''Building Task Params end_message: {end_message}''') - shuffle(task_params) - all_futures = {} - self.log.debug("spawning %d workers" % self.config["Collection"]["NUM_WORKERS"]) - with futures.ThreadPoolExecutor( - max_workers=self.config["Collection"]["NUM_WORKERS"] - ) as executor: - results = {executor.submit(apiobj.fetch): apiobj for apiobj in task_params} - all_futures.update(results) - for future in futures.as_completed(all_futures): - param = all_futures[future] - api_type = str(param) - try: - future.result() - obj = self.kvstore.get(api_type) - except Exception as exc: - self.log.error(f"API Type: {api_type} thread generated an exception: {exc}", exc_info=True,) - else: - self.log.info(f"API Type: {api_type} thread completed {obj}") - finally: - self.stop_running() - self.mongosess.close() - else: - if not self.is_process_running(["sumomongodbatlascollector"]): - self.kvstore.release_lock_on_expired_key(self.SINGLE_PROCESS_LOCK_KEY, expiry_min=10) - - # def execute_api_with_logging(self, apiobj): - # api_type = str(apiobj.__class__.__name__) - # result = apiobj.fetch() - # return result - - def test(self): - if self.is_running(): - task_params = self.build_task_params() - shuffle(task_params) - try: - for apiobj in task_params: - apiobj.fetch() - finally: - self.stop_running() + end_message = tracker.end("self.build_task_params") + self.log.info(f'''{len(tasks)} Tasks Generated {start_message} {end_message}''') + return tasks def main(*args, **kwargs): diff --git a/sumomongodbatlascollector/mongodbatlas.yaml b/sumomongodbatlascollector/mongodbatlas.yaml index be0ef82..35c74a5 100644 --- a/sumomongodbatlascollector/mongodbatlas.yaml +++ b/sumomongodbatlascollector/mongodbatlas.yaml @@ -1,7 +1,7 @@ MongoDBAtlas: BASE_URL: "https://cloud.mongodb.com/api/atlas/v1.0" FETCH_METHOD: get - PAGINATION_LIMIT: 500 + PAGINATION_LIMIT: 500 # Number of events to fetch in a single API call. ORGANIZATION_ID: null PROJECT_ID: null PUBLIC_API_KEY: null @@ -12,7 +12,7 @@ MongoDBAtlas: - EVENTS_PROJECT - EVENTS_ORG METRIC_TYPES: - PROCESS_METRICS: + PROCESS_METRICS: # Query Parameter m in API: https://www.mongodb.com/docs/atlas/reference/api-resources-spec/v1/#tag/Monitoring-and-Logs/operation/getHostMeasurements. Commented ones are not used in the app. If you do not want to collect any of the metrics then comment all of them and provide empty list [] as value. - CACHE_DIRTY_BYTES - CACHE_USED_BYTES - CONNECTIONS @@ -46,43 +46,172 @@ MongoDBAtlas: - SYSTEM_NORMALIZED_CPU_KERNEL - SYSTEM_NORMALIZED_CPU_STEAL - SYSTEM_NORMALIZED_CPU_USER - DISK_METRICS: + # - ASSERT_MSG + # - ASSERT_MSG + # - ASSERT_REGULAR + # - ASSERT_USER + # - ASSERT_WARNING + # - BACKGROUND_FLUSH_AVG + # - CACHE_BYTES_READ_INTO + # - CACHE_BYTES_WRITTEN_FROM + # - CACHE_FILL_RATIO + # - COMPUTED_MEMORY + # - DIRTY_FILL_RATIO + # - DOCUMENT_METRICS_DELETED + # - DOCUMENT_METRICS_INSERTED + # - DOCUMENT_METRICS_RETURNED + # - DOCUMENT_METRICS_UPDATED + # - FTS_DISK_UTILIZATION + # - FTS_MEMORY_MAPPED + # - FTS_MEMORY_RESIDENT + # - FTS_MEMORY_VIRTUAL + # - FTS_PROCESS_CPU_KERNEL + # - FTS_PROCESS_CPU_USER + # - FTS_PROCESS_NORMALIZED_CPU_KERNEL + # - FTS_PROCESS_NORMALIZED_CPU_USER + # - GLOBAL_ACCESSES_NOT_IN_MEMORY + # - GLOBAL_LOCK_CURRENT_QUEUE_READERS + # - GLOBAL_LOCK_CURRENT_QUEUE_WRITERS + # - GLOBAL_PAGE_FAULT_EXCEPTIONS_THROWN + # - INDEX_COUNTERS_BTREE_ACCESSES + # - INDEX_COUNTERS_BTREE_HITS + # - INDEX_COUNTERS_BTREE_MISS_RATIO + # - INDEX_COUNTERS_BTREE_MISSES + # - JOURNALING_COMMITS_IN_WRITE_LOCK + # - JOURNALING_MB + # - JOURNALING_WRITE_DATA_FILES_MB + # - MAX_PROCESS_CPU_CHILDREN_KERNEL + # - MAX_PROCESS_CPU_CHILDREN_USER + # - MAX_PROCESS_CPU_KERNEL + # - MAX_PROCESS_CPU_USER + # - MAX_PROCESS_NORMALIZED_CPU_CHILDREN_KERNEL + # - MAX_PROCESS_NORMALIZED_CPU_CHILDREN_USER + # - MAX_PROCESS_NORMALIZED_CPU_KERNEL + # - MAX_PROCESS_NORMALIZED_CPU_USER + # - MAX_SWAP_USAGE_FREE + # - MAX_SWAP_USAGE_USED + # - MAX_SYSTEM_CPU_GUEST + # - MAX_SYSTEM_CPU_IOWAIT + # - MAX_SYSTEM_CPU_IRQ + # - MAX_SYSTEM_CPU_KERNEL + # - MAX_SYSTEM_CPU_SOFTIRQ + # - MAX_SYSTEM_CPU_STEAL + # - MAX_SYSTEM_CPU_USER + # - MAX_SYSTEM_MEMORY_AVAILABLE + # - MAX_SYSTEM_MEMORY_FREE + # - MAX_SYSTEM_MEMORY_USED + # - MAX_SYSTEM_NETWORK_IN + # - MAX_SYSTEM_NETWORK_OUT + # - MAX_SYSTEM_NORMALIZED_CPU_GUEST + # - MAX_SYSTEM_NORMALIZED_CPU_IOWAIT + # - MAX_SYSTEM_NORMALIZED_CPU_IRQ + # - MAX_SYSTEM_NORMALIZED_CPU_KERNEL + # - MAX_SYSTEM_NORMALIZED_CPU_NICE + # - MAX_SYSTEM_NORMALIZED_CPU_SOFTIRQ + # - MAX_SYSTEM_NORMALIZED_CPU_STEAL + # - MAX_SYSTEM_NORMALIZED_CPU_USER + # - MEMORY_MAPPED + # - OPCOUNTER_REPL_CMD + # - OPCOUNTER_REPL_DELETE + # - OPCOUNTER_REPL_INSERT + # - OPCOUNTER_REPL_UPDATE + # - OPCOUNTER_TTL_DELETED + # - OPERATION_THROTTLING_REJECTED_OPERATIONS + # - OPERATIONS_QUERIES_KILLED + # - OPERATIONS_SCAN_AND_ORDER + # - OPLOG_MASTER_LAG_TIME_DIFF + # - OPLOG_MASTER_TIME + # - OPLOG_RATE_GB_PER_HOUR + # - OPLOG_REPLICATION_LAG + # - OPLOG_SLAVE_LAG_MASTER_TIME + # - PROCESS_CPU_CHILDREN_KERNEL + # - PROCESS_CPU_CHILDREN_USER + # - PROCESS_NORMALIZED_CPU_CHILDREN_KERNEL + # - PROCESS_NORMALIZED_CPU_CHILDREN_USER + # - PROCESS_NORMALIZED_CPU_KERNEL + # - PROCESS_NORMALIZED_CPU_USER + # - RESTARTS_IN_LAST_HOUR + # - SWAP_USAGE_FREE + # - SWAP_USAGE_USED + # - SYSTEM_CPU_GUEST + # - SYSTEM_CPU_IOWAIT + # - SYSTEM_CPU_IRQ + # - SYSTEM_CPU_KERNEL + # - SYSTEM_CPU_NICE + # - SYSTEM_CPU_SOFTIRQ + # - SYSTEM_CPU_STEAL + # - SYSTEM_CPU_USER + # - SYSTEM_MEMORY_AVAILABLE + # - SYSTEM_MEMORY_FREE + # - SYSTEM_MEMORY_USED + # - SYSTEM_NETWORK_IN + # - SYSTEM_NETWORK_OUT + # - SYSTEM_NORMALIZED_CPU_GUEST + # - SYSTEM_NORMALIZED_CPU_IOWAIT + # - SYSTEM_NORMALIZED_CPU_IRQ + # - SYSTEM_NORMALIZED_CPU_NICE + # - SYSTEM_NORMALIZED_CPU_SOFTIRQ + # - TICKETS_AVAILABLE_READS + # - TICKETS_AVAILABLE_WRITE + DISK_METRICS: # Query Parameter m in API: https://www.mongodb.com/docs/atlas/reference/api-resources-spec/v1/#tag/Monitoring-and-Logs/operation/listDiskMeasurements. Commented ones are not used in the app. If you do not want to collect any of the metrics then comment all of them and provide empty list [] as value. - DISK_PARTITION_IOPS_READ - DISK_PARTITION_IOPS_WRITE - DISK_PARTITION_LATENCY_READ - DISK_PARTITION_LATENCY_WRITE - DISK_PARTITION_SPACE_PERCENT_FREE - DISK_PARTITION_SPACE_PERCENT_USED - DATABASE_METRICS: + # - MAX_DISK_PARTITION_IOPS_READ + # - DISK_PARTITION_IOPS_TOTAL + # - MAX_DISK_PARTITION_IOPS_TOTAL + # - MAX_DISK_PARTITION_LATENCY_READ + # - MAX_DISK_PARTITION_LATENCY_WRITE + # - DISK_PARTITION_SPACE_FREE + # - MAX_DISK_PARTITION_SPACE_FREE + # - DISK_PARTITION_SPACE_USED + # - MAX_DISK_PARTITION_SPACE_USED + # - MAX_DISK_PARTITION_SPACE_PERCENT_FREE + # - MAX_DISK_PARTITION_SPACE_PERCENT_USED + # - DISK_PARTITION_THROUGHPUT_READ + # - DISK_PARTITION_THROUGHPUT_WRITE + # - DISK_QUEUE_DEPTH + DATABASE_METRICS: # Query Parameter m in API: https://www.mongodb.com/docs/atlas/reference/api-resources-spec/v1/#tag/Monitoring-and-Logs/operation/getDatabase. Commented ones are not used in the app. If you do not want to collect any of the metrics then comment all of them and provide empty list [] as value. - DATABASE_AVERAGE_OBJECT_SIZE + # - DATABASE_COLLECTION_COUNT + # - DATABASE_DATA_SIZE + # - DATABASE_STORAGE_SIZE + # - DATABASE_INDEX_SIZE + # - DATABASE_INDEX_COUNT + # - DATABASE_EXTENT_COUNT + # - DATABASE_OBJECT_COUNT + # - DATABASE_VIEW_COUNT Logging: - LOG_FORMAT: "%(levelname)s | %(asctime)s | %(threadName)s | %(name)s | %(message)s" + LOG_FORMAT: "%(levelname)s | %(asctime)s | %(threadName)s | %(name)s | %(message)s" # Log format used by the python logging module to write logs in a file. ROTATION_TYPE: D ROTATION_INTERVAL: 1 BACKUP_COUNT: 7 - ENABLE_CONSOLE_LOG: true - ENABLE_LOGFILE: true - LOG_FILEPATH: /tmp/mongodbatlascollector.log + ENABLE_CONSOLE_LOG: true # Enables printing logs in a console. + ENABLE_LOGFILE: true # Set to TRUE to write all logs and errors to a log file. + LOG_FILEPATH: /tmp/mongodbatlascollector.log # Path of the log file used when ENABLE_LOGFILE is set to TRUE. LOG_LEVEL: "INFO" Collection: ENVIRONMENT: onprem - NUM_WORKERS: 2 + NUM_WORKERS: 2 # Number of threads to spawn for API calls. OUTPUT_HANDLER: HTTP - MAX_RETRY: 5 - BACKOFF_FACTOR: 1 - TIMEOUT: 90 # Increase to 2 minutes if the sumo server response time is slow + MAX_RETRY: 3 # Number of retries to attempt in case of request failure. + BACKOFF_FACTOR: 1 # A backoff factor to apply between attempts after the second try. If the backoff_factor is 0.1, then sleep() will sleep for [0.0s, 0.2s, 0.4s, ...] between retries. + TIMEOUT: 90 # Increase to 2 minutes if the sumo server response time is slow. This time out used by the requests library. COMPRESSED: true - MAX_PAYLOAD_BYTESIZE: 4190208 #4MB - END_TIME_EPOCH_OFFSET_SECONDS: 120 - BACKFILL_DAYS: 0 # Determines the start_time of the collection - DBNAME: "mongodbatlas" - DB_DIR: ~/sumo - MIN_REQUEST_WINDOW_LENGTH: 60 # IN SECONDS - MAX_REQUEST_WINDOW_LENGTH: 900 # IN SECONDS - ACTIVATE_TIME_AND_MEMORY_TRACKING: False - # Clusters: ["cluster1-shard-00-00.abc123.mongodb.net"] + MAX_PAYLOAD_BYTESIZE: 4190208 # Maximum size (default is 4MB) of the chunk to be sent to sumo logic. + END_TIME_EPOCH_OFFSET_SECONDS: 120 # The collector assumes that all the log data will be available via API before (now - 2 minutes) ago. + BACKFILL_DAYS: 0 # Number of days before the event collection will start. If the value is 1, then events are fetched from yesterday to today. Atlas retains the last 30 days of log messages and system event audit messages. https://www.mongodb.com/docs/atlas/mongodb-logs/#view-and-download-mongodb-logs + DBNAME: "mongodbatlas" # Change the DBNAME so that state (keys) maintained (bookkeeping) in the database (key value store) are not in conflict. + DB_DIR: ~/sumo # When running locally the db is created in this directory + MIN_REQUEST_WINDOW_LENGTH: 60 # Minimum window length for the request window in seconds. + MAX_REQUEST_WINDOW_LENGTH: 900 # Maximum window length for the request window in seconds. + ACTIVATE_TIME_AND_MEMORY_TRACKING: false # Set this to true for logging memory and time based logging. + # Clusters: ["cluster1"] # User provided list of cluster aliases for selecting specific clusters. By default, it selects all the clusters. DeployMetaData: PACKAGENAME: "sumologic-mongodb-atlas" @@ -91,6 +220,7 @@ DeployMetaData: ENABLE_LAYER: false SumoLogic: - HTTP_LOGS_ENDPOINT: null - HTTP_METRICS_ENDPOINT: null + HTTP_LOGS_ENDPOINT: null # HTTP source endpoint url created in Sumo Logic for ingesting Logs. + HTTP_METRICS_ENDPOINT: null # HTTP source endpoint url created in Sumo Logic for ingesting Metrics. + diff --git a/sumomongodbatlascollector/time_and_memory_tracker.py b/sumomongodbatlascollector/time_and_memory_tracker.py index 6c6e1cf..3a4e232 100644 --- a/sumomongodbatlascollector/time_and_memory_tracker.py +++ b/sumomongodbatlascollector/time_and_memory_tracker.py @@ -16,49 +16,57 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.end() def start(self, operation_name: str) -> str: - if not self.activate: - return "" + operation_name = operation_name.lower() entry = { - "operation_name": operation_name, - "start_time": time.time(), - "start_memory": psutil.Process().memory_info().rss, + "operation_name": operation_name } + if self.activate: + entry.update({ + "start_time": time.time(), + "start_memory": psutil.Process().memory_info().rss + }) + self._stack.append(entry) return self._format_start_message(entry) def end(self, operation_name: str = None) -> str: - if not self.activate: - return "" if self._stack: - exit_time = time.time() - exit_memory = psutil.Process().memory_info().rss - entry = self._stack[-1] + entry = self._stack[-1] if operation_name: operation_name = operation_name.lower() if entry["operation_name"] != operation_name: raise ValueError( f"Attempting to end '{operation_name}' but the current operation is '{entry['operation_name']}'" ) - self._stack.pop() - return self._format_end_message(entry, exit_time, exit_memory) - return "" + if self.activate: + entry.update({ + "exit_time": time.time(), + "exit_memory": psutil.Process().memory_info().rss + }) + return self._format_end_message(entry) + else: + return "" def _format_start_message(self, entry: Dict[str, Any]) -> str: - return ( - f"Starting {entry['operation_name']} at {entry['start_time']:.2f}, " - f"initial memory: {entry['start_memory'] / 1024 / 1024:.2f} MB" - ) + if self.activate: + return ( + f"operation_name: {entry['operation_name']} start_time: {entry['start_time']:.3f} " + f"initial_memory_mb: {entry['start_memory'] / 1024 / 1024:.2f} " + ) + else: + return "" - def _format_end_message( - self, entry: Dict[str, Any], exit_time: float, exit_memory: int - ) -> str: - execution_time = exit_time - entry["start_time"] - memory_used = exit_memory - entry["start_memory"] - return ( - f"{entry['operation_name']} completed in {execution_time:.2f} seconds, " - f"used {memory_used / 1024 / 1024:.2f} MB, " - f"start time: {entry['start_time']:.2f}, end time: {exit_time:.2f}, final memory: {exit_memory / 1024 / 1024:.2f} MB" - ) + def _format_end_message(self, entry: Dict[str, Any]) -> str: + if self.activate: + execution_time = entry['exit_time'] - entry["start_time"] + memory_used = entry['exit_memory'] - entry["start_memory"] + return ( + f"operation_name: {entry['operation_name']} execution_seconds: {execution_time:.3f} " + f"memory_used_mb: {memory_used / 1024 / 1024:.2f} " + f"start_time: {entry['start_time']:.3f}, end_time: {entry['exit_time']:.3f}, final_memory_mb: {entry['exit_memory'] / 1024 / 1024:.2f} " + ) + else: + return "" diff --git a/template.yaml b/template.yaml index f0f536a..545527e 100644 --- a/template.yaml +++ b/template.yaml @@ -18,7 +18,7 @@ Metadata: LicenseUrl: ./LICENSE Name: sumologic-mongodb-atlas ReadmeUrl: ./README.md - SemanticVersion: 1.0.17 + SemanticVersion: 1.0.18 SourceCodeUrl: https://github.com/SumoLogic/sumologic-mongodb-atlas SpdxLicenseId: Apache-2.0 Outputs: @@ -28,9 +28,10 @@ Outputs: Fn::GetAtt: - MongoDBAtlasFunction - Arn - MongoDBAtlasTable: - Description: MongoDBAtlasTable DynamoDB Table Name - Value: !Ref MongoDBAtlasTable + MongoDBAtlasTable: + Description: MongoDBAtlasTable DynamoDB Table Name + Value: + Ref: MongoDBAtlasTableResource Parameters: HttpLogsEndpoint: Type: String @@ -47,9 +48,11 @@ Parameters: Resources: MongoDBAtlasFunction: Properties: - CodeUri: ./target/aws/build/ + CodeUri: ./sumomongodbatlascollector Environment: Variables: + DBNAME: + Ref: MongoDBAtlasTableResource ENABLE_CONSOLE_LOG: 'false' ENVIRONMENT: aws HTTP_LOGS_ENDPOINT: @@ -71,32 +74,34 @@ Resources: Type: Schedule Handler: main.main MemorySize: 256 - Policies: - - Version: "2012-10-17" - Statement: - - Effect: Allow - Action: - - dynamodb:PutItem + Policies: + - Statement: + - Action: + - dynamodb:PutItem - dynamodb:UpdateItem - dynamodb:GetItem - dynamodb:Scan - dynamodb:Query - Resource: !GetAtt MongoDBAtlasTableResource.Arn + - dynamodb:DescribeTable + Effect: Allow + Resource: + Fn::GetAtt: + - MongoDBAtlasTableResource + - Arn + Version: '2012-10-17' Runtime: python3.11 Timeout: 900 Type: AWS::Serverless::Function - - MongoDBAtlasTableResource: - Type: AWS::DynamoDB::Table - Properties: - TableName: !Sub ${AWS::StackName}-mongodbatlastable - AttributeDefinitions: - - AttributeName: id - AttributeType: S - KeySchema: - - AttributeName: id - KeyType: HASH - ProvisionedThroughput: - ReadCapacityUnits: 5 - WriteCapacityUnits: 5 + MongoDBAtlasTableResource: + Properties: + AttributeDefinitions: + - AttributeName: key_col + AttributeType: S + KeySchema: + - AttributeName: key_col + KeyType: HASH + ProvisionedThroughput: + ReadCapacityUnits: 30 + WriteCapacityUnits: 20 + Type: AWS::DynamoDB::Table Transform: AWS::Serverless-2016-10-31 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sumomongodbatlascollector/mock_mongo_server.py b/tests/mock_mongo_server.py similarity index 100% rename from sumomongodbatlascollector/mock_mongo_server.py rename to tests/mock_mongo_server.py diff --git a/sumomongodbatlascollector/mock_functions/process_metric_mock.py b/tests/process_metric_mock.py similarity index 100% rename from sumomongodbatlascollector/mock_functions/process_metric_mock.py rename to tests/process_metric_mock.py diff --git a/sumomongodbatlascollector/test_mock.py b/tests/test_mock.py similarity index 99% rename from sumomongodbatlascollector/test_mock.py rename to tests/test_mock.py index ba83e50..f72b88a 100644 --- a/sumomongodbatlascollector/test_mock.py +++ b/tests/test_mock.py @@ -5,7 +5,7 @@ import time from unittest.mock import Mock, patch -from api import ( +from sumomongodbatlascollector.api import ( # AlertsAPI, DatabaseMetricsAPI, DiskMetricsAPI, @@ -16,7 +16,7 @@ ProcessMetricsAPI, ProjectEventsAPI, ) -from main import MongoDBAtlasCollector +from sumomongodbatlascollector.main import MongoDBAtlasCollector @patch("sumoappclient.sumoclient.base.BaseAPI") diff --git a/sumomongodbatlascollector/test_mongo_collector.py b/tests/test_mongo_collector.py similarity index 95% rename from sumomongodbatlascollector/test_mongo_collector.py rename to tests/test_mongo_collector.py index 4b76e89..5c4819e 100644 --- a/sumomongodbatlascollector/test_mongo_collector.py +++ b/tests/test_mongo_collector.py @@ -3,9 +3,8 @@ import tempfile import os from unittest.mock import patch, MagicMock, call -from main import MongoDBAtlasCollector +from sumomongodbatlascollector.main import MongoDBAtlasCollector from sumoappclient.sumoclient.base import BaseCollector -# from sumoappclient.provider.factory import ProviderFactory from requests.auth import HTTPDigestAuth @@ -26,8 +25,8 @@ def mock_config(): "NUM_WORKERS": 2, "TIMEOUT": 30, "Clusters": [ - "cluster1-shard-00-00.abc123.mongodb.net", - "cluster2-shard-00-00.xyz789.mongodb.net", + "Cluster2", + "Cluster1", ], "DATA_REFRESH_TIME": 3600000, }, @@ -106,6 +105,7 @@ def test_mongodb_atlas_collector_init( assert hasattr(mongodb_atlas_collector, "mongosess") +@pytest.mark.skip() def test_get_current_dir(mongodb_atlas_collector): expected_dir = os.path.dirname(os.path.abspath(__file__)) assert mongodb_atlas_collector.get_current_dir() == expected_dir @@ -121,10 +121,10 @@ def test_get_cluster_name(mongodb_atlas_collector): def test_get_user_provided_cluster_name(mongodb_atlas_collector): - assert mongodb_atlas_collector._get_user_provided_cluster_name() == [ - "cluster1-shard-00-00.abc123.mongodb.net", - "cluster2-shard-00-00.xyz789.mongodb.net", - ] + assert set(mongodb_atlas_collector._get_user_provided_cluster_name()) == set([ + "Cluster2", + "Cluster1", + ]) mongodb_atlas_collector.collection_config.pop("Clusters") assert mongodb_atlas_collector._get_user_provided_cluster_name() == [] @@ -135,7 +135,7 @@ def test_getpaginateddata(mongodb_atlas_collector): url = "https://test.com/api" kwargs = {"auth": mongodb_atlas_collector.digestauth, "params": {"pageNum": 1}} - with patch("main.ClientMixin.make_request") as mock_make_request: + with patch("sumomongodbatlascollector.main.ClientMixin.make_request") as mock_make_request: mock_make_request.side_effect = [ (True, {"results": [{"id": 1}, {"id": 2}]}), (True, {"results": [{"id": 3}]}), @@ -188,7 +188,7 @@ def test_getpaginateddata(mongodb_atlas_collector): ) -@patch("main.MongoDBAtlasCollector.getpaginateddata") +@patch("sumomongodbatlascollector.main.MongoDBAtlasCollector.getpaginateddata") def test_get_all_processes_from_project(mock_getpaginateddata, mongodb_atlas_collector): mock_data = [ { @@ -212,11 +212,11 @@ def test_get_all_processes_from_project(mock_getpaginateddata, mongodb_atlas_col mongodb_atlas_collector._get_all_processes_from_project() ) - assert process_ids == ["process1", "process2"] - assert set(hostnames) == { + assert set(process_ids) == set(["process1", "process2"]) + assert set(hostnames) == set([ "cluster1-shard-00-00.abc123.mongodb.net", "cluster2-shard-00-00.xyz789.mongodb.net", - } + ]) assert cluster_mapping == {"cluster1": "Cluster1", "cluster2": "Cluster2"} expected_url = f"{mongodb_atlas_collector.api_config['BASE_URL']}/groups/{mongodb_atlas_collector.api_config['PROJECT_ID']}/processes" @@ -229,7 +229,7 @@ def test_get_all_processes_from_project(mock_getpaginateddata, mongodb_atlas_col mock_getpaginateddata.assert_called_once_with(expected_url, **expected_kwargs) -@patch("main.MongoDBAtlasCollector.getpaginateddata") +@patch("sumomongodbatlascollector.main.MongoDBAtlasCollector.getpaginateddata") def test_get_all_processes_from_project_with_user_provided_clusters( mock_getpaginateddata, mongodb_atlas_collector ): @@ -260,12 +260,11 @@ def test_get_all_processes_from_project_with_user_provided_clusters( mongodb_atlas_collector._get_all_processes_from_project() ) - assert process_ids == ["process1", "process2", "process3"] - assert set(hostnames) == { + assert set(process_ids) == set(["process1", "process2"]) + assert set(hostnames) == set([ "cluster1-shard-00-00.abc123.mongodb.net", - "cluster2-shard-00-00.xyz789.mongodb.net", - "cluster3-shard-00-00.def456.mongodb.net", - } + "cluster2-shard-00-00.xyz789.mongodb.net" + ]) assert cluster_mapping == {"cluster1": "Cluster1", "cluster2": "Cluster2"} expected_url = f"{mongodb_atlas_collector.api_config['BASE_URL']}/groups/{mongodb_atlas_collector.api_config['PROJECT_ID']}/processes" @@ -278,7 +277,7 @@ def test_get_all_processes_from_project_with_user_provided_clusters( mock_getpaginateddata.assert_called_once_with(expected_url, **expected_kwargs) -@patch("main.MongoDBAtlasCollector.getpaginateddata") +@patch("sumomongodbatlascollector.main.MongoDBAtlasCollector.getpaginateddata") def test_get_all_disks_from_host(mock_getpaginateddata, mongodb_atlas_collector): mock_data = [ { @@ -314,8 +313,8 @@ def test_get_all_disks_from_host(mock_getpaginateddata, mongodb_atlas_collector) mock_getpaginateddata.assert_has_calls(expected_calls) -@patch("main.MongoDBAtlasCollector._get_all_databases") -@patch("main.get_current_timestamp") +@patch("sumomongodbatlascollector.main.MongoDBAtlasCollector._get_all_databases") +@patch("sumomongodbatlascollector.main.get_current_timestamp") def test_set_database_names( mock_get_current_timestamp, mock_get_all_databases, mongodb_atlas_collector ): @@ -339,7 +338,7 @@ def test_set_database_names( @pytest.fixture def mock_get_current_timestamp(): - with patch("main.get_current_timestamp") as mock: + with patch("sumomongodbatlascollector.main.get_current_timestamp") as mock: mock.return_value = 1627776000000 # Example timestamp yield mock diff --git a/sumomongodbatlascollector/test_mongo_db_api.py b/tests/test_mongo_db_api.py similarity index 96% rename from sumomongodbatlascollector/test_mongo_db_api.py rename to tests/test_mongo_db_api.py index 499130f..ce8c467 100644 --- a/sumomongodbatlascollector/test_mongo_db_api.py +++ b/tests/test_mongo_db_api.py @@ -6,7 +6,7 @@ from sumoappclient.sumoclient.base import BaseAPI # from sumoappclient.common.utils import get_current_timestamp -from api import MongoDBAPI +from sumomongodbatlascollector.api import MongoDBAPI class ConcreteMongoDBAPI(MongoDBAPI): @@ -62,7 +62,7 @@ def test_init(mongodb_api): assert isinstance(mongodb_api.digestauth, HTTPDigestAuth) assert mongodb_api.digestauth.username == "public_key" assert mongodb_api.digestauth.password == "private_key" - assert mongodb_api.MAX_REQUEST_WINDOW_LENGTH == 3600 + assert mongodb_api.MAX_REQUEST_WINDOW_LENGTH == 900 assert mongodb_api.MIN_REQUEST_WINDOW_LENGTH == 60 assert isinstance(mongodb_api, MongoDBAPI) assert isinstance(mongodb_api, BaseAPI)