From 2e8bbc1ae6523cf429291bfea4dc24a56626b255 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 11 Sep 2020 00:01:52 -0400 Subject: [PATCH 001/172] Change schema to support multiple operations --- astrolabe/runner.py | 67 +++++++++++------------ tests/retryReads-resizeCluster.yaml | 27 +++++---- tests/retryReads-toggleServerSideJS.yaml | 28 +++++----- tests/retryWrites-resizeCluster.yaml | 27 +++++---- tests/retryWrites-toggleServerSideJS.yaml | 27 ++++----- 5 files changed, 91 insertions(+), 85 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index c0117316..8b9be45f 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -70,7 +70,7 @@ def get_connection_string(self): if self.__connection_string is None: cluster = self.cluster_url.get().data prefix, suffix = cluster.srvAddress.split("//") - uri_options = self.spec.maintenancePlan.uriOptions.copy() + uri_options = self.spec.uriOptions.copy() # Boolean options must be converted to lowercase strings. for key, value in uri_options.items(): @@ -91,20 +91,16 @@ def is_cluster_state(self, goal_state): cluster_info = self.cluster_url.get().data return cluster_info.stateName.lower() == goal_state.lower() - def verify_cluster_configuration_matches(self, state): + def verify_cluster_configuration_matches(self, expected_configuration): """Verify that the cluster config is what we expect it to be (based on maintenance status). Raises AssertionError.""" - state = state.lower() - if state not in ("initial", "final"): - raise AstrolabeTestCaseError( - "State must be either 'initial' or 'final'.") cluster_config = self.cluster_url.get().data assert_subset( cluster_config, - self.spec.maintenancePlan[state].clusterConfiguration) + expected_configuration.clusterConfiguration) process_args = self.cluster_url.processArgs.get().data assert_subset( - process_args, self.spec.maintenancePlan[state].processArgs) + process_args, expected_configuration.processArgs) def initialize(self): """ @@ -113,7 +109,7 @@ def initialize(self): """ LOGGER.info("Initializing cluster {!r}".format(self.cluster_name)) - cluster_config = self.spec.maintenancePlan.initial.\ + cluster_config = self.spec.initialConfiguration.\ clusterConfiguration.copy() cluster_config["name"] = self.cluster_name try: @@ -130,7 +126,7 @@ def initialize(self): raise # Apply processArgs if provided. - process_args = self.spec.maintenancePlan.initial.processArgs + process_args = self.spec.initialConfiguration.processArgs if process_args: self.client.groups[self.project.id].\ clusters[self.cluster_name].processArgs.patch(**process_args) @@ -163,34 +159,35 @@ def run(self, persist_cluster=False, startup_time=1): driver_workload=self.spec.driverWorkload, startup_time=startup_time) - # Step-3: begin maintenance routine. - final_config = self.spec.maintenancePlan.final - cluster_config = final_config.clusterConfiguration - process_args = final_config.processArgs + for operation in self.spec.operations: + # Step-3: begin maintenance routine. + final_config = operation.setClusterConfiguration + cluster_config = final_config.clusterConfiguration + process_args = final_config.processArgs - if not cluster_config and not process_args: - raise RuntimeError("invalid maintenance plan") + if not cluster_config and not process_args: + raise RuntimeError("invalid maintenance plan") - if cluster_config: - LOGGER.info("Pushing cluster configuration update") - self.cluster_url.patch(**cluster_config) + if cluster_config: + LOGGER.info("Pushing cluster configuration update") + self.cluster_url.patch(**cluster_config) - if process_args: - LOGGER.info("Pushing process arguments update") - self.cluster_url.processArgs.patch(**process_args) - - # Sleep before polling to give Atlas time to update cluster.stateName. - sleep(3) - - # Step-4: wait until maintenance completes (cluster is IDLE). - selector = BooleanCallablePoller( - frequency=self.config.polling_frequency, - timeout=self.config.polling_timeout) - LOGGER.info("Waiting for cluster maintenance to complete") - selector.poll([self], attribute="is_cluster_state", args=("IDLE",), - kwargs={}) - self.verify_cluster_configuration_matches("final") - LOGGER.info("Cluster maintenance complete") + if process_args: + LOGGER.info("Pushing process arguments update") + self.cluster_url.processArgs.patch(**process_args) + + # Sleep before polling to give Atlas time to update cluster.stateName. + sleep(3) + + # Step-4: wait until maintenance completes (cluster is IDLE). + selector = BooleanCallablePoller( + frequency=self.config.polling_frequency, + timeout=self.config.polling_timeout) + LOGGER.info("Waiting for cluster maintenance to complete") + selector.poll([self], attribute="is_cluster_state", args=("IDLE",), + kwargs={}) + self.verify_cluster_configuration_matches(final_config) + LOGGER.info("Cluster maintenance complete") # Step-5: interrupt driver workload and capture streams stats = self.workload_runner.terminate() diff --git a/tests/retryReads-resizeCluster.yaml b/tests/retryReads-resizeCluster.yaml index 87d2805c..a1d1b48d 100644 --- a/tests/retryReads-resizeCluster.yaml +++ b/tests/retryReads-resizeCluster.yaml @@ -1,21 +1,24 @@ -maintenancePlan: - initial: - clusterConfiguration: - clusterType: REPLICASET - providerSettings: - providerName: AWS - regionName: US_WEST_1 - instanceSizeName: M10 - processArgs: {} - final: +initialConfiguration: + clusterConfiguration: + clusterType: REPLICASET + providerSettings: + providerName: AWS + regionName: US_WEST_1 + instanceSizeName: M10 + processArgs: {} + +operations: + setClusterConfiguration: clusterConfiguration: providerSettings: providerName: AWS regionName: US_WEST_1 instanceSizeName: M20 processArgs: {} - uriOptions: - retryReads: true + +uriOptions: + retryReads: true + driverWorkload: database: test_database collection: test_collection diff --git a/tests/retryReads-toggleServerSideJS.yaml b/tests/retryReads-toggleServerSideJS.yaml index cf641ee1..9cdcdd31 100644 --- a/tests/retryReads-toggleServerSideJS.yaml +++ b/tests/retryReads-toggleServerSideJS.yaml @@ -1,19 +1,21 @@ -maintenancePlan: - initial: - clusterConfiguration: - clusterType: REPLICASET - providerSettings: - providerName: AWS - regionName: US_WEST_1 - instanceSizeName: M10 - processArgs: - javascriptEnabled: false - final: +initialConfiguration: + clusterConfiguration: + clusterType: REPLICASET + providerSettings: + providerName: AWS + regionName: US_WEST_1 + instanceSizeName: M10 + processArgs: {} + +operations: + setClusterConfiguration: clusterConfiguration: {} processArgs: javascriptEnabled: true - uriOptions: - retryReads: true + +uriOptions: + retryReads: true + driverWorkload: database: test_database collection: test_collection diff --git a/tests/retryWrites-resizeCluster.yaml b/tests/retryWrites-resizeCluster.yaml index 91ff9152..2aaefdab 100644 --- a/tests/retryWrites-resizeCluster.yaml +++ b/tests/retryWrites-resizeCluster.yaml @@ -1,21 +1,24 @@ -maintenancePlan: - initial: - clusterConfiguration: - clusterType: REPLICASET - providerSettings: - providerName: AWS - regionName: US_WEST_1 - instanceSizeName: M10 - processArgs: {} - final: +initialConfiguration: + clusterConfiguration: + clusterType: REPLICASET + providerSettings: + providerName: AWS + regionName: US_WEST_1 + instanceSizeName: M10 + processArgs: {} + +operations: + setClusterConfiguration: clusterConfiguration: providerSettings: providerName: AWS regionName: US_WEST_1 instanceSizeName: M20 processArgs: {} - uriOptions: - retryWrites: true + +uriOptions: + retryWrites: true + driverWorkload: database: test_database collection: test_collection diff --git a/tests/retryWrites-toggleServerSideJS.yaml b/tests/retryWrites-toggleServerSideJS.yaml index fb17175c..ee23b11c 100644 --- a/tests/retryWrites-toggleServerSideJS.yaml +++ b/tests/retryWrites-toggleServerSideJS.yaml @@ -1,19 +1,20 @@ -maintenancePlan: - initial: - clusterConfiguration: - clusterType: REPLICASET - providerSettings: - providerName: AWS - regionName: US_WEST_1 - instanceSizeName: M10 - processArgs: - javascriptEnabled: false - final: +initialConfiguration: + clusterConfiguration: + clusterType: REPLICASET + providerSettings: + providerName: AWS + regionName: US_WEST_1 + instanceSizeName: M10 + processArgs: {} + +operations: clusterConfiguration: {} processArgs: javascriptEnabled: true - uriOptions: - retryWrites: true + +uriOptions: + retryWrites: true + driverWorkload: database: test_database collection: test_collection From 46c16274e637c14f3cadbdcde43ff5e42cb3e42c Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sat, 12 Sep 2020 05:33:36 -0400 Subject: [PATCH 002/172] use driver 2.13 --- .evergreen/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 13d89d5f..08177b2a 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -184,7 +184,7 @@ axes: variables: DRIVER_DIRNAME: "ruby" DRIVER_REPOSITORY: "https://github.com/mongodb/mongo-ruby-driver" - DRIVER_REVISION: "master" + DRIVER_REVISION: "2.13-stable" - id: node-master display_name: "Node (master)" variables: From cac3bb8ecbd294a607a8207ad6705d4e80ba1693 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sat, 12 Sep 2020 05:34:09 -0400 Subject: [PATCH 003/172] operations --- astrolabe/runner.py | 2 +- tests/retryReads-resizeCluster.yaml | 15 ++++++++------- tests/retryReads-toggleServerSideJS.yaml | 9 +++++---- tests/retryWrites-resizeCluster.yaml | 15 ++++++++------- tests/retryWrites-toggleServerSideJS.yaml | 1 + 5 files changed, 23 insertions(+), 19 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 8b9be45f..4b81f1a9 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -136,7 +136,7 @@ def run(self, persist_cluster=False, startup_time=1): self.id, self.cluster_name)) # Step-0: sanity-check the cluster configuration. - self.verify_cluster_configuration_matches("initial") + self.verify_cluster_configuration_matches(self.spec.initialConfiguration) # Start the test timer. timer = Timer() diff --git a/tests/retryReads-resizeCluster.yaml b/tests/retryReads-resizeCluster.yaml index a1d1b48d..387b4cb0 100644 --- a/tests/retryReads-resizeCluster.yaml +++ b/tests/retryReads-resizeCluster.yaml @@ -8,13 +8,14 @@ initialConfiguration: processArgs: {} operations: - setClusterConfiguration: - clusterConfiguration: - providerSettings: - providerName: AWS - regionName: US_WEST_1 - instanceSizeName: M20 - processArgs: {} + - + setClusterConfiguration: + clusterConfiguration: + providerSettings: + providerName: AWS + regionName: US_WEST_1 + instanceSizeName: M20 + processArgs: {} uriOptions: retryReads: true diff --git a/tests/retryReads-toggleServerSideJS.yaml b/tests/retryReads-toggleServerSideJS.yaml index 9cdcdd31..a5e84fad 100644 --- a/tests/retryReads-toggleServerSideJS.yaml +++ b/tests/retryReads-toggleServerSideJS.yaml @@ -8,10 +8,11 @@ initialConfiguration: processArgs: {} operations: - setClusterConfiguration: - clusterConfiguration: {} - processArgs: - javascriptEnabled: true + - + setClusterConfiguration: + clusterConfiguration: {} + processArgs: + javascriptEnabled: true uriOptions: retryReads: true diff --git a/tests/retryWrites-resizeCluster.yaml b/tests/retryWrites-resizeCluster.yaml index 2aaefdab..df729d60 100644 --- a/tests/retryWrites-resizeCluster.yaml +++ b/tests/retryWrites-resizeCluster.yaml @@ -8,13 +8,14 @@ initialConfiguration: processArgs: {} operations: - setClusterConfiguration: - clusterConfiguration: - providerSettings: - providerName: AWS - regionName: US_WEST_1 - instanceSizeName: M20 - processArgs: {} + - + setClusterConfiguration: + clusterConfiguration: + providerSettings: + providerName: AWS + regionName: US_WEST_1 + instanceSizeName: M20 + processArgs: {} uriOptions: retryWrites: true diff --git a/tests/retryWrites-toggleServerSideJS.yaml b/tests/retryWrites-toggleServerSideJS.yaml index ee23b11c..b706c1f8 100644 --- a/tests/retryWrites-toggleServerSideJS.yaml +++ b/tests/retryWrites-toggleServerSideJS.yaml @@ -8,6 +8,7 @@ initialConfiguration: processArgs: {} operations: + - clusterConfiguration: {} processArgs: javascriptEnabled: true From 64a0496e841314bbc2e4225bd7fe4dbde6a257bc Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sat, 12 Sep 2020 05:34:24 -0400 Subject: [PATCH 004/172] metrics collection --- integrations/ruby/executor.rb | 41 ++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index a809c356..706576d3 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -6,14 +6,48 @@ class UnknownOperation < StandardError; end class UnknownOperationConfiguration < StandardError; end +class MetricsCollector + def initialize + @operations = {} + @samples = [] + end + + attr_reader :samples + + def started(event) + @operations[event.operation_id] = event + end + + def succeeded(event) + started_event = @operations.delete(event.operation_id) + raise "Started event for #{event.operation_id} not found" unless started_event + @samples << { + command_name: started_event.command_name, + duration: event.duration, + } + end + + def failed(event) + started_event = @operations.delete(event.operation_id) + raise "Started event for #{event.operation_id} not found" unless started_event + @samples << { + command_name: started_event.command_name, + duration: event.duration, + failure: event.failure, + } + end +end + class Executor def initialize(uri, spec) @uri, @spec = uri, spec @operation_count = @failure_count = @error_count = 0 + @metrics_collector = MetricsCollector.new end attr_reader :uri, :spec attr_reader :operation_count, :failure_count, :error_count + attr_reader :metrics_collector def run set_signal_handler @@ -125,6 +159,9 @@ def write_result File.open('results.json', 'w') do |f| f << JSON.dump(result) end + File.open('metrics.json', 'w') do |f| + f << JSON.dump(metrics_collector.samples) + end end def collection @@ -132,6 +169,8 @@ def collection end def client - @client ||= Mongo::Client.new(uri) + @client ||= Mongo::Client.new(uri).tap do |client| + client.subscribe(Mongo::Monitoring::COMMAND, metrics_collector) + end end end From 0fe25eec4af1af920fc8a0f40b17d6f61a9f554e Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sat, 12 Sep 2020 06:38:59 -0400 Subject: [PATCH 005/172] test failover implementation --- astrolabe/runner.py | 65 ++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 4b81f1a9..844c0b73 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -160,34 +160,35 @@ def run(self, persist_cluster=False, startup_time=1): startup_time=startup_time) for operation in self.spec.operations: - # Step-3: begin maintenance routine. - final_config = operation.setClusterConfiguration - cluster_config = final_config.clusterConfiguration - process_args = final_config.processArgs - - if not cluster_config and not process_args: - raise RuntimeError("invalid maintenance plan") - - if cluster_config: - LOGGER.info("Pushing cluster configuration update") - self.cluster_url.patch(**cluster_config) - - if process_args: - LOGGER.info("Pushing process arguments update") - self.cluster_url.processArgs.patch(**process_args) - - # Sleep before polling to give Atlas time to update cluster.stateName. - sleep(3) - - # Step-4: wait until maintenance completes (cluster is IDLE). - selector = BooleanCallablePoller( - frequency=self.config.polling_frequency, - timeout=self.config.polling_timeout) - LOGGER.info("Waiting for cluster maintenance to complete") - selector.poll([self], attribute="is_cluster_state", args=("IDLE",), - kwargs={}) - self.verify_cluster_configuration_matches(final_config) - LOGGER.info("Cluster maintenance complete") + if hasattr(operation, 'setClusterConfiguration'): + # Step-3: begin maintenance routine. + final_config = operation.setClusterConfiguration + cluster_config = final_config.clusterConfiguration + process_args = final_config.processArgs + + if not cluster_config and not process_args: + raise RuntimeError("invalid maintenance plan") + + if cluster_config: + LOGGER.info("Pushing cluster configuration update") + self.cluster_url.patch(**cluster_config) + + if process_args: + LOGGER.info("Pushing process arguments update") + self.cluster_url.processArgs.patch(**process_args) + + # Sleep before polling to give Atlas time to update cluster.stateName. + sleep(3) + + # Step-4: wait until maintenance completes (cluster is IDLE). + self.wait_for_idle() + self.verify_cluster_configuration_matches(final_config) + LOGGER.info("Cluster maintenance complete") + + if hasattr(operation, 'testFailover'): + self.cluster_url['restartPrimaries'].post() + + self.wait_for_idle() # Step-5: interrupt driver workload and capture streams stats = self.workload_runner.terminate() @@ -220,6 +221,14 @@ def run(self, persist_cluster=False, startup_time=1): self.cluster_name)) return junit_test + + def wait_for_idle(self): + selector = BooleanCallablePoller( + frequency=self.config.polling_frequency, + timeout=self.config.polling_timeout) + LOGGER.info("Waiting for cluster maintenance to complete") + selector.poll([self], attribute="is_cluster_state", args=("IDLE",), + kwargs={}) class SpecTestRunnerBase: From 8560798807504d4b5e375a33969f9a1ad1dbd315 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sat, 12 Sep 2020 06:39:04 -0400 Subject: [PATCH 006/172] test failover test --- tests/retryReads-testFailover.yaml | 35 ++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 tests/retryReads-testFailover.yaml diff --git a/tests/retryReads-testFailover.yaml b/tests/retryReads-testFailover.yaml new file mode 100644 index 00000000..5ad7af88 --- /dev/null +++ b/tests/retryReads-testFailover.yaml @@ -0,0 +1,35 @@ +initialConfiguration: + clusterConfiguration: + clusterType: REPLICASET + providerSettings: + providerName: AWS + regionName: US_WEST_1 + instanceSizeName: M10 + processArgs: {} + +operations: + - + testFailover: true + +uriOptions: + retryReads: true + +driverWorkload: + database: test_database + collection: test_collection + testData: + - {_id: 1, x: 11} + - {_id: 2, x: 22} + - {_id: 3, x: 33} + operations: + - + object: collection + name: find + arguments: + filter: + _id: {$gt: 1} + sort: + _id: 1 + result: + - {_id: 2, x: 22} + - {_id: 3, x: 33} From 75144cf74c5f77d8c62d006d1ef00c9c173f9381 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 13 Sep 2020 10:33:28 -0400 Subject: [PATCH 007/172] priority takeover --- tests/retryReads-primaryTakeover.yaml | 62 +++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 tests/retryReads-primaryTakeover.yaml diff --git a/tests/retryReads-primaryTakeover.yaml b/tests/retryReads-primaryTakeover.yaml new file mode 100644 index 00000000..88be7a06 --- /dev/null +++ b/tests/retryReads-primaryTakeover.yaml @@ -0,0 +1,62 @@ +initialConfiguration: + clusterConfiguration: + clusterType: REPLICASET + providerSettings: + providerName: AWS + instanceSizeName: M10 + replicationSpecs: + - + id: '111111111111111111111111' + numShards: 1 + regionsConfig: + US_WEST_1: + electableNodes: 3 + priority: 7 + readOnlyNodes: 0 + processArgs: {} + +operations: + - + setClusterConfiguration: + clusterConfiguration: + clusterType: REPLICASET + providerSettings: + providerName: AWS + instanceSizeName: M10 + replicationSpecs: + - + id: '111111111111111111111111' + numShards: 1 + regionsConfig: + US_WEST_1: + electableNodes: 2 + priority: 6 + readOnlyNodes: 0 + US_EAST_1: + electableNodes: 1 + priority: 7 + readOnlyNodes: 0 + processArgs: {} + +uriOptions: + retryReads: true + +driverWorkload: + database: test_database + collection: test_collection + testData: + - {_id: 1, x: 11} + - {_id: 2, x: 22} + - {_id: 3, x: 33} + operations: + - + object: collection + name: find + arguments: + filter: + _id: {$gt: 1} + sort: + _id: 1 + result: + - {_id: 2, x: 22} + - {_id: 3, x: 33} From 4a409fba503e8e50f050f59e01ae97c578c08414 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 16 Sep 2020 18:38:36 -0400 Subject: [PATCH 008/172] comparison diagnostics --- astrolabe/utils.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/astrolabe/utils.py b/astrolabe/utils.py index cc017bbd..334d7e2d 100644 --- a/astrolabe/utils.py +++ b/astrolabe/utils.py @@ -84,12 +84,15 @@ def assert_subset(dict1, dict2): """Utility that asserts that `dict2` is a subset of `dict1`, while accounting for nested fields.""" for key, value in dict2.items(): - if key not in dict1: - raise AssertionError("not a subset") - if isinstance(value, dict): - assert_subset(dict1[key], value) + if key == 'replicationSpecs': + pass else: - assert dict1[key] == value + if key not in dict1: + raise AssertionError("not a subset: '%s' from %s is not in %s" % (key, repr(dict2), repr(dict1))) + if isinstance(value, dict): + assert_subset(dict1[key], value) + else: + assert dict1[key] == value, "Different values for '%s':\nexpected '%s'\nactual '%s'" % (key, repr(dict2[key]), repr(dict1[key])) class Timer: From a0091d12a7699f4521ecb112d1b8076e98840762 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 16 Sep 2020 18:38:59 -0400 Subject: [PATCH 009/172] atlas descriptive details --- atlasclient/client.py | 1 + atlasclient/exceptions.py | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/atlasclient/client.py b/atlasclient/client.py index d9709638..b9b268af 100644 --- a/atlasclient/client.py +++ b/atlasclient/client.py @@ -248,6 +248,7 @@ def handle_response(method, response): kwargs = { 'response': response, 'request_method': method, + 'detail': data.get('detail'), 'error_code': data.get('errorCode')} if response.status_code == 400: diff --git a/atlasclient/exceptions.py b/atlasclient/exceptions.py index e6b9cd3e..dd16693b 100644 --- a/atlasclient/exceptions.py +++ b/atlasclient/exceptions.py @@ -17,7 +17,7 @@ class AtlasApiBaseError(Exception): """Base Exception class for all ``atlasclient`` errors.""" - def __init__(self, msg, resource_url=None, request_method=None, + def __init__(self, msg, resource_url=None, request_method=None, detail=None, status_code=None, error_code=None, headers=None): self._msg = msg self.request_method = request_method @@ -25,12 +25,14 @@ def __init__(self, msg, resource_url=None, request_method=None, self.status_code = status_code self.error_code = error_code self.headers = headers + self.detail = detail + #import pdb;pdb.set_trace() def __str__(self): if self.request_method and self.resource_url: if self.error_code: - return '{} Error Code: {!r} ({} {})'.format( - self._msg, self.error_code, self.request_method, + return '{} Error Code: {!r}: {} ({} {})'.format( + self._msg, self.error_code, self.detail, self.request_method, self.resource_url) else: return '{} ({} {})'.format( @@ -43,9 +45,9 @@ class AtlasClientError(AtlasApiBaseError): class AtlasApiError(AtlasApiBaseError): - def __init__(self, msg, response=None, request_method=None, + def __init__(self, msg, response=None, request_method=None, detail=None, error_code=None): - kwargs = {'request_method': request_method, + kwargs = {'request_method': request_method, 'detail': detail, 'error_code': error_code} # Parse remaining fields from response object. From bf97152c0c0db6dd9b5cdc6a02a2897738f16121 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 18 Sep 2020 04:00:10 -0400 Subject: [PATCH 010/172] vm restart --- astrolabe/runner.py | 8 ++++++++ atlasclient/client.py | 11 ++++++++--- tests/retryReads-vmRestart.yaml | 35 +++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 3 deletions(-) create mode 100644 tests/retryReads-vmRestart.yaml diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 844c0b73..44984bd7 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -189,6 +189,14 @@ def run(self, persist_cluster=False, startup_time=1): self.cluster_url['restartPrimaries'].post() self.wait_for_idle() + + if hasattr(operation, 'restartVms'): + #import pdb;pdb.set_trace() + url = "/api/private/nds/groups/%s/clusters/%s/reboot" % (self.project.id, self.cluster_name) + self.client.request('POST', url) + #self.cluster_url['reboot'].post() + + self.wait_for_idle() # Step-5: interrupt driver workload and capture streams stats = self.workload_runner.terminate() diff --git a/atlasclient/client.py b/atlasclient/client.py index b9b268af..6058c43a 100644 --- a/atlasclient/client.py +++ b/atlasclient/client.py @@ -220,6 +220,8 @@ def request(self, method, path, **params): def construct_resource_url(self, path, api_version=None): url_template = "{base_url}/{version}/{resource_path}" + if path[0] == '/': + url_template = 'https://cloud.mongodb.com{resource_path}' return url_template.format( base_url=self.config.base_url, version=api_version or self.config.api_version, @@ -241,15 +243,18 @@ def handle_response(method, response): raise AtlasRateLimitError('Too many requests', response=response, request_method=method, error_code=429) - if data is None: + if data is None and False: raise AtlasApiError('Unable to decode JSON response.', response=response, request_method=method) kwargs = { 'response': response, 'request_method': method, - 'detail': data.get('detail'), - 'error_code': data.get('errorCode')} + } + + if data is not None: + kwargs['detail'] = data.get('detail') + kwargs['error_code'] = data.get('errorCode') if response.status_code == 400: raise AtlasApiError('400: Bad Request.', **kwargs) diff --git a/tests/retryReads-vmRestart.yaml b/tests/retryReads-vmRestart.yaml new file mode 100644 index 00000000..87b5f9a9 --- /dev/null +++ b/tests/retryReads-vmRestart.yaml @@ -0,0 +1,35 @@ +initialConfiguration: + clusterConfiguration: + clusterType: REPLICASET + providerSettings: + providerName: AWS + regionName: US_WEST_1 + instanceSizeName: M10 + processArgs: {} + +operations: + - + restartVms: true + +uriOptions: + retryReads: true + +driverWorkload: + database: test_database + collection: test_collection + testData: + - {_id: 1, x: 11} + - {_id: 2, x: 22} + - {_id: 3, x: 33} + operations: + - + object: collection + name: find + arguments: + filter: + _id: {$gt: 1} + sort: + _id: 1 + result: + - {_id: 2, x: 22} + - {_id: 3, x: 33} From c2ae81c9a8c6f90af3e216b4764a672136b537bb Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 18 Sep 2020 04:20:08 -0400 Subject: [PATCH 011/172] process restart --- tests/retryReads-processRestart.yaml | 43 ++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 tests/retryReads-processRestart.yaml diff --git a/tests/retryReads-processRestart.yaml b/tests/retryReads-processRestart.yaml new file mode 100644 index 00000000..eb079a95 --- /dev/null +++ b/tests/retryReads-processRestart.yaml @@ -0,0 +1,43 @@ +initialConfiguration: + clusterConfiguration: + clusterType: REPLICASET + providerSettings: + providerName: AWS + regionName: US_WEST_1 + instanceSizeName: M10 + mongoDBVersion: 4.2.0 + processArgs: {} + +operations: + - + clusterConfiguration: + clusterType: REPLICASET + providerSettings: + providerName: AWS + regionName: US_WEST_1 + instanceSizeName: M10 + mongoDBVersion: 4.2.1 + processArgs: {} + +uriOptions: + retryReads: true + +driverWorkload: + database: test_database + collection: test_collection + testData: + - {_id: 1, x: 11} + - {_id: 2, x: 22} + - {_id: 3, x: 33} + operations: + - + object: collection + name: find + arguments: + filter: + _id: {$gt: 1} + sort: + _id: 1 + result: + - {_id: 2, x: 22} + - {_id: 3, x: 33} From 257c4c2a3e9690911877f24d18cbfc58f8321f89 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 22 Sep 2020 23:22:25 -0400 Subject: [PATCH 012/172] fix subset checks --- astrolabe/utils.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/astrolabe/utils.py b/astrolabe/utils.py index 334d7e2d..68f8a89f 100644 --- a/astrolabe/utils.py +++ b/astrolabe/utils.py @@ -83,16 +83,21 @@ def create_click_option(option_spec, **kwargs): def assert_subset(dict1, dict2): """Utility that asserts that `dict2` is a subset of `dict1`, while accounting for nested fields.""" - for key, value in dict2.items(): - if key == 'replicationSpecs': - pass + for key, value2 in dict2.items(): + if key not in dict1: + raise AssertionError("not a subset: '%s' from %s is not in %s" % (key, repr(dict2), repr(dict1))) + value1 = dict1[key] + if isinstance(value2, dict): + assert_subset(value1, value2) + elif isinstance(value2, list): + assert len(value1) == len(value2) + for i in range(len(value2)): + if isinstance(value2[i], dict): + assert_subset(value1[i], value2[i]) + else: + assert value1[i] == value2[i] else: - if key not in dict1: - raise AssertionError("not a subset: '%s' from %s is not in %s" % (key, repr(dict2), repr(dict1))) - if isinstance(value, dict): - assert_subset(dict1[key], value) - else: - assert dict1[key] == value, "Different values for '%s':\nexpected '%s'\nactual '%s'" % (key, repr(dict2[key]), repr(dict1[key])) + assert value1 == value2, "Different values for '%s':\nexpected '%s'\nactual '%s'" % (key, repr(dict2[key]), repr(dict1[key])) class Timer: From 808b853a9ce7efe48142c4edd41a7d01d0c348ad Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 23 Sep 2020 10:06:49 -0400 Subject: [PATCH 013/172] assert region --- astrolabe/runner.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 44984bd7..d6074802 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -14,9 +14,10 @@ import logging import os -from time import sleep +from time import sleep, monotonic from urllib.parse import urlencode +from pymongo import MongoClient from tabulate import tabulate import junitparser import yaml @@ -197,6 +198,29 @@ def run(self, persist_cluster=False, startup_time=1): #self.cluster_url['reboot'].post() self.wait_for_idle() + + if hasattr(operation, 'assertPrimaryRegion'): + region = operation['assertPrimaryRegion'] + + cluster_config = self.cluster_url.get().data + deadline = monotonic() + 20 + + while True: + mc = MongoClient(cluster_config['connectionStrings']['standard'], username='atlasuser', password='mypassword123') + rsc = mc.admin.command('replSetGetConfig') + member = [m for m in rsc['config']['members'] + if m['horizons']['PUBLIC'] == '%s:%s' % mc.primary][0] + member_region = member['tags']['region'] + mc.close() + + if region == member_region: + break + + if monotonic() > deadline: + raise Exception("Primary in cluster not in expected region '%s' (actual region '%s')" % (region, member_region)) + else: + sleep(5) + # Step-5: interrupt driver workload and capture streams stats = self.workload_runner.terminate() From a29c3049658c31f01ac97dbdc93c38ccbf0006ea Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 23 Sep 2020 10:06:58 -0400 Subject: [PATCH 014/172] assert region in primary takeover test --- tests/retryReads-primaryTakeover.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/retryReads-primaryTakeover.yaml b/tests/retryReads-primaryTakeover.yaml index 88be7a06..0a15097c 100644 --- a/tests/retryReads-primaryTakeover.yaml +++ b/tests/retryReads-primaryTakeover.yaml @@ -16,6 +16,8 @@ initialConfiguration: processArgs: {} operations: + - assertPrimaryRegion: US_WEST_1 + - setClusterConfiguration: clusterConfiguration: @@ -38,6 +40,9 @@ operations: readOnlyNodes: 0 processArgs: {} + - assertPrimaryRegion: US_EAST_1 + + uriOptions: retryReads: true From 64f401667dc888468c20b5c20b23212f8956c721 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 23 Sep 2020 12:35:18 -0400 Subject: [PATCH 015/172] process restart via tls args --- tests/retryReads-processRestart.yaml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/retryReads-processRestart.yaml b/tests/retryReads-processRestart.yaml index eb079a95..8181a5a3 100644 --- a/tests/retryReads-processRestart.yaml +++ b/tests/retryReads-processRestart.yaml @@ -5,19 +5,19 @@ initialConfiguration: providerName: AWS regionName: US_WEST_1 instanceSizeName: M10 - mongoDBVersion: 4.2.0 - processArgs: {} + processArgs: + minimumEnabledTlsProtocol: TLS_1_1 operations: - - clusterConfiguration: - clusterType: REPLICASET - providerSettings: - providerName: AWS - regionName: US_WEST_1 - instanceSizeName: M10 - mongoDBVersion: 4.2.1 - processArgs: {} + setClusterConfiguration: + clusterConfiguration: + providerSettings: + providerName: AWS + regionName: US_WEST_1 + instanceSizeName: M10 + processArgs: + minimumEnabledTlsProtocol: TLS_1_2 uriOptions: retryReads: true From 20adcb3135e590bd509ea4904ac6e7718ac5111e Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 23 Sep 2020 12:35:23 -0400 Subject: [PATCH 016/172] primary removal --- tests/retryReads-primaryRemoval.yaml | 67 ++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 tests/retryReads-primaryRemoval.yaml diff --git a/tests/retryReads-primaryRemoval.yaml b/tests/retryReads-primaryRemoval.yaml new file mode 100644 index 00000000..483b674e --- /dev/null +++ b/tests/retryReads-primaryRemoval.yaml @@ -0,0 +1,67 @@ +initialConfiguration: + clusterConfiguration: + clusterType: REPLICASET + providerSettings: + providerName: AWS + instanceSizeName: M10 + replicationSpecs: + - + id: '111111111111111111111111' + numShards: 1 + regionsConfig: + US_WEST_1: + electableNodes: 2 + priority: 6 + readOnlyNodes: 0 + US_EAST_1: + electableNodes: 1 + priority: 7 + readOnlyNodes: 0 + processArgs: {} + +operations: + - assertPrimaryRegion: US_EAST_1 + + - + setClusterConfiguration: + clusterConfiguration: + clusterType: REPLICASET + providerSettings: + providerName: AWS + instanceSizeName: M10 + replicationSpecs: + - + id: '111111111111111111111111' + numShards: 1 + regionsConfig: + US_WEST_1: + electableNodes: 3 + priority: 7 + readOnlyNodes: 0 + processArgs: {} + + - assertPrimaryRegion: US_WEST_1 + + +uriOptions: + retryReads: true + +driverWorkload: + database: test_database + collection: test_collection + testData: + - {_id: 1, x: 11} + - {_id: 2, x: 22} + - {_id: 3, x: 33} + operations: + - + object: collection + name: find + arguments: + filter: + _id: {$gt: 1} + sort: + _id: 1 + result: + - {_id: 2, x: 22} + - {_id: 3, x: 33} From 96f8caf702a7df8daa424a31f67fab57eea25564 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 23 Sep 2020 12:35:35 -0400 Subject: [PATCH 017/172] primary takeover increase timeout --- astrolabe/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index d6074802..175fc7a8 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -203,7 +203,7 @@ def run(self, persist_cluster=False, startup_time=1): region = operation['assertPrimaryRegion'] cluster_config = self.cluster_url.get().data - deadline = monotonic() + 20 + deadline = monotonic() + 90 while True: mc = MongoClient(cluster_config['connectionStrings']['standard'], username='atlasuser', password='mypassword123') From ee208073a6dfc9e8472acc6bb32ed8b11e252fa2 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 28 Sep 2020 23:10:14 -0400 Subject: [PATCH 018/172] dev atlas --- astrolabe/cli.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/astrolabe/cli.py b/astrolabe/cli.py index 830f7681..caa55939 100644 --- a/astrolabe/cli.py +++ b/astrolabe/cli.py @@ -14,7 +14,7 @@ import logging from pprint import pprint -import unittest +import unittest, os from urllib.parse import unquote_plus import click @@ -90,6 +90,8 @@ def cli(ctx, atlas_base_url, atlas_api_username, Astrolabe is a command-line application for running automated driver tests against a MongoDB Atlas cluster undergoing maintenance. """ + + atlas_base_url = 'https://cloud-dev.mongodb.com/api/atlas' # Create an atlasclient and attach it to the context. client = AtlasClient( base_url=atlas_base_url, @@ -98,6 +100,12 @@ def cli(ctx, atlas_base_url, atlas_api_username, timeout=http_timeout) ctx.obj = client + ctx.admin_client = AtlasClient( + base_url=atlas_base_url, + username=os.environ['ATLAS_ADMIN_API_USERNAME'], + password=os.environ['ATLAS_ADMIN_API_PASSWORD'], + timeout=http_timeout) + # Configure logging. loglevel = getattr(logging, log_level.upper()) logging.basicConfig( From c24f5837399f601d76abc01bbe6ff63b91937a37 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 28 Sep 2020 23:31:40 -0400 Subject: [PATCH 019/172] vm restart --- astrolabe/cli.py | 7 ++++--- astrolabe/runner.py | 13 +++++++------ atlasclient/client.py | 3 ++- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/astrolabe/cli.py b/astrolabe/cli.py index caa55939..230089b6 100644 --- a/astrolabe/cli.py +++ b/astrolabe/cli.py @@ -98,13 +98,13 @@ def cli(ctx, atlas_base_url, atlas_api_username, username=atlas_api_username, password=atlas_api_password, timeout=http_timeout) - ctx.obj = client - ctx.admin_client = AtlasClient( + admin_client = AtlasClient( base_url=atlas_base_url, username=os.environ['ATLAS_ADMIN_API_USERNAME'], password=os.environ['ATLAS_ADMIN_API_PASSWORD'], timeout=http_timeout) + ctx.obj = (client,admin_client) # Configure logging. loglevel = getattr(logging, log_level.upper()) @@ -396,7 +396,8 @@ def run_single_test(ctx, spec_test_file, workload_executor, LOGGER.info(tabulate_astrolabe_configuration(config)) # Step-1: create the Test-Runner. - runner = SingleTestRunner(client=ctx.obj, + runner = SingleTestRunner(client=ctx.obj[0], + admin_client=ctx.obj[1], test_locator_token=spec_test_file, configuration=config, xunit_output=xunit_output, diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 175fc7a8..e23e2b39 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -38,10 +38,11 @@ class AtlasTestCase: - def __init__(self, *, client, test_name, cluster_name, specification, + def __init__(self, *, client, admin_client, test_name, cluster_name, specification, configuration): # Initialize. self.client = client + self.admin_client = admin_client self.id = test_name self.cluster_name = cluster_name self.spec = specification @@ -192,11 +193,10 @@ def run(self, persist_cluster=False, startup_time=1): self.wait_for_idle() if hasattr(operation, 'restartVms'): - #import pdb;pdb.set_trace() url = "/api/private/nds/groups/%s/clusters/%s/reboot" % (self.project.id, self.cluster_name) - self.client.request('POST', url) - #self.cluster_url['reboot'].post() + self.admin_client.request('POST', url) + sleep(5) self.wait_for_idle() if hasattr(operation, 'assertPrimaryRegion'): @@ -265,10 +265,11 @@ def wait_for_idle(self): class SpecTestRunnerBase: """Base class for spec test runners.""" - def __init__(self, *, client, test_locator_token, configuration, xunit_output, + def __init__(self, *, client, admin_client, test_locator_token, configuration, xunit_output, persist_clusters, workload_startup_time): self.cases = [] self.client = client + self.admin_client = admin_client self.config = configuration self.xunit_logger = SingleTestXUnitLogger(output_directory=xunit_output) self.persist_clusters = persist_clusters @@ -287,7 +288,7 @@ def __init__(self, *, client, test_locator_token, configuration, xunit_output, cluster_name = get_cluster_name(test_name, self.config.name_salt) self.cases.append( - AtlasTestCase(client=self.client, + AtlasTestCase(client=self.client, admin_client=self.admin_client, test_name=test_name, cluster_name=cluster_name, specification=test_spec, diff --git a/atlasclient/client.py b/atlasclient/client.py index 6058c43a..b537bba3 100644 --- a/atlasclient/client.py +++ b/atlasclient/client.py @@ -152,6 +152,7 @@ def __init__(self, *, username, password, - `timeout` (float, optional): time, in seconds, after which an HTTP request to the Atlas API should timeout. Default: 10.0. """ + self.username=username self.config = ClientConfiguration( base_url=base_url, api_version=api_version, timeout=timeout, auth=requests.auth.HTTPDigestAuth( @@ -221,7 +222,7 @@ def request(self, method, path, **params): def construct_resource_url(self, path, api_version=None): url_template = "{base_url}/{version}/{resource_path}" if path[0] == '/': - url_template = 'https://cloud.mongodb.com{resource_path}' + url_template = 'https://cloud-dev.mongodb.com{resource_path}' return url_template.format( base_url=self.config.base_url, version=api_version or self.config.api_version, From b151269310e1e09f15fcb1724b9ca21cd8193a3f Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 29 Sep 2020 03:56:12 -0400 Subject: [PATCH 020/172] reformat --- tests/retryReads-primaryTakeover.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/retryReads-primaryTakeover.yaml b/tests/retryReads-primaryTakeover.yaml index 0a15097c..5b979357 100644 --- a/tests/retryReads-primaryTakeover.yaml +++ b/tests/retryReads-primaryTakeover.yaml @@ -16,7 +16,8 @@ initialConfiguration: processArgs: {} operations: - - assertPrimaryRegion: US_WEST_1 + - + assertPrimaryRegion: US_WEST_1 - setClusterConfiguration: From f8ea7e1039137890b5fae03c36208ea859e5ef43 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 29 Sep 2020 22:31:07 -0400 Subject: [PATCH 021/172] fix process restart rs --- tests/retryReads-processRestart.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/retryReads-processRestart.yaml b/tests/retryReads-processRestart.yaml index 8181a5a3..63d2387c 100644 --- a/tests/retryReads-processRestart.yaml +++ b/tests/retryReads-processRestart.yaml @@ -6,7 +6,7 @@ initialConfiguration: regionName: US_WEST_1 instanceSizeName: M10 processArgs: - minimumEnabledTlsProtocol: TLS_1_1 + minimumEnabledTlsProtocol: TLS1_1 operations: - @@ -17,7 +17,7 @@ operations: regionName: US_WEST_1 instanceSizeName: M10 processArgs: - minimumEnabledTlsProtocol: TLS_1_2 + minimumEnabledTlsProtocol: TLS1_2 uriOptions: retryReads: true From 4154a61ead19c6e956a5e8c6e38259dfe45675ff Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 29 Sep 2020 22:31:18 -0400 Subject: [PATCH 022/172] process restart sharded --- tests/retryReads-processRestart-sharded.yaml | 43 ++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 tests/retryReads-processRestart-sharded.yaml diff --git a/tests/retryReads-processRestart-sharded.yaml b/tests/retryReads-processRestart-sharded.yaml new file mode 100644 index 00000000..cede599a --- /dev/null +++ b/tests/retryReads-processRestart-sharded.yaml @@ -0,0 +1,43 @@ +initialConfiguration: + clusterConfiguration: + clusterType: SHARDED + providerSettings: + providerName: AWS + regionName: US_WEST_1 + instanceSizeName: M10 + processArgs: + minimumEnabledTlsProtocol: TLS1_1 + +operations: + - + setClusterConfiguration: + clusterConfiguration: + providerSettings: + providerName: AWS + regionName: US_WEST_1 + instanceSizeName: M10 + processArgs: + minimumEnabledTlsProtocol: TLS1_2 + +uriOptions: + retryReads: true + +driverWorkload: + database: test_database + collection: test_collection + testData: + - {_id: 1, x: 11} + - {_id: 2, x: 22} + - {_id: 3, x: 33} + operations: + - + object: collection + name: find + arguments: + filter: + _id: {$gt: 1} + sort: + _id: 1 + result: + - {_id: 2, x: 22} + - {_id: 3, x: 33} From 0cdd9da255e5390c7fe5368ce9b761fd752979e6 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 29 Sep 2020 22:35:18 -0400 Subject: [PATCH 023/172] sharded tests --- tests/retryReads-testFailover-sharded.yaml | 35 ++++++++++++++++++++++ tests/retryReads-vmRestart-sharded.yaml | 35 ++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 tests/retryReads-testFailover-sharded.yaml create mode 100644 tests/retryReads-vmRestart-sharded.yaml diff --git a/tests/retryReads-testFailover-sharded.yaml b/tests/retryReads-testFailover-sharded.yaml new file mode 100644 index 00000000..53495d45 --- /dev/null +++ b/tests/retryReads-testFailover-sharded.yaml @@ -0,0 +1,35 @@ +initialConfiguration: + clusterConfiguration: + clusterType: SHARDED + providerSettings: + providerName: AWS + regionName: US_WEST_1 + instanceSizeName: M10 + processArgs: {} + +operations: + - + testFailover: true + +uriOptions: + retryReads: true + +driverWorkload: + database: test_database + collection: test_collection + testData: + - {_id: 1, x: 11} + - {_id: 2, x: 22} + - {_id: 3, x: 33} + operations: + - + object: collection + name: find + arguments: + filter: + _id: {$gt: 1} + sort: + _id: 1 + result: + - {_id: 2, x: 22} + - {_id: 3, x: 33} diff --git a/tests/retryReads-vmRestart-sharded.yaml b/tests/retryReads-vmRestart-sharded.yaml new file mode 100644 index 00000000..239b8d45 --- /dev/null +++ b/tests/retryReads-vmRestart-sharded.yaml @@ -0,0 +1,35 @@ +initialConfiguration: + clusterConfiguration: + clusterType: SHARDED + providerSettings: + providerName: AWS + regionName: US_WEST_1 + instanceSizeName: M10 + processArgs: {} + +operations: + - + restartVms: true + +uriOptions: + retryReads: true + +driverWorkload: + database: test_database + collection: test_collection + testData: + - {_id: 1, x: 11} + - {_id: 2, x: 22} + - {_id: 3, x: 33} + operations: + - + object: collection + name: find + arguments: + filter: + _id: {$gt: 1} + sort: + _id: 1 + result: + - {_id: 2, x: 22} + - {_id: 3, x: 33} From 7956354d1fe40433cc3e8095036babdc007aba2a Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 30 Sep 2020 16:14:37 -0400 Subject: [PATCH 024/172] report how long polling was done for --- astrolabe/poller.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astrolabe/poller.py b/astrolabe/poller.py index b88668ab..22977855 100644 --- a/astrolabe/poller.py +++ b/astrolabe/poller.py @@ -53,7 +53,7 @@ def poll(self, objects, *, attribute, args, kwargs): LOGGER.debug("Waiting {:.2f} seconds before retrying".format( self.interval)) sleep(self.interval) - raise PollingTimeoutError + raise PollingTimeoutError("Polling timed out after %s seconds" % self.timeout) class BooleanCallablePoller(PollerBase): From f647ae963bffe3bd89bc0e873057b653c22a7d89 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 30 Sep 2020 16:15:45 -0400 Subject: [PATCH 025/172] increase polling interval for sharded clusters --- astrolabe/configuration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astrolabe/configuration.py b/astrolabe/configuration.py index 35bd4b60..fc2773f0 100644 --- a/astrolabe/configuration.py +++ b/astrolabe/configuration.py @@ -36,7 +36,7 @@ 'help': 'Maximum time (in s) to poll API endpoints.', 'cliopt': '--polling-timeout', 'envvar': 'ATLAS_POLLING_TIMEOUT', - 'default': 1200.0}, + 'default': 3600.0}, 'ATLAS_POLLING_FREQUENCY': { 'type': click.FLOAT, 'help': 'Frequency (in Hz) at which to poll API endpoints.', From c39861f983600c234bf6e37eba16027414afa290 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 30 Sep 2020 17:15:26 -0400 Subject: [PATCH 026/172] vm move tests --- tests/retryReads-move-sharded.yaml | 41 ++++++++++++++++++++++++++++++ tests/retryReads-move.yaml | 41 ++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 tests/retryReads-move-sharded.yaml create mode 100644 tests/retryReads-move.yaml diff --git a/tests/retryReads-move-sharded.yaml b/tests/retryReads-move-sharded.yaml new file mode 100644 index 00000000..df53fc8f --- /dev/null +++ b/tests/retryReads-move-sharded.yaml @@ -0,0 +1,41 @@ +initialConfiguration: + clusterConfiguration: + clusterType: SHARDED + providerSettings: + providerName: AWS + regionName: US_EAST_1 + instanceSizeName: M10 + processArgs: {} + +operations: + - + setClusterConfiguration: + clusterConfiguration: + providerSettings: + providerName: AWS + regionName: US_EAST_1 + instanceSizeName: M20 + processArgs: {} + +uriOptions: + retryReads: true + +driverWorkload: + database: test_database + collection: test_collection + testData: + - {_id: 1, x: 11} + - {_id: 2, x: 22} + - {_id: 3, x: 33} + operations: + - + object: collection + name: find + arguments: + filter: + _id: {$gt: 1} + sort: + _id: 1 + result: + - {_id: 2, x: 22} + - {_id: 3, x: 33} diff --git a/tests/retryReads-move.yaml b/tests/retryReads-move.yaml new file mode 100644 index 00000000..387b4cb0 --- /dev/null +++ b/tests/retryReads-move.yaml @@ -0,0 +1,41 @@ +initialConfiguration: + clusterConfiguration: + clusterType: REPLICASET + providerSettings: + providerName: AWS + regionName: US_WEST_1 + instanceSizeName: M10 + processArgs: {} + +operations: + - + setClusterConfiguration: + clusterConfiguration: + providerSettings: + providerName: AWS + regionName: US_WEST_1 + instanceSizeName: M20 + processArgs: {} + +uriOptions: + retryReads: true + +driverWorkload: + database: test_database + collection: test_collection + testData: + - {_id: 1, x: 11} + - {_id: 2, x: 22} + - {_id: 3, x: 33} + operations: + - + object: collection + name: find + arguments: + filter: + _id: {$gt: 1} + sort: + _id: 1 + result: + - {_id: 2, x: 22} + - {_id: 3, x: 33} From ed088944d7a24091bc4bd6c199666aa9d81c3128 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 8 Oct 2020 22:18:42 -0400 Subject: [PATCH 027/172] no create flag --- astrolabe/cli.py | 8 +++++++- astrolabe/runner.py | 10 +++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/astrolabe/cli.py b/astrolabe/cli.py index 230089b6..5b51761d 100644 --- a/astrolabe/cli.py +++ b/astrolabe/cli.py @@ -74,6 +74,10 @@ 'the test run. Useful when a test will be run multiple times with ' 'the same cluster name salt.')) +NOCREATE_FLAG = click.option( + '--no-create', is_flag=True, default=False, + help=('Do not create clusters at the beginning of the run, assume they have already been provisioned by a previous run.')) + @click.group() @create_click_option(CONFIGOPTS.ATLAS_API_BASE_URL) @@ -372,12 +376,13 @@ def spec_tests(): @POLLINGFREQUENCY_OPTION @XUNITOUTPUT_OPTION @NODELETE_FLAG +@NOCREATE_FLAG @EXECUTORSTARTUPTIME_OPTION @click.pass_context def run_single_test(ctx, spec_test_file, workload_executor, db_username, db_password, org_name, project_name, cluster_name_salt, polling_timeout, polling_frequency, - xunit_output, no_delete, startup_time): + xunit_output, no_delete, no_create, startup_time): """ Runs one APM test. This is the main entry point for running APM tests in headless environments. @@ -402,6 +407,7 @@ def run_single_test(ctx, spec_test_file, workload_executor, configuration=config, xunit_output=xunit_output, persist_clusters=no_delete, + no_create=no_create, workload_startup_time=startup_time) # Step-2: run the tests. diff --git a/astrolabe/runner.py b/astrolabe/runner.py index e23e2b39..824b40cd 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -104,11 +104,14 @@ def verify_cluster_configuration_matches(self, expected_configuration): assert_subset( process_args, expected_configuration.processArgs) - def initialize(self): + def initialize(self, no_create=False): """ Initialize a cluster with the configuration required by the test specification. """ + if no_create: + return + LOGGER.info("Initializing cluster {!r}".format(self.cluster_name)) cluster_config = self.spec.initialConfiguration.\ @@ -266,13 +269,14 @@ def wait_for_idle(self): class SpecTestRunnerBase: """Base class for spec test runners.""" def __init__(self, *, client, admin_client, test_locator_token, configuration, xunit_output, - persist_clusters, workload_startup_time): + persist_clusters, no_create, workload_startup_time): self.cases = [] self.client = client self.admin_client = admin_client self.config = configuration self.xunit_logger = SingleTestXUnitLogger(output_directory=xunit_output) self.persist_clusters = persist_clusters + self.no_create = no_create self.workload_startup_time = workload_startup_time for full_path in self.find_spec_tests(test_locator_token): @@ -348,7 +352,7 @@ def run(self): # Step-1: initialize tests clusters for case in self.cases: - case.initialize() + case.initialize(no_create=self.no_create) # Step-2: run tests round-robin until all have been run. remaining_test_cases = self.cases.copy() From 06200912d845d597a5d687ca8d5ecb6498bc0634 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 8 Oct 2020 22:41:32 -0400 Subject: [PATCH 028/172] fake unified format impl --- astrolabe/runner.py | 4 +- astrolabe/utils.py | 11 ++-- integrations/ruby/executor.rb | 112 +++++++++++++++++----------------- 3 files changed, 65 insertions(+), 62 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 824b40cd..91cceacf 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -148,8 +148,8 @@ def run(self, persist_cluster=False, startup_time=1): timer.start() # Step-1: load test data. - test_data = self.spec.driverWorkload.get('testData') - if test_data: + test_datas = self.spec.driverWorkload.get('initialData') + if test_datas: LOGGER.info("Loading test data on cluster {!r}".format( self.cluster_name)) connection_string = self.get_connection_string() diff --git a/astrolabe/utils.py b/astrolabe/utils.py index 68f8a89f..d8c8b664 100644 --- a/astrolabe/utils.py +++ b/astrolabe/utils.py @@ -178,11 +178,12 @@ def load_test_data(connection_string, driver_workload): kwargs['tlsCAFile'] = certifi.where() client = MongoClient(connection_string, **kwargs) - coll = client.get_database( - driver_workload.database).get_collection( - driver_workload.collection) - coll.drop() - coll.insert_many(driver_workload.testData) + for spec in driver_workload.initialData: + coll = client.get_database( + spec.databaseName).get_collection( + spec.collectionName) + coll.drop() + coll.insert_many(spec.documents) class DriverWorkloadSubprocessRunner: diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index 706576d3..cccf2314 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -78,70 +78,72 @@ def load_data end def perform_operations - spec['operations'].each do |op_spec| - begin - case op_spec['name'] - when 'find' - unless op_spec['object'] == 'collection' - raise UnknownOperationConfiguration, "Can only find on a collection" - end + spec['tests'].each do |test| + test['operations'].each do |op_spec| + begin + case op_spec['name'] + when 'find' + unless op_spec['object'] == 'collection0' + raise UnknownOperationConfiguration, "Can only find on a collection" + end - args = op_spec['arguments'].dup - op = collection.find(args.delete('filter') || {}) - if sort = args.delete('sort') - op = op.sort(sort) - end - unless args.empty? - raise UnknownOperationConfiguration, "Unhandled keys in args: #{args}" - end + args = op_spec['arguments'].dup + op = collection.find(args.delete('filter') || {}) + if sort = args.delete('sort') + op = op.sort(sort) + end + unless args.empty? + raise UnknownOperationConfiguration, "Unhandled keys in args: #{args}" + end - docs = op.to_a + docs = op.to_a - if expected_docs = op_spec['result'] - if expected_docs != docs - puts "Failure" - @failure_count += 1 + if expected_docs = op_spec['expectResult'] + if expected_docs != docs + puts "Failure" + @failure_count += 1 + end + end + when 'insertOne' + unless op_spec['object'] == 'collection0' + raise UnknownOperationConfiguration, "Can only find on a collection" end - end - when 'insertOne' - unless op_spec['object'] == 'collection' - raise UnknownOperationConfiguration, "Can only find on a collection" - end - args = op_spec['arguments'].dup - document = args.delete('document') - unless args.empty? - raise UnknownOperationConfiguration, "Unhandled keys in args: #{args}" - end + args = op_spec['arguments'].dup + document = args.delete('document') + unless args.empty? + raise UnknownOperationConfiguration, "Unhandled keys in args: #{args}" + end - collection.insert_one(document) - when 'updateOne' - unless op_spec['object'] == 'collection' - raise UnknownOperationConfiguration, "Can only find on a collection" - end + collection.insert_one(document) + when 'updateOne' + unless op_spec['object'] == 'collection0' + raise UnknownOperationConfiguration, "Can only find on a collection" + end - args = op_spec['arguments'].dup - scope = collection - if filter = args.delete('filter') - scope = collection.find(filter) - end - if update = args.delete('update') - scope.update_one(update) - end - unless args.empty? - raise UnknownOperationConfiguration, "Unhandled keys in args: #{args}" + args = op_spec['arguments'].dup + scope = collection + if filter = args.delete('filter') + scope = collection.find(filter) + end + if update = args.delete('update') + scope.update_one(update) + end + unless args.empty? + raise UnknownOperationConfiguration, "Unhandled keys in args: #{args}" + end + else + raise UnknownOperation, "Unhandled operation #{op_spec['name']}" end - else - raise UnknownOperation, "Unhandled operation #{op_spec['name']}" + #rescue Mongo::Error => e + # The validator intentionally gives us invalid operations, figure out + # how to handle this requirement while maintaining diagnostics. + rescue => e + STDERR.puts "Error: #{e.class}: #{e}" + @error_count += 1 end - #rescue Mongo::Error => e - # The validator intentionally gives us invalid operations, figure out - # how to handle this requirement while maintaining diagnostics. - rescue => e - STDERR.puts "Error: #{e.class}: #{e}" - @error_count += 1 + @operation_count += 1 end - @operation_count += 1 end end @@ -165,7 +167,7 @@ def write_result end def collection - @collection ||= client.use(spec['database'])[spec['collection']] + @collection ||= client.use('database0')['collection0'] end def client From cffff62b38da023fd0ed9909185843561916ff52 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 8 Oct 2020 22:41:48 -0400 Subject: [PATCH 029/172] change read tests to unified format --- tests/retryReads-move-sharded.yaml | 53 +++++++---- tests/retryReads-move.yaml | 57 ++++++++---- tests/retryReads-primaryRemoval.yaml | 53 +++++++---- tests/retryReads-primaryTakeover.yaml | 53 +++++++---- tests/retryReads-processRestart-sharded.yaml | 53 +++++++---- tests/retryReads-processRestart.yaml | 53 +++++++---- tests/retryReads-resizeCluster.yaml | 53 +++++++---- tests/retryReads-testFailover-sharded.yaml | 53 +++++++---- tests/retryReads-testFailover.yaml | 53 +++++++---- tests/retryReads-toggleServerSideJS.yaml | 53 +++++++---- tests/retryReads-vmRestart-sharded.yaml | 53 +++++++---- tests/retryReads-vmRestart.yaml | 94 ++++++++++++++++---- 12 files changed, 487 insertions(+), 194 deletions(-) diff --git a/tests/retryReads-move-sharded.yaml b/tests/retryReads-move-sharded.yaml index df53fc8f..a2b31bbd 100644 --- a/tests/retryReads-move-sharded.yaml +++ b/tests/retryReads-move-sharded.yaml @@ -21,21 +21,42 @@ uriOptions: retryReads: true driverWorkload: - database: test_database - collection: test_collection - testData: - - {_id: 1, x: 11} - - {_id: 2, x: 22} - - {_id: 3, x: 33} - operations: - - - object: collection - name: find - arguments: - filter: - _id: {$gt: 1} - sort: - _id: 1 - result: + description: "Find" + + schemaVersion: "1.0" + + createEntities: + - client: + id: &client0 client0 + - database: + id: &database0 database0 + client: *client0 + databaseName: &database0Name dat + - collection: + id: &collection0 collection0 + database: *database0 + collectionName: &collection0Name dat + + initialData: + - collectionName: *collection0Name + databaseName: *database0Name + documents: + - {_id: 1, x: 11} - {_id: 2, x: 22} - {_id: 3, x: 33} + + tests: + - description: "Find one" + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-move.yaml b/tests/retryReads-move.yaml index 387b4cb0..d3f3c217 100644 --- a/tests/retryReads-move.yaml +++ b/tests/retryReads-move.yaml @@ -4,7 +4,7 @@ initialConfiguration: providerSettings: providerName: AWS regionName: US_WEST_1 - instanceSizeName: M10 + instanceSizeName: M20 processArgs: {} operations: @@ -14,28 +14,49 @@ operations: providerSettings: providerName: AWS regionName: US_WEST_1 - instanceSizeName: M20 + instanceSizeName: M10 processArgs: {} uriOptions: retryReads: true driverWorkload: - database: test_database - collection: test_collection - testData: - - {_id: 1, x: 11} - - {_id: 2, x: 22} - - {_id: 3, x: 33} - operations: - - - object: collection - name: find - arguments: - filter: - _id: {$gt: 1} - sort: - _id: 1 - result: + description: "Find" + + schemaVersion: "1.0" + + createEntities: + - client: + id: &client0 client0 + - database: + id: &database0 database0 + client: *client0 + databaseName: &database0Name dat + - collection: + id: &collection0 collection0 + database: *database0 + collectionName: &collection0Name dat + + initialData: + - collectionName: *collection0Name + databaseName: *database0Name + documents: + - {_id: 1, x: 11} - {_id: 2, x: 22} - {_id: 3, x: 33} + + tests: + - description: "Find one" + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-primaryRemoval.yaml b/tests/retryReads-primaryRemoval.yaml index 483b674e..391c92d8 100644 --- a/tests/retryReads-primaryRemoval.yaml +++ b/tests/retryReads-primaryRemoval.yaml @@ -47,21 +47,42 @@ uriOptions: retryReads: true driverWorkload: - database: test_database - collection: test_collection - testData: - - {_id: 1, x: 11} - - {_id: 2, x: 22} - - {_id: 3, x: 33} - operations: - - - object: collection - name: find - arguments: - filter: - _id: {$gt: 1} - sort: - _id: 1 - result: + description: "Find" + + schemaVersion: "1.0" + + createEntities: + - client: + id: &client0 client0 + - database: + id: &database0 database0 + client: *client0 + databaseName: &database0Name dat + - collection: + id: &collection0 collection0 + database: *database0 + collectionName: &collection0Name dat + + initialData: + - collectionName: *collection0Name + databaseName: *database0Name + documents: + - {_id: 1, x: 11} - {_id: 2, x: 22} - {_id: 3, x: 33} + + tests: + - description: "Find one" + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-primaryTakeover.yaml b/tests/retryReads-primaryTakeover.yaml index 5b979357..d1edbc3a 100644 --- a/tests/retryReads-primaryTakeover.yaml +++ b/tests/retryReads-primaryTakeover.yaml @@ -48,21 +48,42 @@ uriOptions: retryReads: true driverWorkload: - database: test_database - collection: test_collection - testData: - - {_id: 1, x: 11} - - {_id: 2, x: 22} - - {_id: 3, x: 33} - operations: - - - object: collection - name: find - arguments: - filter: - _id: {$gt: 1} - sort: - _id: 1 - result: + description: "Find" + + schemaVersion: "1.0" + + createEntities: + - client: + id: &client0 client0 + - database: + id: &database0 database0 + client: *client0 + databaseName: &database0Name dat + - collection: + id: &collection0 collection0 + database: *database0 + collectionName: &collection0Name dat + + initialData: + - collectionName: *collection0Name + databaseName: *database0Name + documents: + - {_id: 1, x: 11} - {_id: 2, x: 22} - {_id: 3, x: 33} + + tests: + - description: "Find one" + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-processRestart-sharded.yaml b/tests/retryReads-processRestart-sharded.yaml index cede599a..4e64fa24 100644 --- a/tests/retryReads-processRestart-sharded.yaml +++ b/tests/retryReads-processRestart-sharded.yaml @@ -23,21 +23,42 @@ uriOptions: retryReads: true driverWorkload: - database: test_database - collection: test_collection - testData: - - {_id: 1, x: 11} - - {_id: 2, x: 22} - - {_id: 3, x: 33} - operations: - - - object: collection - name: find - arguments: - filter: - _id: {$gt: 1} - sort: - _id: 1 - result: + description: "Find" + + schemaVersion: "1.0" + + createEntities: + - client: + id: &client0 client0 + - database: + id: &database0 database0 + client: *client0 + databaseName: &database0Name dat + - collection: + id: &collection0 collection0 + database: *database0 + collectionName: &collection0Name dat + + initialData: + - collectionName: *collection0Name + databaseName: *database0Name + documents: + - {_id: 1, x: 11} - {_id: 2, x: 22} - {_id: 3, x: 33} + + tests: + - description: "Find one" + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-processRestart.yaml b/tests/retryReads-processRestart.yaml index 63d2387c..9607e394 100644 --- a/tests/retryReads-processRestart.yaml +++ b/tests/retryReads-processRestart.yaml @@ -23,21 +23,42 @@ uriOptions: retryReads: true driverWorkload: - database: test_database - collection: test_collection - testData: - - {_id: 1, x: 11} - - {_id: 2, x: 22} - - {_id: 3, x: 33} - operations: - - - object: collection - name: find - arguments: - filter: - _id: {$gt: 1} - sort: - _id: 1 - result: + description: "Find" + + schemaVersion: "1.0" + + createEntities: + - client: + id: &client0 client0 + - database: + id: &database0 database0 + client: *client0 + databaseName: &database0Name dat + - collection: + id: &collection0 collection0 + database: *database0 + collectionName: &collection0Name dat + + initialData: + - collectionName: *collection0Name + databaseName: *database0Name + documents: + - {_id: 1, x: 11} - {_id: 2, x: 22} - {_id: 3, x: 33} + + tests: + - description: "Find one" + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-resizeCluster.yaml b/tests/retryReads-resizeCluster.yaml index 387b4cb0..fb77d56a 100644 --- a/tests/retryReads-resizeCluster.yaml +++ b/tests/retryReads-resizeCluster.yaml @@ -21,21 +21,42 @@ uriOptions: retryReads: true driverWorkload: - database: test_database - collection: test_collection - testData: - - {_id: 1, x: 11} - - {_id: 2, x: 22} - - {_id: 3, x: 33} - operations: - - - object: collection - name: find - arguments: - filter: - _id: {$gt: 1} - sort: - _id: 1 - result: + description: "Find" + + schemaVersion: "1.0" + + createEntities: + - client: + id: &client0 client0 + - database: + id: &database0 database0 + client: *client0 + databaseName: &database0Name dat + - collection: + id: &collection0 collection0 + database: *database0 + collectionName: &collection0Name dat + + initialData: + - collectionName: *collection0Name + databaseName: *database0Name + documents: + - {_id: 1, x: 11} - {_id: 2, x: 22} - {_id: 3, x: 33} + + tests: + - description: "Find one" + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-testFailover-sharded.yaml b/tests/retryReads-testFailover-sharded.yaml index 53495d45..c06aeb99 100644 --- a/tests/retryReads-testFailover-sharded.yaml +++ b/tests/retryReads-testFailover-sharded.yaml @@ -15,21 +15,42 @@ uriOptions: retryReads: true driverWorkload: - database: test_database - collection: test_collection - testData: - - {_id: 1, x: 11} - - {_id: 2, x: 22} - - {_id: 3, x: 33} - operations: - - - object: collection - name: find - arguments: - filter: - _id: {$gt: 1} - sort: - _id: 1 - result: + description: "Find" + + schemaVersion: "1.0" + + createEntities: + - client: + id: &client0 client0 + - database: + id: &database0 database0 + client: *client0 + databaseName: &database0Name dat + - collection: + id: &collection0 collection0 + database: *database0 + collectionName: &collection0Name dat + + initialData: + - collectionName: *collection0Name + databaseName: *database0Name + documents: + - {_id: 1, x: 11} - {_id: 2, x: 22} - {_id: 3, x: 33} + + tests: + - description: "Find one" + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-testFailover.yaml b/tests/retryReads-testFailover.yaml index 5ad7af88..089c96e6 100644 --- a/tests/retryReads-testFailover.yaml +++ b/tests/retryReads-testFailover.yaml @@ -15,21 +15,42 @@ uriOptions: retryReads: true driverWorkload: - database: test_database - collection: test_collection - testData: - - {_id: 1, x: 11} - - {_id: 2, x: 22} - - {_id: 3, x: 33} - operations: - - - object: collection - name: find - arguments: - filter: - _id: {$gt: 1} - sort: - _id: 1 - result: + description: "Find" + + schemaVersion: "1.0" + + createEntities: + - client: + id: &client0 client0 + - database: + id: &database0 database0 + client: *client0 + databaseName: &database0Name dat + - collection: + id: &collection0 collection0 + database: *database0 + collectionName: &collection0Name dat + + initialData: + - collectionName: *collection0Name + databaseName: *database0Name + documents: + - {_id: 1, x: 11} - {_id: 2, x: 22} - {_id: 3, x: 33} + + tests: + - description: "Find one" + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-toggleServerSideJS.yaml b/tests/retryReads-toggleServerSideJS.yaml index a5e84fad..3a9df6c1 100644 --- a/tests/retryReads-toggleServerSideJS.yaml +++ b/tests/retryReads-toggleServerSideJS.yaml @@ -18,21 +18,42 @@ uriOptions: retryReads: true driverWorkload: - database: test_database - collection: test_collection - testData: - - {_id: 1, x: 11} - - {_id: 2, x: 22} - - {_id: 3, x: 33} - operations: - - - object: collection - name: find - arguments: - filter: - _id: {$gt: 1} - sort: - _id: 1 - result: + description: "Find" + + schemaVersion: "1.0" + + createEntities: + - client: + id: &client0 client0 + - database: + id: &database0 database0 + client: *client0 + databaseName: &database0Name dat + - collection: + id: &collection0 collection0 + database: *database0 + collectionName: &collection0Name dat + + initialData: + - collectionName: *collection0Name + databaseName: *database0Name + documents: + - {_id: 1, x: 11} - {_id: 2, x: 22} - {_id: 3, x: 33} + + tests: + - description: "Find one" + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-vmRestart-sharded.yaml b/tests/retryReads-vmRestart-sharded.yaml index 239b8d45..3fd6f311 100644 --- a/tests/retryReads-vmRestart-sharded.yaml +++ b/tests/retryReads-vmRestart-sharded.yaml @@ -15,21 +15,42 @@ uriOptions: retryReads: true driverWorkload: - database: test_database - collection: test_collection - testData: - - {_id: 1, x: 11} - - {_id: 2, x: 22} - - {_id: 3, x: 33} - operations: - - - object: collection - name: find - arguments: - filter: - _id: {$gt: 1} - sort: - _id: 1 - result: + description: "Find" + + schemaVersion: "1.0" + + createEntities: + - client: + id: &client0 client0 + - database: + id: &database0 database0 + client: *client0 + databaseName: &database0Name dat + - collection: + id: &collection0 collection0 + database: *database0 + collectionName: &collection0Name dat + + initialData: + - collectionName: *collection0Name + databaseName: *database0Name + documents: + - {_id: 1, x: 11} - {_id: 2, x: 22} - {_id: 3, x: 33} + + tests: + - description: "Find one" + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-vmRestart.yaml b/tests/retryReads-vmRestart.yaml index 87b5f9a9..6685ad12 100644 --- a/tests/retryReads-vmRestart.yaml +++ b/tests/retryReads-vmRestart.yaml @@ -1,6 +1,47 @@ initialConfiguration: clusterConfiguration: clusterType: REPLICASET +driverWorkload: + description: "Find" + + schemaVersion: "1.0" + + createEntities: + - client: + id: &client0 client0 + - database: + id: &database0 database0 + client: *client0 + databaseName: &database0Name dat + - collection: + id: &collection0 collection0 + database: *database0 + collectionName: &collection0Name dat + + initialData: + - collectionName: *collection0Name + databaseName: *database0Name + documents: + - {_id: 1, x: 11} + - {_id: 2, x: 22} + - {_id: 3, x: 33} + + tests: + - description: "Find one" + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 + providerSettings: providerName: AWS regionName: US_WEST_1 @@ -15,21 +56,42 @@ uriOptions: retryReads: true driverWorkload: - database: test_database - collection: test_collection - testData: - - {_id: 1, x: 11} - - {_id: 2, x: 22} - - {_id: 3, x: 33} - operations: - - - object: collection - name: find - arguments: - filter: - _id: {$gt: 1} - sort: - _id: 1 - result: + description: "Find" + + schemaVersion: "1.0" + + createEntities: + - client: + id: &client0 client0 + - database: + id: &database0 database0 + client: *client0 + databaseName: &database0Name dat + - collection: + id: &collection0 collection0 + database: *database0 + collectionName: &collection0Name dat + + initialData: + - collectionName: *collection0Name + databaseName: *database0Name + documents: + - {_id: 1, x: 11} - {_id: 2, x: 22} - {_id: 3, x: 33} + + tests: + - description: "Find one" + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 From 9288746051895a9000553d4c74349a29c8d6a21c Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 8 Oct 2020 22:42:46 -0400 Subject: [PATCH 030/172] change write tests to unified format --- tests/retryWrites-resizeCluster.yaml | 30 +++++++++++++++++------ tests/retryWrites-toggleServerSideJS.yaml | 30 +++++++++++++++++------ 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/tests/retryWrites-resizeCluster.yaml b/tests/retryWrites-resizeCluster.yaml index df729d60..425b2347 100644 --- a/tests/retryWrites-resizeCluster.yaml +++ b/tests/retryWrites-resizeCluster.yaml @@ -21,11 +21,25 @@ uriOptions: retryWrites: true driverWorkload: - database: test_database - collection: test_collection - operations: - - - object: collection - name: insertOne - arguments: - document: {data: 100} + description: "Insert" + + schemaVersion: "1.0" + + createEntities: + - client: + id: &client0 client0 + - database: + id: &database0 database0 + client: *client0 + databaseName: &database0Name dat + - collection: + id: &collection0 collection0 + database: *database0 + collectionName: &collection0Name dat + + tests: + - description: "Insert one" + operations: + - name: insertOne + object: *collection0 + arguments: { data: 100 } diff --git a/tests/retryWrites-toggleServerSideJS.yaml b/tests/retryWrites-toggleServerSideJS.yaml index b706c1f8..b73887bb 100644 --- a/tests/retryWrites-toggleServerSideJS.yaml +++ b/tests/retryWrites-toggleServerSideJS.yaml @@ -17,11 +17,25 @@ uriOptions: retryWrites: true driverWorkload: - database: test_database - collection: test_collection - operations: - - - object: collection - name: insertOne - arguments: - document: {data: 100} + description: "Insert" + + schemaVersion: "1.0" + + createEntities: + - client: + id: &client0 client0 + - database: + id: &database0 database0 + client: *client0 + databaseName: &database0Name dat + - collection: + id: &collection0 collection0 + database: *database0 + collectionName: &collection0Name dat + + tests: + - description: "Insert one" + operations: + - name: insertOne + object: *collection0 + arguments: { data: 100 } From ba675ff16c0e31af1e6d6cf40da6581f7b66bfe1 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 9 Oct 2020 00:41:52 -0400 Subject: [PATCH 031/172] failure diagnostics --- integrations/ruby/executor.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index cccf2314..3253559b 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -100,7 +100,7 @@ def perform_operations if expected_docs = op_spec['expectResult'] if expected_docs != docs - puts "Failure" + puts "Failure: expected docs (#{expected_docs.inspect}) != actual docs (#{docs.inspect})" @failure_count += 1 end end From 93f9775c020323f3ecc4d493694751096e86babd Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 9 Oct 2020 00:42:02 -0400 Subject: [PATCH 032/172] unified test format --- integrations/ruby/executor.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index 3253559b..c271190c 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -167,7 +167,13 @@ def write_result end def collection - @collection ||= client.use('database0')['collection0'] + db_name = spec['createEntities'].detect { |entity| + entity['database']&.[]('id') == 'database0' + }['database'].fetch('databaseName') + collection_name = spec['createEntities'].detect { |entity| + entity['collection']&.[]('id') == 'collection0' + }['collection'].fetch('collectionName') + @collection ||= client.use(db_name)[collection_name] end def client From b85f487f6dddcc955c709e19417bb22a81a66bb2 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 14 Oct 2020 13:45:47 -0400 Subject: [PATCH 033/172] atlas log retrieval --- astrolabe/runner.py | 67 +++++++++++++++++++++- atlasclient/client.py | 1 + tests/retryReads-testFailover-sharded.yaml | 4 ++ tests/retryReads-testFailover.yaml | 4 ++ 4 files changed, 74 insertions(+), 2 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 91cceacf..1baf7141 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging -import os +import logging, datetime, time as _time, gzip +import os, io from time import sleep, monotonic from urllib.parse import urlencode @@ -109,6 +109,9 @@ def initialize(self, no_create=False): Initialize a cluster with the configuration required by the test specification. """ + + self.start_time = _time.time() + if no_create: return @@ -195,6 +198,12 @@ def run(self, persist_cluster=False, startup_time=1): self.wait_for_idle() + if hasattr(operation, 'sleep'): + _time.sleep(operation['sleep']) + + if hasattr(operation, 'waitForIdle'): + self.wait_for_idle() + if hasattr(operation, 'restartVms'): url = "/api/private/nds/groups/%s/clusters/%s/reboot" % (self.project.id, self.cluster_name) self.admin_client.request('POST', url) @@ -247,6 +256,11 @@ def run(self, persist_cluster=False, startup_time=1): # is only visible for failed tests. LOGGER.info("Workload Statistics: {}".format(stats)) + + LOGGER.info("Waiting 5 minutes for Atlas logs to become available") + sleep(5*60) + + self.retrieve_logs() # Step 7: download logs asynchronously and delete cluster. # TODO: https://github.com/mongodb-labs/drivers-atlas-testing/issues/4 @@ -264,6 +278,55 @@ def wait_for_idle(self): LOGGER.info("Waiting for cluster maintenance to complete") selector.poll([self], attribute="is_cluster_state", args=("IDLE",), kwargs={}) + + def retrieve_logs(self): + # There is no straightforward facility in Atlas to retrieve logs + # for a cluster. See https://jira.mongodb.org/browse/PRODTRIAGE-968. + # Atlas provides the "cluster start" time, added in + # https://jira.mongodb.org/browse/CLOUDP-73874. This is however + # not the time when any process started, but appears to be roughly + # the time when cluster creation began. Since a cluster can take + # anywhere from 6 to 30 minutes to provision depending on the type, + # simply retrieving logs from the "cluster start" time would result in + # several of the intervals retrieving the exact same data from when + # a process really started. + # Because of this, figure out the times the hard way: + # - Retrieve the first log starting with the "cluster start" time. + # - Read the first log line. + # - Use the time in that line as the actual node start time. + # - Step forward in 5 minute increments to get the entire log, + # hopefully in a complete and correct manner. See + # https://jira.mongodb.org/browse/PRODTRIAGE-1030 for why + # using end time (or simply using the API as documented) doesn't work. + + cluster_config = self.cluster_url.get().data + data = self.client.request('GET', 'groups/%s/processes' % self.project.id).data + for hostinfo in data['results']: + hostname = hostinfo['hostname'] + + log_names = {'mongodb.gz': 'mongod.log'} + if cluster_config['clusterType'] == 'SHARDED': + log_names['mongos.gz'] = 'mongos.log' + + for api_log_name, log_name in log_names.items(): + + time = int(self.start_time) + while time < _time.time(): + fn = '%s_%s_%s.gz' % (hostname, log_name, datetime.datetime.fromtimestamp(time).strftime('%Y%m%dT%H:%M:%SZ')) + LOGGER.info('Retrieving %s' % fn) + resp = self.client.request('GET', 'groups/%s/clusters/%s/logs/%s' % (self.project.id, hostname, api_log_name), startDate=time) + with open(fn, 'wb') as f: + f.write(resp.response.content) + + time += 5*60 + + def iso8601_to_timestamp(self, time_str): + if time_str.endswith('Z'): + format = '%Y-%m-%dT%H:%M:%SZ' + else: + format = '%Y-%m-%dT%H:%M:%S.%f+0000' + t = datetime.datetime.strptime(time_str, format) + return int(_time.mktime(t.timetuple())) class SpecTestRunnerBase: diff --git a/atlasclient/client.py b/atlasclient/client.py index b537bba3..830a40c4 100644 --- a/atlasclient/client.py +++ b/atlasclient/client.py @@ -77,6 +77,7 @@ def get_path(self): class _ApiResponse: """Private wrapper class for processing HTTP responses.""" def __init__(self, response, request_method, json_data): + self.response = response self.resource_url = response.url self.headers = response.headers self.status_code = response.status_code diff --git a/tests/retryReads-testFailover-sharded.yaml b/tests/retryReads-testFailover-sharded.yaml index c06aeb99..b1e8845e 100644 --- a/tests/retryReads-testFailover-sharded.yaml +++ b/tests/retryReads-testFailover-sharded.yaml @@ -10,6 +10,10 @@ initialConfiguration: operations: - testFailover: true + - + sleep: 10 + - + waitForIdle: true uriOptions: retryReads: true diff --git a/tests/retryReads-testFailover.yaml b/tests/retryReads-testFailover.yaml index 089c96e6..f879535e 100644 --- a/tests/retryReads-testFailover.yaml +++ b/tests/retryReads-testFailover.yaml @@ -10,6 +10,10 @@ initialConfiguration: operations: - testFailover: true + - + sleep: 10 + - + waitForIdle: true uriOptions: retryReads: true From 6b6f70eedf082f0348c3f14c5afba6eb3b22b10b Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 14 Oct 2020 14:28:41 -0400 Subject: [PATCH 034/172] fix end time --- astrolabe/runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 1baf7141..44684e42 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -299,6 +299,7 @@ def retrieve_logs(self): # https://jira.mongodb.org/browse/PRODTRIAGE-1030 for why # using end time (or simply using the API as documented) doesn't work. + end_time = _time.time() cluster_config = self.cluster_url.get().data data = self.client.request('GET', 'groups/%s/processes' % self.project.id).data for hostinfo in data['results']: @@ -311,7 +312,7 @@ def retrieve_logs(self): for api_log_name, log_name in log_names.items(): time = int(self.start_time) - while time < _time.time(): + while time < end_time: fn = '%s_%s_%s.gz' % (hostname, log_name, datetime.datetime.fromtimestamp(time).strftime('%Y%m%dT%H:%M:%SZ')) LOGGER.info('Retrieving %s' % fn) resp = self.client.request('GET', 'groups/%s/clusters/%s/logs/%s' % (self.project.id, hostname, api_log_name), startDate=time) From 61423586c526c0141eaae3ba03ec9d81ce1c9355 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 14 Oct 2020 22:56:28 -0400 Subject: [PATCH 035/172] logs redone --- astrolabe/runner.py | 78 +++++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 46 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 44684e42..030f2a53 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -257,8 +257,8 @@ def run(self, persist_cluster=False, startup_time=1): LOGGER.info("Workload Statistics: {}".format(stats)) - LOGGER.info("Waiting 5 minutes for Atlas logs to become available") - sleep(5*60) + #LOGGER.info("Waiting 5 minutes for Atlas logs to become available") + #sleep(5*60) self.retrieve_logs() @@ -280,54 +280,40 @@ def wait_for_idle(self): kwargs={}) def retrieve_logs(self): - # There is no straightforward facility in Atlas to retrieve logs - # for a cluster. See https://jira.mongodb.org/browse/PRODTRIAGE-968. - # Atlas provides the "cluster start" time, added in - # https://jira.mongodb.org/browse/CLOUDP-73874. This is however - # not the time when any process started, but appears to be roughly - # the time when cluster creation began. Since a cluster can take - # anywhere from 6 to 30 minutes to provision depending on the type, - # simply retrieving logs from the "cluster start" time would result in - # several of the intervals retrieving the exact same data from when - # a process really started. - # Because of this, figure out the times the hard way: - # - Retrieve the first log starting with the "cluster start" time. - # - Read the first log line. - # - Use the time in that line as the actual node start time. - # - Step forward in 5 minute increments to get the entire log, - # hopefully in a complete and correct manner. See - # https://jira.mongodb.org/browse/PRODTRIAGE-1030 for why - # using end time (or simply using the API as documented) doesn't work. + data = self.admin_client.request('GET', '/api/private/nds/groups/%s/clusters/%s' % (self.project.id, self.cluster_name)).data - end_time = _time.time() - cluster_config = self.cluster_url.get().data - data = self.client.request('GET', 'groups/%s/processes' % self.project.id).data - for hostinfo in data['results']: - hostname = hostinfo['hostname'] - - log_names = {'mongodb.gz': 'mongod.log'} - if cluster_config['clusterType'] == 'SHARDED': - log_names['mongos.gz'] = 'mongos.log' + if data['clusterType'] == 'SHARDED': + rtype = 'CLUSTER' + rname = self.cluster_name + else: + rtype = 'REPLICASET' + rname = data['deploymentItemName'] - for api_log_name, log_name in log_names.items(): + params = dict( + resourceName=rname, + resourceType=rtype, + redacted=True, + logTypes=['FTDC','MONGODB'],#,'AUTOMATION_AGENT','MONITORING_AGENT','BACKUP_AGENT'], + sizeRequestedPerFileBytes=100000000, + ) + data = self.admin_client.request('POST', 'groups/%s/logCollectionJobs' % (self.project.id,), **params).data + job_id = data['id'] - time = int(self.start_time) - while time < end_time: - fn = '%s_%s_%s.gz' % (hostname, log_name, datetime.datetime.fromtimestamp(time).strftime('%Y%m%dT%H:%M:%SZ')) - LOGGER.info('Retrieving %s' % fn) - resp = self.client.request('GET', 'groups/%s/clusters/%s/logs/%s' % (self.project.id, hostname, api_log_name), startDate=time) - with open(fn, 'wb') as f: - f.write(resp.response.content) - - time += 5*60 + while True: + LOGGER.debug('Poll job %s' % job_id) + data = self.admin_client.request('GET', 'groups/%s/logCollectionJobs/%s' % (self.project.id, job_id)).data + if data['status'] == 'IN_PROGRESS': + sleep(1) + elif data['status'] == 'SUCCESS': + break + else: + raise Exception("Unexpected log collection job status %s" % data['status']) - def iso8601_to_timestamp(self, time_str): - if time_str.endswith('Z'): - format = '%Y-%m-%dT%H:%M:%SZ' - else: - format = '%Y-%m-%dT%H:%M:%S.%f+0000' - t = datetime.datetime.strptime(time_str, format) - return int(_time.mktime(t.timetuple())) + url = data['downloadUrl'].replace('https://cloud-dev.mongodb.com', '') + LOGGER.info('Retrieving %s' % url) + resp = self.admin_client.request('GET', url) + with open('logs.tar.gz', 'wb') as f: + f.write(resp.response.content) class SpecTestRunnerBase: From 88e12d8b9c9f9c630cd3acbc985a183111123828 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 16 Oct 2020 13:49:41 -0400 Subject: [PATCH 036/172] Evergreen changes - Ruby to the top - Run weekly --- .evergreen/config.yml | 149 ++++++++++++++++++++---------------------- 1 file changed, 72 insertions(+), 77 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 08177b2a..a4731e13 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -80,6 +80,8 @@ functions: ATLAS_API_PASSWORD: ${atlas_secret} ATLAS_API_BASE_URL: ${atlas_url} ATLAS_ORGANIZATION_NAME: ${atlas_organization} + ATLAS_ADMIN_API_USERNAME: ${atlas_admin_api_username} + ATLAS_ADMIN_API_PASSWORD: ${atlas_admin_api_password} add_expansions_to_env: true command: | astrolabevenv/${PYTHON_BIN_DIR}/astrolabe spec-tests run-one tests/${TEST_NAME}.yaml -e integrations/${DRIVER_DIRNAME}/workload-executor @@ -112,6 +114,8 @@ functions: ATLAS_API_PASSWORD: ${atlas_secret} ATLAS_API_BASE_URL: ${atlas_url} ATLAS_ORGANIZATION_NAME: ${atlas_organization} + ATLAS_ADMIN_API_USERNAME: ${atlas_admin_api_username} + ATLAS_ADMIN_API_PASSWORD: ${atlas_admin_api_password} add_expansions_to_env: true command: | astrolabevenv/${PYTHON_BIN_DIR}/astrolabe spec-tests delete-cluster tests/${TEST_NAME}.yaml @@ -140,30 +144,20 @@ tasks: commands: - func: "validate executor" # One test-case per task. - - name: retryReads-resizeCluster - tags: ["all", "retryReads", "resizeCluster"] - commands: - - func: "run test" - vars: - TEST_NAME: retryReads-resizeCluster - - name: retryReads-toggleServerSideJS - tags: ["all", "retryReads", "toggleServerSideJS"] - commands: - - func: "run test" - vars: - TEST_NAME: retryReads-toggleServerSideJS - - name: retryWrites-resizeCluster - tags: ["all", "retryWrites", "resizeCluster"] + - name: retryReads-processRestart + cron: '@weekly' + tags: ["all"] commands: - func: "run test" vars: - TEST_NAME: retryWrites-resizeCluster - - name: retryWrites-toggleServerSideJS - tags: ["all", "retryWrites", "toggleServerSideJS"] + TEST_NAME: retryReads-processRestart + - name: retryReads-processRestart-sharded + cron: '@weekly' + tags: ["all"] commands: - func: "run test" vars: - TEST_NAME: retryWrites-toggleServerSideJS + TEST_NAME: retryReads-processRestart-sharded axes: # The 'driver' axis specifies the driver to be tested (including driver version). @@ -311,22 +305,6 @@ axes: GOPATH: /home/ubuntu/go buildvariants: -- matrix_name: "tests-python" - matrix_spec: - driver: ["pymongo-master"] - platform: ["ubuntu-18.04"] - runtime: ["python27", "python38"] - display_name: "${driver} ${platform} ${runtime}" - tasks: - - ".all" -- matrix_name: "tests-python-windows" - matrix_spec: - driver: ["pymongo-master"] - platform: ["windows-64"] - runtime: ["python37-windows"] - display_name: "${driver} ${platform} ${runtime}" - tasks: - - ".all" - matrix_name: "tests-ruby" matrix_spec: driver: ["ruby-master"] @@ -335,46 +313,63 @@ buildvariants: display_name: "${driver} ${platform} ${runtime}" tasks: - ".all" -- matrix_name: tests-node - matrix_spec: - driver: - - node-master - platform: - - ubuntu-18.04 - runtime: - - node-dubnium - - node-erbium - display_name: "${driver} ${platform} ${runtime}" - tasks: - - .all -- matrix_name: "tests-java" - matrix_spec: - driver: ["java-master"] - platform: ["ubuntu-18.04"] - runtime: ["java11"] - display_name: "${driver} ${platform} ${runtime}" - tasks: - - ".all" -- matrix_name: "tests-dotnet-windows" - matrix_spec: - driver: ["dotnet-master"] - platform: ["windows-64"] - runtime: - - "dotnet-async-netcoreapp2.1" - - "dotnet-sync-netcoreapp2.1" - - "dotnet-async-netcoreapp1.1" - - "dotnet-sync-netcoreapp1.1" - - "dotnet-async-net452" - - "dotnet-sync-net452" - display_name: "${driver} ${platform} ${runtime}" - tasks: - - ".all" -- matrix_name: "tests-go" - matrix_spec: - driver: ["go-master"] - platform: ubuntu-18.04 - runtime: go-13 - display_name: "${driver} ${platform} ${runtime}" - tasks: - - ".all" - +#- matrix_name: "tests-python" +# matrix_spec: +# driver: ["pymongo-master"] +# platform: ["ubuntu-18.04"] +# runtime: ["python27", "python38"] +# display_name: "${driver} ${platform} ${runtime}" +# tasks: +# - ".all" +#- matrix_name: "tests-python-windows" +# matrix_spec: +# driver: ["pymongo-master"] +# platform: ["windows-64"] +# runtime: ["python37-windows"] +# display_name: "${driver} ${platform} ${runtime}" +# tasks: +# - ".all" +#- matrix_name: tests-node +# matrix_spec: +# driver: +# - node-master +# platform: +# - ubuntu-18.04 +# runtime: +# - node-dubnium +# - node-erbium +# display_name: "${driver} ${platform} ${runtime}" +# tasks: +# - .all +#- matrix_name: "tests-java" +# matrix_spec: +# driver: ["java-master"] +# platform: ["ubuntu-18.04"] +# runtime: ["java11"] +# display_name: "${driver} ${platform} ${runtime}" +# tasks: +# - ".all" +#- matrix_name: "tests-dotnet-windows" +# matrix_spec: +# driver: ["dotnet-master"] +# platform: ["windows-64"] +# runtime: +# - "dotnet-async-netcoreapp2.1" +# - "dotnet-sync-netcoreapp2.1" +# - "dotnet-async-netcoreapp1.1" +# - "dotnet-sync-netcoreapp1.1" +# - "dotnet-async-net452" +# - "dotnet-sync-net452" +# display_name: "${driver} ${platform} ${runtime}" +# tasks: +# - ".all" +#- matrix_name: "tests-go" +# matrix_spec: +# driver: ["go-master"] +# platform: ubuntu-18.04 +# runtime: go-13 +# display_name: "${driver} ${platform} ${runtime}" +# tasks: +# - ".all" +# +# From ea66890a9d6e417572dc399fe35d6f3258e7b423 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 16 Oct 2020 17:14:49 -0400 Subject: [PATCH 037/172] admin config --- astrolabe/cli.py | 9 ++++++--- astrolabe/configuration.py | 10 ++++++++++ docs/source/faq.rst | 10 +++++++--- docs/source/installing-running-locally.rst | 2 ++ 4 files changed, 25 insertions(+), 6 deletions(-) diff --git a/astrolabe/cli.py b/astrolabe/cli.py index 5b51761d..7160a76d 100644 --- a/astrolabe/cli.py +++ b/astrolabe/cli.py @@ -83,12 +83,15 @@ @create_click_option(CONFIGOPTS.ATLAS_API_BASE_URL) @create_click_option(CONFIGOPTS.ATLAS_API_USERNAME) @create_click_option(CONFIGOPTS.ATLAS_API_PASSWORD) +@create_click_option(CONFIGOPTS.ATLAS_ADMIN_API_USERNAME) +@create_click_option(CONFIGOPTS.ATLAS_ADMIN_API_PASSWORD) @create_click_option(CONFIGOPTS.ATLAS_HTTP_TIMEOUT) @create_click_option(CONFIGOPTS.ASTROLABE_LOGLEVEL) @click.version_option() @click.pass_context def cli(ctx, atlas_base_url, atlas_api_username, - atlas_api_password, http_timeout, log_level): + atlas_api_password, atlas_admin_api_username, atlas_admin_api_password, + http_timeout, log_level): """ Astrolabe is a command-line application for running automated driver @@ -105,8 +108,8 @@ def cli(ctx, atlas_base_url, atlas_api_username, admin_client = AtlasClient( base_url=atlas_base_url, - username=os.environ['ATLAS_ADMIN_API_USERNAME'], - password=os.environ['ATLAS_ADMIN_API_PASSWORD'], + username=atlas_admin_api_username, + password=atlas_admin_api_password, timeout=http_timeout) ctx.obj = (client,admin_client) diff --git a/astrolabe/configuration.py b/astrolabe/configuration.py index fc2773f0..cdcc1f2b 100644 --- a/astrolabe/configuration.py +++ b/astrolabe/configuration.py @@ -53,6 +53,16 @@ 'cliopt': '--atlas-api-password', 'envvar': 'ATLAS_API_PASSWORD', 'default': None}, + 'ATLAS_ADMIN_API_USERNAME': { + 'help': 'HTTP-Digest username (Atlas API public-key).', + 'cliopt': '--atlas-admin-api-username', + 'envvar': 'ATLAS_ADMIN_API_USERNAME', + 'default': None}, + 'ATLAS_ADMIN_API_PASSWORD': { + 'help': 'HTTP-Digest password (Atlas API private-key).', + 'cliopt': '--atlas-admin-api-password', + 'envvar': 'ATLAS_ADMIN_API_PASSWORD', + 'default': None}, 'ATLAS_DB_USERNAME': { 'help': 'Database username on the MongoDB instance.', 'cliopt': '--db-username', diff --git a/docs/source/faq.rst b/docs/source/faq.rst index 2574e7c8..71c7573f 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -11,15 +11,19 @@ not provided an API key, or that the API key that you have provided is has expir see the `MongoDB Atlas API `_ documentation for instructions on how to create programmatic API keys. +You also need a set of API keys with Atlas global operator permissions, +referred to as admin credentials. + ``astrolabe`` can be configured to use API keys in one of 2 ways: * Using the `-u/--username` and `-p/--password` command options:: - $ astrolabe -u -p check-connection + $ astrolabe -u -p --atlas-admin-api-username --atlas-admin-api-password check-connection -* Using the ``ATLAS_API_USERNAME`` and ``ATLAS_API_PASSWORD`` environment variables:: +* Using the ``ATLAS_API_USERNAME``, ``ATLAS_API_PASSWORD``, +``ATLAS_ADMIN_API_USERNAME``, ``ATLAS_ADMIN_API_PASSWORD`` environment variables:: - $ ATLAS_API_USERNAME= ATLAS_API_PASSWORD= astrolabe check-connection + $ ATLAS_API_USERNAME= ATLAS_API_PASSWORD= ATLAS_ADMIN_API_USERNAME= ATLAS_ADMIN_API_PASSWORD= astrolabe check-connection .. _faq-why-custom-distro: diff --git a/docs/source/installing-running-locally.rst b/docs/source/installing-running-locally.rst index 254ca81a..aad3378a 100644 --- a/docs/source/installing-running-locally.rst +++ b/docs/source/installing-running-locally.rst @@ -51,6 +51,8 @@ Finally, declare the following variables to configure ``astrolabe``:: $ export ATLAS_ORGANIZATION_NAME= $ export ATLAS_API_USERNAME= $ export ATLAS_API_PASSWORD= + $ export ATLAS_ADMIN_API_USERNAME= + $ export ATLAS_ADMIN_API_PASSWORD= Finally, use the ``check-connection`` command to confirm that ``astrolabe`` is able to connect to and authenticate with the Atlas API:: From e4c8c4528e6a0837697b06acbf47a4d94562ad4f Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 16 Oct 2020 17:55:45 -0400 Subject: [PATCH 038/172] update validator for unified test format --- astrolabe/validator.py | 14 ++++++-------- atlasclient/utils.py | 2 +- tests/validator.yaml | 26 ++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 9 deletions(-) create mode 100644 tests/validator.yaml diff --git a/astrolabe/validator.py b/astrolabe/validator.py index ebb327ac..77d0a6d6 100644 --- a/astrolabe/validator.py +++ b/astrolabe/validator.py @@ -18,18 +18,16 @@ from unittest import TestCase from pymongo import MongoClient +import yaml from atlasclient import JSONObject from astrolabe.exceptions import WorkloadExecutorError from astrolabe.utils import DriverWorkloadSubprocessRunner, load_test_data -DRIVER_WORKLOAD = JSONObject.from_dict({ - 'database': 'validation_db', - 'collection': 'validation_coll', - 'testData': [{'_id': 'validation_sentinel', 'count': 0}], - 'operations': [] -}) +DRIVER_WORKLOAD = JSONObject.from_dict( + yaml.load(open('tests/validator.yaml').read(), Loader=yaml.FullLoader)['driverWorkload'] +) class ValidateWorkloadExecutor(TestCase): @@ -40,8 +38,8 @@ class ValidateWorkloadExecutor(TestCase): def setUp(self): self.client = MongoClient(self.CONNECTION_STRING, w='majority') self.coll = self.client.get_database( - DRIVER_WORKLOAD['database']).get_collection( - DRIVER_WORKLOAD['collection']) + [e for e in DRIVER_WORKLOAD['createEntities'] if 'database' in e][0]['database']['databaseName']).get_collection( + [e for e in DRIVER_WORKLOAD['createEntities'] if 'collection' in e][0]['collection']['collectionName']) load_test_data(self.CONNECTION_STRING, DRIVER_WORKLOAD) def run_test(self, driver_workload): diff --git a/atlasclient/utils.py b/atlasclient/utils.py index ca004ce3..d5de3cdf 100644 --- a/atlasclient/utils.py +++ b/atlasclient/utils.py @@ -23,7 +23,7 @@ def __getattr__(self, name): if name in self: return self[name] raise AttributeError('{} has no property named {}.'.format( - self.__class__.__name__, name)) + self, name)) @classmethod def from_dict(cls, raw_dict): diff --git a/tests/validator.yaml b/tests/validator.yaml new file mode 100644 index 00000000..2ae415f5 --- /dev/null +++ b/tests/validator.yaml @@ -0,0 +1,26 @@ +operations: [] + +driverWorkload: + description: "Validator" + + schemaVersion: "1.0" + + createEntities: + - client: + id: &client0 client0 + - database: + id: &database0 database0 + client: *client0 + databaseName: &database0Name dat + - collection: + id: &collection0 collection0 + database: *database0 + collectionName: &collection0Name dat + + initialData: + - collectionName: *collection0Name + databaseName: *database0Name + documents: + - + _id: validation_sentinel + count: 0 From 176e12c8c7bfe9134b3be734d8375c03fb509fdb Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 16 Oct 2020 17:57:58 -0400 Subject: [PATCH 039/172] reduce timeout --- .evergreen/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index a4731e13..d5e9261f 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -100,7 +100,7 @@ functions: working_dir: astrolabe-src add_expansions_to_env: true command: | - astrolabevenv/${PYTHON_BIN_DIR}/astrolabe spec-tests validate-workload-executor -e integrations/${DRIVER_DIRNAME}/workload-executor --connection-string "mongodb://localhost:27017/" + astrolabevenv/${PYTHON_BIN_DIR}/astrolabe spec-tests validate-workload-executor -e integrations/${DRIVER_DIRNAME}/workload-executor --connection-string "mongodb://localhost:27017/?serverselectiontimeoutms=10000" "delete test cluster": # Delete the cluster that was used to run the test. From c84be0a97f185ca759ccd8a518654ea391f31a41 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 16 Oct 2020 22:49:11 -0400 Subject: [PATCH 040/172] validation w/unified test format done --- astrolabe/validator.py | 41 +++++-------------- tests/validator-numErrors.yml | 41 +++++++++++++++++++ .../{validator.yaml => validator-simple.yml} | 13 +++++- 3 files changed, 64 insertions(+), 31 deletions(-) create mode 100644 tests/validator-numErrors.yml rename tests/{validator.yaml => validator-simple.yml} (65%) diff --git a/astrolabe/validator.py b/astrolabe/validator.py index 77d0a6d6..6fe7188e 100644 --- a/astrolabe/validator.py +++ b/astrolabe/validator.py @@ -25,11 +25,6 @@ from astrolabe.utils import DriverWorkloadSubprocessRunner, load_test_data -DRIVER_WORKLOAD = JSONObject.from_dict( - yaml.load(open('tests/validator.yaml').read(), Loader=yaml.FullLoader)['driverWorkload'] -) - - class ValidateWorkloadExecutor(TestCase): WORKLOAD_EXECUTOR = None CONNECTION_STRING = None @@ -37,12 +32,13 @@ class ValidateWorkloadExecutor(TestCase): def setUp(self): self.client = MongoClient(self.CONNECTION_STRING, w='majority') - self.coll = self.client.get_database( - [e for e in DRIVER_WORKLOAD['createEntities'] if 'database' in e][0]['database']['databaseName']).get_collection( - [e for e in DRIVER_WORKLOAD['createEntities'] if 'collection' in e][0]['collection']['collectionName']) - load_test_data(self.CONNECTION_STRING, DRIVER_WORKLOAD) def run_test(self, driver_workload): + self.coll = self.client.get_database( + [e for e in driver_workload['createEntities'] if 'database' in e][0]['database']['databaseName']).get_collection( + [e for e in driver_workload['createEntities'] if 'collection' in e][0]['collection']['collectionName']) + load_test_data(self.CONNECTION_STRING, driver_workload) + subprocess = DriverWorkloadSubprocessRunner() try: subprocess.spawn(workload_executor=self.WORKLOAD_EXECUTOR, @@ -77,15 +73,9 @@ def run_test(self, driver_workload): return stats def test_simple(self): - operations = [ - {'object': 'collection', - 'name': 'updateOne', - 'arguments': { - 'filter': {'_id': 'validation_sentinel'}, - 'update': {'$inc': {'count': 1}}}}] - driver_workload = deepcopy(DRIVER_WORKLOAD) - driver_workload['operations'] = operations - driver_workload = JSONObject.from_dict(driver_workload) + driver_workload = JSONObject.from_dict( + yaml.load(open('tests/validator-simple.yml').read(), Loader=yaml.FullLoader)['driverWorkload'] + ) stats = self.run_test(driver_workload) @@ -104,18 +94,9 @@ def test_simple(self): "or didn't execute them appropriately.") def test_num_errors(self): - operations = [ - {'object': 'collection', - 'name': 'updateOne', - 'arguments': { - 'filter': {'_id': 'validation_sentinel'}, - 'update': {'$inc': {'count': 1}}}}, - {'object': 'collection', - 'name': 'doesNotExist', - 'arguments': {'foo': 'bar'}}] - driver_workload = deepcopy(DRIVER_WORKLOAD) - driver_workload['operations'] = operations - driver_workload = JSONObject.from_dict(driver_workload) + driver_workload = JSONObject.from_dict( + yaml.load(open('tests/validator-numErrors.yml').read(), Loader=yaml.FullLoader)['driverWorkload'] + ) stats = self.run_test(driver_workload) diff --git a/tests/validator-numErrors.yml b/tests/validator-numErrors.yml new file mode 100644 index 00000000..2e440724 --- /dev/null +++ b/tests/validator-numErrors.yml @@ -0,0 +1,41 @@ +operations: [] + +driverWorkload: + description: "Validator - num errors" + + schemaVersion: "1.0" + + createEntities: + - client: + id: &client0 client0 + - database: + id: &database0 database0 + client: *client0 + databaseName: &database0Name dat + - collection: + id: &collection0 collection0 + database: *database0 + collectionName: &collection0Name dat + + initialData: + - collectionName: *collection0Name + databaseName: *database0Name + documents: + - + _id: validation_sentinel + count: 0 + + tests: + - description: "updateOne" + operations: + - name: updateOne + object: *collection0 + arguments: + filter: { _id: validation_sentinel} + update: + $inc: + count: 1 + - name: doesNotExist + object: *collection0 + arguments: + foo: bar diff --git a/tests/validator.yaml b/tests/validator-simple.yml similarity index 65% rename from tests/validator.yaml rename to tests/validator-simple.yml index 2ae415f5..514758b5 100644 --- a/tests/validator.yaml +++ b/tests/validator-simple.yml @@ -1,7 +1,7 @@ operations: [] driverWorkload: - description: "Validator" + description: "Validator - simple" schemaVersion: "1.0" @@ -24,3 +24,14 @@ driverWorkload: - _id: validation_sentinel count: 0 + + tests: + - description: "updateOne" + operations: + - name: updateOne + object: *collection0 + arguments: + filter: { _id: validation_sentinel} + update: + $inc: + count: 1 From a96a9cc9d963f16b14749e056e61b98fc75b4f32 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 16 Oct 2020 23:56:55 -0400 Subject: [PATCH 041/172] use deployment item name --- astrolabe/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 030f2a53..7a9e0e86 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -284,7 +284,7 @@ def retrieve_logs(self): if data['clusterType'] == 'SHARDED': rtype = 'CLUSTER' - rname = self.cluster_name + rname = data['deploymentItemName'] else: rtype = 'REPLICASET' rname = data['deploymentItemName'] From f838abd11efe0aa402f4ddf89ad867e35b8fe785 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sat, 17 Oct 2020 02:30:54 -0400 Subject: [PATCH 042/172] upload server logs --- .evergreen/config.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index d5e9261f..73b8055a 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -126,6 +126,18 @@ functions: params: file: "astrolabe-src/xunit-output/*.xml" + "upload server logs": + - command: s3.put + params: + aws_key: ${aws_key} + aws_secret: ${aws_secret} + local_file: logs.tar.gz + remote_file: ${UPLOAD_BUCKET}/${build_variant}/${revision}/${version_id}/${build_id}/logs/${task_id}-${execution}-server-logs.tar.gz + bucket: mciuploads + permissions: public-read + content_type: ${content_type|application/x-gzip} + display_name: "mongodb-logs.tar.gz" + # Functions to run before the test. pre: - func: "install astrolabe" @@ -135,6 +147,7 @@ pre: post: - func: "delete test cluster" - func: "upload test results" + - func: "upload server logs" tasks: # Workload executor validation task (patch-only). From 29e0554a9f4fb564896363e2103b53130d6d379c Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sat, 17 Oct 2020 03:37:27 -0400 Subject: [PATCH 043/172] cleanup fix --- astrolabe/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astrolabe/cli.py b/astrolabe/cli.py index 7160a76d..0afe19a2 100644 --- a/astrolabe/cli.py +++ b/astrolabe/cli.py @@ -442,7 +442,7 @@ def delete_test_cluster(ctx, spec_test_file, org_name, project_name, # Step-2: delete the cluster. organization = cmd.get_one_organization_by_name( - client=ctx.obj, organization_name=org_name) + client=ctx.obj[0], organization_name=org_name) project = cmd.ensure_project( client=ctx.obj, project_name=project_name, organization_id=organization.id) try: From ec3bb721fcf9eaa5b4d252137be11cf84fa100a6 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sat, 17 Oct 2020 12:58:08 -0400 Subject: [PATCH 044/172] cmap events --- integrations/ruby/executor.rb | 42 +++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index c271190c..14d47688 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -9,33 +9,53 @@ class UnknownOperationConfiguration < StandardError; end class MetricsCollector def initialize @operations = {} - @samples = [] + @command_events = [] + @connection_events = [] end - attr_reader :samples + attr_reader :command_events, :connection_events def started(event) - @operations[event.operation_id] = event + @operations[event.operation_id] = [event, Time.now] end def succeeded(event) - started_event = @operations.delete(event.operation_id) + started_event, started_at = @operations.delete(event.operation_id) raise "Started event for #{event.operation_id} not found" unless started_event - @samples << { + @command_events << { command_name: started_event.command_name, duration: event.duration, + start_time: started_at.to_f, + address: started_event.address.seed, } end def failed(event) - started_event = @operations.delete(event.operation_id) + started_event, started_at = @operations.delete(event.operation_id) raise "Started event for #{event.operation_id} not found" unless started_event - @samples << { + @command_events << { command_name: started_event.command_name, duration: event.duration, failure: event.failure, + start_time: started_at.to_f, + address: started_event.address.seed, } end + + def published(event) + @connection_events << { + name: event.class.name.sub(/.*::/, ''), + time: Time.now.to_f, + address: event.address.seed, + }.tap do |entry| + if event.respond_to?(:connection_id) + entry[:connection_id] = event.connection_id + end + if event.respond_to?(:reason) + entry[:reason] = event.reason + end + end + end end class Executor @@ -161,8 +181,11 @@ def write_result File.open('results.json', 'w') do |f| f << JSON.dump(result) end - File.open('metrics.json', 'w') do |f| - f << JSON.dump(metrics_collector.samples) + File.open('events.json', 'w') do |f| + f << JSON.dump( + commands: metrics_collector.command_events, + connections: metrics_collector.connection_events, + ) end end @@ -179,6 +202,7 @@ def collection def client @client ||= Mongo::Client.new(uri).tap do |client| client.subscribe(Mongo::Monitoring::COMMAND, metrics_collector) + client.subscribe(Mongo::Monitoring::CONNECTION_POOL, metrics_collector) end end end From 39c6739e3cd53c32b64064c5c0845443458f02c8 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sat, 17 Oct 2020 13:00:18 -0400 Subject: [PATCH 045/172] upload event logs --- .evergreen/config.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 73b8055a..a2edd4ca 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -138,6 +138,25 @@ functions: content_type: ${content_type|application/x-gzip} display_name: "mongodb-logs.tar.gz" + "upload event logs": + - command: subprocess.exec + params: + working_dir: astrolabe-src + continue_on_err: true # Because script may not exist OR platform may not be Windows. + add_expansions_to_env: true + command: | + gzip events.json.gz + - command: s3.put + params: + aws_key: ${aws_key} + aws_secret: ${aws_secret} + local_file: events.json.gz + remote_file: ${UPLOAD_BUCKET}/${build_variant}/${revision}/${version_id}/${build_id}/logs/${task_id}-${execution}-events.json.gz + bucket: mciuploads + permissions: public-read + content_type: ${content_type|application/x-gzip} + display_name: "events.json.gz" + # Functions to run before the test. pre: - func: "install astrolabe" From c3bf87ec47342bf0a134edcec6539d92b7f76a2b Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sat, 17 Oct 2020 13:21:47 -0400 Subject: [PATCH 046/172] fix deletion more --- astrolabe/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/astrolabe/cli.py b/astrolabe/cli.py index 0afe19a2..c75751c9 100644 --- a/astrolabe/cli.py +++ b/astrolabe/cli.py @@ -444,9 +444,9 @@ def delete_test_cluster(ctx, spec_test_file, org_name, project_name, organization = cmd.get_one_organization_by_name( client=ctx.obj[0], organization_name=org_name) project = cmd.ensure_project( - client=ctx.obj, project_name=project_name, organization_id=organization.id) + client=ctx.obj[0], project_name=project_name, organization_id=organization.id) try: - ctx.obj.groups[project.id].clusters[cluster_name].delete() + ctx.obj[0].groups[project.id].clusters[cluster_name].delete() except AtlasApiBaseError: pass From a936029c1ad95deb5de7cb246a6cbc73892dbf41 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sat, 17 Oct 2020 13:22:12 -0400 Subject: [PATCH 047/172] upload event logs --- .evergreen/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index a2edd4ca..53237644 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -167,6 +167,7 @@ post: - func: "delete test cluster" - func: "upload test results" - func: "upload server logs" + - func: "upload event logs" tasks: # Workload executor validation task (patch-only). From b20e8a945edb0f7b5781c3d689b193bd7532c307 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sat, 17 Oct 2020 13:24:23 -0400 Subject: [PATCH 048/172] collect errors --- integrations/ruby/executor.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index 14d47688..8123f344 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -11,9 +11,10 @@ def initialize @operations = {} @command_events = [] @connection_events = [] + @errors = [] end - attr_reader :command_events, :connection_events + attr_reader :command_events, :connection_events, :errors def started(event) @operations[event.operation_id] = [event, Time.now] @@ -160,6 +161,10 @@ def perform_operations # how to handle this requirement while maintaining diagnostics. rescue => e STDERR.puts "Error: #{e.class}: #{e}" + metrics_collector.errors << { + error: "#{e.class}: #{e}", + time: Time.now.to_f, + } @error_count += 1 end @operation_count += 1 @@ -185,6 +190,7 @@ def write_result f << JSON.dump( commands: metrics_collector.command_events, connections: metrics_collector.connection_events, + errors: metrics_collector.errors, ) end end From f7d27c5b502f1467cacf70dbfef61ee878e49a60 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sat, 17 Oct 2020 13:27:44 -0400 Subject: [PATCH 049/172] ignore artifacts --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 5af7bee3..910c5594 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ xunit-output/ *.iml integrations/go/go.sum integrations/go/executor +events.json +logs.tar.gz From 9616d3400e0dbd8e4498b5cb84488000c9596b7e Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 18 Oct 2020 14:04:48 -0400 Subject: [PATCH 050/172] fix uploads --- .evergreen/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 53237644..9e70a42f 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -131,7 +131,7 @@ functions: params: aws_key: ${aws_key} aws_secret: ${aws_secret} - local_file: logs.tar.gz + local_file: astrolabe-src/logs.tar.gz remote_file: ${UPLOAD_BUCKET}/${build_variant}/${revision}/${version_id}/${build_id}/logs/${task_id}-${execution}-server-logs.tar.gz bucket: mciuploads permissions: public-read @@ -150,7 +150,7 @@ functions: params: aws_key: ${aws_key} aws_secret: ${aws_secret} - local_file: events.json.gz + local_file: astrolabe-src/events.json.gz remote_file: ${UPLOAD_BUCKET}/${build_variant}/${revision}/${version_id}/${build_id}/logs/${task_id}-${execution}-events.json.gz bucket: mciuploads permissions: public-read From bb21bb1961d408a8dcb03d14f8494d46ca8ceb3e Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 18 Oct 2020 16:48:40 -0400 Subject: [PATCH 051/172] shorten --- .evergreen/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 9e70a42f..583fb6fc 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -132,7 +132,7 @@ functions: aws_key: ${aws_key} aws_secret: ${aws_secret} local_file: astrolabe-src/logs.tar.gz - remote_file: ${UPLOAD_BUCKET}/${build_variant}/${revision}/${version_id}/${build_id}/logs/${task_id}-${execution}-server-logs.tar.gz + remote_file: ${UPLOAD_BUCKET}/${version_id}/${build_id}-${task_id}-${execution}/server-logs.tar.gz bucket: mciuploads permissions: public-read content_type: ${content_type|application/x-gzip} @@ -151,7 +151,7 @@ functions: aws_key: ${aws_key} aws_secret: ${aws_secret} local_file: astrolabe-src/events.json.gz - remote_file: ${UPLOAD_BUCKET}/${build_variant}/${revision}/${version_id}/${build_id}/logs/${task_id}-${execution}-events.json.gz + remote_file: ${UPLOAD_BUCKET}/${version_id}/${build_id}-${task_id}-${execution}/events.json.gz bucket: mciuploads permissions: public-read content_type: ${content_type|application/x-gzip} From ec9834095a791b2a342d74044710029ebcd449bb Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 18 Oct 2020 16:51:06 -0400 Subject: [PATCH 052/172] use shell --- .evergreen/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 583fb6fc..f34da476 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -139,7 +139,7 @@ functions: display_name: "mongodb-logs.tar.gz" "upload event logs": - - command: subprocess.exec + - command: shell.exec params: working_dir: astrolabe-src continue_on_err: true # Because script may not exist OR platform may not be Windows. From 38dcab354fe36a19b756d1056ae441c08008fb4c Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 23 Oct 2020 11:57:09 -0400 Subject: [PATCH 053/172] aggregate connection counts --- astrolabe/cli.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/astrolabe/cli.py b/astrolabe/cli.py index c75751c9..c80ac8fa 100644 --- a/astrolabe/cli.py +++ b/astrolabe/cli.py @@ -13,9 +13,11 @@ # limitations under the License. import logging +import json from pprint import pprint import unittest, os from urllib.parse import unquote_plus +from collections import defaultdict import click @@ -524,5 +526,33 @@ def validate_workload_executor(workload_executor, startup_time, exit(1) +@spec_tests.command('stats') +@click.pass_context +def stats(ctx): + with open('results.json', 'r') as fp: + stats = json.load(fp) + with open('events.json', 'r') as fp: + events = json.load(fp) + + import numpy + + conn_events = events['connections'] + counts = defaultdict(lambda: 0) + max_counts = defaultdict(lambda: 0) + conn_count = max_conn_count = 0 + for e in conn_events: + if e['name'] == 'ConnectionCreated': + counts[e['address']] += 1 + elif e['name'] == 'ConnectionClosed': + counts[e['address']] -= 1 + if counts[e['address']] > max_counts[e['address']]: + max_counts[e['address']] = counts[e['address']] + + stats['maxConnectionCounts'] = max_counts + + with open('stats.json', 'w') as fp: + json.dump(stats, fp) + + if __name__ == '__main__': cli() From 1d066486c468c761879b39817fc8ec0b5a70c4dc Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 23 Oct 2020 13:23:05 -0400 Subject: [PATCH 054/172] ignore stats --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 910c5594..116f1d5b 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ integrations/go/go.sum integrations/go/executor events.json logs.tar.gz +stats.json From 5f542b1ccb5951f577c0b248c55b73b3a5ecaa52 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 23 Oct 2020 13:36:31 -0400 Subject: [PATCH 055/172] aggregate command times --- astrolabe/cli.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/astrolabe/cli.py b/astrolabe/cli.py index c80ac8fa..bafdade9 100644 --- a/astrolabe/cli.py +++ b/astrolabe/cli.py @@ -536,6 +536,12 @@ def stats(ctx): import numpy + command_events = events['commands'] + command_times = [c['duration'] for c in command_events] + stats['avgCommandTime'] = numpy.average(command_times) + stats['p95CommandTime'] = numpy.percentile(command_times, 95) + stats['p99CommandTime'] = numpy.percentile(command_times, 99) + conn_events = events['connections'] counts = defaultdict(lambda: 0) max_counts = defaultdict(lambda: 0) From b7ac6196c622de6272a4c74ddee41171fbc2adc8 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 23 Oct 2020 13:58:05 -0400 Subject: [PATCH 056/172] use camel case --- integrations/ruby/executor.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index 8123f344..11f7318f 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -24,9 +24,9 @@ def succeeded(event) started_event, started_at = @operations.delete(event.operation_id) raise "Started event for #{event.operation_id} not found" unless started_event @command_events << { - command_name: started_event.command_name, + commandName: started_event.command_name, duration: event.duration, - start_time: started_at.to_f, + startTime: started_at.to_f, address: started_event.address.seed, } end @@ -35,10 +35,10 @@ def failed(event) started_event, started_at = @operations.delete(event.operation_id) raise "Started event for #{event.operation_id} not found" unless started_event @command_events << { - command_name: started_event.command_name, + commandName: started_event.command_name, duration: event.duration, failure: event.failure, - start_time: started_at.to_f, + startTime: started_at.to_f, address: started_event.address.seed, } end @@ -50,7 +50,7 @@ def published(event) address: event.address.seed, }.tap do |entry| if event.respond_to?(:connection_id) - entry[:connection_id] = event.connection_id + entry[:connectionId] = event.connection_id end if event.respond_to?(:reason) entry[:reason] = event.reason From 3a5303c3230efba92019ba62496fa50d92699292 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sat, 12 Dec 2020 10:45:54 -0500 Subject: [PATCH 057/172] undo dev override --- astrolabe/cli.py | 1 - 1 file changed, 1 deletion(-) diff --git a/astrolabe/cli.py b/astrolabe/cli.py index bafdade9..3ad015ac 100644 --- a/astrolabe/cli.py +++ b/astrolabe/cli.py @@ -100,7 +100,6 @@ def cli(ctx, atlas_base_url, atlas_api_username, tests against a MongoDB Atlas cluster undergoing maintenance. """ - atlas_base_url = 'https://cloud-dev.mongodb.com/api/atlas' # Create an atlasclient and attach it to the context. client = AtlasClient( base_url=atlas_base_url, From ae8485928fc601a50f30858eff75126feee7abe5 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 13 Dec 2020 20:53:53 -0500 Subject: [PATCH 058/172] fix up context use --- astrolabe/cli.py | 74 ++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/astrolabe/cli.py b/astrolabe/cli.py index 3ad015ac..9faae6b3 100644 --- a/astrolabe/cli.py +++ b/astrolabe/cli.py @@ -81,6 +81,12 @@ help=('Do not create clusters at the beginning of the run, assume they have already been provisioned by a previous run.')) +class ContextStore: + def __init__(self, client, admin_client): + self.client = client + self.admin_client = admin_client + + @click.group() @create_click_option(CONFIGOPTS.ATLAS_API_BASE_URL) @create_click_option(CONFIGOPTS.ATLAS_API_USERNAME) @@ -112,7 +118,7 @@ def cli(ctx, atlas_base_url, atlas_api_username, username=atlas_admin_api_username, password=atlas_admin_api_password, timeout=http_timeout) - ctx.obj = (client,admin_client) + ctx.obj = ContextStore(client, admin_client) # Configure logging. loglevel = getattr(logging, log_level.upper()) @@ -133,7 +139,7 @@ def cli(ctx, atlas_base_url, atlas_api_username, @click.pass_context def check_connection(ctx): """Command to verify validity of Atlas API credentials.""" - pprint(ctx.obj.root.get().data) + pprint(ctx.obj.client.root.get().data) @cli.group('organizations') @@ -146,7 +152,7 @@ def atlas_organizations(): @click.pass_context def list_all_organizations(ctx): """List all Atlas Organizations (limited to first 100).""" - pprint(ctx.obj.orgs.get().data) + pprint(ctx.obj.client.orgs.get().data) @atlas_organizations.command('get-one') @@ -156,7 +162,7 @@ def get_one_organization_by_name(ctx, org_name): """Get one Atlas Organization by name. Prints "None" if no organization bearing the given name exists.""" pprint(cmd.get_one_organization_by_name( - client=ctx.obj, organization_name=org_name)) + client=ctx.obj.client, organization_name=org_name)) @cli.group('projects') @@ -172,16 +178,16 @@ def atlas_projects(): def create_project_if_necessary(ctx, org_name, project_name, ): """Ensure that the given Atlas Project exists.""" org = cmd.get_one_organization_by_name( - client=ctx.obj, organization_name=org_name) + client=ctx.obj.client, organization_name=org_name) pprint(cmd.ensure_project( - client=ctx.obj, project_name=project_name, organization_id=org.id)) + client=ctx.obj.client, project_name=project_name, organization_id=org.id)) @atlas_projects.command('list') @click.pass_context def list_projects(ctx): """List all Atlas Projects (limited to first 100).""" - pprint(ctx.obj.groups.get().data) + pprint(ctx.obj.client.groups.get().data) @atlas_projects.command('get-one') @@ -189,7 +195,7 @@ def list_projects(ctx): @click.pass_context def get_one_project_by_name(ctx, project_name): """Get one Atlas Project.""" - pprint(ctx.obj.groups.byName[project_name].get().data) + pprint(ctx.obj.client.groups.byName[project_name].get().data) @atlas_projects.command('enable-anywhere-access') @@ -197,8 +203,8 @@ def get_one_project_by_name(ctx, project_name): @click.pass_context def enable_project_access_from_anywhere(ctx, project_name): """Add 0.0.0.0/0 to the IP whitelist of the Atlas Project.""" - project = ctx.obj.groups.byName[project_name].get().data - cmd.ensure_connect_from_anywhere(client=ctx.obj, project_id=project.id) + project = ctx.obj.client.groups.byName[project_name].get().data + cmd.ensure_connect_from_anywhere(client=ctx.obj.client, project_id=project.id) @cli.group('users') @@ -215,9 +221,9 @@ def atlas_users(): def create_user(ctx, db_username, db_password, project_name): """Create an Atlas User with admin privileges. Modifies user permissions, if the user already exists.""" - project = ctx.obj.groups.byName[project_name].get().data + project = ctx.obj.client.groups.byName[project_name].get().data user = cmd.ensure_admin_user( - client=ctx.obj, project_id=project.id, username=db_username, + client=ctx.obj.client, project_id=project.id, username=db_username, password=db_password) pprint(user) @@ -227,8 +233,8 @@ def create_user(ctx, db_username, db_password, project_name): @click.pass_context def list_users(ctx, project_name): """List all Atlas Users.""" - project = ctx.obj.groups.byName[project_name].get().data - pprint(ctx.obj.groups[project.id].databaseUsers.get().data) + project = ctx.obj.client.groups.byName[project_name].get().data + pprint(ctx.obj.client.groups[project.id].databaseUsers.get().data) @cli.group('clusters') @@ -246,7 +252,7 @@ def atlas_clusters(): @click.pass_context def create_cluster(ctx, project_name, cluster_name, instance_size_name): """Create a new dedicated-tier Atlas Cluster.""" - project = ctx.obj.groups.byName[project_name].get().data + project = ctx.obj.client.groups.byName[project_name].get().data cluster_config = { 'name': cluster_name, @@ -256,7 +262,7 @@ def create_cluster(ctx, project_name, cluster_name, instance_size_name): 'regionName': 'US_WEST_1', 'instanceSizeName': instance_size_name}} - cluster = ctx.obj.groups[project.id].clusters.post(**cluster_config) + cluster = ctx.obj.client.groups[project.id].clusters.post(**cluster_config) pprint(cluster.data) @@ -266,8 +272,8 @@ def create_cluster(ctx, project_name, cluster_name, instance_size_name): @click.pass_context def get_one_cluster_by_name(ctx, cluster_name, project_name): """Get one Atlas Cluster.""" - project = ctx.obj.groups.byName[project_name].get().data - cluster = ctx.obj.groups[project.id].clusters[cluster_name].get() + project = ctx.obj.client.groups.byName[project_name].get().data + cluster = ctx.obj.client.groups[project.id].clusters[cluster_name].get() pprint(cluster.data) @@ -280,7 +286,7 @@ def get_one_cluster_by_name(ctx, cluster_name, project_name): @click.pass_context def resize_cluster(ctx, project_name, cluster_name, instance_size_name): """Resize an existing dedicated-tier Atlas Cluster.""" - project = ctx.obj.groups.byName[project_name].get().data + project = ctx.obj.client.groups.byName[project_name].get().data new_cluster_config = { 'clusterType': 'REPLICASET', @@ -289,7 +295,7 @@ def resize_cluster(ctx, project_name, cluster_name, instance_size_name): 'regionName': 'US_WEST_1', 'instanceSizeName': instance_size_name}} - cluster = ctx.obj.groups[project.id].clusters[cluster_name].patch( + cluster = ctx.obj.client.groups[project.id].clusters[cluster_name].patch( **new_cluster_config) pprint(cluster.data) @@ -300,10 +306,10 @@ def resize_cluster(ctx, project_name, cluster_name, instance_size_name): @click.pass_context def toggle_cluster_javascript(ctx, project_name, cluster_name): """Enable/disable server-side javascript for an existing Atlas Cluster.""" - project = ctx.obj.groups.byName[project_name].get().data + project = ctx.obj.client.groups.byName[project_name].get().data # Alias to reduce verbosity. - pargs = ctx.obj.groups[project.id].clusters[cluster_name].processArgs + pargs = ctx.obj.client.groups[project.id].clusters[cluster_name].processArgs initial_process_args = pargs.get() target_js_value = not initial_process_args.data.javascriptEnabled @@ -317,8 +323,8 @@ def toggle_cluster_javascript(ctx, project_name, cluster_name): @click.pass_context def list_clusters(ctx, project_name): """List all Atlas Clusters.""" - project = ctx.obj.groups.byName[project_name].get().data - clusters = ctx.obj.groups[project.id].clusters.get() + project = ctx.obj.client.groups.byName[project_name].get().data + clusters = ctx.obj.client.groups[project.id].clusters.get() pprint(clusters.data) @@ -328,8 +334,8 @@ def list_clusters(ctx, project_name): @click.pass_context def isready_cluster(ctx, project_name, cluster_name): """Check if the Atlas Cluster is 'IDLE'.""" - project = ctx.obj.groups.byName[project_name].get().data - state = ctx.obj.groups[project.id].clusters[cluster_name].get().data.stateName + project = ctx.obj.client.groups.byName[project_name].get().data + state = ctx.obj.client.groups[project.id].clusters[cluster_name].get().data.stateName if state == "IDLE": click.echo("True") @@ -344,8 +350,8 @@ def isready_cluster(ctx, project_name, cluster_name): @click.pass_context def delete_cluster(ctx, project_name, cluster_name): """Delete the Atlas Cluster.""" - project = ctx.obj.groups.byName[project_name].get().data - ctx.obj.groups[project.id].clusters[cluster_name].delete().data + project = ctx.obj.client.groups.byName[project_name].get().data + ctx.obj.client.groups[project.id].clusters[cluster_name].delete().data click.echo("DONE!") @@ -405,8 +411,8 @@ def run_single_test(ctx, spec_test_file, workload_executor, LOGGER.info(tabulate_astrolabe_configuration(config)) # Step-1: create the Test-Runner. - runner = SingleTestRunner(client=ctx.obj[0], - admin_client=ctx.obj[1], + runner = SingleTestRunner(client=ctx.obj.client, + admin_client=ctx.obj.admin_client, test_locator_token=spec_test_file, configuration=config, xunit_output=xunit_output, @@ -443,11 +449,11 @@ def delete_test_cluster(ctx, spec_test_file, org_name, project_name, # Step-2: delete the cluster. organization = cmd.get_one_organization_by_name( - client=ctx.obj[0], organization_name=org_name) + client=ctx.obj.client, organization_name=org_name) project = cmd.ensure_project( - client=ctx.obj[0], project_name=project_name, organization_id=organization.id) + client=ctx.obj.client, project_name=project_name, organization_id=organization.id) try: - ctx.obj[0].groups[project.id].clusters[cluster_name].delete() + ctx.obj.client.groups[project.id].clusters[cluster_name].delete() except AtlasApiBaseError: pass @@ -489,7 +495,7 @@ def run_headless(ctx, spec_tests_directory, workload_executor, db_username, LOGGER.info(tabulate_astrolabe_configuration(config)) # Step-1: create the Test-Runner. - runner = MultiTestRunner(client=ctx.obj, + runner = MultiTestRunner(client=ctx.obj.client, test_locator_token=spec_tests_directory, configuration=config, xunit_output=xunit_output, From 07113050fc96c63b6f42421449991c611c85ba90 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 14 Dec 2020 17:48:43 -0500 Subject: [PATCH 059/172] docs --- .gitignore | 1 + docs/source/installing-running-locally.rst | 29 +++++++++++++++++++--- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 116f1d5b..e92e6ece 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ integrations/go/executor events.json logs.tar.gz stats.json +/docs/build diff --git a/docs/source/installing-running-locally.rst b/docs/source/installing-running-locally.rst index aad3378a..cbdf12cb 100644 --- a/docs/source/installing-running-locally.rst +++ b/docs/source/installing-running-locally.rst @@ -43,16 +43,37 @@ Before you can start using ``astrolabe``, you must configure it to give it acces If you haven't done so already, create a `MongoDB Atlas Organization `_ (this can -only be done via the Atlas UI). Make a note of the name of the Atlas organization. You will also need -a `Programmatic API Key ` for this Atlas Organization with -"Organization Owner" permissions. The API key will consist of 2 parts - a public key and a private key. -Finally, declare the following variables to configure ``astrolabe``:: +only be done via the Atlas UI). Make a note of the name of the Atlas organization. + +Depending on the test scenario being executed, you will need either one +or two sets of a `Programmatic API Keys +`_: a regular +key for Atlas Organization you created with "Organization Owner" permissions, +and potentially a key with Atlas Global Operator permissions (hereafter +referred to as the "admin key"). The admin key generally must be created by +a Cloud team member and would typically be issued for the development environment +of Atlas (`https://cloud-dev.mongodb.com `_), +meaning the organization and projects must also be created in the development +environment. + +Each API key consists of 2 parts - a public key and a private key. + +To configure ``astrolabe`` to use production Atlas and specify only a regular +API key, declare the following variables:: + + $ export ATLAS_ORGANIZATION_NAME= + $ export ATLAS_API_USERNAME= + $ export ATLAS_API_PASSWORD= + +To configure ``astrolabe`` to use development Atlas and specify two sets of +API keys, declare the following variables:: $ export ATLAS_ORGANIZATION_NAME= $ export ATLAS_API_USERNAME= $ export ATLAS_API_PASSWORD= $ export ATLAS_ADMIN_API_USERNAME= $ export ATLAS_ADMIN_API_PASSWORD= + $ export ATLAS_API_BASE_URL=https://cloud-dev.mongodb.com/api/atlas Finally, use the ``check-connection`` command to confirm that ``astrolabe`` is able to connect to and authenticate with the Atlas API:: From 518c935760b368beb86763b769550a076bc1faea Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 14 Dec 2020 17:58:17 -0500 Subject: [PATCH 060/172] add admin client to check_connection --- astrolabe/cli.py | 16 +++++++++++----- atlasclient/client.py | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/astrolabe/cli.py b/astrolabe/cli.py index 9faae6b3..870bdf0d 100644 --- a/astrolabe/cli.py +++ b/astrolabe/cli.py @@ -113,11 +113,15 @@ def cli(ctx, atlas_base_url, atlas_api_username, password=atlas_api_password, timeout=http_timeout) - admin_client = AtlasClient( - base_url=atlas_base_url, - username=atlas_admin_api_username, - password=atlas_admin_api_password, - timeout=http_timeout) + if atlas_admin_api_username: + admin_client = AtlasClient( + base_url=atlas_base_url, + username=atlas_admin_api_username, + password=atlas_admin_api_password, + timeout=http_timeout) + else: + admin_client = None + ctx.obj = ContextStore(client, admin_client) # Configure logging. @@ -140,6 +144,8 @@ def cli(ctx, atlas_base_url, atlas_api_username, def check_connection(ctx): """Command to verify validity of Atlas API credentials.""" pprint(ctx.obj.client.root.get().data) + if ctx.obj.admin_client: + pprint(ctx.obj.admin_client.root.get().data) @cli.group('organizations') diff --git a/atlasclient/client.py b/atlasclient/client.py index 830a40c4..7d2d5558 100644 --- a/atlasclient/client.py +++ b/atlasclient/client.py @@ -222,7 +222,7 @@ def request(self, method, path, **params): def construct_resource_url(self, path, api_version=None): url_template = "{base_url}/{version}/{resource_path}" - if path[0] == '/': + if path and path[0] == '/': url_template = 'https://cloud-dev.mongodb.com{resource_path}' return url_template.format( base_url=self.config.base_url, From 82011c00a1416ba5cb41939c5a32ba1376f509e0 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 14 Dec 2020 17:59:56 -0500 Subject: [PATCH 061/172] fix faq --- docs/source/faq.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/faq.rst b/docs/source/faq.rst index 71c7573f..22877c6d 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -21,7 +21,7 @@ referred to as admin credentials. $ astrolabe -u -p --atlas-admin-api-username --atlas-admin-api-password check-connection * Using the ``ATLAS_API_USERNAME``, ``ATLAS_API_PASSWORD``, -``ATLAS_ADMIN_API_USERNAME``, ``ATLAS_ADMIN_API_PASSWORD`` environment variables:: + ``ATLAS_ADMIN_API_USERNAME``, ``ATLAS_ADMIN_API_PASSWORD`` environment variables:: $ ATLAS_API_USERNAME= ATLAS_API_PASSWORD= ATLAS_ADMIN_API_USERNAME= ATLAS_ADMIN_API_PASSWORD= astrolabe check-connection From 5833c7d49631706125cfeb3c934cc4a34a45e440 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 14 Dec 2020 18:27:00 -0500 Subject: [PATCH 062/172] workload executor requirements --- docs/source/spec-workload-executor.rst | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/docs/source/spec-workload-executor.rst b/docs/source/spec-workload-executor.rst index 9c892e81..7f3d16fc 100644 --- a/docs/source/spec-workload-executor.rst +++ b/docs/source/spec-workload-executor.rst @@ -47,18 +47,27 @@ After accepting the inputs, the workload executor: to run the operations described therein in accordance with the :ref:`test-scenario-format-specification`. Note that the workload executor: - * MUST ignore the ``testData`` array. ``astrolabe`` is responsible for initializing the cluster with + * MUST ignore the ``initialData`` array. ``astrolabe`` is responsible for initializing the cluster with this data *before* starting the workload executor. - * MUST run operations sequentially and in the order in which they appear in the ``operations`` array. - * MUST repeat the entire set of specified operations indefinitely, until the **termination signal** from + * MUST run the tests, and the operations in each test, sequentially + and in the order in which they appear in the ``tests`` and ``operations`` array. + * MUST repeat the entire set of specified tests and operations indefinitely, until the **termination signal** from ``astrolabe`` is received. - * MUST keep count of the number of operations failures (``numFailures``) that are encountered while running - operations. An operation failure is when the actual return value of an operation does not match its + * MUST keep count of the number of the number of operation failures + (``numFailures``) that are encountered. An operation failure is when + the actual return value of an operation does not match its expected return value (as defined in the ``result`` field of the ``driverWorkload``). * MUST keep count of the number of operation errors (``numErrors``) that are encountered while running operations. An operation error is when running an operation unexpectedly raises an error. Workload executors implementations should try to be as resilient as possible to these kinds of operation errors. * MUST keep count of the number of operations that are run successfully (``numSuccesses``). + * MUST record all errors encountered while running operations. + * MUST use `command monitoring `_ + to record started, succeeded and failed events for each operation sent to + MongoDB. + * MUST use `CMAP `_ + to record all connection pool and connection-related events published + during the course of scenario execution. #. MUST set a signal handler for handling the termination signal that is sent by ``astrolabe``. The termination signal is used by ``astrolabe`` to communicate to the workload executor that it should stop running operations. Upon From 31c7bdfd18a8afc265b53b3c7398475134189f0b Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 14 Dec 2020 18:40:13 -0500 Subject: [PATCH 063/172] remove debug --- atlasclient/exceptions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/atlasclient/exceptions.py b/atlasclient/exceptions.py index dd16693b..cac93d0f 100644 --- a/atlasclient/exceptions.py +++ b/atlasclient/exceptions.py @@ -26,7 +26,6 @@ def __init__(self, msg, resource_url=None, request_method=None, detail=None, self.error_code = error_code self.headers = headers self.detail = detail - #import pdb;pdb.set_trace() def __str__(self): if self.request_method and self.resource_url: From 3223c2219ad444532e3785854dad5313da20d37c Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 14 Dec 2020 20:36:34 -0500 Subject: [PATCH 064/172] add requirements.txt --- requirements.txt | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..245942b5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +click>=7,<8 +requests>=2,<3 +pymongo>=3.10,<4 +dnspython>=1.16,<2 +pyyaml>=5,<6 +tabulate>=0.8,<0.9 +junitparser>=1,<2 From b53da3219406adbcf10e7ca8fac35d31edfdfe8c Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 14 Dec 2020 21:12:13 -0500 Subject: [PATCH 065/172] update test format --- docs/source/spec-test-format.rst | 72 ++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/docs/source/spec-test-format.rst b/docs/source/spec-test-format.rst index fd11fb36..c5402447 100644 --- a/docs/source/spec-test-format.rst +++ b/docs/source/spec-test-format.rst @@ -10,19 +10,23 @@ The YAML file format described herein is used to define platform-independent *At YAML-formatted *Test Scenario Files*. Each Test Scenario File describes exactly one Atlas Planned Maintenance Test. A Test Scenario File has the following keys: -* maintenancePlan (document): a *Planned Maintenance Scenario* object. Each object has the following keys: +* initialConfiguration (document): Description of *Cluster Configuration Options* to be used for initializing the + test cluster. This document MUST contain the following keys: - * initialConfiguration (document): Description of *Cluster Configuration Options* to be used for initializing the - test cluster. This document MUST contain the following keys: + * clusterConfiguration (document): Document containing initial *Basic Configuration Options* values. + This document MUST, at a minimum, have all fields **required** by the + `Create One Cluster `_ endpoint. + * processArgs (document): Document containing initial *Advanced Configuration Option* values. This MAY be an empty + document if the maintenance plan does not require modifying the Advanced Configuration Options. - * clusterConfiguration (document): Document containing initial *Basic Configuration Options* values. - This document MUST, at a minimum, have all fields **required** by the - `Create One Cluster `_ endpoint. - * processArgs (document): Document containing initial *Advanced Configuration Option* values. This MAY be an empty - document if the maintenance plan does not require modifying the Advanced Configuration Options. - - * finalConfiguration (document): Description of **new** *Cluster Configuration Options* to be applied to the - test cluster. This document MUST contain the following keys (note that at least one of these fields MUST be +* operations (array): List of operations to be performed, representing the + maintenance event. Each operation is a document containing one key which is + the name of the operation. The possible operations are: + + * setClusterConfiguration: set the cluster configuration to the specified + *Cluster Configuration Options* as defined in initialConfiguration. + The value must be the *Cluster Configuration Options* which MUST contain + the following keys (note that at least one of these fields MUST be a non-empty document): * clusterConfiguration (document): Document containing final *Basic Configuration Option* values. @@ -32,24 +36,38 @@ A Test Scenario File has the following keys: * processArgs (document): Document containing final *Advanced Configuration Option* values. This MAY be an empty document if the maintenance plan does not require modifying the Advanced Configuration Options. - * uriOptions (document): Document containing ``key: value`` pairs of URI options that must be included in the - connection string passed to the workload executor by the *Test Orchestrator*. - -* driverWorkload (document): Object describing a *Driver Workload*. Has the following keys: + * testFailover: trigger an election in the cluste rusing the "test failover" + API endpoint. The value MUST be ``true``. + + testFailover SHOULD be followed by sleep and waitForIdle operations + because it does not update maintenance state synchronously (see + `PRODTRUAGE-1232 `_). + + * restartVms: perform a rolling restart of all nodes in the cluster. + This operation requires Atlas Global Operator API key to be set when + invoking ``astrolabe``. The value MUST be ``true``. + + * assertPrimaryRegion: assert that the primary in the deployment is in the + specified region. The value MUST be the region name as defined in Atlas API, + e.g. ``US_WEST_1``. This operation is undefined and MUST NOT be used when + the deployment is a sharded cluster. + + * sleep: do nothing for the specified duration. The value MUST be the duration + to sleep for, in seconds. + + * waitForIdle: wait for cluster maintenance state to become "idle". + The value MUST be ``true``. - * collection (string): Name of the collection to use for running test operations. - * database (string): Name of the database to use for running test operations. - * testData (array, optional): Array of documents to be inserted into the ``database.collection`` namespace before - starting the test run. Test data insertion is performed by the *Test Orchestrator* and this field MUST be ignored - by the *Workload Executor*. - * operations (array): Array of Operation objects, each describing an operation to be executed. The operations are run - sequentially and repeatedly until the maintenance completes. Each object has the following keys: +* uriOptions (document): Document containing ``key: value`` pairs of URI options that must be included in the + connection string passed to the workload executor by the *Test Orchestrator*. - * object (string): The entity on which to perform the operation. Can be "database" or "collection". - * name (string): name of the operation. - * arguments (document): the names and values of arguments to be passed to the operation. - * result (optional, multiple types): The result of executing the operation. This will correspond to the operation's - return value. +* driverWorkload (document): Description of the driver workload to execute + The document must be a complete test as defined by the + `Unified Test Format specification `_. + + Note that the ``initialData`` (and, by necessity, ``createEntities``) + field of this document is interpreted and executed by ``astrolabe``, while + the remaining fields are interpreted and executed by the workload executor. ------- Changes From f7385b4138c71175e60ffb4dad65a14d65f8c53c Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 21 Dec 2020 10:53:53 -0500 Subject: [PATCH 066/172] add examples to operations --- docs/source/spec-test-format.rst | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/docs/source/spec-test-format.rst b/docs/source/spec-test-format.rst index c5402447..cc8817c5 100644 --- a/docs/source/spec-test-format.rst +++ b/docs/source/spec-test-format.rst @@ -35,6 +35,16 @@ A Test Scenario File has the following keys: `Modify One Cluster `_ endpoint. * processArgs (document): Document containing final *Advanced Configuration Option* values. This MAY be an empty document if the maintenance plan does not require modifying the Advanced Configuration Options. + + Example:: + + setClusterConfiguration: + clusterConfiguration: + providerSettings: + providerName: AWS + regionName: US_WEST_1 + instanceSizeName: M10 + processArgs: {} * testFailover: trigger an election in the cluste rusing the "test failover" API endpoint. The value MUST be ``true``. @@ -42,22 +52,42 @@ A Test Scenario File has the following keys: testFailover SHOULD be followed by sleep and waitForIdle operations because it does not update maintenance state synchronously (see `PRODTRUAGE-1232 `_). + + Example:: + testFailover: true + * restartVms: perform a rolling restart of all nodes in the cluster. This operation requires Atlas Global Operator API key to be set when invoking ``astrolabe``. The value MUST be ``true``. - + + Example:: + + restartVms: true + * assertPrimaryRegion: assert that the primary in the deployment is in the specified region. The value MUST be the region name as defined in Atlas API, e.g. ``US_WEST_1``. This operation is undefined and MUST NOT be used when the deployment is a sharded cluster. + + Example:: + + assertPrimaryRegion: US_WEST_1 * sleep: do nothing for the specified duration. The value MUST be the duration to sleep for, in seconds. + + Example:: + + sleep: 10 * waitForIdle: wait for cluster maintenance state to become "idle". The value MUST be ``true``. + Example:: + + waitForIdle: true + * uriOptions (document): Document containing ``key: value`` pairs of URI options that must be included in the connection string passed to the workload executor by the *Test Orchestrator*. From 42a1d3bde95cab636ffcd6b4e72c9ebb235e5096 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 30 Dec 2020 17:24:09 -0500 Subject: [PATCH 067/172] docs --- docs/source/spec-workload-executor.rst | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/source/spec-workload-executor.rst b/docs/source/spec-workload-executor.rst index 7f3d16fc..138518ab 100644 --- a/docs/source/spec-workload-executor.rst +++ b/docs/source/spec-workload-executor.rst @@ -43,7 +43,18 @@ After accepting the inputs, the workload executor: * MUST NOT override any of the URI options specified in the incoming connection string. * MUST NOT augment the incoming connection string with any additional URI options. -#. MUST parse the incoming the ``driverWorkload`` document and use the ``MongoClient`` instance from the previous step +#. MUST parse the incoming the ``driverWorkload`` document and set up + the driver's unified test runner to execute the provided workload, with + the following deviations from the unified test runner specification: + + - Any `MongoClients `_ + that are instantiated by the workload executor MUST use the input + connection string as provided to the workload executor, and MUST + apply URI options specified in the particular test, if any, over the + provided connection string. + + + use the ``MongoClient`` instance from the previous step to run the operations described therein in accordance with the :ref:`test-scenario-format-specification`. Note that the workload executor: From 47079bae098354b9c968ef46e6366ba655ce6016 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 3 Jan 2021 13:39:06 -0500 Subject: [PATCH 068/172] dev requirements --- requirements-dev.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 requirements-dev.txt diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..3a7574b2 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,2 @@ +-r requirements.txt +sphinx From d32dc2972169dda7c7ec9212fff709200ef0e1e3 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 3 Jan 2021 14:04:20 -0500 Subject: [PATCH 069/172] document --no-create --- docs/source/installing-running-locally.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/source/installing-running-locally.rst b/docs/source/installing-running-locally.rst index cbdf12cb..2ff24716 100644 --- a/docs/source/installing-running-locally.rst +++ b/docs/source/installing-running-locally.rst @@ -141,6 +141,14 @@ Using this flag with a given test file and static ``--cluster-name-salt`` value times between successive test runs (you will still need to wait for the cluster to be reconfigured to the initial configuration). +``astrolabe`` also provides the ``--no-create`` flag which makes it skip +cluster initialization. This flag may be used to further speed up the test +runs, but it can only be used for scenarios where the cluster configuration +does not change from the initial one (otherwise the test would start with the +wrong configuration) and only after a previous run with ``--no-delete`` +successfully created the cluster. + + Debugging --------- From cf769045b8b91bcc106bc947b4972fccc2dd621b Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 3 Jan 2021 14:26:11 -0500 Subject: [PATCH 070/172] document events --- docs/source/spec-workload-executor.rst | 55 +++++++++++++++++++++----- 1 file changed, 46 insertions(+), 9 deletions(-) diff --git a/docs/source/spec-workload-executor.rst b/docs/source/spec-workload-executor.rst index 138518ab..87410b4f 100644 --- a/docs/source/spec-workload-executor.rst +++ b/docs/source/spec-workload-executor.rst @@ -54,12 +54,20 @@ After accepting the inputs, the workload executor: provided connection string. - use the ``MongoClient`` instance from the previous step - to run the operations described therein in accordance with the :ref:`test-scenario-format-specification`. + Then, the workload executor MUST use the ``MongoClient`` instance + from the previous step to run the operations described in the + scenario in accordance with the `Unified Test Format specification + `_. Note that the workload executor: * MUST ignore the ``initialData`` array. ``astrolabe`` is responsible for initializing the cluster with this data *before* starting the workload executor. + * MUST set up `command monitoring `_ + event listeners on all MongoClients to record started, succeeded and failed events for each operation sent to + MongoDB in the course of scenario execution. + * MUST set up `CMAP `_ + event listeners on all MongoClients to record all connection pool and connection-related events published + during the course of scenario execution. * MUST run the tests, and the operations in each test, sequentially and in the order in which they appear in the ``tests`` and ``operations`` array. * MUST repeat the entire set of specified tests and operations indefinitely, until the **termination signal** from @@ -73,19 +81,48 @@ After accepting the inputs, the workload executor: implementations should try to be as resilient as possible to these kinds of operation errors. * MUST keep count of the number of operations that are run successfully (``numSuccesses``). * MUST record all errors encountered while running operations. - * MUST use `command monitoring `_ - to record started, succeeded and failed events for each operation sent to - MongoDB. - * MUST use `CMAP `_ - to record all connection pool and connection-related events published - during the course of scenario execution. #. MUST set a signal handler for handling the termination signal that is sent by ``astrolabe``. The termination signal is used by ``astrolabe`` to communicate to the workload executor that it should stop running operations. Upon receiving the termination signal, the workload executor: * MUST stop running driver operations and exit soon. - * MUST dump collected workload statistics as a JSON file named ``results.json`` in the current working directory + * MUST write the collected events and errors into a JSON file named + ``events.json`` in the current directory + (i.e. the directory from where the workload executor is being executed). + The data written MUST be a map with the following fields: + + * ``commands``: an array of command events published during scenario + execution. Each command event MUST be a map with the following fields: + + * ``commandName``: the name of the command, e.g. ``insert``. + * ``duration``: the time, in (floating-point) seconds, it took for the command to execute. + * ``failure``: if the command succeeded, this field MUST not be set. + If the command failed, this field MUST contain a textual description + of the error encountered while executing the command. + * ``startTime``: the (floating-point) number of seconds since the Unix epoch when the + command began executing. + * ``address``: the address of the server to which the command + was sent, e.g. ``localhost:27017``. + * ``connections``: an array of CMAP events published during scenario + execution. Each event MUST be a map with the following fields: + + * ``name``: the name of the event, e.g. ``PoolCreated``. + * ``time``: the (floating-point) number of seconds since the Unix epoch + when the event was published. + * ``address``: the address of the server that the command was + published for, e.g. ``localhost:27017``. + * ``errors``: an array of errors encountered during scenario execution. + Each error MUST be a map with the following fields: + + * ``error``: textual description of the error. + * ``time``: the (floating-point) number of seconds since the Unix epoch + when the error occurred. + + The number of errors MUST be reported as ``numErrors`` in ``results.json``, + as described below. + + * MUST write the collected workload statistics into a JSON file named ``results.json`` in the current working directory (i.e. the directory from where the workload executor is being executed). Workload statistics MUST contain the following fields (drivers MAY report additional statistics using field names of their choice): From 429b7833e66cc2c9c0a57a3d4bb693608bfba154 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 3 Jan 2021 14:58:34 -0500 Subject: [PATCH 071/172] shorten lines --- docs/source/spec-workload-executor.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/source/spec-workload-executor.rst b/docs/source/spec-workload-executor.rst index 87410b4f..3d94cdb5 100644 --- a/docs/source/spec-workload-executor.rst +++ b/docs/source/spec-workload-executor.rst @@ -153,8 +153,10 @@ Pseudocode Implementation # targetDriver is the driver to be tested. import { MongoClient } from "targetDriver" - # The workloadRunner function accepts a connection string and a stringified JSON blob describing the driver workload. - # This function will be invoked with arguments parsed from the command-line invocation of the workload executor script. + # The workloadRunner function accepts a connection string and a + # stringified JSON blob describing the driver workload. + # This function will be invoked with arguments parsed from the + # command-line invocation of the workload executor script. function workloadRunner(connectionString: string, driverWorkload: object): void { # Use the MongoClient of the driver to be tested to connect to the Atlas Cluster. @@ -169,8 +171,10 @@ Pseudocode Implementation var num_failures = 0; var num_successes = 0; - # Run the workload - operations are run sequentially, repeatedly until the termination signal is received. - # Do not attempt to initialize the cluster with the contents of ``testData`` - astrolabe takes care of this. + # Run the workload - operations are run sequentially, repeatedly + # until the termination signal is received. + # Do not attempt to initialize the cluster with the contents of + # ``testData`` - astrolabe takes care of this. try { while (True) { for (let operation in workloadSpec.operations) { From 2489cae63a994b63a48661d3c34da1a12d8141f3 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 3 Jan 2021 15:13:25 -0500 Subject: [PATCH 072/172] make ruby executor more easy to use standalone --- integrations/ruby/executor.rb | 17 +++++++---------- integrations/ruby/workload-executor | 29 ++++++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index 11f7318f..e3994e3a 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -72,9 +72,6 @@ def initialize(uri, spec) def run set_signal_handler - # Normally, the orchestrator loads test data. - # If the executor is run by itself, uncomment the next line. - #load_data while true break if @stop perform_operations @@ -83,6 +80,13 @@ def run write_result end + def load_data + collection.delete_many + spec['initialData']&.each do |s| + collection.insert_many(s.fetch('documents')) + end + end + private def set_signal_handler @@ -91,13 +95,6 @@ def set_signal_handler end end - def load_data - collection.delete_many - if data = spec['testData'] - collection.insert_many(data) - end - end - def perform_operations spec['tests'].each do |test| test['operations'].each do |op_spec| diff --git a/integrations/ruby/workload-executor b/integrations/ruby/workload-executor index 03e79036..18119894 100755 --- a/integrations/ruby/workload-executor +++ b/integrations/ruby/workload-executor @@ -8,14 +8,37 @@ $: << File.dirname(__FILE__) $: << File.join(File.dirname(__FILE__), '../../mongo-ruby-driver/lib') require 'executor' +require 'optparse' + +options = {} +OptionParser.new do |opts| + opts.banner = "Usage: workload-executor URI SPEC\n" << + " workload-executor -s SCENARIO-PATH URI" + + opts.on("-s", "--scenario=PATH", "Specify scenario path") do |v| + options[:scenario_path] = v + end + opts.on('-i', '--insert', 'Insert scenario data') do + options[:insert] = true + end +end.parse! uri, spec = ARGV -if spec.nil? +if spec.nil? && !options[:scenario_path] raise "Usage: executor.rb URI SPEC" end -spec = JSON.load(spec) +if options[:scenario_path] + scenario = YAML.load(File.read(options[:scenario_path])) + spec = scenario.fetch('driverWorkload') +else + spec = JSON.load(spec) +end executor = Executor.new(uri, spec) -executor.run +if options[:insert] + executor.load_data +else + executor.run +end From d106f741abf9ece401d6f30a563d1d43eeeabf2d Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 3 Jan 2021 21:43:21 -0500 Subject: [PATCH 073/172] unified test runner integration --- integrations/ruby/executor.rb | 115 +++++++--------------------- integrations/ruby/workload-executor | 28 +++++++ 2 files changed, 54 insertions(+), 89 deletions(-) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index e3994e3a..aa17ec53 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -1,5 +1,6 @@ require 'json' require 'mongo' +require 'runners/unified' Mongo::Logger.logger.level = Logger::WARN @@ -72,6 +73,9 @@ def initialize(uri, spec) def run set_signal_handler + unified_tests.each do |test| + test.create_entities + end while true break if @stop perform_operations @@ -81,9 +85,8 @@ def run end def load_data - collection.delete_many - spec['initialData']&.each do |s| - collection.insert_many(s.fetch('documents')) + unified_tests.each do |test| + test.set_initial_data end end @@ -95,77 +98,28 @@ def set_signal_handler end end + def unified_group + @unified_group ||= Unified::TestGroup.new(spec) + end + + def unified_tests + @tests ||= unified_group.tests + end + def perform_operations - spec['tests'].each do |test| - test['operations'].each do |op_spec| - begin - case op_spec['name'] - when 'find' - unless op_spec['object'] == 'collection0' - raise UnknownOperationConfiguration, "Can only find on a collection" - end - - args = op_spec['arguments'].dup - op = collection.find(args.delete('filter') || {}) - if sort = args.delete('sort') - op = op.sort(sort) - end - unless args.empty? - raise UnknownOperationConfiguration, "Unhandled keys in args: #{args}" - end - - docs = op.to_a - - if expected_docs = op_spec['expectResult'] - if expected_docs != docs - puts "Failure: expected docs (#{expected_docs.inspect}) != actual docs (#{docs.inspect})" - @failure_count += 1 - end - end - when 'insertOne' - unless op_spec['object'] == 'collection0' - raise UnknownOperationConfiguration, "Can only find on a collection" - end - - args = op_spec['arguments'].dup - document = args.delete('document') - unless args.empty? - raise UnknownOperationConfiguration, "Unhandled keys in args: #{args}" - end - - collection.insert_one(document) - when 'updateOne' - unless op_spec['object'] == 'collection0' - raise UnknownOperationConfiguration, "Can only find on a collection" - end - - args = op_spec['arguments'].dup - scope = collection - if filter = args.delete('filter') - scope = collection.find(filter) - end - if update = args.delete('update') - scope.update_one(update) - end - unless args.empty? - raise UnknownOperationConfiguration, "Unhandled keys in args: #{args}" - end - else - raise UnknownOperation, "Unhandled operation #{op_spec['name']}" - end - #rescue Mongo::Error => e - # The validator intentionally gives us invalid operations, figure out - # how to handle this requirement while maintaining diagnostics. - rescue => e - STDERR.puts "Error: #{e.class}: #{e}" - metrics_collector.errors << { - error: "#{e.class}: #{e}", - time: Time.now.to_f, - } - @error_count += 1 - end - @operation_count += 1 + unified_tests.each do |test| + begin + test.run + rescue => e + raise + STDERR.puts "Error: #{e.class}: #{e}" + metrics_collector.errors << { + error: "#{e.class}: #{e}", + time: Time.now.to_f, + } + @error_count += 1 end + @operation_count += 1 end end @@ -191,21 +145,4 @@ def write_result ) end end - - def collection - db_name = spec['createEntities'].detect { |entity| - entity['database']&.[]('id') == 'database0' - }['database'].fetch('databaseName') - collection_name = spec['createEntities'].detect { |entity| - entity['collection']&.[]('id') == 'collection0' - }['collection'].fetch('collectionName') - @collection ||= client.use(db_name)[collection_name] - end - - def client - @client ||= Mongo::Client.new(uri).tap do |client| - client.subscribe(Mongo::Monitoring::COMMAND, metrics_collector) - client.subscribe(Mongo::Monitoring::CONNECTION_POOL, metrics_collector) - end - end end diff --git a/integrations/ruby/workload-executor b/integrations/ruby/workload-executor index 18119894..4abd2f95 100755 --- a/integrations/ruby/workload-executor +++ b/integrations/ruby/workload-executor @@ -6,9 +6,13 @@ puts ([$0] + ARGV.map { |arg| Shellwords.shellescape(arg) }).join(' ') $: << File.dirname(__FILE__) $: << File.join(File.dirname(__FILE__), '../../mongo-ruby-driver/lib') +$: << File.join(File.dirname(__FILE__), '../../mongo-ruby-driver/spec') +$: << File.join(File.dirname(__FILE__), '../../../ruby-driver/lib') +$: << File.join(File.dirname(__FILE__), '../../../ruby-driver/spec') require 'executor' require 'optparse' +autoload :Byebug, 'byebug' options = {} OptionParser.new do |opts| @@ -36,9 +40,33 @@ else spec = JSON.load(spec) end +$uri = uri + +class ClientRegistry + def self.instance + new + end + + def global_client(which) + $global_client ||= Mongo::Client.new($uri) + end +end + +module Unified + class Test + def create_client(**opts) + Mongo::Client.new($uri, **opts).tap do |client| + client.subscribe(Mongo::Monitoring::COMMAND, $metrics_collector) + client.subscribe(Mongo::Monitoring::CONNECTION_POOL, $metrics_collector) + end + end + end +end + executor = Executor.new(uri, spec) if options[:insert] executor.load_data else + $metrics_collector = executor.metrics_collector executor.run end From 09e05a01623ae690047598edc01e77780214f8e0 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 4 Jan 2021 11:48:31 -0500 Subject: [PATCH 074/172] workload executor spec --- docs/source/spec-workload-executor.rst | 57 +++++++++++++++----------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/docs/source/spec-workload-executor.rst b/docs/source/spec-workload-executor.rst index 3d94cdb5..64cd1ad5 100644 --- a/docs/source/spec-workload-executor.rst +++ b/docs/source/spec-workload-executor.rst @@ -53,34 +53,45 @@ After accepting the inputs, the workload executor: apply URI options specified in the particular test, if any, over the provided connection string. + - Each MongoClient MUST be set up to publish `command monitoring + `_ + events. The workload executor MUST record all events published + in the course of scenario execution. - Then, the workload executor MUST use the ``MongoClient`` instance - from the previous step to run the operations described in the - scenario in accordance with the `Unified Test Format specification - `_. - Note that the workload executor: - - * MUST ignore the ``initialData`` array. ``astrolabe`` is responsible for initializing the cluster with + - Each MongoClient MUST be set up to publish `CMAP + `_ + events. The workload executor MUST record all events published + in the course of scenario execution. + + - The ``initialData`` array in the scenario MUST be ignored by the + unified test runner (and by the workload executor). + ``astrolabe`` is responsible for initializing the cluster with this data *before* starting the workload executor. - * MUST set up `command monitoring `_ - event listeners on all MongoClients to record started, succeeded and failed events for each operation sent to - MongoDB in the course of scenario execution. - * MUST set up `CMAP `_ - event listeners on all MongoClients to record all connection pool and connection-related events published - during the course of scenario execution. - * MUST run the tests, and the operations in each test, sequentially - and in the order in which they appear in the ``tests`` and ``operations`` array. - * MUST repeat the entire set of specified tests and operations indefinitely, until the **termination signal** from - ``astrolabe`` is received. - * MUST keep count of the number of the number of operation failures + +#. MUST use the driver's unified test runner to execute the tests in the + scenario, and the operations in each test, sequentially and in the order + in which they appear in the ``tests`` and ``operations`` arrays, + with the following deviations from the unified test runner specification: + + * The workload executor MUST repeat execution of the entire set of + specified tests and operations indefinitely, until the + **termination signal** from ``astrolabe`` is received. + + * The workload executor MUST keep count of the number of the number of operation failures (``numFailures``) that are encountered. An operation failure is when the actual return value of an operation does not match its expected return value (as defined in the ``result`` field of the ``driverWorkload``). - * MUST keep count of the number of operation errors (``numErrors``) that are encountered while running - operations. An operation error is when running an operation unexpectedly raises an error. Workload executors - implementations should try to be as resilient as possible to these kinds of operation errors. - * MUST keep count of the number of operations that are run successfully (``numSuccesses``). - * MUST record all errors encountered while running operations. + + * The workload executor MUST record all errors encountered while running the scenario. + An operation error is any error that is propagated out of the unified test runner. + Workload executor implementations should try to be as resilient + as possible to these kinds of operation errors. + + * The workload executor MUST keep count of the number of operation errors (``numErrors``) that + are encountered while running the scenario. + + * The workload executor MUST keep count of the number of invocations of the scenario that + did not result in an error (``numSuccesses``). #. MUST set a signal handler for handling the termination signal that is sent by ``astrolabe``. The termination signal is used by ``astrolabe`` to communicate to the workload executor that it should stop running operations. Upon From f5634e93203424a4b9039215ae27ff253749ff15 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 5 Jan 2021 16:09:54 -0500 Subject: [PATCH 075/172] handle failures --- docs/source/spec-workload-executor.rst | 9 ++++--- integrations/ruby/executor.rb | 10 ++++++- integrations/ruby/workload-executor | 12 ++++++--- tests/validator-numErrors.yml | 3 +++ tests/validator-numFailures.yml | 36 ++++++++++++++++++++++++++ 5 files changed, 62 insertions(+), 8 deletions(-) create mode 100644 tests/validator-numFailures.yml diff --git a/docs/source/spec-workload-executor.rst b/docs/source/spec-workload-executor.rst index 64cd1ad5..63d81878 100644 --- a/docs/source/spec-workload-executor.rst +++ b/docs/source/spec-workload-executor.rst @@ -77,10 +77,11 @@ After accepting the inputs, the workload executor: specified tests and operations indefinitely, until the **termination signal** from ``astrolabe`` is received. - * The workload executor MUST keep count of the number of the number of operation failures - (``numFailures``) that are encountered. An operation failure is when - the actual return value of an operation does not match its - expected return value (as defined in the ``result`` field of the ``driverWorkload``). + * The workload executor MUST keep count of the number of failures + (``numFailures``) that are encountered. A failure is when + the actual return value of an operation or a published event does not match + the respective expected return value or event (as these expectations + are defined in the unified test format). * The workload executor MUST record all errors encountered while running the scenario. An operation error is any error that is propagated out of the unified test runner. diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index aa17ec53..5b872101 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -13,9 +13,10 @@ def initialize @command_events = [] @connection_events = [] @errors = [] + @failures = [] end - attr_reader :command_events, :connection_events, :errors + attr_reader :command_events, :connection_events, :errors, :failures def started(event) @operations[event.operation_id] = [event, Time.now] @@ -110,6 +111,13 @@ def perform_operations unified_tests.each do |test| begin test.run + rescue Unified::Error => e + STDERR.puts "Failure: #{e.class}: #{e}" + metrics_collector.failures << { + failure: "#{e.class}: #{e}", + time: Time.now.to_f, + } + @failure_count += 1 rescue => e raise STDERR.puts "Error: #{e.class}: #{e}" diff --git a/integrations/ruby/workload-executor b/integrations/ruby/workload-executor index 4abd2f95..9805a15d 100755 --- a/integrations/ruby/workload-executor +++ b/integrations/ruby/workload-executor @@ -16,8 +16,8 @@ autoload :Byebug, 'byebug' options = {} OptionParser.new do |opts| - opts.banner = "Usage: workload-executor URI SPEC\n" << - " workload-executor -s SCENARIO-PATH URI" + opts.banner = $usage_banner = "Usage: workload-executor URI SPEC\n" << + " workload-executor -s SCENARIO-PATH [-i] [-u URI]" opts.on("-s", "--scenario=PATH", "Specify scenario path") do |v| options[:scenario_path] = v @@ -25,12 +25,18 @@ OptionParser.new do |opts| opts.on('-i', '--insert', 'Insert scenario data') do options[:insert] = true end + opts.on("-u", "--uri=URI", "Specify MongoDB server URI") do |v| + options[:uri] = v + end end.parse! uri, spec = ARGV +uri ||= options[:uri] +uri ||= 'mongodb://localhost' if spec.nil? && !options[:scenario_path] - raise "Usage: executor.rb URI SPEC" + STDERR.puts $usage_banner + exit 1 end if options[:scenario_path] diff --git a/tests/validator-numErrors.yml b/tests/validator-numErrors.yml index 2e440724..3cbbd458 100644 --- a/tests/validator-numErrors.yml +++ b/tests/validator-numErrors.yml @@ -1,3 +1,6 @@ +# This file intentionally causes the workload executor to produce an error +# on each execution. + operations: [] driverWorkload: diff --git a/tests/validator-numFailures.yml b/tests/validator-numFailures.yml new file mode 100644 index 00000000..a595d633 --- /dev/null +++ b/tests/validator-numFailures.yml @@ -0,0 +1,36 @@ +# This file intentionally causes the workload executor to produce a failure +# on each execution. + +operations: [] + +driverWorkload: + description: "Validator - num failures" + + schemaVersion: "1.0" + + createEntities: + - client: + id: &client0 client0 + - database: + id: &database0 database0 + client: *client0 + databaseName: &database0Name dat + - collection: + id: &collection0 collection0 + database: *database0 + collectionName: &collection0Name dat + + initialData: [] + + tests: + - description: "Find one" + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 42 From d14357fe464504be26c874e661e8bae78b1b63aa Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 5 Jan 2021 17:42:34 -0500 Subject: [PATCH 076/172] use revised unified runner --- integrations/ruby/executor.rb | 2 +- integrations/ruby/workload-executor | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index 5b872101..08bdc5b6 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -100,7 +100,7 @@ def set_signal_handler end def unified_group - @unified_group ||= Unified::TestGroup.new(spec) + @unified_group ||= Unified::TestGroup.new(spec, client_args: uri) end def unified_tests diff --git a/integrations/ruby/workload-executor b/integrations/ruby/workload-executor index 9805a15d..2d93380f 100755 --- a/integrations/ruby/workload-executor +++ b/integrations/ruby/workload-executor @@ -60,8 +60,9 @@ end module Unified class Test + alias :create_client_without_events :create_client def create_client(**opts) - Mongo::Client.new($uri, **opts).tap do |client| + create_client_without_events(**opts).tap do |client| client.subscribe(Mongo::Monitoring::COMMAND, $metrics_collector) client.subscribe(Mongo::Monitoring::CONNECTION_POOL, $metrics_collector) end From 80ef2ba6122492c5b0061ac5f987a167c29bf4b8 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 5 Jan 2021 17:49:05 -0500 Subject: [PATCH 077/172] fix up error expectations --- integrations/ruby/executor.rb | 1 - tests/validator-numErrors.yml | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index 08bdc5b6..00f7c8b8 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -119,7 +119,6 @@ def perform_operations } @failure_count += 1 rescue => e - raise STDERR.puts "Error: #{e.class}: #{e}" metrics_collector.errors << { error: "#{e.class}: #{e}", diff --git a/tests/validator-numErrors.yml b/tests/validator-numErrors.yml index 3cbbd458..8d6b93b9 100644 --- a/tests/validator-numErrors.yml +++ b/tests/validator-numErrors.yml @@ -38,6 +38,9 @@ driverWorkload: update: $inc: count: 1 + + - description: "error" + operations: - name: doesNotExist object: *collection0 arguments: From 66f4cddcc2820a12a258d4e1437703e4ba5c91c7 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 5 Jan 2021 17:54:59 -0500 Subject: [PATCH 078/172] validate failure handling --- astrolabe/validator.py | 17 +++++++++++++++++ tests/validator-numFailures.yml | 20 +++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/astrolabe/validator.py b/astrolabe/validator.py index 6fe7188e..c218a4a2 100644 --- a/astrolabe/validator.py +++ b/astrolabe/validator.py @@ -118,6 +118,23 @@ def test_num_errors(self): "to be reported, got {} instead.".format( num_reported_updates, num_reported_errors)) + def test_num_failures(self): + driver_workload = JSONObject.from_dict( + yaml.load(open('tests/validator-numFailures.yml').read(), Loader=yaml.FullLoader)['driverWorkload'] + ) + + stats = self.run_test(driver_workload) + + num_reported_finds = stats['numSuccesses'] + + num_reported_failures = stats['numFailures'] + if abs(num_reported_failures - num_reported_finds) > 1: + self.fail( + "The workload executor reported inconsistent execution " + "statistics. Expected approximately {} errored operations " + "to be reported, got {} instead.".format( + num_reported_finds, num_reported_failures)) + def validator_factory(workload_executor, connection_string, startup_time): ValidateWorkloadExecutor.WORKLOAD_EXECUTOR = workload_executor diff --git a/tests/validator-numFailures.yml b/tests/validator-numFailures.yml index a595d633..2fe85b39 100644 --- a/tests/validator-numFailures.yml +++ b/tests/validator-numFailures.yml @@ -20,10 +20,28 @@ driverWorkload: database: *database0 collectionName: &collection0Name dat - initialData: [] + initialData: + - collectionName: *collection0Name + databaseName: *database0Name + documents: + - + _id: 2 + x: 2 tests: - description: "Find one" + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 2 + + - description: "Find one - failure" operations: - name: find object: *collection0 From 86b010845d4527cd886d0f0085338a999e59119c Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 5 Jan 2021 21:24:41 -0500 Subject: [PATCH 079/172] test Ruby driver master --- .evergreen/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index f34da476..29378e9a 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -211,7 +211,7 @@ axes: variables: DRIVER_DIRNAME: "ruby" DRIVER_REPOSITORY: "https://github.com/mongodb/mongo-ruby-driver" - DRIVER_REVISION: "2.13-stable" + DRIVER_REVISION: "master" - id: node-master display_name: "Node (master)" variables: From 9d5768cb28d5c04217cf59d52731ae8c1733954a Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev <39304720+p-mongo@users.noreply.github.com> Date: Wed, 20 Jan 2021 13:31:57 -0500 Subject: [PATCH 080/172] Update astrolabe/runner.py Co-authored-by: Andreas Braun --- astrolabe/runner.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 7a9e0e86..7b30d61e 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -257,9 +257,6 @@ def run(self, persist_cluster=False, startup_time=1): LOGGER.info("Workload Statistics: {}".format(stats)) - #LOGGER.info("Waiting 5 minutes for Atlas logs to become available") - #sleep(5*60) - self.retrieve_logs() # Step 7: download logs asynchronously and delete cluster. From b741789a05345dbd78986e9a4f140955b7186f05 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 21 Jan 2021 20:16:01 -0500 Subject: [PATCH 081/172] move statistics aggregation into command module --- astrolabe/cli.py | 31 +----------------------------- astrolabe/commands.py | 44 +++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 1 + 3 files changed, 46 insertions(+), 30 deletions(-) diff --git a/astrolabe/cli.py b/astrolabe/cli.py index 870bdf0d..16e04dde 100644 --- a/astrolabe/cli.py +++ b/astrolabe/cli.py @@ -13,7 +13,6 @@ # limitations under the License. import logging -import json from pprint import pprint import unittest, os from urllib.parse import unquote_plus @@ -540,35 +539,7 @@ def validate_workload_executor(workload_executor, startup_time, @spec_tests.command('stats') @click.pass_context def stats(ctx): - with open('results.json', 'r') as fp: - stats = json.load(fp) - with open('events.json', 'r') as fp: - events = json.load(fp) - - import numpy - - command_events = events['commands'] - command_times = [c['duration'] for c in command_events] - stats['avgCommandTime'] = numpy.average(command_times) - stats['p95CommandTime'] = numpy.percentile(command_times, 95) - stats['p99CommandTime'] = numpy.percentile(command_times, 99) - - conn_events = events['connections'] - counts = defaultdict(lambda: 0) - max_counts = defaultdict(lambda: 0) - conn_count = max_conn_count = 0 - for e in conn_events: - if e['name'] == 'ConnectionCreated': - counts[e['address']] += 1 - elif e['name'] == 'ConnectionClosed': - counts[e['address']] -= 1 - if counts[e['address']] > max_counts[e['address']]: - max_counts[e['address']] = counts[e['address']] - - stats['maxConnectionCounts'] = max_counts - - with open('stats.json', 'w') as fp: - json.dump(stats, fp) + cmd.aggregate_statistics() if __name__ == '__main__': diff --git a/astrolabe/commands.py b/astrolabe/commands.py index 8eb63cdc..a32c5a15 100644 --- a/astrolabe/commands.py +++ b/astrolabe/commands.py @@ -13,6 +13,7 @@ # limitations under the License. import logging +import json from atlasclient import AtlasApiError @@ -88,3 +89,46 @@ def ensure_connect_from_anywhere(*, client, project_id, ): ip_details_list = [{"cidrBlock": "0.0.0.0/0"}] resp = client.groups[project_id].whitelist.post(json=ip_details_list).data LOGGER.debug("Project whitelist details: {}".format(resp)) + + +def aggregate_statistics(): + '''Read the results.json and events.json files, aggregate the events into + statistics and write the statistics into stats.json. + + Statistics calculated: + + - Average command execution time + - 95th percentile command execution time + - 99th percentile command execution time + - Peak number of open connections + ''' + + with open('results.json', 'r') as fp: + stats = json.load(fp) + with open('events.json', 'r') as fp: + events = json.load(fp) + + import numpy + + command_events = events['commands'] + command_times = [c['duration'] for c in command_events] + stats['avgCommandTime'] = numpy.average(command_times) + stats['p95CommandTime'] = numpy.percentile(command_times, 95) + stats['p99CommandTime'] = numpy.percentile(command_times, 99) + + conn_events = events['connections'] + counts = defaultdict(lambda: 0) + max_counts = defaultdict(lambda: 0) + conn_count = max_conn_count = 0 + for e in conn_events: + if e['name'] == 'ConnectionCreated': + counts[e['address']] += 1 + elif e['name'] == 'ConnectionClosed': + counts[e['address']] -= 1 + if counts[e['address']] > max_counts[e['address']]: + max_counts[e['address']] = counts[e['address']] + + stats['maxConnectionCounts'] = max_counts + + with open('stats.json', 'w') as fp: + json.dump(stats, fp) diff --git a/requirements.txt b/requirements.txt index 245942b5..85aa1cd1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ dnspython>=1.16,<2 pyyaml>=5,<6 tabulate>=0.8,<0.9 junitparser>=1,<2 +numpy From 49a0853fe59e46c9c7676781ce4c60e2b93f0af7 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 21 Jan 2021 20:17:32 -0500 Subject: [PATCH 082/172] Remove unneeded start time retrieval --- astrolabe/runner.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 7b30d61e..6773771e 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -110,8 +110,6 @@ def initialize(self, no_create=False): specification. """ - self.start_time = _time.time() - if no_create: return From eeedc87e9203fe608f5da235f197182c52d15377 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 21 Jan 2021 20:27:21 -0500 Subject: [PATCH 083/172] Create clusters when --no-create is given but they do not exist --- astrolabe/cli.py | 2 +- astrolabe/runner.py | 10 +++++++++- docs/source/installing-running-locally.rst | 13 ++++++++----- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/astrolabe/cli.py b/astrolabe/cli.py index 16e04dde..a51864e5 100644 --- a/astrolabe/cli.py +++ b/astrolabe/cli.py @@ -77,7 +77,7 @@ NOCREATE_FLAG = click.option( '--no-create', is_flag=True, default=False, - help=('Do not create clusters at the beginning of the run, assume they have already been provisioned by a previous run.')) + help=('Do not create and configure clusters at the beginning of the run if they already exist, assume they have already been provisioned by a previous run.')) class ContextStore: diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 6773771e..0f6cebb1 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -111,7 +111,15 @@ def initialize(self, no_create=False): """ if no_create: - return + try: + self.cluster_url.get().data + # If --no-create was specified and the cluster exists, skip + # initialization. If the cluster does not exist, continue + # with normal creation. + return + except AtlasApiError as exc: + if exc.error_code != 'CLUSTER_NOT_FOUND': + LOGGER.warn('Get cluster failed with unexpected error: %s. Will attempt to create the cluster.' % exc) LOGGER.info("Initializing cluster {!r}".format(self.cluster_name)) diff --git a/docs/source/installing-running-locally.rst b/docs/source/installing-running-locally.rst index 2ff24716..91ec0717 100644 --- a/docs/source/installing-running-locally.rst +++ b/docs/source/installing-running-locally.rst @@ -142,11 +142,14 @@ times between successive test runs (you will still need to wait for the cluster configuration). ``astrolabe`` also provides the ``--no-create`` flag which makes it skip -cluster initialization. This flag may be used to further speed up the test -runs, but it can only be used for scenarios where the cluster configuration -does not change from the initial one (otherwise the test would start with the -wrong configuration) and only after a previous run with ``--no-delete`` -successfully created the cluster. +cluster initialization if the cluster already exists. This flag may be used +to further speed up the test runs, but it can only be used for scenarios +where the cluster configuration does not change from the initial one +(otherwise the test would start with the wrong configuration). Using +``--no-delete`` is recommended with ``--no-create``, otherwise each run will +delete the cluster upon completion. **If the cluster exists but has the wrong +configuration, ``astrolabe`` will use it as is and the results of the test +may be incorrect.** Debugging From ca6bf532d0deb511c1e1833573892874aabd29df Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 21 Jan 2021 20:32:44 -0500 Subject: [PATCH 084/172] refactor operation processing --- astrolabe/runner.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 0f6cebb1..6ea0f879 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -174,9 +174,14 @@ def run(self, persist_cluster=False, startup_time=1): startup_time=startup_time) for operation in self.spec.operations: - if hasattr(operation, 'setClusterConfiguration'): + if len(operation) != 1: + raise ValueError("Operation must have exactly one key: %s" % operation) + + op_name, op_spec = next(iteritems(operation)) + + if op_name == 'setClusterConfiguration': # Step-3: begin maintenance routine. - final_config = operation.setClusterConfiguration + final_config = op_spec cluster_config = final_config.clusterConfiguration process_args = final_config.processArgs @@ -199,26 +204,26 @@ def run(self, persist_cluster=False, startup_time=1): self.verify_cluster_configuration_matches(final_config) LOGGER.info("Cluster maintenance complete") - if hasattr(operation, 'testFailover'): + if op_name == 'testFailover': self.cluster_url['restartPrimaries'].post() self.wait_for_idle() - if hasattr(operation, 'sleep'): - _time.sleep(operation['sleep']) + if op_name == 'sleep': + _time.sleep(op_spec) - if hasattr(operation, 'waitForIdle'): + if op_name == 'waitForIdle': self.wait_for_idle() - if hasattr(operation, 'restartVms'): + if op_name == 'restartVms': url = "/api/private/nds/groups/%s/clusters/%s/reboot" % (self.project.id, self.cluster_name) self.admin_client.request('POST', url) sleep(5) self.wait_for_idle() - if hasattr(operation, 'assertPrimaryRegion'): - region = operation['assertPrimaryRegion'] + if op_name == 'assertPrimaryRegion': + region = op_spec cluster_config = self.cluster_url.get().data deadline = monotonic() + 90 From 90b5adc43f99999c4dcd816c59eb633cbfe608d1 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 21 Jan 2021 20:33:46 -0500 Subject: [PATCH 085/172] remove the wait for state check --- astrolabe/runner.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 6ea0f879..8cb1bbfd 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -196,9 +196,6 @@ def run(self, persist_cluster=False, startup_time=1): LOGGER.info("Pushing process arguments update") self.cluster_url.processArgs.patch(**process_args) - # Sleep before polling to give Atlas time to update cluster.stateName. - sleep(3) - # Step-4: wait until maintenance completes (cluster is IDLE). self.wait_for_idle() self.verify_cluster_configuration_matches(final_config) From 36570af602b41c7fd1ae22414d608b030800fe63 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 21 Jan 2021 20:37:22 -0500 Subject: [PATCH 086/172] move sleep after vm restart into scenario specifications --- astrolabe/runner.py | 1 - docs/source/spec-test-format.rst | 3 +++ tests/retryReads-vmRestart-sharded.yaml | 4 ++++ tests/retryReads-vmRestart.yaml | 4 ++++ 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 8cb1bbfd..0c7d9a7f 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -216,7 +216,6 @@ def run(self, persist_cluster=False, startup_time=1): url = "/api/private/nds/groups/%s/clusters/%s/reboot" % (self.project.id, self.cluster_name) self.admin_client.request('POST', url) - sleep(5) self.wait_for_idle() if op_name == 'assertPrimaryRegion': diff --git a/docs/source/spec-test-format.rst b/docs/source/spec-test-format.rst index cc8817c5..adafd596 100644 --- a/docs/source/spec-test-format.rst +++ b/docs/source/spec-test-format.rst @@ -61,6 +61,9 @@ A Test Scenario File has the following keys: This operation requires Atlas Global Operator API key to be set when invoking ``astrolabe``. The value MUST be ``true``. + testFailover SHOULD be followed by sleep and waitForIdle operations + because it does not update maintenance state synchronously. + Example:: restartVms: true diff --git a/tests/retryReads-vmRestart-sharded.yaml b/tests/retryReads-vmRestart-sharded.yaml index 3fd6f311..fa7617a1 100644 --- a/tests/retryReads-vmRestart-sharded.yaml +++ b/tests/retryReads-vmRestart-sharded.yaml @@ -10,6 +10,10 @@ initialConfiguration: operations: - restartVms: true + - + sleep: 10 + - + waitForIdle: true uriOptions: retryReads: true diff --git a/tests/retryReads-vmRestart.yaml b/tests/retryReads-vmRestart.yaml index 6685ad12..dce35119 100644 --- a/tests/retryReads-vmRestart.yaml +++ b/tests/retryReads-vmRestart.yaml @@ -51,6 +51,10 @@ driverWorkload: operations: - restartVms: true + - + sleep: 10 + - + waitForIdle: true uriOptions: retryReads: true From 72099fbef199ff3f5bfd3b233d13d448bfd1cab1 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 21 Jan 2021 21:29:43 -0500 Subject: [PATCH 087/172] allow region timeout to be user-configurable --- astrolabe/runner.py | 11 ++++++----- docs/source/spec-test-format.rst | 17 +++++++++++++---- tests/retryReads-primaryRemoval.yaml | 3 ++- tests/retryReads-primaryTakeover.yaml | 4 +++- 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 0c7d9a7f..4ba7026c 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -14,7 +14,7 @@ import logging, datetime, time as _time, gzip import os, io -from time import sleep, monotonic +from time import sleep from urllib.parse import urlencode from pymongo import MongoClient @@ -219,10 +219,12 @@ def run(self, persist_cluster=False, startup_time=1): self.wait_for_idle() if op_name == 'assertPrimaryRegion': - region = op_spec + region = op_spec['region'] cluster_config = self.cluster_url.get().data - deadline = monotonic() + 90 + timer = Timer() + timer.start() + timeout = op_spec.get('timeout', 90) while True: mc = MongoClient(cluster_config['connectionStrings']['standard'], username='atlasuser', password='mypassword123') @@ -235,11 +237,10 @@ def run(self, persist_cluster=False, startup_time=1): if region == member_region: break - if monotonic() > deadline: + if timer.elapsed > timeout: raise Exception("Primary in cluster not in expected region '%s' (actual region '%s')" % (region, member_region)) else: sleep(5) - # Step-5: interrupt driver workload and capture streams stats = self.workload_runner.terminate() diff --git a/docs/source/spec-test-format.rst b/docs/source/spec-test-format.rst index adafd596..63919cda 100644 --- a/docs/source/spec-test-format.rst +++ b/docs/source/spec-test-format.rst @@ -69,13 +69,22 @@ A Test Scenario File has the following keys: restartVms: true * assertPrimaryRegion: assert that the primary in the deployment is in the - specified region. The value MUST be the region name as defined in Atlas API, - e.g. ``US_WEST_1``. This operation is undefined and MUST NOT be used when - the deployment is a sharded cluster. + specified region. The value MUST be a hash with the following keys: + + * region (string, required): the region name as defined in Atlas API, + e.g. ``US_WEST_1``. + * timeout (floating-point number, optional): the maximum time, in + seconds, to wait for the region to become the expected one. + Default is 90 seconds. + + This operation is undefined and MUST NOT be used when the deployment is + a sharded cluster. Example:: - assertPrimaryRegion: US_WEST_1 + assertPrimaryRegion: + region: US_WEST_1 + timeout: 15 * sleep: do nothing for the specified duration. The value MUST be the duration to sleep for, in seconds. diff --git a/tests/retryReads-primaryRemoval.yaml b/tests/retryReads-primaryRemoval.yaml index 391c92d8..d2f160d8 100644 --- a/tests/retryReads-primaryRemoval.yaml +++ b/tests/retryReads-primaryRemoval.yaml @@ -40,7 +40,8 @@ operations: readOnlyNodes: 0 processArgs: {} - - assertPrimaryRegion: US_WEST_1 + - assertPrimaryRegion: + region: US_WEST_1 uriOptions: diff --git a/tests/retryReads-primaryTakeover.yaml b/tests/retryReads-primaryTakeover.yaml index d1edbc3a..aebe5131 100644 --- a/tests/retryReads-primaryTakeover.yaml +++ b/tests/retryReads-primaryTakeover.yaml @@ -41,7 +41,9 @@ operations: readOnlyNodes: 0 processArgs: {} - - assertPrimaryRegion: US_EAST_1 + - assertPrimaryRegion: + region: US_EAST_1 + timeout: 60 uriOptions: From 8c1bfed2c25d5d8ba8ab914a288bd30487e17707 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 21 Jan 2021 21:37:32 -0500 Subject: [PATCH 088/172] dry MongoClient construction --- astrolabe/runner.py | 11 +++++------ astrolabe/utils.py | 22 ++++++++++++++-------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 4ba7026c..5500f158 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -227,12 +227,11 @@ def run(self, persist_cluster=False, startup_time=1): timeout = op_spec.get('timeout', 90) while True: - mc = MongoClient(cluster_config['connectionStrings']['standard'], username='atlasuser', password='mypassword123') - rsc = mc.admin.command('replSetGetConfig') - member = [m for m in rsc['config']['members'] - if m['horizons']['PUBLIC'] == '%s:%s' % mc.primary][0] - member_region = member['tags']['region'] - mc.close() + with mongo_client(self.get_connection_string()) as mc: + rsc = mc.admin.command('replSetGetConfig') + member = [m for m in rsc['config']['members'] + if m['horizons']['PUBLIC'] == '%s:%s' % mc.primary][0] + member_region = member['tags']['region'] if region == member_region: break diff --git a/astrolabe/utils.py b/astrolabe/utils.py index d8c8b664..52445cdf 100644 --- a/astrolabe/utils.py +++ b/astrolabe/utils.py @@ -19,6 +19,7 @@ import subprocess import sys from hashlib import sha256 +from contextlib import closing from time import monotonic, sleep import click @@ -167,8 +168,7 @@ def get_cluster_name(test_name, name_salt): return name_hash.hexdigest()[:10] -def load_test_data(connection_string, driver_workload): - """Insert the test data into the cluster.""" +def mongo_client(connection_string): kwargs = {'w': "majority"} # TODO: remove this if...else block after BUILD-10841 is done. @@ -177,13 +177,19 @@ def load_test_data(connection_string, driver_workload): import certifi kwargs['tlsCAFile'] = certifi.where() client = MongoClient(connection_string, **kwargs) + + return closing(client) + - for spec in driver_workload.initialData: - coll = client.get_database( - spec.databaseName).get_collection( - spec.collectionName) - coll.drop() - coll.insert_many(spec.documents) +def load_test_data(connection_string, driver_workload): + """Insert the test data into the cluster.""" + with mongo_client(connection_string) as client: + for spec in driver_workload.initialData: + coll = client.get_database( + spec.databaseName).get_collection( + spec.collectionName) + coll.drop() + coll.insert_many(spec.documents) class DriverWorkloadSubprocessRunner: From 6a86057d52dde53812e8e41a2c3497147adf3bab Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 22 Jan 2021 15:43:45 -0500 Subject: [PATCH 089/172] fix non-sharded vm restart test --- tests/retryReads-vmRestart.yaml | 43 +-------------------------------- 1 file changed, 1 insertion(+), 42 deletions(-) diff --git a/tests/retryReads-vmRestart.yaml b/tests/retryReads-vmRestart.yaml index dce35119..fb95e513 100644 --- a/tests/retryReads-vmRestart.yaml +++ b/tests/retryReads-vmRestart.yaml @@ -1,54 +1,13 @@ initialConfiguration: clusterConfiguration: clusterType: REPLICASET -driverWorkload: - description: "Find" - - schemaVersion: "1.0" - - createEntities: - - client: - id: &client0 client0 - - database: - id: &database0 database0 - client: *client0 - databaseName: &database0Name dat - - collection: - id: &collection0 collection0 - database: *database0 - collectionName: &collection0Name dat - - initialData: - - collectionName: *collection0Name - databaseName: *database0Name - documents: - - {_id: 1, x: 11} - - {_id: 2, x: 22} - - {_id: 3, x: 33} - - tests: - - description: "Find one" - operations: - - name: find - object: *collection0 - arguments: - filter: { _id: { $gt: 1 }} - sort: { _id: 1 } - expectResult: - - - _id: 2 - x: 22 - - - _id: 3 - x: 33 - providerSettings: providerName: AWS regionName: US_WEST_1 instanceSizeName: M10 processArgs: {} -operations: + operations: - restartVms: true - From 6884512ac9929410b73e20628acabaa39915d924 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 22 Jan 2021 15:51:48 -0500 Subject: [PATCH 090/172] use the dotted atlas client api --- astrolabe/runner.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 5500f158..8ef0e72e 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -213,8 +213,8 @@ def run(self, persist_cluster=False, startup_time=1): self.wait_for_idle() if op_name == 'restartVms': - url = "/api/private/nds/groups/%s/clusters/%s/reboot" % (self.project.id, self.cluster_name) - self.admin_client.request('POST', url) + rv = self.admin_client.api.private.nds.groups[self.project.id].clusters[self.cluster_name].reboot.post() + import pdb;pdb.set_trace() self.wait_for_idle() @@ -284,7 +284,7 @@ def wait_for_idle(self): kwargs={}) def retrieve_logs(self): - data = self.admin_client.request('GET', '/api/private/nds/groups/%s/clusters/%s' % (self.project.id, self.cluster_name)).data + data = self.admin_client.api.private.nds.groups[self.project.id].clusters[self.cluster_name].get().data if data['clusterType'] == 'SHARDED': rtype = 'CLUSTER' @@ -300,12 +300,12 @@ def retrieve_logs(self): logTypes=['FTDC','MONGODB'],#,'AUTOMATION_AGENT','MONITORING_AGENT','BACKUP_AGENT'], sizeRequestedPerFileBytes=100000000, ) - data = self.admin_client.request('POST', 'groups/%s/logCollectionJobs' % (self.project.id,), **params).data + data = self.admin_client.groups[self.project.id].logCollectionJobs.post(**params).data job_id = data['id'] while True: LOGGER.debug('Poll job %s' % job_id) - data = self.admin_client.request('GET', 'groups/%s/logCollectionJobs/%s' % (self.project.id, job_id)).data + data = self.admin_client.groups[self.project.id].logCollectionJobs[job_id].get().data if data['status'] == 'IN_PROGRESS': sleep(1) elif data['status'] == 'SUCCESS': From 2a7bfbce5047a481b8e91020f2e27983672e1792 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 22 Jan 2021 15:55:36 -0500 Subject: [PATCH 091/172] check status code --- astrolabe/runner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 8ef0e72e..4cbe4b68 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -316,6 +316,8 @@ def retrieve_logs(self): url = data['downloadUrl'].replace('https://cloud-dev.mongodb.com', '') LOGGER.info('Retrieving %s' % url) resp = self.admin_client.request('GET', url) + if resp.status_code != 200: + raise RuntimeError('Request to %s failed: %s' % url, resp.status_code) with open('logs.tar.gz', 'wb') as f: f.write(resp.response.content) From b59a58f2b52f21c3aabed2d7b075b28a1cc1c63e Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 22 Jan 2021 15:57:24 -0500 Subject: [PATCH 092/172] stop hardcoding cloud-dev --- astrolabe/runner.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 4cbe4b68..0eee4c72 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -313,7 +313,10 @@ def retrieve_logs(self): else: raise Exception("Unexpected log collection job status %s" % data['status']) - url = data['downloadUrl'].replace('https://cloud-dev.mongodb.com', '') + LOGGER.info('Log download URL: %s' % data['downloadUrl']) + # Assume the URL uses the same host as the other API requests, and + # remove it so that we just have the path. + url = re.sub(r'\w+://[^/]+', '', data['downloadUrl']) LOGGER.info('Retrieving %s' % url) resp = self.admin_client.request('GET', url) if resp.status_code != 200: From 19d754f22792951b2708c5ee74c1e0009332a31e Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 22 Jan 2021 15:58:29 -0500 Subject: [PATCH 093/172] stop hardcoding cloud-dev --- atlasclient/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atlasclient/client.py b/atlasclient/client.py index 7d2d5558..85279a0e 100644 --- a/atlasclient/client.py +++ b/atlasclient/client.py @@ -223,7 +223,7 @@ def request(self, method, path, **params): def construct_resource_url(self, path, api_version=None): url_template = "{base_url}/{version}/{resource_path}" if path and path[0] == '/': - url_template = 'https://cloud-dev.mongodb.com{resource_path}' + url_template = '{base_url}{resource_path}' return url_template.format( base_url=self.config.base_url, version=api_version or self.config.api_version, From 529a6846cad50ab774055f8881e898fe02e4c7ba Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 22 Jan 2021 16:00:04 -0500 Subject: [PATCH 094/172] use api_version=private keyword arg --- astrolabe/runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 0eee4c72..21780bfe 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -213,7 +213,7 @@ def run(self, persist_cluster=False, startup_time=1): self.wait_for_idle() if op_name == 'restartVms': - rv = self.admin_client.api.private.nds.groups[self.project.id].clusters[self.cluster_name].reboot.post() + rv = self.admin_client.nds.groups[self.project.id].clusters[self.cluster_name].reboot.post(api_version='private') import pdb;pdb.set_trace() self.wait_for_idle() @@ -284,7 +284,7 @@ def wait_for_idle(self): kwargs={}) def retrieve_logs(self): - data = self.admin_client.api.private.nds.groups[self.project.id].clusters[self.cluster_name].get().data + data = self.admin_client.nds.groups[self.project.id].clusters[self.cluster_name].get(api_version='private').data if data['clusterType'] == 'SHARDED': rtype = 'CLUSTER' From 50e811c1647e986727b4dda549ac1d5527bb9197 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 22 Jan 2021 16:00:56 -0500 Subject: [PATCH 095/172] remove unused import --- astrolabe/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astrolabe/cli.py b/astrolabe/cli.py index a51864e5..3c8326cc 100644 --- a/astrolabe/cli.py +++ b/astrolabe/cli.py @@ -14,7 +14,7 @@ import logging from pprint import pprint -import unittest, os +import unittest from urllib.parse import unquote_plus from collections import defaultdict From 5c264ce3fbc50f351b31124a6f4433e79ba35f32 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 22 Jan 2021 16:01:42 -0500 Subject: [PATCH 096/172] remove stats --- astrolabe/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astrolabe/cli.py b/astrolabe/cli.py index 3c8326cc..03fa6311 100644 --- a/astrolabe/cli.py +++ b/astrolabe/cli.py @@ -536,7 +536,7 @@ def validate_workload_executor(workload_executor, startup_time, exit(1) -@spec_tests.command('stats') +@spec_tests.command() @click.pass_context def stats(ctx): cmd.aggregate_statistics() From 73f5abb540b674beec6d22f32c7e149315121ebb Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 22 Jan 2021 16:07:40 -0500 Subject: [PATCH 097/172] use one client instance for all iterations --- astrolabe/runner.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 21780bfe..504ff078 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -226,20 +226,20 @@ def run(self, persist_cluster=False, startup_time=1): timer.start() timeout = op_spec.get('timeout', 90) - while True: - with mongo_client(self.get_connection_string()) as mc: + with mongo_client(self.get_connection_string()) as mc: + while True: rsc = mc.admin.command('replSetGetConfig') member = [m for m in rsc['config']['members'] if m['horizons']['PUBLIC'] == '%s:%s' % mc.primary][0] member_region = member['tags']['region'] - if region == member_region: - break - - if timer.elapsed > timeout: - raise Exception("Primary in cluster not in expected region '%s' (actual region '%s')" % (region, member_region)) - else: - sleep(5) + if region == member_region: + break + + if timer.elapsed > timeout: + raise Exception("Primary in cluster not in expected region '%s' (actual region '%s')" % (region, member_region)) + else: + sleep(5) # Step-5: interrupt driver workload and capture streams stats = self.workload_runner.terminate() From ea69fcda96a5c5dc54d4f5e29efe5d56b2cd0b68 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 22 Jan 2021 16:38:33 -0500 Subject: [PATCH 098/172] add numpy to setup.py --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index f66e3035..c3e32146 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,7 @@ 'click>=7,<8', 'requests>=2,<3', 'pymongo>=3.10,<4', 'dnspython>=1.16,<2', 'pyyaml>=5,<6', 'tabulate>=0.8,<0.9', + 'numpy', 'junitparser>=1,<2'] if sys.platform == 'win32': install_requires.append('certifi') From 77a6bf576caffca6cab0985aca2debe287e5b3c9 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 22 Jan 2021 16:45:08 -0500 Subject: [PATCH 099/172] run all tests --- .evergreen/config.yml | 89 +++++++++++++++++++++++++++++++++++- .evergreen/generate-tasks.sh | 16 +++++++ 2 files changed, 103 insertions(+), 2 deletions(-) create mode 100755 .evergreen/generate-tasks.sh diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 29378e9a..1cd86ab6 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -177,13 +177,35 @@ tasks: commands: - func: "validate executor" # One test-case per task. - - name: retryReads-processRestart + # Use .evergreen/generate-tasks.sh to generate this list. + - name: retryReads-move-sharded cron: '@weekly' tags: ["all"] commands: - func: "run test" vars: - TEST_NAME: retryReads-processRestart + TEST_NAME: retryReads-move-sharded + - name: retryReads-move + cron: '@weekly' + tags: ["all"] + commands: + - func: "run test" + vars: + TEST_NAME: retryReads-move + - name: retryReads-primaryRemoval + cron: '@weekly' + tags: ["all"] + commands: + - func: "run test" + vars: + TEST_NAME: retryReads-primaryRemoval + - name: retryReads-primaryTakeover + cron: '@weekly' + tags: ["all"] + commands: + - func: "run test" + vars: + TEST_NAME: retryReads-primaryTakeover - name: retryReads-processRestart-sharded cron: '@weekly' tags: ["all"] @@ -191,6 +213,69 @@ tasks: - func: "run test" vars: TEST_NAME: retryReads-processRestart-sharded + - name: retryReads-processRestart + cron: '@weekly' + tags: ["all"] + commands: + - func: "run test" + vars: + TEST_NAME: retryReads-processRestart + - name: retryReads-resizeCluster + cron: '@weekly' + tags: ["all"] + commands: + - func: "run test" + vars: + TEST_NAME: retryReads-resizeCluster + - name: retryReads-testFailover-sharded + cron: '@weekly' + tags: ["all"] + commands: + - func: "run test" + vars: + TEST_NAME: retryReads-testFailover-sharded + - name: retryReads-testFailover + cron: '@weekly' + tags: ["all"] + commands: + - func: "run test" + vars: + TEST_NAME: retryReads-testFailover + - name: retryReads-toggleServerSideJS + cron: '@weekly' + tags: ["all"] + commands: + - func: "run test" + vars: + TEST_NAME: retryReads-toggleServerSideJS + - name: retryReads-vmRestart-sharded + cron: '@weekly' + tags: ["all"] + commands: + - func: "run test" + vars: + TEST_NAME: retryReads-vmRestart-sharded + - name: retryReads-vmRestart + cron: '@weekly' + tags: ["all"] + commands: + - func: "run test" + vars: + TEST_NAME: retryReads-vmRestart + - name: retryWrites-resizeCluster + cron: '@weekly' + tags: ["all"] + commands: + - func: "run test" + vars: + TEST_NAME: retryWrites-resizeCluster + - name: retryWrites-toggleServerSideJS + cron: '@weekly' + tags: ["all"] + commands: + - func: "run test" + vars: + TEST_NAME: retryWrites-toggleServerSideJS axes: # The 'driver' axis specifies the driver to be tested (including driver version). diff --git a/.evergreen/generate-tasks.sh b/.evergreen/generate-tasks.sh new file mode 100755 index 00000000..22288a17 --- /dev/null +++ b/.evergreen/generate-tasks.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +for f in tests/*.yaml; do + task=`basename $f |sed -e s/.yaml//` + +cat <<-EOT + - name: $task + cron: '@weekly' + tags: ["all"] + commands: + - func: "run test" + vars: + TEST_NAME: $task +EOT + +done From b56ab0f873ca217eb2b8e2ed6d67110f336ce3d7 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 22 Jan 2021 16:50:07 -0500 Subject: [PATCH 100/172] redo self.coll assignment as suggested --- astrolabe/validator.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/astrolabe/validator.py b/astrolabe/validator.py index c218a4a2..d573f93d 100644 --- a/astrolabe/validator.py +++ b/astrolabe/validator.py @@ -34,9 +34,24 @@ def setUp(self): self.client = MongoClient(self.CONNECTION_STRING, w='majority') def run_test(self, driver_workload): - self.coll = self.client.get_database( - [e for e in driver_workload['createEntities'] if 'database' in e][0]['database']['databaseName']).get_collection( - [e for e in driver_workload['createEntities'] if 'collection' in e][0]['collection']['collectionName']) + # Set self.coll for future use of the validator, such that it can + # read the data inserted into the collection. + # Actual insertion of initial data isn't done via this object. + dbname = None + collname = None + for e in driver_workload['createEntities']: + if dbname and collname: + break + if dbname is None and 'database' in e: + dbname = e['database']['databaseName'] + elif collanme is None and 'collection' in e: + collname = e['collection']['collectionName'] + + if not (dbname and collname): + self.fail('Invalid scenario: executor validator test cases must provide database and collection entities') + + self.coll = self.client.get_database(dbname).get_collection(collname) + load_test_data(self.CONNECTION_STRING, driver_workload) subprocess = DriverWorkloadSubprocessRunner() From a73285724b4cbf85f42efed73a2592c7beba9ad2 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 25 Jan 2021 09:03:28 -0500 Subject: [PATCH 101/172] fix cmap link --- docs/source/spec-workload-executor.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/spec-workload-executor.rst b/docs/source/spec-workload-executor.rst index 63d81878..c95b1689 100644 --- a/docs/source/spec-workload-executor.rst +++ b/docs/source/spec-workload-executor.rst @@ -59,7 +59,7 @@ After accepting the inputs, the workload executor: in the course of scenario execution. - Each MongoClient MUST be set up to publish `CMAP - `_ + `_ events. The workload executor MUST record all events published in the course of scenario execution. From 18cf8c0d5d1a4ffcc7404448ea17aba15d8c35d1 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 25 Jan 2021 11:16:57 -0500 Subject: [PATCH 102/172] check that cluster is configured as expected, otherwise recreate --- astrolabe/runner.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 504ff078..13af9edd 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -112,14 +112,17 @@ def initialize(self, no_create=False): if no_create: try: - self.cluster_url.get().data # If --no-create was specified and the cluster exists, skip # initialization. If the cluster does not exist, continue # with normal creation. + self.cluster_url.get().data + self.verify_cluster_configuration_matches(self.spec.initialConfiguration) return except AtlasApiError as exc: if exc.error_code != 'CLUSTER_NOT_FOUND': - LOGGER.warn('Get cluster failed with unexpected error: %s. Will attempt to create the cluster.' % exc) + LOGGER.warn('Cluster was not found, will create one') + except AssertionError as exc: + LOGGER.warn('Configuration did not match: %s. Recreating the cluster' % exc) LOGGER.info("Initializing cluster {!r}".format(self.cluster_name)) From a5a95627dc32a1d0c76deae4928e4df862c8d520 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 25 Jan 2021 11:17:03 -0500 Subject: [PATCH 103/172] fix iteritems not existing in python 3 --- astrolabe/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 13af9edd..e1722cf8 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -180,7 +180,7 @@ def run(self, persist_cluster=False, startup_time=1): if len(operation) != 1: raise ValueError("Operation must have exactly one key: %s" % operation) - op_name, op_spec = next(iteritems(operation)) + op_name, op_spec = list(operation.items())[0] if op_name == 'setClusterConfiguration': # Step-3: begin maintenance routine. From 8287f722fb16858a4b9eb3aac5a0640bacbc5559 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 25 Jan 2021 11:51:38 -0500 Subject: [PATCH 104/172] troubleshooting note --- docs/source/integration-guide.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/source/integration-guide.rst b/docs/source/integration-guide.rst index f6219a77..a89aa5fe 100644 --- a/docs/source/integration-guide.rst +++ b/docs/source/integration-guide.rst @@ -272,3 +272,13 @@ in the Evergreen configuration file: .. note:: Users are asked to be extra cautious while dealing with environment variables that contain sensitive secrets. Using these variables in a script that sets ``-xtrace`` can, for instance, result in leaking these secrets into Evergreen's log output. + +--------------- +Troubleshooting +--------------- + +When using ``cloud-dev``, be aware that operational issues within Atlas are +not being monitored and solved with a particular SLA. If builds are failing +and the failure appears to be caused by Atlas rather than the tests themselves, +the driver being tested or ``astrolabe``, inquiring in ``cloud-non-prod-ops`` +Slack channel is the next suggested troubleshooting step. From 26963fc75120f61f70358dc0916eb07456122cc5 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 25 Jan 2021 19:57:54 -0500 Subject: [PATCH 105/172] fix logs retrieval --- .evergreen/config.yml | 2 +- astrolabe/cli.py | 31 ++++++++++++++++++++++++ astrolabe/runner.py | 46 +++--------------------------------- astrolabe/utils.py | 42 ++++++++++++++++++++++++++++++++ atlasclient/client.py | 6 ++++- atlasclient/configuration.py | 4 ++-- 6 files changed, 84 insertions(+), 47 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 1cd86ab6..6a3eb6b8 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -78,7 +78,7 @@ functions: CLUSTER_NAME_SALT: ${build_id} ATLAS_API_USERNAME: ${atlas_key} ATLAS_API_PASSWORD: ${atlas_secret} - ATLAS_API_BASE_URL: ${atlas_url} + ATLAS_API_BASE_URL: https://cloud-dev.mongodb.com/api ATLAS_ORGANIZATION_NAME: ${atlas_organization} ATLAS_ADMIN_API_USERNAME: ${atlas_admin_api_username} ATLAS_ADMIN_API_PASSWORD: ${atlas_admin_api_password} diff --git a/astrolabe/cli.py b/astrolabe/cli.py index 03fa6311..ca14298a 100644 --- a/astrolabe/cli.py +++ b/astrolabe/cli.py @@ -29,6 +29,7 @@ from astrolabe.configuration import ( CONFIGURATION_OPTIONS as CONFIGOPTS, TestCaseConfiguration) from astrolabe.utils import ( + get_logs, create_click_option, get_cluster_name, get_test_name_from_spec_file, ClickLogHandler) from astrolabe.validator import validator_factory @@ -434,6 +435,36 @@ def run_single_test(ctx, spec_test_file, workload_executor, exit(0) +@spec_tests.command('get-logs') +@click.argument("spec_test_file", type=click.Path( + exists=True, file_okay=True, dir_okay=False, resolve_path=True)) +@ATLASORGANIZATIONNAME_OPTION +@ATLASPROJECTNAME_OPTION +@CLUSTERNAMESALT_OPTION +@POLLINGTIMEOUT_OPTION +@POLLINGFREQUENCY_OPTION +@click.pass_context +def get_logs_cmd(ctx, spec_test_file, org_name, project_name, + cluster_name_salt, polling_timeout, polling_frequency, + ): + """ + Retrieves logs for the cluster. + """ + + # Step-1: determine the cluster name for the given test. + cluster_name = get_cluster_name(get_test_name_from_spec_file( + spec_test_file), cluster_name_salt) + + organization = cmd.get_one_organization_by_name( + client=ctx.obj.client, + organization_name=org_name) + project = cmd.ensure_project( + client=ctx.obj.client, project_name=project_name, + organization_id=organization.id) + get_logs(admin_client=ctx.obj.admin_client, + project=project, cluster_name=cluster_name) + + @spec_tests.command('delete-cluster') @click.argument("spec_test_file", type=click.Path( exists=True, file_okay=True, dir_okay=False, resolve_path=True)) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index e1722cf8..e737dbf5 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -31,7 +31,7 @@ from astrolabe.utils import ( assert_subset, get_cluster_name, get_test_name_from_spec_file, load_test_data, DriverWorkloadSubprocessRunner, SingleTestXUnitLogger, - Timer) + get_logs, Timer) LOGGER = logging.getLogger(__name__) @@ -267,7 +267,8 @@ def run(self, persist_cluster=False, startup_time=1): LOGGER.info("Workload Statistics: {}".format(stats)) - self.retrieve_logs() + get_logs(admin_client=self.admin_client, + project=self.project, cluster_name=self.cluster_name) # Step 7: download logs asynchronously and delete cluster. # TODO: https://github.com/mongodb-labs/drivers-atlas-testing/issues/4 @@ -285,47 +286,6 @@ def wait_for_idle(self): LOGGER.info("Waiting for cluster maintenance to complete") selector.poll([self], attribute="is_cluster_state", args=("IDLE",), kwargs={}) - - def retrieve_logs(self): - data = self.admin_client.nds.groups[self.project.id].clusters[self.cluster_name].get(api_version='private').data - - if data['clusterType'] == 'SHARDED': - rtype = 'CLUSTER' - rname = data['deploymentItemName'] - else: - rtype = 'REPLICASET' - rname = data['deploymentItemName'] - - params = dict( - resourceName=rname, - resourceType=rtype, - redacted=True, - logTypes=['FTDC','MONGODB'],#,'AUTOMATION_AGENT','MONITORING_AGENT','BACKUP_AGENT'], - sizeRequestedPerFileBytes=100000000, - ) - data = self.admin_client.groups[self.project.id].logCollectionJobs.post(**params).data - job_id = data['id'] - - while True: - LOGGER.debug('Poll job %s' % job_id) - data = self.admin_client.groups[self.project.id].logCollectionJobs[job_id].get().data - if data['status'] == 'IN_PROGRESS': - sleep(1) - elif data['status'] == 'SUCCESS': - break - else: - raise Exception("Unexpected log collection job status %s" % data['status']) - - LOGGER.info('Log download URL: %s' % data['downloadUrl']) - # Assume the URL uses the same host as the other API requests, and - # remove it so that we just have the path. - url = re.sub(r'\w+://[^/]+', '', data['downloadUrl']) - LOGGER.info('Retrieving %s' % url) - resp = self.admin_client.request('GET', url) - if resp.status_code != 200: - raise RuntimeError('Request to %s failed: %s' % url, resp.status_code) - with open('logs.tar.gz', 'wb') as f: - f.write(resp.response.content) class SpecTestRunnerBase: diff --git a/astrolabe/utils.py b/astrolabe/utils.py index 52445cdf..af3ebaae 100644 --- a/astrolabe/utils.py +++ b/astrolabe/utils.py @@ -287,3 +287,45 @@ def terminate(self): stats = self._PLACEHOLDER_EXECUTION_STATISTICS return stats + + +def get_logs(admin_client, project, cluster_name): + data = admin_client.nds.groups[project.id].clusters[cluster_name].get(api_version='private').data + + if data['clusterType'] == 'SHARDED': + rtype = 'CLUSTER' + rname = data['deploymentItemName'] + else: + rtype = 'REPLICASET' + rname = data['deploymentItemName'] + + params = dict( + resourceName=rname, + resourceType=rtype, + redacted=True, + logTypes=['FTDC','MONGODB'],#,'AUTOMATION_AGENT','MONITORING_AGENT','BACKUP_AGENT'], + sizeRequestedPerFileBytes=100000000, + ) + data = admin_client.groups[project.id].logCollectionJobs.post(**params).data + job_id = data['id'] + + while True: + LOGGER.debug('Poll job %s' % job_id) + data = admin_client.groups[project.id].logCollectionJobs[job_id].get().data + if data['status'] == 'IN_PROGRESS': + sleep(1) + elif data['status'] == 'SUCCESS': + break + else: + raise Exception("Unexpected log collection job status %s" % data['status']) + + LOGGER.info('Log download URL: %s' % data['downloadUrl']) + # Assume the URL uses the same host as the other API requests, and + # remove it so that we just have the path. + url = re.sub(r'\w+://[^/]+', '', data['downloadUrl']) + LOGGER.info('Retrieving %s' % url) + resp = admin_client.request('GET', url) + if resp.status_code != 200: + raise RuntimeError('Request to %s failed: %s' % url, resp.status_code) + with open('logs.tar.gz', 'wb') as f: + f.write(resp.response.content) diff --git a/atlasclient/client.py b/atlasclient/client.py index 85279a0e..c9647055 100644 --- a/atlasclient/client.py +++ b/atlasclient/client.py @@ -224,8 +224,12 @@ def construct_resource_url(self, path, api_version=None): url_template = "{base_url}/{version}/{resource_path}" if path and path[0] == '/': url_template = '{base_url}{resource_path}' + base_url = self.config.base_url + # Allow trailing slash like https://cloud-dev.mongodb.com/ in the base URL + while base_url.endswith('/'): + base_url = base_url[:-1] return url_template.format( - base_url=self.config.base_url, + base_url=base_url, version=api_version or self.config.api_version, resource_path=path) diff --git a/atlasclient/configuration.py b/atlasclient/configuration.py index 20de46b3..7b0fb377 100644 --- a/atlasclient/configuration.py +++ b/atlasclient/configuration.py @@ -27,5 +27,5 @@ # Default configuration values. CONFIG_DEFAULTS = JSONObject.from_dict({ "ATLAS_HTTP_TIMEOUT": 10.0, - "ATLAS_API_VERSION": "v1.0", - "ATLAS_API_BASE_URL": "https://cloud.mongodb.com/api/atlas"}) + "ATLAS_API_VERSION": "atlas/v1.0", + "ATLAS_API_BASE_URL": "https://cloud.mongodb.com/api"}) From d9cc79953ccad87039dae0cda2cdef77e2608edc Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 25 Jan 2021 20:00:54 -0500 Subject: [PATCH 106/172] move all uri options into client definitions --- tests/retryReads-move-sharded.yaml | 5 ++--- tests/retryReads-move.yaml | 5 ++--- tests/retryReads-primaryRemoval.yaml | 5 ++--- tests/retryReads-primaryTakeover.yaml | 5 ++--- tests/retryReads-processRestart-sharded.yaml | 5 ++--- tests/retryReads-processRestart.yaml | 5 ++--- tests/retryReads-resizeCluster.yaml | 5 ++--- tests/retryReads-testFailover-sharded.yaml | 5 ++--- tests/retryReads-testFailover.yaml | 5 ++--- tests/retryReads-toggleServerSideJS.yaml | 5 ++--- tests/retryReads-vmRestart-sharded.yaml | 5 ++--- tests/retryReads-vmRestart.yaml | 5 ++--- tests/retryWrites-resizeCluster.yaml | 5 ++--- tests/retryWrites-toggleServerSideJS.yaml | 5 ++--- 14 files changed, 28 insertions(+), 42 deletions(-) diff --git a/tests/retryReads-move-sharded.yaml b/tests/retryReads-move-sharded.yaml index a2b31bbd..ec7e51af 100644 --- a/tests/retryReads-move-sharded.yaml +++ b/tests/retryReads-move-sharded.yaml @@ -17,9 +17,6 @@ operations: instanceSizeName: M20 processArgs: {} -uriOptions: - retryReads: true - driverWorkload: description: "Find" @@ -28,6 +25,8 @@ driverWorkload: createEntities: - client: id: &client0 client0 + uriOptions: + retryReads: true - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-move.yaml b/tests/retryReads-move.yaml index d3f3c217..3e1bc318 100644 --- a/tests/retryReads-move.yaml +++ b/tests/retryReads-move.yaml @@ -17,9 +17,6 @@ operations: instanceSizeName: M10 processArgs: {} -uriOptions: - retryReads: true - driverWorkload: description: "Find" @@ -28,6 +25,8 @@ driverWorkload: createEntities: - client: id: &client0 client0 + uriOptions: + retryReads: true - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-primaryRemoval.yaml b/tests/retryReads-primaryRemoval.yaml index d2f160d8..65cf849d 100644 --- a/tests/retryReads-primaryRemoval.yaml +++ b/tests/retryReads-primaryRemoval.yaml @@ -44,9 +44,6 @@ operations: region: US_WEST_1 -uriOptions: - retryReads: true - driverWorkload: description: "Find" @@ -55,6 +52,8 @@ driverWorkload: createEntities: - client: id: &client0 client0 + uriOptions: + retryReads: true - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-primaryTakeover.yaml b/tests/retryReads-primaryTakeover.yaml index aebe5131..b27a2b75 100644 --- a/tests/retryReads-primaryTakeover.yaml +++ b/tests/retryReads-primaryTakeover.yaml @@ -46,9 +46,6 @@ operations: timeout: 60 -uriOptions: - retryReads: true - driverWorkload: description: "Find" @@ -57,6 +54,8 @@ driverWorkload: createEntities: - client: id: &client0 client0 + uriOptions: + retryReads: true - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-processRestart-sharded.yaml b/tests/retryReads-processRestart-sharded.yaml index 4e64fa24..9c5e793a 100644 --- a/tests/retryReads-processRestart-sharded.yaml +++ b/tests/retryReads-processRestart-sharded.yaml @@ -19,9 +19,6 @@ operations: processArgs: minimumEnabledTlsProtocol: TLS1_2 -uriOptions: - retryReads: true - driverWorkload: description: "Find" @@ -30,6 +27,8 @@ driverWorkload: createEntities: - client: id: &client0 client0 + uriOptions: + retryReads: true - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-processRestart.yaml b/tests/retryReads-processRestart.yaml index 9607e394..b05377bb 100644 --- a/tests/retryReads-processRestart.yaml +++ b/tests/retryReads-processRestart.yaml @@ -19,9 +19,6 @@ operations: processArgs: minimumEnabledTlsProtocol: TLS1_2 -uriOptions: - retryReads: true - driverWorkload: description: "Find" @@ -30,6 +27,8 @@ driverWorkload: createEntities: - client: id: &client0 client0 + uriOptions: + retryReads: true - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-resizeCluster.yaml b/tests/retryReads-resizeCluster.yaml index fb77d56a..48179fbe 100644 --- a/tests/retryReads-resizeCluster.yaml +++ b/tests/retryReads-resizeCluster.yaml @@ -17,9 +17,6 @@ operations: instanceSizeName: M20 processArgs: {} -uriOptions: - retryReads: true - driverWorkload: description: "Find" @@ -28,6 +25,8 @@ driverWorkload: createEntities: - client: id: &client0 client0 + uriOptions: + retryReads: true - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-testFailover-sharded.yaml b/tests/retryReads-testFailover-sharded.yaml index b1e8845e..d82b52cc 100644 --- a/tests/retryReads-testFailover-sharded.yaml +++ b/tests/retryReads-testFailover-sharded.yaml @@ -15,9 +15,6 @@ operations: - waitForIdle: true -uriOptions: - retryReads: true - driverWorkload: description: "Find" @@ -26,6 +23,8 @@ driverWorkload: createEntities: - client: id: &client0 client0 + uriOptions: + retryReads: true - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-testFailover.yaml b/tests/retryReads-testFailover.yaml index f879535e..8eb4f04e 100644 --- a/tests/retryReads-testFailover.yaml +++ b/tests/retryReads-testFailover.yaml @@ -15,9 +15,6 @@ operations: - waitForIdle: true -uriOptions: - retryReads: true - driverWorkload: description: "Find" @@ -26,6 +23,8 @@ driverWorkload: createEntities: - client: id: &client0 client0 + uriOptions: + retryReads: true - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-toggleServerSideJS.yaml b/tests/retryReads-toggleServerSideJS.yaml index 3a9df6c1..006e8191 100644 --- a/tests/retryReads-toggleServerSideJS.yaml +++ b/tests/retryReads-toggleServerSideJS.yaml @@ -14,9 +14,6 @@ operations: processArgs: javascriptEnabled: true -uriOptions: - retryReads: true - driverWorkload: description: "Find" @@ -25,6 +22,8 @@ driverWorkload: createEntities: - client: id: &client0 client0 + uriOptions: + retryReads: true - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-vmRestart-sharded.yaml b/tests/retryReads-vmRestart-sharded.yaml index fa7617a1..95d83fab 100644 --- a/tests/retryReads-vmRestart-sharded.yaml +++ b/tests/retryReads-vmRestart-sharded.yaml @@ -15,9 +15,6 @@ operations: - waitForIdle: true -uriOptions: - retryReads: true - driverWorkload: description: "Find" @@ -26,6 +23,8 @@ driverWorkload: createEntities: - client: id: &client0 client0 + uriOptions: + retryReads: true - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-vmRestart.yaml b/tests/retryReads-vmRestart.yaml index fb95e513..487040f9 100644 --- a/tests/retryReads-vmRestart.yaml +++ b/tests/retryReads-vmRestart.yaml @@ -15,9 +15,6 @@ initialConfiguration: - waitForIdle: true -uriOptions: - retryReads: true - driverWorkload: description: "Find" @@ -26,6 +23,8 @@ driverWorkload: createEntities: - client: id: &client0 client0 + uriOptions: + retryReads: true - database: id: &database0 database0 client: *client0 diff --git a/tests/retryWrites-resizeCluster.yaml b/tests/retryWrites-resizeCluster.yaml index 425b2347..6c971589 100644 --- a/tests/retryWrites-resizeCluster.yaml +++ b/tests/retryWrites-resizeCluster.yaml @@ -17,9 +17,6 @@ operations: instanceSizeName: M20 processArgs: {} -uriOptions: - retryWrites: true - driverWorkload: description: "Insert" @@ -28,6 +25,8 @@ driverWorkload: createEntities: - client: id: &client0 client0 + uriOptions: + retryWrites: true - database: id: &database0 database0 client: *client0 diff --git a/tests/retryWrites-toggleServerSideJS.yaml b/tests/retryWrites-toggleServerSideJS.yaml index b73887bb..abec9ba7 100644 --- a/tests/retryWrites-toggleServerSideJS.yaml +++ b/tests/retryWrites-toggleServerSideJS.yaml @@ -13,9 +13,6 @@ operations: processArgs: javascriptEnabled: true -uriOptions: - retryWrites: true - driverWorkload: description: "Insert" @@ -24,6 +21,8 @@ driverWorkload: createEntities: - client: id: &client0 client0 + uriOptions: + retryWrites: true - database: id: &database0 database0 client: *client0 From c831b78870755f8f62880714554296810a60f044 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 25 Jan 2021 20:08:37 -0500 Subject: [PATCH 107/172] stop manually managing test client uri options --- astrolabe/runner.py | 14 +------------- docs/source/spec-test-format.rst | 8 +++++--- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index e737dbf5..2beefe04 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -71,19 +71,7 @@ def cluster_url(self): def get_connection_string(self): if self.__connection_string is None: cluster = self.cluster_url.get().data - prefix, suffix = cluster.srvAddress.split("//") - uri_options = self.spec.uriOptions.copy() - - # Boolean options must be converted to lowercase strings. - for key, value in uri_options.items(): - if isinstance(value, bool): - uri_options[key] = str(value).lower() - - connection_string = (prefix + "//" + self.config.database_username - + ":" + self.config.database_password + "@" - + suffix + "/?") - connection_string += urlencode(uri_options) - self.__connection_string = connection_string + self.__connection_string = cluster.srvAddress return self.__connection_string def __repr__(self): diff --git a/docs/source/spec-test-format.rst b/docs/source/spec-test-format.rst index 63919cda..be27f46b 100644 --- a/docs/source/spec-test-format.rst +++ b/docs/source/spec-test-format.rst @@ -100,9 +100,6 @@ A Test Scenario File has the following keys: waitForIdle: true -* uriOptions (document): Document containing ``key: value`` pairs of URI options that must be included in the - connection string passed to the workload executor by the *Test Orchestrator*. - * driverWorkload (document): Description of the driver workload to execute The document must be a complete test as defined by the `Unified Test Format specification `_. @@ -111,6 +108,11 @@ A Test Scenario File has the following keys: field of this document is interpreted and executed by ``astrolabe``, while the remaining fields are interpreted and executed by the workload executor. +.. note:: A previous version of this document specified a top-level + ``uriOptions`` for specifying URI options for the MongoClient under test. + In the current version, options can be specified using the ``uriOptions`` + key of the unified test format when creating a client entity. + ------- Changes ------- From 57d33d6f8343c5271060b5251d0c9b3a4ed78644 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 25 Jan 2021 20:14:58 -0500 Subject: [PATCH 108/172] revise init documentation --- docs/source/spec-workload-executor.rst | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/docs/source/spec-workload-executor.rst b/docs/source/spec-workload-executor.rst index c95b1689..39154edd 100644 --- a/docs/source/spec-workload-executor.rst +++ b/docs/source/spec-workload-executor.rst @@ -37,8 +37,9 @@ Behavioral Description After accepting the inputs, the workload executor: -#. MUST use the input connection string to instantiate the ``MongoClient`` of the driver that is to be tested. - Note that the workload executor: +#. MUST use the input connection string to `instantiate the + unified test runner `_ + of the driver being tested. Note that the workload executor: * MUST NOT override any of the URI options specified in the incoming connection string. * MUST NOT augment the incoming connection string with any additional URI options. @@ -47,12 +48,6 @@ After accepting the inputs, the workload executor: the driver's unified test runner to execute the provided workload, with the following deviations from the unified test runner specification: - - Any `MongoClients `_ - that are instantiated by the workload executor MUST use the input - connection string as provided to the workload executor, and MUST - apply URI options specified in the particular test, if any, over the - provided connection string. - - Each MongoClient MUST be set up to publish `command monitoring `_ events. The workload executor MUST record all events published From eed4aaefcb7e2984ed614e9d7635683bab3b9dfa Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 25 Jan 2021 20:20:05 -0500 Subject: [PATCH 109/172] fix log retrieval again --- astrolabe/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/astrolabe/utils.py b/astrolabe/utils.py index af3ebaae..42a30ae5 100644 --- a/astrolabe/utils.py +++ b/astrolabe/utils.py @@ -18,6 +18,7 @@ import signal import subprocess import sys +import re from hashlib import sha256 from contextlib import closing from time import monotonic, sleep @@ -323,6 +324,8 @@ def get_logs(admin_client, project, cluster_name): # Assume the URL uses the same host as the other API requests, and # remove it so that we just have the path. url = re.sub(r'\w+://[^/]+', '', data['downloadUrl']) + if url.startswith('/api'): + url = url[4:] LOGGER.info('Retrieving %s' % url) resp = admin_client.request('GET', url) if resp.status_code != 200: From 0b2a8bf31c650b438b9064e5f4e0c7de933e2170 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 25 Jan 2021 20:23:30 -0500 Subject: [PATCH 110/172] credentials need to be added to the uri --- astrolabe/runner.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 2beefe04..35e204ad 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -13,7 +13,7 @@ # limitations under the License. import logging, datetime, time as _time, gzip -import os, io +import os, io, re from time import sleep from urllib.parse import urlencode @@ -71,7 +71,10 @@ def cluster_url(self): def get_connection_string(self): if self.__connection_string is None: cluster = self.cluster_url.get().data - self.__connection_string = cluster.srvAddress + uri = re.sub(r'://', + '://%s:%s@' % (self.config.database_username, self.config.database_password), + cluster.srvAddress) + self.__connection_string = uri return self.__connection_string def __repr__(self): From b73db85213da5b6f26aa4804b4450022f1fbbe66 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 25 Jan 2021 20:28:05 -0500 Subject: [PATCH 111/172] add todo note --- .evergreen/config.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 6a3eb6b8..49d98fdd 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -431,6 +431,8 @@ buildvariants: display_name: "${driver} ${platform} ${runtime}" tasks: - ".all" +# TODO: re-enable language builds once workload executors have been +# re-implemented to work with the new format #- matrix_name: "tests-python" # matrix_spec: # driver: ["pymongo-master"] From 5424854f757b3e20b1fe4da98db5a35c09289e7d Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 25 Jan 2021 20:28:36 -0500 Subject: [PATCH 112/172] remove debug --- astrolabe/runner.py | 1 - 1 file changed, 1 deletion(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 35e204ad..4531510a 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -208,7 +208,6 @@ def run(self, persist_cluster=False, startup_time=1): if op_name == 'restartVms': rv = self.admin_client.nds.groups[self.project.id].clusters[self.cluster_name].reboot.post(api_version='private') - import pdb;pdb.set_trace() self.wait_for_idle() From d97a23344db23a045a824005ad23066d74941753 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 25 Jan 2021 20:29:45 -0500 Subject: [PATCH 113/172] version limit numpy --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c3e32146..af36d251 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ 'click>=7,<8', 'requests>=2,<3', 'pymongo>=3.10,<4', 'dnspython>=1.16,<2', 'pyyaml>=5,<6', 'tabulate>=0.8,<0.9', - 'numpy', + 'numpy<2', 'junitparser>=1,<2'] if sys.platform == 'win32': install_requires.append('certifi') From c2f70034977f4a41218399c71663f85b6e119a3f Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 26 Jan 2021 08:52:02 -0500 Subject: [PATCH 114/172] looping --- integrations/ruby/executor.rb | 3 +++ tests/retryReads-testFailover.yaml | 26 +++++++++++++++----------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index 00f7c8b8..84ea4ea3 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -96,6 +96,9 @@ def load_data def set_signal_handler Signal.trap('INT') do @stop = true + unified_tests.each do |test| + test.stop! + end end end diff --git a/tests/retryReads-testFailover.yaml b/tests/retryReads-testFailover.yaml index 8eb4f04e..36093c7f 100644 --- a/tests/retryReads-testFailover.yaml +++ b/tests/retryReads-testFailover.yaml @@ -45,15 +45,19 @@ driverWorkload: tests: - description: "Find one" operations: - - name: find - object: *collection0 + - name: loop + object: testRunner arguments: - filter: { _id: { $gt: 1 }} - sort: { _id: 1 } - expectResult: - - - _id: 2 - x: 22 - - - _id: 3 - x: 33 + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 From 49e35a3b1705b907a41ad2c6d1d8ce9613d40403 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 26 Jan 2021 12:02:14 -0500 Subject: [PATCH 115/172] fix vmrestart --- tests/retryReads-vmRestart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/retryReads-vmRestart.yaml b/tests/retryReads-vmRestart.yaml index 487040f9..4c9c0e6f 100644 --- a/tests/retryReads-vmRestart.yaml +++ b/tests/retryReads-vmRestart.yaml @@ -7,7 +7,7 @@ initialConfiguration: instanceSizeName: M10 processArgs: {} - operations: +operations: - restartVms: true - From 9bc8248f5fa4e43a3f58fc14ab3ba34e240b7653 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 26 Jan 2021 21:27:39 -0500 Subject: [PATCH 116/172] store events as entities --- integrations/ruby/executor.rb | 15 ++++++++++----- tests/retryReads-testFailover.yaml | 15 +++++++++++++++ 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index 84ea4ea3..8e2ae307 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -147,12 +147,17 @@ def write_result File.open('results.json', 'w') do |f| f << JSON.dump(result) end + result = { + errors: metrics_collector.errors, + } + unified_tests.map do |test| + test.entities[:event_list].each do |name, events| + result[name] ||= [] + result[name] += events + end + end File.open('events.json', 'w') do |f| - f << JSON.dump( - commands: metrics_collector.command_events, - connections: metrics_collector.connection_events, - errors: metrics_collector.errors, - ) + f << JSON.dump(result) end end end diff --git a/tests/retryReads-testFailover.yaml b/tests/retryReads-testFailover.yaml index 36093c7f..f7f7aabb 100644 --- a/tests/retryReads-testFailover.yaml +++ b/tests/retryReads-testFailover.yaml @@ -25,6 +25,21 @@ driverWorkload: id: &client0 client0 uriOptions: retryReads: true + storeEventsAsEntities: + CommandStartedEvent: command + CommandSucceededEvent: command + CommandFailedEvent: command + PoolCreatedEvent: connection + PoolReadyEvent: connection + PoolClearedEvent: connection + PoolClosedEvent: connection + ConnectionCreatedEvent: connection + ConnectionReadyEvent: connection + ConnectionClosedEvent: connection + ConnectionCheckOutStartedEvent: connection + ConnectionCheckOutFailedEvent: connection + ConnectionCheckedOutEvent: connection + ConnectionCheckedInEvent: connection - database: id: &database0 database0 client: *client0 From 32c4ebf9e274d06009af1a04647624a09a876a54 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 26 Jan 2021 21:40:54 -0500 Subject: [PATCH 117/172] improved integration --- integrations/ruby/executor.rb | 89 +++++------------------------ integrations/ruby/workload-executor | 24 -------- tests/retryReads-testFailover.yaml | 2 + 3 files changed, 17 insertions(+), 98 deletions(-) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index 8e2ae307..7b51767b 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -7,65 +7,10 @@ class UnknownOperation < StandardError; end class UnknownOperationConfiguration < StandardError; end -class MetricsCollector - def initialize - @operations = {} - @command_events = [] - @connection_events = [] - @errors = [] - @failures = [] - end - - attr_reader :command_events, :connection_events, :errors, :failures - - def started(event) - @operations[event.operation_id] = [event, Time.now] - end - - def succeeded(event) - started_event, started_at = @operations.delete(event.operation_id) - raise "Started event for #{event.operation_id} not found" unless started_event - @command_events << { - commandName: started_event.command_name, - duration: event.duration, - startTime: started_at.to_f, - address: started_event.address.seed, - } - end - - def failed(event) - started_event, started_at = @operations.delete(event.operation_id) - raise "Started event for #{event.operation_id} not found" unless started_event - @command_events << { - commandName: started_event.command_name, - duration: event.duration, - failure: event.failure, - startTime: started_at.to_f, - address: started_event.address.seed, - } - end - - def published(event) - @connection_events << { - name: event.class.name.sub(/.*::/, ''), - time: Time.now.to_f, - address: event.address.seed, - }.tap do |entry| - if event.respond_to?(:connection_id) - entry[:connectionId] = event.connection_id - end - if event.respond_to?(:reason) - entry[:reason] = event.reason - end - end - end -end - class Executor def initialize(uri, spec) @uri, @spec = uri, spec @operation_count = @failure_count = @error_count = 0 - @metrics_collector = MetricsCollector.new end attr_reader :uri, :spec @@ -116,20 +61,13 @@ def perform_operations test.run rescue Unified::Error => e STDERR.puts "Failure: #{e.class}: #{e}" - metrics_collector.failures << { - failure: "#{e.class}: #{e}", - time: Time.now.to_f, - } @failure_count += 1 rescue => e STDERR.puts "Error: #{e.class}: #{e}" - metrics_collector.errors << { - error: "#{e.class}: #{e}", - time: Time.now.to_f, - } @error_count += 1 end - @operation_count += 1 + @operation_count += test.entities.get(:iteration_count, 'iterations') + @error_count += test.entities.get(:error_list, 'errors').length end end @@ -147,17 +85,20 @@ def write_result File.open('results.json', 'w') do |f| f << JSON.dump(result) end - result = { - errors: metrics_collector.errors, - } - unified_tests.map do |test| - test.entities[:event_list].each do |name, events| - result[name] ||= [] - result[name] += events + {}.tap do |event_result| + unified_tests.map do |test| + test.entities[:event_list]&.each do |name, events| + event_result[name] ||= [] + event_result[name] += events + end + test.entities[:error_list]&.each do |name, errors| + event_result[name] ||= [] + event_result[name] += errors + end + end + File.open('events.json', 'w') do |f| + f << JSON.dump(event_result) end - end - File.open('events.json', 'w') do |f| - f << JSON.dump(result) end end end diff --git a/integrations/ruby/workload-executor b/integrations/ruby/workload-executor index 2d93380f..6cf9d704 100755 --- a/integrations/ruby/workload-executor +++ b/integrations/ruby/workload-executor @@ -46,30 +46,6 @@ else spec = JSON.load(spec) end -$uri = uri - -class ClientRegistry - def self.instance - new - end - - def global_client(which) - $global_client ||= Mongo::Client.new($uri) - end -end - -module Unified - class Test - alias :create_client_without_events :create_client - def create_client(**opts) - create_client_without_events(**opts).tap do |client| - client.subscribe(Mongo::Monitoring::COMMAND, $metrics_collector) - client.subscribe(Mongo::Monitoring::CONNECTION_POOL, $metrics_collector) - end - end - end -end - executor = Executor.new(uri, spec) if options[:insert] executor.load_data diff --git a/tests/retryReads-testFailover.yaml b/tests/retryReads-testFailover.yaml index f7f7aabb..b5d8799e 100644 --- a/tests/retryReads-testFailover.yaml +++ b/tests/retryReads-testFailover.yaml @@ -63,6 +63,8 @@ driverWorkload: - name: loop object: testRunner arguments: + storeErrorsAsEntity: errors + storeIterationsAsEntity: iterations operations: - name: find object: *collection0 From 446caaf3671bcf083d7374d0d11d7cf22a59a5ad Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 27 Jan 2021 20:50:16 -0500 Subject: [PATCH 118/172] update workload executor spec --- docs/source/spec-workload-executor.rst | 212 ++++++++++++++----------- 1 file changed, 115 insertions(+), 97 deletions(-) diff --git a/docs/source/spec-workload-executor.rst b/docs/source/spec-workload-executor.rst index 39154edd..24b75cec 100644 --- a/docs/source/spec-workload-executor.rst +++ b/docs/source/spec-workload-executor.rst @@ -44,112 +44,130 @@ After accepting the inputs, the workload executor: * MUST NOT override any of the URI options specified in the incoming connection string. * MUST NOT augment the incoming connection string with any additional URI options. -#. MUST parse the incoming the ``driverWorkload`` document and set up - the driver's unified test runner to execute the provided workload, with - the following deviations from the unified test runner specification: +#. MUST parse the incoming ``driverWorkload`` document and set up + the driver's unified test runner to execute the provided workload. + + .. note:: + + The workload SHOULD include a ``loop`` operation, as described in the + unified test format, but the workload executor SHOULD NOT validate that + this is the case. + +#. MUST set a signal handler for handling the termination signal that is + sent by ``astrolabe``. The termination signal is used by ``astrolabe`` + to communicate to the workload executor, and ultimately the unified test + runner, that they should stop running operations. + +#. MUST invoke the unified test runner to execute the workload. + If the workload includes a ``loop`` operation, the workload will run until + terminated by the workload executor; otherwise, the workload will terminate + when the unified test runner finishes executing all of the operations. + The workload executor MUST handle the case of a non-looping workload and + it MUST terminate when the workload terminates. - - Each MongoClient MUST be set up to publish `command monitoring - `_ - events. The workload executor MUST record all events published - in the course of scenario execution. + If the unified test runner raises an error while executing the workload, + the error MUST be reported using the same format as errors handled by the + unified test runner, as described in the unified test runner specification + under the ``loop`` operation. Errors handled by the workload + executor MUST be included in the calculated (and reported) error count. - - Each MongoClient MUST be set up to publish `CMAP - `_ - events. The workload executor MUST record all events published - in the course of scenario execution. + If the unified test runner reports a failure while executing the workload, + the failure MUST be reported using the same format as failures handled by the + unified test runner, as described in the unified test runner specification + under the ``loop`` operation. Failures handled by the workload + executor MUST be included in the calculated (and reported) failure count. + If the driver's unified test runner is intended to handle all failures + internally, failures that propagate out of the unified test runner MAY + be treated as errors by the workload executor. + +#. Upon receipt of the termination signal, MUST instruct the + unified test runner to stop running the ``loop`` operation, if one + is currently running. If the unified test runner is not currently running + any ``loop`` operations, the workload executor MUST instruct the + unified test runner to terminate when the next ``loop`` operation is + encountered. The workload executor MAY attempt to terminate the + unified test runner sooner (such as instructing the unified test runner + to terminate after completing the current operation). + The workload executor SHOULD terminate the unified test runner gracefully, + such that in-progress operations are completed to their natural outcome + (success or failure). + +#. MUST wait for the unified test runner to terminate, either due to the + receipt of the termination signal or due to completely executing all of + the operations if they do not include loops. - - The ``initialData`` array in the scenario MUST be ignored by the - unified test runner (and by the workload executor). - ``astrolabe`` is responsible for initializing the cluster with - this data *before* starting the workload executor. - -#. MUST use the driver's unified test runner to execute the tests in the - scenario, and the operations in each test, sequentially and in the order - in which they appear in the ``tests`` and ``operations`` arrays, - with the following deviations from the unified test runner specification: +#. MUST use the driver's unified test runner to retrieve the following + entities from the entity map, if they are set: - * The workload executor MUST repeat execution of the entire set of - specified tests and operations indefinitely, until the - **termination signal** from ``astrolabe`` is received. + * iteration count: the number of iterations that the workload executor + performed over the looped operations. - * The workload executor MUST keep count of the number of failures - (``numFailures``) that are encountered. A failure is when - the actual return value of an operation or a published event does not match - the respective expected return value or event (as these expectations - are defined in the unified test format). + * error lists: arrays of documents describing the errors that occurred + while the workload executor was executing the operations. Each client + entity may report errors to a separate error list, or the same + error list may be used by multiple client entities. - * The workload executor MUST record all errors encountered while running the scenario. - An operation error is any error that is propagated out of the unified test runner. - Workload executor implementations should try to be as resilient - as possible to these kinds of operation errors. + * failure lists: arrays of documents describing the failures that occurred + while the workload executor was executing the operations. Each client + entity may report errors to a separate failure list, or the same + failure list may be used by multiple client entities. - * The workload executor MUST keep count of the number of operation errors (``numErrors``) that - are encountered while running the scenario. + * event lists: arrays of documents describing the events that occurred + while the workload executor was executing the operations. Each client + entity may report events to a separate event list, or the same + event list may be used by multiple client entities. + +#. MUST calculate the aggregate counts of errors (``numErrors``) and failures + (``numFailures``) from the error and failure lists. + +#. MUST write the collected events, errors and failures into a JSON file named + ``events.json`` in the current directory + (i.e. the directory from where the workload executor is being executed). + The data written MUST be a map with the following fields: - * The workload executor MUST keep count of the number of invocations of the scenario that - did not result in an error (``numSuccesses``). - -#. MUST set a signal handler for handling the termination signal that is sent by ``astrolabe``. The termination signal - is used by ``astrolabe`` to communicate to the workload executor that it should stop running operations. Upon - receiving the termination signal, the workload executor: - - * MUST stop running driver operations and exit soon. - * MUST write the collected events and errors into a JSON file named - ``events.json`` in the current directory - (i.e. the directory from where the workload executor is being executed). - The data written MUST be a map with the following fields: - - * ``commands``: an array of command events published during scenario - execution. Each command event MUST be a map with the following fields: - - * ``commandName``: the name of the command, e.g. ``insert``. - * ``duration``: the time, in (floating-point) seconds, it took for the command to execute. - * ``failure``: if the command succeeded, this field MUST not be set. - If the command failed, this field MUST contain a textual description - of the error encountered while executing the command. - * ``startTime``: the (floating-point) number of seconds since the Unix epoch when the - command began executing. - * ``address``: the address of the server to which the command - was sent, e.g. ``localhost:27017``. - * ``connections``: an array of CMAP events published during scenario - execution. Each event MUST be a map with the following fields: - - * ``name``: the name of the event, e.g. ``PoolCreated``. - * ``time``: the (floating-point) number of seconds since the Unix epoch - when the event was published. - * ``address``: the address of the server that the command was - published for, e.g. ``localhost:27017``. - * ``errors``: an array of errors encountered during scenario execution. - Each error MUST be a map with the following fields: - - * ``error``: textual description of the error. - * ``time``: the (floating-point) number of seconds since the Unix epoch - when the error occurred. - - The number of errors MUST be reported as ``numErrors`` in ``results.json``, - as described below. + - For each event list entity, the name of the entity MUST become a key and the + documents stored in the entity MUST become the respective value. + + - For each error list entity, the name of the entity MUST become a key and the + documents stored in the entity MUST become the respective value. + + - The errors that the workload executor handles MUST be stored using the + ``errors`` key. + + - For each failure list entity, the name of the entity MUST become a key and the + documents stored in the entity MUST become the respective value. + + - The failures that the workload executor handles MUST be stored using the + ``failures`` key. - * MUST write the collected workload statistics into a JSON file named ``results.json`` in the current working directory - (i.e. the directory from where the workload executor is being executed). Workload statistics MUST contain the - following fields (drivers MAY report additional statistics using field names of their choice): - - * ``numErrors``: the number of operation errors that were encountered during the test. - * ``numFailures``: the number of operation failures that were encountered during the test. - * ``numSuccesses``: the number of operations executed successfully during the test. - - .. note:: The values of ``numErrors`` and ``numFailures`` are used by ``astrolabe`` to determine the overall - success or failure of a driver workload execution. A non-zero value for either of these fields is construed - as a sign that something went wrong while executing the workload and the test is marked as a failure. - The workload executor's exit code is **not** used for determining success/failure and is ignored. - - .. note:: If ``astrolabe`` encounters an error in parsing the workload statistics dumped to ``results.json`` - (caused, for example, by malformed JSON), ``numErrors``, ``numFailures``, and ``numSuccesses`` - will be set to ``-1`` and the test run will be assumed to have failed. - - .. note:: The choice of termination signal used by ``astrolabe`` varies by platform. ``SIGINT`` [#f1]_ is used as - the termination signal on Linux and OSX, while ``CTRL_BREAK_EVENT`` [#f2]_ is used on Windows. - - .. note:: On Windows systems, the workload executor is invoked via Cygwin Bash. +#. MUST write the collected workload statistics into a JSON file named + ``results.json`` in the current working directory (i.e. the directory + from where the workload executor is being executed). Workload statistics + MUST contain the following fields (drivers MAY report additional statistics + using field names of their choice): + + * ``numErrors``: the number of operation errors that were encountered + during the test. This includes errors handled by the workload executor + and errors handled by the unified test runner. + * ``numFailures``: the number of operation failures that were encountered + during the test. This includes failures handled by the workload executor + and failures handled by the unified test runner. + * ``numSuccesses``: the number of successful loop iterations executed + during the test. + + .. note:: The values of ``numErrors`` and ``numFailures`` are used by ``astrolabe`` to determine the overall + success or failure of a driver workload execution. A non-zero value for either of these fields is construed + as a sign that something went wrong while executing the workload and the test is marked as a failure. + The workload executor's exit code is **not** used for determining success/failure and is ignored. + +.. note:: If ``astrolabe`` encounters an error in parsing the workload statistics dumped to ``results.json`` + (caused, for example, by malformed JSON), ``numErrors``, ``numFailures``, and ``numSuccesses`` + will be set to ``-1`` and the test run will be assumed to have failed. + +.. note:: The choice of termination signal used by ``astrolabe`` varies by platform. ``SIGINT`` [#f1]_ is used as + the termination signal on Linux and OSX, while ``CTRL_BREAK_EVENT`` [#f2]_ is used on Windows. + +.. note:: On Windows systems, the workload executor is invoked via Cygwin Bash. Pseudocode Implementation From 986f1a3b2047afcbbd74ce5a04677298324b463c Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 28 Jan 2021 08:29:56 -0500 Subject: [PATCH 119/172] remove remaining initial data handling --- astrolabe/runner.py | 14 ++------------ astrolabe/utils.py | 11 ----------- docs/source/spec-test-format.rst | 14 +++++++++++--- 3 files changed, 13 insertions(+), 26 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 4531510a..681ad3f1 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -30,7 +30,7 @@ from astrolabe.poller import BooleanCallablePoller from astrolabe.utils import ( assert_subset, get_cluster_name, get_test_name_from_spec_file, - load_test_data, DriverWorkloadSubprocessRunner, SingleTestXUnitLogger, + DriverWorkloadSubprocessRunner, SingleTestXUnitLogger, get_logs, Timer) @@ -143,23 +143,13 @@ def run(self, persist_cluster=False, startup_time=1): LOGGER.info("Running test {!r} on cluster {!r}".format( self.id, self.cluster_name)) - # Step-0: sanity-check the cluster configuration. + # Step-1: sanity-check the cluster configuration. self.verify_cluster_configuration_matches(self.spec.initialConfiguration) # Start the test timer. timer = Timer() timer.start() - # Step-1: load test data. - test_datas = self.spec.driverWorkload.get('initialData') - if test_datas: - LOGGER.info("Loading test data on cluster {!r}".format( - self.cluster_name)) - connection_string = self.get_connection_string() - load_test_data(connection_string, self.spec.driverWorkload) - LOGGER.info("Successfully loaded test data on cluster {!r}".format( - self.cluster_name)) - # Step-2: run driver workload. self.workload_runner.spawn( workload_executor=self.config.workload_executor, diff --git a/astrolabe/utils.py b/astrolabe/utils.py index 42a30ae5..267c926e 100644 --- a/astrolabe/utils.py +++ b/astrolabe/utils.py @@ -182,17 +182,6 @@ def mongo_client(connection_string): return closing(client) -def load_test_data(connection_string, driver_workload): - """Insert the test data into the cluster.""" - with mongo_client(connection_string) as client: - for spec in driver_workload.initialData: - coll = client.get_database( - spec.databaseName).get_collection( - spec.collectionName) - coll.drop() - coll.insert_many(spec.documents) - - class DriverWorkloadSubprocessRunner: """Convenience wrapper to run a workload executor in a subprocess.""" _PLACEHOLDER_EXECUTION_STATISTICS = { diff --git a/docs/source/spec-test-format.rst b/docs/source/spec-test-format.rst index be27f46b..fa5cef04 100644 --- a/docs/source/spec-test-format.rst +++ b/docs/source/spec-test-format.rst @@ -104,9 +104,17 @@ A Test Scenario File has the following keys: The document must be a complete test as defined by the `Unified Test Format specification `_. - Note that the ``initialData`` (and, by necessity, ``createEntities``) - field of this document is interpreted and executed by ``astrolabe``, while - the remaining fields are interpreted and executed by the workload executor. + The workload SHOULD use the ``loop`` unified test format operation to + define the server operations to execute during maintenance. There SHOULD + be exactly one ``loop`` operation per scenario, and it SHOULD be the last + operation in the scenario. The scenario SHOULD use + ``storeErrorsAsEntity``, ``storeFailuresAsEntity`` and + ``storeIterationsAsEntity`` operation arguments to allow the workload + executor to retrieve errors and failures that occur during these operations. + + The scenario MAY use ``storeEventsAsEntities`` operation argument + when defining MongoClients to record events published during maintenance + and store these events as Evergreen artifacts. .. note:: A previous version of this document specified a top-level ``uriOptions`` for specifying URI options for the MongoClient under test. From da8d62f6cce5b8a07a6a921c1ca992300ba24bee Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 28 Jan 2021 13:00:51 -0500 Subject: [PATCH 120/172] rename test files to .yml --- .../{retryReads-move-sharded.yaml => retryReads-move-sharded.yml} | 0 tests/{retryReads-move.yaml => retryReads-move.yml} | 0 ...tryReads-primaryRemoval.yaml => retryReads-primaryRemoval.yml} | 0 ...yReads-primaryTakeover.yaml => retryReads-primaryTakeover.yml} | 0 ...Restart-sharded.yaml => retryReads-processRestart-sharded.yml} | 0 ...tryReads-processRestart.yaml => retryReads-processRestart.yml} | 0 ...retryReads-resizeCluster.yaml => retryReads-resizeCluster.yml} | 0 ...tFailover-sharded.yaml => retryReads-testFailover-sharded.yml} | 0 .../{retryReads-testFailover.yaml => retryReads-testFailover.yml} | 0 ...-toggleServerSideJS.yaml => retryReads-toggleServerSideJS.yml} | 0 ...ds-vmRestart-sharded.yaml => retryReads-vmRestart-sharded.yml} | 0 tests/{retryReads-vmRestart.yaml => retryReads-vmRestart.yml} | 0 ...tryWrites-resizeCluster.yaml => retryWrites-resizeCluster.yml} | 0 ...toggleServerSideJS.yaml => retryWrites-toggleServerSideJS.yml} | 0 14 files changed, 0 insertions(+), 0 deletions(-) rename tests/{retryReads-move-sharded.yaml => retryReads-move-sharded.yml} (100%) rename tests/{retryReads-move.yaml => retryReads-move.yml} (100%) rename tests/{retryReads-primaryRemoval.yaml => retryReads-primaryRemoval.yml} (100%) rename tests/{retryReads-primaryTakeover.yaml => retryReads-primaryTakeover.yml} (100%) rename tests/{retryReads-processRestart-sharded.yaml => retryReads-processRestart-sharded.yml} (100%) rename tests/{retryReads-processRestart.yaml => retryReads-processRestart.yml} (100%) rename tests/{retryReads-resizeCluster.yaml => retryReads-resizeCluster.yml} (100%) rename tests/{retryReads-testFailover-sharded.yaml => retryReads-testFailover-sharded.yml} (100%) rename tests/{retryReads-testFailover.yaml => retryReads-testFailover.yml} (100%) rename tests/{retryReads-toggleServerSideJS.yaml => retryReads-toggleServerSideJS.yml} (100%) rename tests/{retryReads-vmRestart-sharded.yaml => retryReads-vmRestart-sharded.yml} (100%) rename tests/{retryReads-vmRestart.yaml => retryReads-vmRestart.yml} (100%) rename tests/{retryWrites-resizeCluster.yaml => retryWrites-resizeCluster.yml} (100%) rename tests/{retryWrites-toggleServerSideJS.yaml => retryWrites-toggleServerSideJS.yml} (100%) diff --git a/tests/retryReads-move-sharded.yaml b/tests/retryReads-move-sharded.yml similarity index 100% rename from tests/retryReads-move-sharded.yaml rename to tests/retryReads-move-sharded.yml diff --git a/tests/retryReads-move.yaml b/tests/retryReads-move.yml similarity index 100% rename from tests/retryReads-move.yaml rename to tests/retryReads-move.yml diff --git a/tests/retryReads-primaryRemoval.yaml b/tests/retryReads-primaryRemoval.yml similarity index 100% rename from tests/retryReads-primaryRemoval.yaml rename to tests/retryReads-primaryRemoval.yml diff --git a/tests/retryReads-primaryTakeover.yaml b/tests/retryReads-primaryTakeover.yml similarity index 100% rename from tests/retryReads-primaryTakeover.yaml rename to tests/retryReads-primaryTakeover.yml diff --git a/tests/retryReads-processRestart-sharded.yaml b/tests/retryReads-processRestart-sharded.yml similarity index 100% rename from tests/retryReads-processRestart-sharded.yaml rename to tests/retryReads-processRestart-sharded.yml diff --git a/tests/retryReads-processRestart.yaml b/tests/retryReads-processRestart.yml similarity index 100% rename from tests/retryReads-processRestart.yaml rename to tests/retryReads-processRestart.yml diff --git a/tests/retryReads-resizeCluster.yaml b/tests/retryReads-resizeCluster.yml similarity index 100% rename from tests/retryReads-resizeCluster.yaml rename to tests/retryReads-resizeCluster.yml diff --git a/tests/retryReads-testFailover-sharded.yaml b/tests/retryReads-testFailover-sharded.yml similarity index 100% rename from tests/retryReads-testFailover-sharded.yaml rename to tests/retryReads-testFailover-sharded.yml diff --git a/tests/retryReads-testFailover.yaml b/tests/retryReads-testFailover.yml similarity index 100% rename from tests/retryReads-testFailover.yaml rename to tests/retryReads-testFailover.yml diff --git a/tests/retryReads-toggleServerSideJS.yaml b/tests/retryReads-toggleServerSideJS.yml similarity index 100% rename from tests/retryReads-toggleServerSideJS.yaml rename to tests/retryReads-toggleServerSideJS.yml diff --git a/tests/retryReads-vmRestart-sharded.yaml b/tests/retryReads-vmRestart-sharded.yml similarity index 100% rename from tests/retryReads-vmRestart-sharded.yaml rename to tests/retryReads-vmRestart-sharded.yml diff --git a/tests/retryReads-vmRestart.yaml b/tests/retryReads-vmRestart.yml similarity index 100% rename from tests/retryReads-vmRestart.yaml rename to tests/retryReads-vmRestart.yml diff --git a/tests/retryWrites-resizeCluster.yaml b/tests/retryWrites-resizeCluster.yml similarity index 100% rename from tests/retryWrites-resizeCluster.yaml rename to tests/retryWrites-resizeCluster.yml diff --git a/tests/retryWrites-toggleServerSideJS.yaml b/tests/retryWrites-toggleServerSideJS.yml similarity index 100% rename from tests/retryWrites-toggleServerSideJS.yaml rename to tests/retryWrites-toggleServerSideJS.yml From def442ff0b395067911168f15a0c8b4a2a55d991 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 28 Jan 2021 13:02:05 -0500 Subject: [PATCH 121/172] rename test files to .yml --- .evergreen/config.yml | 4 ++-- .evergreen/generate-tasks.sh | 4 ++-- docs/source/installing-running-locally.rst | 4 ++-- docs/source/technical-design.rst | 4 ++-- tests/README.rst | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 49d98fdd..9e9e8054 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -84,7 +84,7 @@ functions: ATLAS_ADMIN_API_PASSWORD: ${atlas_admin_api_password} add_expansions_to_env: true command: | - astrolabevenv/${PYTHON_BIN_DIR}/astrolabe spec-tests run-one tests/${TEST_NAME}.yaml -e integrations/${DRIVER_DIRNAME}/workload-executor + astrolabevenv/${PYTHON_BIN_DIR}/astrolabe spec-tests run-one tests/${TEST_NAME}.yml -e integrations/${DRIVER_DIRNAME}/workload-executor "validate executor": # Run a MongoDB instance locally. @@ -118,7 +118,7 @@ functions: ATLAS_ADMIN_API_PASSWORD: ${atlas_admin_api_password} add_expansions_to_env: true command: | - astrolabevenv/${PYTHON_BIN_DIR}/astrolabe spec-tests delete-cluster tests/${TEST_NAME}.yaml + astrolabevenv/${PYTHON_BIN_DIR}/astrolabe spec-tests delete-cluster tests/${TEST_NAME}.yml "upload test results": # Upload the xunit-format test results. diff --git a/.evergreen/generate-tasks.sh b/.evergreen/generate-tasks.sh index 22288a17..dd4d379e 100755 --- a/.evergreen/generate-tasks.sh +++ b/.evergreen/generate-tasks.sh @@ -1,7 +1,7 @@ #!/bin/sh -for f in tests/*.yaml; do - task=`basename $f |sed -e s/.yaml//` +for f in tests/*.yml; do + task=`basename $f |sed -e s/.yml//` cat <<-EOT - name: $task diff --git a/docs/source/installing-running-locally.rst b/docs/source/installing-running-locally.rst index 91ec0717..c07147c3 100644 --- a/docs/source/installing-running-locally.rst +++ b/docs/source/installing-running-locally.rst @@ -110,11 +110,11 @@ Running Atlas Planned Maintenance Tests The ``spec-tests`` command-group is used for Atlas Planned Maintenance (APM) tests. To run a single APM test, do:: - $ astrolabe spec-tests run-one -e --project-name --cluster-name-salt + $ astrolabe spec-tests run-one -e --project-name --cluster-name-salt where: -* ```` is the absolute or relative path to a test scenario file in the +* ```` is the absolute or relative path to a test scenario file in the :ref:`test-scenario-format-specification`, * ```` is the absolute or relative path to the workload executor of the driver to be tested, * ```` is the name of the Atlas Project under which the test cluster used for the test will be created, diff --git a/docs/source/technical-design.rst b/docs/source/technical-design.rst index cd226567..bd1dfbca 100644 --- a/docs/source/technical-design.rst +++ b/docs/source/technical-design.rst @@ -164,13 +164,13 @@ User-Facing API The Test Orchestrator MUST be an executable that supports the following invocation pattern:: - ./test-orchestrator spec-tests run-one path/to/workload-spec.yaml -e path/to/workload-executor + ./test-orchestrator spec-tests run-one path/to/workload-spec.yml -e path/to/workload-executor where: * ``test-orchestrator`` is the Test Orchestrator executable, * ``spec-tests run-one`` is the name of the command issued to this executable, -* ``path/to/workload-spec.yaml`` is the path to a test scenario file, +* ``path/to/workload-spec.yml`` is the path to a test scenario file, * ``-e`` is a flag indicating that the following argument is the workload executor binary, and * ``path/to/workload-executor`` is the path to the workload executor binary that is to be used to run the Driver Workload. diff --git a/tests/README.rst b/tests/README.rst index e5be0781..81615aee 100644 --- a/tests/README.rst +++ b/tests/README.rst @@ -12,7 +12,7 @@ Test File Naming Convention The names of test files serve as the names of the tests themselves (as displayed in the Evergreen UI). Consequently, it is recommended that file names observe the following naming convention:: - -.yaml + -.yml Use of ``camelCase`` is recommended for specifying the driver workload and maintenance plan names. Ideally, these names should be descriptive enough to be self-explanatory though this might not be possible for more complex workloads From 006ea2353c1d5c541b215407cc1bbb248466ae9d Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 28 Jan 2021 13:07:03 -0500 Subject: [PATCH 122/172] update all scenarios for new syntax --- tests/retryReads-move-sharded.yml | 43 +++++++++++++++------ tests/retryReads-move.yml | 43 +++++++++++++++------ tests/retryReads-primaryRemoval.yml | 43 +++++++++++++++------ tests/retryReads-primaryTakeover.yml | 43 +++++++++++++++------ tests/retryReads-processRestart-sharded.yml | 43 +++++++++++++++------ tests/retryReads-processRestart.yml | 43 +++++++++++++++------ tests/retryReads-resizeCluster.yml | 43 +++++++++++++++------ tests/retryReads-testFailover-sharded.yml | 43 +++++++++++++++------ tests/retryReads-toggleServerSideJS.yml | 43 +++++++++++++++------ tests/retryReads-vmRestart-sharded.yml | 43 +++++++++++++++------ tests/retryReads-vmRestart.yml | 43 +++++++++++++++------ tests/retryWrites-resizeCluster.yml | 27 +++++++++++-- tests/retryWrites-toggleServerSideJS.yml | 27 +++++++++++-- 13 files changed, 400 insertions(+), 127 deletions(-) diff --git a/tests/retryReads-move-sharded.yml b/tests/retryReads-move-sharded.yml index ec7e51af..8ee979ef 100644 --- a/tests/retryReads-move-sharded.yml +++ b/tests/retryReads-move-sharded.yml @@ -27,6 +27,21 @@ driverWorkload: id: &client0 client0 uriOptions: retryReads: true + storeEventsAsEntities: + CommandStartedEvent: command + CommandSucceededEvent: command + CommandFailedEvent: command + PoolCreatedEvent: connection + PoolReadyEvent: connection + PoolClearedEvent: connection + PoolClosedEvent: connection + ConnectionCreatedEvent: connection + ConnectionReadyEvent: connection + ConnectionClosedEvent: connection + ConnectionCheckOutStartedEvent: connection + ConnectionCheckOutFailedEvent: connection + ConnectionCheckedOutEvent: connection + ConnectionCheckedInEvent: connection - database: id: &database0 database0 client: *client0 @@ -47,15 +62,21 @@ driverWorkload: tests: - description: "Find one" operations: - - name: find - object: *collection0 + - name: loop + object: testRunner arguments: - filter: { _id: { $gt: 1 }} - sort: { _id: 1 } - expectResult: - - - _id: 2 - x: 22 - - - _id: 3 - x: 33 + storeErrorsAsEntity: errors + storeIterationsAsEntity: iterations + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-move.yml b/tests/retryReads-move.yml index 3e1bc318..073e6078 100644 --- a/tests/retryReads-move.yml +++ b/tests/retryReads-move.yml @@ -27,6 +27,21 @@ driverWorkload: id: &client0 client0 uriOptions: retryReads: true + storeEventsAsEntities: + CommandStartedEvent: command + CommandSucceededEvent: command + CommandFailedEvent: command + PoolCreatedEvent: connection + PoolReadyEvent: connection + PoolClearedEvent: connection + PoolClosedEvent: connection + ConnectionCreatedEvent: connection + ConnectionReadyEvent: connection + ConnectionClosedEvent: connection + ConnectionCheckOutStartedEvent: connection + ConnectionCheckOutFailedEvent: connection + ConnectionCheckedOutEvent: connection + ConnectionCheckedInEvent: connection - database: id: &database0 database0 client: *client0 @@ -47,15 +62,21 @@ driverWorkload: tests: - description: "Find one" operations: - - name: find - object: *collection0 + - name: loop + object: testRunner arguments: - filter: { _id: { $gt: 1 }} - sort: { _id: 1 } - expectResult: - - - _id: 2 - x: 22 - - - _id: 3 - x: 33 + storeErrorsAsEntity: errors + storeIterationsAsEntity: iterations + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-primaryRemoval.yml b/tests/retryReads-primaryRemoval.yml index 65cf849d..98673538 100644 --- a/tests/retryReads-primaryRemoval.yml +++ b/tests/retryReads-primaryRemoval.yml @@ -54,6 +54,21 @@ driverWorkload: id: &client0 client0 uriOptions: retryReads: true + storeEventsAsEntities: + CommandStartedEvent: command + CommandSucceededEvent: command + CommandFailedEvent: command + PoolCreatedEvent: connection + PoolReadyEvent: connection + PoolClearedEvent: connection + PoolClosedEvent: connection + ConnectionCreatedEvent: connection + ConnectionReadyEvent: connection + ConnectionClosedEvent: connection + ConnectionCheckOutStartedEvent: connection + ConnectionCheckOutFailedEvent: connection + ConnectionCheckedOutEvent: connection + ConnectionCheckedInEvent: connection - database: id: &database0 database0 client: *client0 @@ -74,15 +89,21 @@ driverWorkload: tests: - description: "Find one" operations: - - name: find - object: *collection0 + - name: loop + object: testRunner arguments: - filter: { _id: { $gt: 1 }} - sort: { _id: 1 } - expectResult: - - - _id: 2 - x: 22 - - - _id: 3 - x: 33 + storeErrorsAsEntity: errors + storeIterationsAsEntity: iterations + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-primaryTakeover.yml b/tests/retryReads-primaryTakeover.yml index b27a2b75..238ceded 100644 --- a/tests/retryReads-primaryTakeover.yml +++ b/tests/retryReads-primaryTakeover.yml @@ -56,6 +56,21 @@ driverWorkload: id: &client0 client0 uriOptions: retryReads: true + storeEventsAsEntities: + CommandStartedEvent: command + CommandSucceededEvent: command + CommandFailedEvent: command + PoolCreatedEvent: connection + PoolReadyEvent: connection + PoolClearedEvent: connection + PoolClosedEvent: connection + ConnectionCreatedEvent: connection + ConnectionReadyEvent: connection + ConnectionClosedEvent: connection + ConnectionCheckOutStartedEvent: connection + ConnectionCheckOutFailedEvent: connection + ConnectionCheckedOutEvent: connection + ConnectionCheckedInEvent: connection - database: id: &database0 database0 client: *client0 @@ -76,15 +91,21 @@ driverWorkload: tests: - description: "Find one" operations: - - name: find - object: *collection0 + - name: loop + object: testRunner arguments: - filter: { _id: { $gt: 1 }} - sort: { _id: 1 } - expectResult: - - - _id: 2 - x: 22 - - - _id: 3 - x: 33 + storeErrorsAsEntity: errors + storeIterationsAsEntity: iterations + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-processRestart-sharded.yml b/tests/retryReads-processRestart-sharded.yml index 9c5e793a..558f58dc 100644 --- a/tests/retryReads-processRestart-sharded.yml +++ b/tests/retryReads-processRestart-sharded.yml @@ -29,6 +29,21 @@ driverWorkload: id: &client0 client0 uriOptions: retryReads: true + storeEventsAsEntities: + CommandStartedEvent: command + CommandSucceededEvent: command + CommandFailedEvent: command + PoolCreatedEvent: connection + PoolReadyEvent: connection + PoolClearedEvent: connection + PoolClosedEvent: connection + ConnectionCreatedEvent: connection + ConnectionReadyEvent: connection + ConnectionClosedEvent: connection + ConnectionCheckOutStartedEvent: connection + ConnectionCheckOutFailedEvent: connection + ConnectionCheckedOutEvent: connection + ConnectionCheckedInEvent: connection - database: id: &database0 database0 client: *client0 @@ -49,15 +64,21 @@ driverWorkload: tests: - description: "Find one" operations: - - name: find - object: *collection0 + - name: loop + object: testRunner arguments: - filter: { _id: { $gt: 1 }} - sort: { _id: 1 } - expectResult: - - - _id: 2 - x: 22 - - - _id: 3 - x: 33 + storeErrorsAsEntity: errors + storeIterationsAsEntity: iterations + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-processRestart.yml b/tests/retryReads-processRestart.yml index b05377bb..12f19861 100644 --- a/tests/retryReads-processRestart.yml +++ b/tests/retryReads-processRestart.yml @@ -29,6 +29,21 @@ driverWorkload: id: &client0 client0 uriOptions: retryReads: true + storeEventsAsEntities: + CommandStartedEvent: command + CommandSucceededEvent: command + CommandFailedEvent: command + PoolCreatedEvent: connection + PoolReadyEvent: connection + PoolClearedEvent: connection + PoolClosedEvent: connection + ConnectionCreatedEvent: connection + ConnectionReadyEvent: connection + ConnectionClosedEvent: connection + ConnectionCheckOutStartedEvent: connection + ConnectionCheckOutFailedEvent: connection + ConnectionCheckedOutEvent: connection + ConnectionCheckedInEvent: connection - database: id: &database0 database0 client: *client0 @@ -49,15 +64,21 @@ driverWorkload: tests: - description: "Find one" operations: - - name: find - object: *collection0 + - name: loop + object: testRunner arguments: - filter: { _id: { $gt: 1 }} - sort: { _id: 1 } - expectResult: - - - _id: 2 - x: 22 - - - _id: 3 - x: 33 + storeErrorsAsEntity: errors + storeIterationsAsEntity: iterations + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-resizeCluster.yml b/tests/retryReads-resizeCluster.yml index 48179fbe..4f964b41 100644 --- a/tests/retryReads-resizeCluster.yml +++ b/tests/retryReads-resizeCluster.yml @@ -27,6 +27,21 @@ driverWorkload: id: &client0 client0 uriOptions: retryReads: true + storeEventsAsEntities: + CommandStartedEvent: command + CommandSucceededEvent: command + CommandFailedEvent: command + PoolCreatedEvent: connection + PoolReadyEvent: connection + PoolClearedEvent: connection + PoolClosedEvent: connection + ConnectionCreatedEvent: connection + ConnectionReadyEvent: connection + ConnectionClosedEvent: connection + ConnectionCheckOutStartedEvent: connection + ConnectionCheckOutFailedEvent: connection + ConnectionCheckedOutEvent: connection + ConnectionCheckedInEvent: connection - database: id: &database0 database0 client: *client0 @@ -47,15 +62,21 @@ driverWorkload: tests: - description: "Find one" operations: - - name: find - object: *collection0 + - name: loop + object: testRunner arguments: - filter: { _id: { $gt: 1 }} - sort: { _id: 1 } - expectResult: - - - _id: 2 - x: 22 - - - _id: 3 - x: 33 + storeErrorsAsEntity: errors + storeIterationsAsEntity: iterations + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-testFailover-sharded.yml b/tests/retryReads-testFailover-sharded.yml index d82b52cc..697dc592 100644 --- a/tests/retryReads-testFailover-sharded.yml +++ b/tests/retryReads-testFailover-sharded.yml @@ -25,6 +25,21 @@ driverWorkload: id: &client0 client0 uriOptions: retryReads: true + storeEventsAsEntities: + CommandStartedEvent: command + CommandSucceededEvent: command + CommandFailedEvent: command + PoolCreatedEvent: connection + PoolReadyEvent: connection + PoolClearedEvent: connection + PoolClosedEvent: connection + ConnectionCreatedEvent: connection + ConnectionReadyEvent: connection + ConnectionClosedEvent: connection + ConnectionCheckOutStartedEvent: connection + ConnectionCheckOutFailedEvent: connection + ConnectionCheckedOutEvent: connection + ConnectionCheckedInEvent: connection - database: id: &database0 database0 client: *client0 @@ -45,15 +60,21 @@ driverWorkload: tests: - description: "Find one" operations: - - name: find - object: *collection0 + - name: loop + object: testRunner arguments: - filter: { _id: { $gt: 1 }} - sort: { _id: 1 } - expectResult: - - - _id: 2 - x: 22 - - - _id: 3 - x: 33 + storeErrorsAsEntity: errors + storeIterationsAsEntity: iterations + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-toggleServerSideJS.yml b/tests/retryReads-toggleServerSideJS.yml index 006e8191..20589e9f 100644 --- a/tests/retryReads-toggleServerSideJS.yml +++ b/tests/retryReads-toggleServerSideJS.yml @@ -24,6 +24,21 @@ driverWorkload: id: &client0 client0 uriOptions: retryReads: true + storeEventsAsEntities: + CommandStartedEvent: command + CommandSucceededEvent: command + CommandFailedEvent: command + PoolCreatedEvent: connection + PoolReadyEvent: connection + PoolClearedEvent: connection + PoolClosedEvent: connection + ConnectionCreatedEvent: connection + ConnectionReadyEvent: connection + ConnectionClosedEvent: connection + ConnectionCheckOutStartedEvent: connection + ConnectionCheckOutFailedEvent: connection + ConnectionCheckedOutEvent: connection + ConnectionCheckedInEvent: connection - database: id: &database0 database0 client: *client0 @@ -44,15 +59,21 @@ driverWorkload: tests: - description: "Find one" operations: - - name: find - object: *collection0 + - name: loop + object: testRunner arguments: - filter: { _id: { $gt: 1 }} - sort: { _id: 1 } - expectResult: - - - _id: 2 - x: 22 - - - _id: 3 - x: 33 + storeErrorsAsEntity: errors + storeIterationsAsEntity: iterations + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-vmRestart-sharded.yml b/tests/retryReads-vmRestart-sharded.yml index 95d83fab..02ad19d6 100644 --- a/tests/retryReads-vmRestart-sharded.yml +++ b/tests/retryReads-vmRestart-sharded.yml @@ -25,6 +25,21 @@ driverWorkload: id: &client0 client0 uriOptions: retryReads: true + storeEventsAsEntities: + CommandStartedEvent: command + CommandSucceededEvent: command + CommandFailedEvent: command + PoolCreatedEvent: connection + PoolReadyEvent: connection + PoolClearedEvent: connection + PoolClosedEvent: connection + ConnectionCreatedEvent: connection + ConnectionReadyEvent: connection + ConnectionClosedEvent: connection + ConnectionCheckOutStartedEvent: connection + ConnectionCheckOutFailedEvent: connection + ConnectionCheckedOutEvent: connection + ConnectionCheckedInEvent: connection - database: id: &database0 database0 client: *client0 @@ -45,15 +60,21 @@ driverWorkload: tests: - description: "Find one" operations: - - name: find - object: *collection0 + - name: loop + object: testRunner arguments: - filter: { _id: { $gt: 1 }} - sort: { _id: 1 } - expectResult: - - - _id: 2 - x: 22 - - - _id: 3 - x: 33 + storeErrorsAsEntity: errors + storeIterationsAsEntity: iterations + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryReads-vmRestart.yml b/tests/retryReads-vmRestart.yml index 4c9c0e6f..8e4cd8c9 100644 --- a/tests/retryReads-vmRestart.yml +++ b/tests/retryReads-vmRestart.yml @@ -25,6 +25,21 @@ driverWorkload: id: &client0 client0 uriOptions: retryReads: true + storeEventsAsEntities: + CommandStartedEvent: command + CommandSucceededEvent: command + CommandFailedEvent: command + PoolCreatedEvent: connection + PoolReadyEvent: connection + PoolClearedEvent: connection + PoolClosedEvent: connection + ConnectionCreatedEvent: connection + ConnectionReadyEvent: connection + ConnectionClosedEvent: connection + ConnectionCheckOutStartedEvent: connection + ConnectionCheckOutFailedEvent: connection + ConnectionCheckedOutEvent: connection + ConnectionCheckedInEvent: connection - database: id: &database0 database0 client: *client0 @@ -45,15 +60,21 @@ driverWorkload: tests: - description: "Find one" operations: - - name: find - object: *collection0 + - name: loop + object: testRunner arguments: - filter: { _id: { $gt: 1 }} - sort: { _id: 1 } - expectResult: - - - _id: 2 - x: 22 - - - _id: 3 - x: 33 + storeErrorsAsEntity: errors + storeIterationsAsEntity: iterations + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 22 + - + _id: 3 + x: 33 diff --git a/tests/retryWrites-resizeCluster.yml b/tests/retryWrites-resizeCluster.yml index 6c971589..6e6e3e12 100644 --- a/tests/retryWrites-resizeCluster.yml +++ b/tests/retryWrites-resizeCluster.yml @@ -27,6 +27,21 @@ driverWorkload: id: &client0 client0 uriOptions: retryWrites: true + storeEventsAsEntities: + CommandStartedEvent: command + CommandSucceededEvent: command + CommandFailedEvent: command + PoolCreatedEvent: connection + PoolReadyEvent: connection + PoolClearedEvent: connection + PoolClosedEvent: connection + ConnectionCreatedEvent: connection + ConnectionReadyEvent: connection + ConnectionClosedEvent: connection + ConnectionCheckOutStartedEvent: connection + ConnectionCheckOutFailedEvent: connection + ConnectionCheckedOutEvent: connection + ConnectionCheckedInEvent: connection - database: id: &database0 database0 client: *client0 @@ -39,6 +54,12 @@ driverWorkload: tests: - description: "Insert one" operations: - - name: insertOne - object: *collection0 - arguments: { data: 100 } + - name: loop + object: testRunner + arguments: + storeErrorsAsEntity: errors + storeIterationsAsEntity: iterations + operations: + - name: insertOne + object: *collection0 + arguments: { data: 100 } diff --git a/tests/retryWrites-toggleServerSideJS.yml b/tests/retryWrites-toggleServerSideJS.yml index abec9ba7..496d22b0 100644 --- a/tests/retryWrites-toggleServerSideJS.yml +++ b/tests/retryWrites-toggleServerSideJS.yml @@ -23,6 +23,21 @@ driverWorkload: id: &client0 client0 uriOptions: retryWrites: true + storeEventsAsEntities: + CommandStartedEvent: command + CommandSucceededEvent: command + CommandFailedEvent: command + PoolCreatedEvent: connection + PoolReadyEvent: connection + PoolClearedEvent: connection + PoolClosedEvent: connection + ConnectionCreatedEvent: connection + ConnectionReadyEvent: connection + ConnectionClosedEvent: connection + ConnectionCheckOutStartedEvent: connection + ConnectionCheckOutFailedEvent: connection + ConnectionCheckedOutEvent: connection + ConnectionCheckedInEvent: connection - database: id: &database0 database0 client: *client0 @@ -35,6 +50,12 @@ driverWorkload: tests: - description: "Insert one" operations: - - name: insertOne - object: *collection0 - arguments: { data: 100 } + - name: loop + object: testRunner + arguments: + storeErrorsAsEntity: errors + storeIterationsAsEntity: iterations + operations: + - name: insertOne + object: *collection0 + arguments: { data: 100 } From b64ed107bc0591309c3323ee0df88c3e18bb26f0 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 29 Jan 2021 08:26:15 -0500 Subject: [PATCH 123/172] fix the validator --- astrolabe/validator.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/astrolabe/validator.py b/astrolabe/validator.py index d573f93d..e9b1c016 100644 --- a/astrolabe/validator.py +++ b/astrolabe/validator.py @@ -22,7 +22,7 @@ from atlasclient import JSONObject from astrolabe.exceptions import WorkloadExecutorError -from astrolabe.utils import DriverWorkloadSubprocessRunner, load_test_data +from astrolabe.utils import DriverWorkloadSubprocessRunner class ValidateWorkloadExecutor(TestCase): @@ -52,8 +52,6 @@ def run_test(self, driver_workload): self.coll = self.client.get_database(dbname).get_collection(collname) - load_test_data(self.CONNECTION_STRING, driver_workload) - subprocess = DriverWorkloadSubprocessRunner() try: subprocess.spawn(workload_executor=self.WORKLOAD_EXECUTOR, From c07e6225ba21cd8e62e0c5eb02a72dde4252bce8 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 29 Jan 2021 08:43:37 -0500 Subject: [PATCH 124/172] remove looping from Ruby runner --- integrations/ruby/executor.rb | 31 +++++-------------------------- 1 file changed, 5 insertions(+), 26 deletions(-) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index 7b51767b..8120c7e7 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -21,21 +21,16 @@ def run set_signal_handler unified_tests.each do |test| test.create_entities - end - while true - break if @stop - perform_operations + test.set_initial_data + test.run + test.assert_outcome + test.assert_events + test.cleanup end puts "Result: #{result.inspect}" write_result end - def load_data - unified_tests.each do |test| - test.set_initial_data - end - end - private def set_signal_handler @@ -55,22 +50,6 @@ def unified_tests @tests ||= unified_group.tests end - def perform_operations - unified_tests.each do |test| - begin - test.run - rescue Unified::Error => e - STDERR.puts "Failure: #{e.class}: #{e}" - @failure_count += 1 - rescue => e - STDERR.puts "Error: #{e.class}: #{e}" - @error_count += 1 - end - @operation_count += test.entities.get(:iteration_count, 'iterations') - @error_count += test.entities.get(:error_list, 'errors').length - end - end - def result { numOperations: @operation_count, From 48da544745ea273ddab5290f7756e223320d376d Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 29 Jan 2021 08:44:33 -0500 Subject: [PATCH 125/172] reinstate client registry --- integrations/ruby/workload-executor | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/integrations/ruby/workload-executor b/integrations/ruby/workload-executor index 6cf9d704..7b37c0c4 100755 --- a/integrations/ruby/workload-executor +++ b/integrations/ruby/workload-executor @@ -46,6 +46,18 @@ else spec = JSON.load(spec) end +$uri = uri + +class ClientRegistry + def self.instance + new + end + + def global_client(which) + $global_client ||= Mongo::Client.new($uri) + end +end + executor = Executor.new(uri, spec) if options[:insert] executor.load_data From 55ed29909aa096360ac4a44ca2735d44dbdde0a6 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 29 Jan 2021 09:09:26 -0500 Subject: [PATCH 126/172] fix base url on deletion side --- .evergreen/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 9e9e8054..04cf56c9 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -112,7 +112,7 @@ functions: CLUSTER_NAME_SALT: ${build_id} ATLAS_API_USERNAME: ${atlas_key} ATLAS_API_PASSWORD: ${atlas_secret} - ATLAS_API_BASE_URL: ${atlas_url} + ATLAS_API_BASE_URL: https://cloud-dev.mongodb.com/api ATLAS_ORGANIZATION_NAME: ${atlas_organization} ATLAS_ADMIN_API_USERNAME: ${atlas_admin_api_username} ATLAS_ADMIN_API_PASSWORD: ${atlas_admin_api_password} From c934f1a4d9cbe0044c65398d4290deb95fb2701e Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 29 Jan 2021 09:29:25 -0500 Subject: [PATCH 127/172] retrieve results from unified runner --- integrations/ruby/executor.rb | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index 8120c7e7..241dde75 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -27,8 +27,8 @@ def run test.assert_events test.cleanup end - puts "Result: #{result.inspect}" write_result + puts "Result: #{result.inspect}" end private @@ -61,23 +61,30 @@ def result end def write_result - File.open('results.json', 'w') do |f| - f << JSON.dump(result) - end {}.tap do |event_result| unified_tests.map do |test| + @operation_count += test.entities.get(:iteration_count, 'iterations') test.entities[:event_list]&.each do |name, events| event_result[name] ||= [] event_result[name] += events end test.entities[:error_list]&.each do |name, errors| + @error_count += errors.length event_result[name] ||= [] event_result[name] += errors end + test.entities[:failure_list]&.each do |name, failures| + @failure_count += failures.length + event_result[name] ||= [] + event_result[name] += failures + end end File.open('events.json', 'w') do |f| f << JSON.dump(event_result) end end + File.open('results.json', 'w') do |f| + f << JSON.dump(result) + end end end From 017257d29a26972c2884f79580419ee5c071ea95 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 1 Feb 2021 01:22:25 -0500 Subject: [PATCH 128/172] update pseudocode --- docs/source/spec-workload-executor.rst | 64 ++++++++++++-------------- 1 file changed, 30 insertions(+), 34 deletions(-) diff --git a/docs/source/spec-workload-executor.rst b/docs/source/spec-workload-executor.rst index 24b75cec..f4e288fc 100644 --- a/docs/source/spec-workload-executor.rst +++ b/docs/source/spec-workload-executor.rst @@ -184,46 +184,42 @@ Pseudocode Implementation # command-line invocation of the workload executor script. function workloadRunner(connectionString: string, driverWorkload: object): void { - # Use the MongoClient of the driver to be tested to connect to the Atlas Cluster. - const client = MongoClient(connectionString); - - # Create objects which will be used to run operations. - const db = client.db(driverWorkload.database); - const collection = db.collection(driverWorkload.collection); - - # Initialize counters. - var num_errors = 0; - var num_failures = 0; - var num_successes = 0; - - # Run the workload - operations are run sequentially, repeatedly - # until the termination signal is received. - # Do not attempt to initialize the cluster with the contents of - # ``testData`` - astrolabe takes care of this. + # Use the driver's unified test runner to run the workload. + const runner = UnifiedTestRunner(connectionString); + try { - while (True) { - for (let operation in workloadSpec.operations) { - try { - # The runOperation method runs operations as per the test format. - # The method return False if the actual return value of the operation does match the expected. - var was_succesful = runOperation(db, collection, operation); - if (was_successful) { - num_successes += 1; - } else { - num_errors += 1; - } - } catch (operationError) { - # We end up here if runOperation raises an unexpected error. - num_failures += 1; - } - } - } + runner.executeScenario(); } catch (terminationSignal) { # The workloadExecutor MUST handle the termination signal gracefully. # The termination signal will be used by astrolabe to terminate drivers operations that otherwise run ad infinitum. # The workload statistics must be written to a file named results.json in the current working directory. - fs.writeFile('results.json', JSON.stringify({‘numErrors’: num_errors, 'numFailures': num_failures, 'numSuccesses': num_successes})); } + + let results = {}; + let numSuccesses = runner.entityMap.get('iterationCount'); + let numErrors = 0; + let numFailures = 0; + for (name, events in runner.entityMap.get('events')) { + results[name] ||= []; + results[name].concat(events); + } + for (name, errors in runner.entityMap.get('errors')) { + results[name] ||= []; + results[name].concat(errors); + numErrors += errors.length; + } + for (name, failures in runner.entityMap.get('failures')) { + results[name] ||= []; + results[name].concat(failures); + numFailures += failures.length; + } + fs.writeFile('events.json', JSON.stringify(results); + + fs.writeFile('results.json', JSON.stringify({ + ‘numErrors’: numErrors, + 'numFailures': numFailures, + 'numSuccesses': numSuccesses, + })); } Reference Implementation From d819387c0eaf9f689b8de501f91c8107f382ffa9 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 1 Feb 2021 01:22:33 -0500 Subject: [PATCH 129/172] note cross-region network permission limit --- docs/source/integration-guide.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/source/integration-guide.rst b/docs/source/integration-guide.rst index a89aa5fe..e647f8c1 100644 --- a/docs/source/integration-guide.rst +++ b/docs/source/integration-guide.rst @@ -282,3 +282,10 @@ not being monitored and solved with a particular SLA. If builds are failing and the failure appears to be caused by Atlas rather than the tests themselves, the driver being tested or ``astrolabe``, inquiring in ``cloud-non-prod-ops`` Slack channel is the next suggested troubleshooting step. + +Atlas has a limit of 40 "cross-region network permissions" by default. +This means a project can have no more than 40 nodes across all of its +clusters if any of its clusters employ multiple regions. The primary +takeover and primary removal tests use multi-region clusters; running +these tests alongside other tests may exceed the 40 node limit. A +request to the Cloud team is required to raise the limit. From 318ece84fd5433264ba3d974cd86fb0d3f4c646a Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 1 Feb 2021 02:19:04 -0500 Subject: [PATCH 130/172] typo fix --- astrolabe/validator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astrolabe/validator.py b/astrolabe/validator.py index e9b1c016..086eb03c 100644 --- a/astrolabe/validator.py +++ b/astrolabe/validator.py @@ -44,7 +44,7 @@ def run_test(self, driver_workload): break if dbname is None and 'database' in e: dbname = e['database']['databaseName'] - elif collanme is None and 'collection' in e: + elif collname is None and 'collection' in e: collname = e['collection']['collectionName'] if not (dbname and collname): From 08d62c9a032a3e86a02664e6db843bd28716d2cc Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 1 Feb 2021 21:10:36 -0500 Subject: [PATCH 131/172] do not kill sessions, this is not allowed --- integrations/ruby/executor.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index 241dde75..df25e1f8 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -43,7 +43,8 @@ def set_signal_handler end def unified_group - @unified_group ||= Unified::TestGroup.new(spec, client_args: uri) + @unified_group ||= Unified::TestGroup.new(spec, + client_args: uri, kill_sessions: false) end def unified_tests From 477f65774a61ccd7c80f18398a2a4df97854964c Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 2 Feb 2021 09:04:46 -0500 Subject: [PATCH 132/172] fix validation --- astrolabe/validator.py | 24 ++++++++++ integrations/ruby/executor.rb | 11 +++-- tests/validator-numErrors.yml | 32 +++++++------ tests/validator-numFailures-as-errors.yml | 58 +++++++++++++++++++++++ tests/validator-numFailures.yml | 46 ++++++++++-------- tests/validator-simple.yml | 18 ++++--- 6 files changed, 145 insertions(+), 44 deletions(-) create mode 100644 tests/validator-numFailures-as-errors.yml diff --git a/astrolabe/validator.py b/astrolabe/validator.py index 086eb03c..792f8f92 100644 --- a/astrolabe/validator.py +++ b/astrolabe/validator.py @@ -148,6 +148,30 @@ def test_num_failures(self): "to be reported, got {} instead.".format( num_reported_finds, num_reported_failures)) + def test_num_failures_as_errors(self): + driver_workload = JSONObject.from_dict( + yaml.load(open('tests/validator-numFailures-as-errors.yml').read(), Loader=yaml.FullLoader)['driverWorkload'] + ) + + stats = self.run_test(driver_workload) + + num_reported_finds = stats['numSuccesses'] + + num_reported_errors = stats['numErrors'] + num_reported_failures = stats['numFailures'] + if abs(num_reported_errors - num_reported_finds) > 1: + self.fail( + "The workload executor reported inconsistent execution " + "statistics. Expected approximately {} errored operations " + "to be reported, got {} instead.".format( + num_reported_finds, num_reported_failures)) + if num_reported_failures > 0: + self.fail( + "The workload executor reported unexpected execution " + "statistics. Expected all failures to be reported as errors, " + "got {} failures instead.".format( + num_reported_failures)) + def validator_factory(workload_executor, connection_string, startup_time): ValidateWorkloadExecutor.WORKLOAD_EXECUTOR = workload_executor diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index df25e1f8..57d28d14 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -10,7 +10,7 @@ class UnknownOperationConfiguration < StandardError; end class Executor def initialize(uri, spec) @uri, @spec = uri, spec - @operation_count = @failure_count = @error_count = 0 + @operation_count = @success_count = @failure_count = @error_count = 0 end attr_reader :uri, :spec @@ -54,8 +54,8 @@ def unified_tests def result { numOperations: @operation_count, - numSuccessfulOperations: @operation_count-@error_count-@failure_count, - numSuccesses: @operation_count-@error_count-@failure_count, + numSuccessfulOperations: @success_count, + numSuccesses: @success_count, numErrors: @error_count, numFailures: @failure_count, } @@ -65,6 +65,11 @@ def write_result {}.tap do |event_result| unified_tests.map do |test| @operation_count += test.entities.get(:iteration_count, 'iterations') + @success_count += test.entities.get(:success_count, 'successes') + test.entities[:event_list]&.each do |name, events| + event_result[name] ||= [] + event_result[name] += events + end test.entities[:event_list]&.each do |name, events| event_result[name] ||= [] event_result[name] += events diff --git a/tests/validator-numErrors.yml b/tests/validator-numErrors.yml index 8d6b93b9..7959ea69 100644 --- a/tests/validator-numErrors.yml +++ b/tests/validator-numErrors.yml @@ -29,19 +29,23 @@ driverWorkload: count: 0 tests: - - description: "updateOne" + - description: "updateOne & error" operations: - - name: updateOne - object: *collection0 + - name: loop + object: testRunner arguments: - filter: { _id: validation_sentinel} - update: - $inc: - count: 1 - - - description: "error" - operations: - - name: doesNotExist - object: *collection0 - arguments: - foo: bar + storeIterationsAsEntity: iterations + storeSuccessesAsEntity: successes + storeErrorsAsEntity: errors + operations: + - name: updateOne + object: *collection0 + arguments: + filter: { _id: validation_sentinel} + update: + $inc: + count: 1 + - name: doesNotExist + object: *collection0 + arguments: + foo: bar diff --git a/tests/validator-numFailures-as-errors.yml b/tests/validator-numFailures-as-errors.yml new file mode 100644 index 00000000..f5c96ced --- /dev/null +++ b/tests/validator-numFailures-as-errors.yml @@ -0,0 +1,58 @@ +# This file intentionally causes the workload executor to produce a failure +# on each execution. + +operations: [] + +driverWorkload: + description: "Validator - num failures" + + schemaVersion: "1.0" + + createEntities: + - client: + id: &client0 client0 + - database: + id: &database0 database0 + client: *client0 + databaseName: &database0Name dat + - collection: + id: &collection0 collection0 + database: *database0 + collectionName: &collection0Name dat + + initialData: + - collectionName: *collection0Name + databaseName: *database0Name + documents: + - + _id: 2 + x: 2 + + tests: + - description: "Find one & failure" + operations: + - name: loop + object: testRunner + arguments: + storeIterationsAsEntity: iterations + storeSuccessesAsEntity: successes + storeErrorsAsEntity: errors + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 2 + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 42 diff --git a/tests/validator-numFailures.yml b/tests/validator-numFailures.yml index 2fe85b39..76da3575 100644 --- a/tests/validator-numFailures.yml +++ b/tests/validator-numFailures.yml @@ -29,26 +29,30 @@ driverWorkload: x: 2 tests: - - description: "Find one" + - description: "Find one & failure" operations: - - name: find - object: *collection0 + - name: loop + object: testRunner arguments: - filter: { _id: { $gt: 1 }} - sort: { _id: 1 } - expectResult: - - - _id: 2 - x: 2 - - - description: "Find one - failure" - operations: - - name: find - object: *collection0 - arguments: - filter: { _id: { $gt: 1 }} - sort: { _id: 1 } - expectResult: - - - _id: 2 - x: 42 + storeIterationsAsEntity: iterations + storeSuccessesAsEntity: successes + storeFailuresAsEntity: errors + operations: + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 2 + - name: find + object: *collection0 + arguments: + filter: { _id: { $gt: 1 }} + sort: { _id: 1 } + expectResult: + - + _id: 2 + x: 42 diff --git a/tests/validator-simple.yml b/tests/validator-simple.yml index 514758b5..ae25991f 100644 --- a/tests/validator-simple.yml +++ b/tests/validator-simple.yml @@ -28,10 +28,16 @@ driverWorkload: tests: - description: "updateOne" operations: - - name: updateOne - object: *collection0 + - name: loop + object: testRunner arguments: - filter: { _id: validation_sentinel} - update: - $inc: - count: 1 + storeIterationsAsEntity: iterations + storeSuccessesAsEntity: successes + operations: + - name: updateOne + object: *collection0 + arguments: + filter: { _id: validation_sentinel} + update: + $inc: + count: 1 From be8dbbfbb987f0abde4b2b0df7b61a1a64805435 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 2 Feb 2021 09:11:50 -0500 Subject: [PATCH 133/172] report number of successful operations separately from number of iterations done --- astrolabe/validator.py | 24 ++++++++++++++++++++++++ docs/source/spec-test-format.rst | 4 ++-- docs/source/spec-workload-executor.rst | 6 +++++- integrations/ruby/executor.rb | 8 ++++---- 4 files changed, 35 insertions(+), 7 deletions(-) diff --git a/astrolabe/validator.py b/astrolabe/validator.py index 792f8f92..62677a2b 100644 --- a/astrolabe/validator.py +++ b/astrolabe/validator.py @@ -101,6 +101,12 @@ def test_simple(self): "statistics. Expected {} successful " "updates to be reported, got {} instead.".format( update_count, num_reported_updates)) + if abs(stats['numIterations'] - update_count) > 1: + self.fail( + "The workload executor reported inconsistent execution " + "statistics. Expected {} iterations " + "to be reported, got {} instead.".format( + update_count, stats['numIterations'])) if update_count == 0: self.fail( "The workload executor didn't execute any operations " @@ -130,6 +136,12 @@ def test_num_errors(self): "statistics. Expected approximately {} errored operations " "to be reported, got {} instead.".format( num_reported_updates, num_reported_errors)) + if abs(stats['numIterations'] - update_count) > 1: + self.fail( + "The workload executor reported inconsistent execution " + "statistics. Expected {} iterations " + "to be reported, got {} instead.".format( + update_count, stats['numIterations'])) def test_num_failures(self): driver_workload = JSONObject.from_dict( @@ -147,6 +159,12 @@ def test_num_failures(self): "statistics. Expected approximately {} errored operations " "to be reported, got {} instead.".format( num_reported_finds, num_reported_failures)) + if abs(stats['numIterations'] - num_reported_finds) > 1: + self.fail( + "The workload executor reported inconsistent execution " + "statistics. Expected {} iterations " + "to be reported, got {} instead.".format( + num_reported_finds, stats['numIterations'])) def test_num_failures_as_errors(self): driver_workload = JSONObject.from_dict( @@ -171,6 +189,12 @@ def test_num_failures_as_errors(self): "statistics. Expected all failures to be reported as errors, " "got {} failures instead.".format( num_reported_failures)) + if abs(stats['numIterations'] - num_reported_finds) > 1: + self.fail( + "The workload executor reported inconsistent execution " + "statistics. Expected {} iterations " + "to be reported, got {} instead.".format( + num_reported_finds, stats['numIterations'])) def validator_factory(workload_executor, connection_string, startup_time): diff --git a/docs/source/spec-test-format.rst b/docs/source/spec-test-format.rst index fa5cef04..e78c15f3 100644 --- a/docs/source/spec-test-format.rst +++ b/docs/source/spec-test-format.rst @@ -108,8 +108,8 @@ A Test Scenario File has the following keys: define the server operations to execute during maintenance. There SHOULD be exactly one ``loop`` operation per scenario, and it SHOULD be the last operation in the scenario. The scenario SHOULD use - ``storeErrorsAsEntity``, ``storeFailuresAsEntity`` and - ``storeIterationsAsEntity`` operation arguments to allow the workload + ``storeErrorsAsEntity``, ``storeFailuresAsEntity``, ``storeSuccesesAsEntity`` + and ``storeIterationsAsEntity`` operation arguments to allow the workload executor to retrieve errors and failures that occur during these operations. The scenario MAY use ``storeEventsAsEntities`` operation argument diff --git a/docs/source/spec-workload-executor.rst b/docs/source/spec-workload-executor.rst index f4e288fc..547eba05 100644 --- a/docs/source/spec-workload-executor.rst +++ b/docs/source/spec-workload-executor.rst @@ -102,6 +102,9 @@ After accepting the inputs, the workload executor: * iteration count: the number of iterations that the workload executor performed over the looped operations. + * success count: the number of successful operations that the workload + executor performed over the looped operations. + * error lists: arrays of documents describing the errors that occurred while the workload executor was executing the operations. Each client entity may report errors to a separate error list, or the same @@ -152,8 +155,9 @@ After accepting the inputs, the workload executor: * ``numFailures``: the number of operation failures that were encountered during the test. This includes failures handled by the workload executor and failures handled by the unified test runner. - * ``numSuccesses``: the number of successful loop iterations executed + * ``numSuccesses``: the number of successful operations executed during the test. + * ``numIterations``: the number of loop iterations executed during the test. .. note:: The values of ``numErrors`` and ``numFailures`` are used by ``astrolabe`` to determine the overall success or failure of a driver workload execution. A non-zero value for either of these fields is construed diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index 57d28d14..93ed731e 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -10,11 +10,11 @@ class UnknownOperationConfiguration < StandardError; end class Executor def initialize(uri, spec) @uri, @spec = uri, spec - @operation_count = @success_count = @failure_count = @error_count = 0 + @iteration_count = @success_count = @failure_count = @error_count = 0 end attr_reader :uri, :spec - attr_reader :operation_count, :failure_count, :error_count + attr_reader :iteration_count, :failure_count, :error_count attr_reader :metrics_collector def run @@ -53,7 +53,7 @@ def unified_tests def result { - numOperations: @operation_count, + numIterations: @iteration_count, numSuccessfulOperations: @success_count, numSuccesses: @success_count, numErrors: @error_count, @@ -64,7 +64,7 @@ def result def write_result {}.tap do |event_result| unified_tests.map do |test| - @operation_count += test.entities.get(:iteration_count, 'iterations') + @iteration_count += test.entities.get(:iteration_count, 'iterations') @success_count += test.entities.get(:success_count, 'successes') test.entities[:event_list]&.each do |name, events| event_result[name] ||= [] From a3e3c9b37f95bab592d3de9f06a08d9f90fb01fe Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 2 Feb 2021 18:49:35 -0500 Subject: [PATCH 134/172] instantiate the test runner before setting up signal handler --- integrations/ruby/executor.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index 93ed731e..d9b0b7dd 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -18,6 +18,8 @@ def initialize(uri, spec) attr_reader :metrics_collector def run + unified_tests + set_signal_handler unified_tests.each do |test| test.create_entities From 3a18de3d36f1bbc2ce83798aab44e1a550e039b1 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 3 Feb 2021 01:13:24 -0500 Subject: [PATCH 135/172] allow 60 seconds for workload executor to terminate --- astrolabe/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/astrolabe/utils.py b/astrolabe/utils.py index 267c926e..f1cc7f3c 100644 --- a/astrolabe/utils.py +++ b/astrolabe/utils.py @@ -251,7 +251,9 @@ def terminate(self): else: os.kill(self.workload_subprocess.pid, signal.CTRL_BREAK_EVENT) - t_wait = 10 + # Since the default server selection timeout is 30 seconds, + # allow up to 60 seconds for the workload executor to terminate. + t_wait = 60 try: self.workload_subprocess.wait(timeout=t_wait) LOGGER.info("Stopped workload executor [PID: {}]".format(self.pid)) From 0250677d9025b4f9dde1c0b13625998489c58678 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 3 Feb 2021 01:15:44 -0500 Subject: [PATCH 136/172] fix assertPrimaryRegion syntax --- tests/retryReads-primaryRemoval.yml | 3 ++- tests/retryReads-primaryTakeover.yml | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/retryReads-primaryRemoval.yml b/tests/retryReads-primaryRemoval.yml index 98673538..a26af78d 100644 --- a/tests/retryReads-primaryRemoval.yml +++ b/tests/retryReads-primaryRemoval.yml @@ -20,7 +20,8 @@ initialConfiguration: processArgs: {} operations: - - assertPrimaryRegion: US_EAST_1 + - assertPrimaryRegion: + region: US_EAST_1 - setClusterConfiguration: diff --git a/tests/retryReads-primaryTakeover.yml b/tests/retryReads-primaryTakeover.yml index 238ceded..180596cd 100644 --- a/tests/retryReads-primaryTakeover.yml +++ b/tests/retryReads-primaryTakeover.yml @@ -16,8 +16,8 @@ initialConfiguration: processArgs: {} operations: - - - assertPrimaryRegion: US_WEST_1 + - assertPrimaryRegion: + region: US_WEST_1 - setClusterConfiguration: From c5c13236cad8f969f13543dee86f2c5a24509671 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 3 Feb 2021 01:20:01 -0500 Subject: [PATCH 137/172] mandate ignoring values for boolean-valued operations --- docs/source/spec-test-format.rst | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/source/spec-test-format.rst b/docs/source/spec-test-format.rst index e78c15f3..4ce875f5 100644 --- a/docs/source/spec-test-format.rst +++ b/docs/source/spec-test-format.rst @@ -46,9 +46,13 @@ A Test Scenario File has the following keys: instanceSizeName: M10 processArgs: {} - * testFailover: trigger an election in the cluste rusing the "test failover" + * testFailover: trigger an election in the cluster using the "test failover" API endpoint. The value MUST be ``true``. + The workload executor MUST ignore the value of this key, so that + the value can be changed to a hash in the future to provide options + to the operation. + testFailover SHOULD be followed by sleep and waitForIdle operations because it does not update maintenance state synchronously (see `PRODTRUAGE-1232 `_). @@ -60,6 +64,10 @@ A Test Scenario File has the following keys: * restartVms: perform a rolling restart of all nodes in the cluster. This operation requires Atlas Global Operator API key to be set when invoking ``astrolabe``. The value MUST be ``true``. + + The workload executor MUST ignore the value of this key, so that + the value can be changed to a hash in the future to provide options + to the operation. testFailover SHOULD be followed by sleep and waitForIdle operations because it does not update maintenance state synchronously. @@ -95,6 +103,10 @@ A Test Scenario File has the following keys: * waitForIdle: wait for cluster maintenance state to become "idle". The value MUST be ``true``. + + The workload executor MUST ignore the value of this key, so that + the value can be changed to a hash in the future to provide options + to the operation. Example:: From cc48c295192c5b402004ce061171d167f2b194ed Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 3 Feb 2021 01:25:06 -0500 Subject: [PATCH 138/172] update test files for changed event syntax --- tests/retryReads-move-sharded.yml | 26 ++++++++++----------- tests/retryReads-move.yml | 26 ++++++++++----------- tests/retryReads-primaryRemoval.yml | 26 ++++++++++----------- tests/retryReads-primaryTakeover.yml | 26 ++++++++++----------- tests/retryReads-processRestart-sharded.yml | 26 ++++++++++----------- tests/retryReads-processRestart.yml | 26 ++++++++++----------- tests/retryReads-resizeCluster.yml | 26 ++++++++++----------- tests/retryReads-testFailover-sharded.yml | 26 ++++++++++----------- tests/retryReads-testFailover.yml | 26 ++++++++++----------- tests/retryReads-toggleServerSideJS.yml | 26 ++++++++++----------- tests/retryReads-vmRestart-sharded.yml | 26 ++++++++++----------- tests/retryReads-vmRestart.yml | 26 ++++++++++----------- tests/retryWrites-resizeCluster.yml | 26 ++++++++++----------- tests/retryWrites-toggleServerSideJS.yml | 26 ++++++++++----------- 14 files changed, 168 insertions(+), 196 deletions(-) diff --git a/tests/retryReads-move-sharded.yml b/tests/retryReads-move-sharded.yml index 8ee979ef..fff86e42 100644 --- a/tests/retryReads-move-sharded.yml +++ b/tests/retryReads-move-sharded.yml @@ -28,20 +28,18 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - CommandStartedEvent: command - CommandSucceededEvent: command - CommandFailedEvent: command - PoolCreatedEvent: connection - PoolReadyEvent: connection - PoolClearedEvent: connection - PoolClosedEvent: connection - ConnectionCreatedEvent: connection - ConnectionReadyEvent: connection - ConnectionClosedEvent: connection - ConnectionCheckOutStartedEvent: connection - ConnectionCheckOutFailedEvent: connection - ConnectionCheckedOutEvent: connection - ConnectionCheckedInEvent: connection + connection: + - PoolCreatedEvent + - PoolReadyEvent + - PoolClearedEvent + - PoolClosedEvent + - ConnectionCreatedEvent + - ConnectionReadyEvent + - ConnectionClosedEvent + - ConnectionCheckOutStartedEvent + - ConnectionCheckOutFailedEvent + - ConnectionCheckedOutEvent + - ConnectionCheckedInEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-move.yml b/tests/retryReads-move.yml index 073e6078..37108563 100644 --- a/tests/retryReads-move.yml +++ b/tests/retryReads-move.yml @@ -28,20 +28,18 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - CommandStartedEvent: command - CommandSucceededEvent: command - CommandFailedEvent: command - PoolCreatedEvent: connection - PoolReadyEvent: connection - PoolClearedEvent: connection - PoolClosedEvent: connection - ConnectionCreatedEvent: connection - ConnectionReadyEvent: connection - ConnectionClosedEvent: connection - ConnectionCheckOutStartedEvent: connection - ConnectionCheckOutFailedEvent: connection - ConnectionCheckedOutEvent: connection - ConnectionCheckedInEvent: connection + connection: + - PoolCreatedEvent + - PoolReadyEvent + - PoolClearedEvent + - PoolClosedEvent + - ConnectionCreatedEvent + - ConnectionReadyEvent + - ConnectionClosedEvent + - ConnectionCheckOutStartedEvent + - ConnectionCheckOutFailedEvent + - ConnectionCheckedOutEvent + - ConnectionCheckedInEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-primaryRemoval.yml b/tests/retryReads-primaryRemoval.yml index a26af78d..587e9044 100644 --- a/tests/retryReads-primaryRemoval.yml +++ b/tests/retryReads-primaryRemoval.yml @@ -56,20 +56,18 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - CommandStartedEvent: command - CommandSucceededEvent: command - CommandFailedEvent: command - PoolCreatedEvent: connection - PoolReadyEvent: connection - PoolClearedEvent: connection - PoolClosedEvent: connection - ConnectionCreatedEvent: connection - ConnectionReadyEvent: connection - ConnectionClosedEvent: connection - ConnectionCheckOutStartedEvent: connection - ConnectionCheckOutFailedEvent: connection - ConnectionCheckedOutEvent: connection - ConnectionCheckedInEvent: connection + connection: + - PoolCreatedEvent + - PoolReadyEvent + - PoolClearedEvent + - PoolClosedEvent + - ConnectionCreatedEvent + - ConnectionReadyEvent + - ConnectionClosedEvent + - ConnectionCheckOutStartedEvent + - ConnectionCheckOutFailedEvent + - ConnectionCheckedOutEvent + - ConnectionCheckedInEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-primaryTakeover.yml b/tests/retryReads-primaryTakeover.yml index 180596cd..064495f9 100644 --- a/tests/retryReads-primaryTakeover.yml +++ b/tests/retryReads-primaryTakeover.yml @@ -57,20 +57,18 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - CommandStartedEvent: command - CommandSucceededEvent: command - CommandFailedEvent: command - PoolCreatedEvent: connection - PoolReadyEvent: connection - PoolClearedEvent: connection - PoolClosedEvent: connection - ConnectionCreatedEvent: connection - ConnectionReadyEvent: connection - ConnectionClosedEvent: connection - ConnectionCheckOutStartedEvent: connection - ConnectionCheckOutFailedEvent: connection - ConnectionCheckedOutEvent: connection - ConnectionCheckedInEvent: connection + connection: + - PoolCreatedEvent + - PoolReadyEvent + - PoolClearedEvent + - PoolClosedEvent + - ConnectionCreatedEvent + - ConnectionReadyEvent + - ConnectionClosedEvent + - ConnectionCheckOutStartedEvent + - ConnectionCheckOutFailedEvent + - ConnectionCheckedOutEvent + - ConnectionCheckedInEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-processRestart-sharded.yml b/tests/retryReads-processRestart-sharded.yml index 558f58dc..3b97090c 100644 --- a/tests/retryReads-processRestart-sharded.yml +++ b/tests/retryReads-processRestart-sharded.yml @@ -30,20 +30,18 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - CommandStartedEvent: command - CommandSucceededEvent: command - CommandFailedEvent: command - PoolCreatedEvent: connection - PoolReadyEvent: connection - PoolClearedEvent: connection - PoolClosedEvent: connection - ConnectionCreatedEvent: connection - ConnectionReadyEvent: connection - ConnectionClosedEvent: connection - ConnectionCheckOutStartedEvent: connection - ConnectionCheckOutFailedEvent: connection - ConnectionCheckedOutEvent: connection - ConnectionCheckedInEvent: connection + connection: + - PoolCreatedEvent + - PoolReadyEvent + - PoolClearedEvent + - PoolClosedEvent + - ConnectionCreatedEvent + - ConnectionReadyEvent + - ConnectionClosedEvent + - ConnectionCheckOutStartedEvent + - ConnectionCheckOutFailedEvent + - ConnectionCheckedOutEvent + - ConnectionCheckedInEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-processRestart.yml b/tests/retryReads-processRestart.yml index 12f19861..9b9c429a 100644 --- a/tests/retryReads-processRestart.yml +++ b/tests/retryReads-processRestart.yml @@ -30,20 +30,18 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - CommandStartedEvent: command - CommandSucceededEvent: command - CommandFailedEvent: command - PoolCreatedEvent: connection - PoolReadyEvent: connection - PoolClearedEvent: connection - PoolClosedEvent: connection - ConnectionCreatedEvent: connection - ConnectionReadyEvent: connection - ConnectionClosedEvent: connection - ConnectionCheckOutStartedEvent: connection - ConnectionCheckOutFailedEvent: connection - ConnectionCheckedOutEvent: connection - ConnectionCheckedInEvent: connection + connection: + - PoolCreatedEvent + - PoolReadyEvent + - PoolClearedEvent + - PoolClosedEvent + - ConnectionCreatedEvent + - ConnectionReadyEvent + - ConnectionClosedEvent + - ConnectionCheckOutStartedEvent + - ConnectionCheckOutFailedEvent + - ConnectionCheckedOutEvent + - ConnectionCheckedInEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-resizeCluster.yml b/tests/retryReads-resizeCluster.yml index 4f964b41..b02789fb 100644 --- a/tests/retryReads-resizeCluster.yml +++ b/tests/retryReads-resizeCluster.yml @@ -28,20 +28,18 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - CommandStartedEvent: command - CommandSucceededEvent: command - CommandFailedEvent: command - PoolCreatedEvent: connection - PoolReadyEvent: connection - PoolClearedEvent: connection - PoolClosedEvent: connection - ConnectionCreatedEvent: connection - ConnectionReadyEvent: connection - ConnectionClosedEvent: connection - ConnectionCheckOutStartedEvent: connection - ConnectionCheckOutFailedEvent: connection - ConnectionCheckedOutEvent: connection - ConnectionCheckedInEvent: connection + connection: + - PoolCreatedEvent + - PoolReadyEvent + - PoolClearedEvent + - PoolClosedEvent + - ConnectionCreatedEvent + - ConnectionReadyEvent + - ConnectionClosedEvent + - ConnectionCheckOutStartedEvent + - ConnectionCheckOutFailedEvent + - ConnectionCheckedOutEvent + - ConnectionCheckedInEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-testFailover-sharded.yml b/tests/retryReads-testFailover-sharded.yml index 697dc592..aff98095 100644 --- a/tests/retryReads-testFailover-sharded.yml +++ b/tests/retryReads-testFailover-sharded.yml @@ -26,20 +26,18 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - CommandStartedEvent: command - CommandSucceededEvent: command - CommandFailedEvent: command - PoolCreatedEvent: connection - PoolReadyEvent: connection - PoolClearedEvent: connection - PoolClosedEvent: connection - ConnectionCreatedEvent: connection - ConnectionReadyEvent: connection - ConnectionClosedEvent: connection - ConnectionCheckOutStartedEvent: connection - ConnectionCheckOutFailedEvent: connection - ConnectionCheckedOutEvent: connection - ConnectionCheckedInEvent: connection + connection: + - PoolCreatedEvent + - PoolReadyEvent + - PoolClearedEvent + - PoolClosedEvent + - ConnectionCreatedEvent + - ConnectionReadyEvent + - ConnectionClosedEvent + - ConnectionCheckOutStartedEvent + - ConnectionCheckOutFailedEvent + - ConnectionCheckedOutEvent + - ConnectionCheckedInEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-testFailover.yml b/tests/retryReads-testFailover.yml index b5d8799e..95c6b341 100644 --- a/tests/retryReads-testFailover.yml +++ b/tests/retryReads-testFailover.yml @@ -26,20 +26,18 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - CommandStartedEvent: command - CommandSucceededEvent: command - CommandFailedEvent: command - PoolCreatedEvent: connection - PoolReadyEvent: connection - PoolClearedEvent: connection - PoolClosedEvent: connection - ConnectionCreatedEvent: connection - ConnectionReadyEvent: connection - ConnectionClosedEvent: connection - ConnectionCheckOutStartedEvent: connection - ConnectionCheckOutFailedEvent: connection - ConnectionCheckedOutEvent: connection - ConnectionCheckedInEvent: connection + connection: + - PoolCreatedEvent + - PoolReadyEvent + - PoolClearedEvent + - PoolClosedEvent + - ConnectionCreatedEvent + - ConnectionReadyEvent + - ConnectionClosedEvent + - ConnectionCheckOutStartedEvent + - ConnectionCheckOutFailedEvent + - ConnectionCheckedOutEvent + - ConnectionCheckedInEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-toggleServerSideJS.yml b/tests/retryReads-toggleServerSideJS.yml index 20589e9f..5dec61f7 100644 --- a/tests/retryReads-toggleServerSideJS.yml +++ b/tests/retryReads-toggleServerSideJS.yml @@ -25,20 +25,18 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - CommandStartedEvent: command - CommandSucceededEvent: command - CommandFailedEvent: command - PoolCreatedEvent: connection - PoolReadyEvent: connection - PoolClearedEvent: connection - PoolClosedEvent: connection - ConnectionCreatedEvent: connection - ConnectionReadyEvent: connection - ConnectionClosedEvent: connection - ConnectionCheckOutStartedEvent: connection - ConnectionCheckOutFailedEvent: connection - ConnectionCheckedOutEvent: connection - ConnectionCheckedInEvent: connection + connection: + - PoolCreatedEvent + - PoolReadyEvent + - PoolClearedEvent + - PoolClosedEvent + - ConnectionCreatedEvent + - ConnectionReadyEvent + - ConnectionClosedEvent + - ConnectionCheckOutStartedEvent + - ConnectionCheckOutFailedEvent + - ConnectionCheckedOutEvent + - ConnectionCheckedInEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-vmRestart-sharded.yml b/tests/retryReads-vmRestart-sharded.yml index 02ad19d6..0dbd0b57 100644 --- a/tests/retryReads-vmRestart-sharded.yml +++ b/tests/retryReads-vmRestart-sharded.yml @@ -26,20 +26,18 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - CommandStartedEvent: command - CommandSucceededEvent: command - CommandFailedEvent: command - PoolCreatedEvent: connection - PoolReadyEvent: connection - PoolClearedEvent: connection - PoolClosedEvent: connection - ConnectionCreatedEvent: connection - ConnectionReadyEvent: connection - ConnectionClosedEvent: connection - ConnectionCheckOutStartedEvent: connection - ConnectionCheckOutFailedEvent: connection - ConnectionCheckedOutEvent: connection - ConnectionCheckedInEvent: connection + connection: + - PoolCreatedEvent + - PoolReadyEvent + - PoolClearedEvent + - PoolClosedEvent + - ConnectionCreatedEvent + - ConnectionReadyEvent + - ConnectionClosedEvent + - ConnectionCheckOutStartedEvent + - ConnectionCheckOutFailedEvent + - ConnectionCheckedOutEvent + - ConnectionCheckedInEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-vmRestart.yml b/tests/retryReads-vmRestart.yml index 8e4cd8c9..fea98662 100644 --- a/tests/retryReads-vmRestart.yml +++ b/tests/retryReads-vmRestart.yml @@ -26,20 +26,18 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - CommandStartedEvent: command - CommandSucceededEvent: command - CommandFailedEvent: command - PoolCreatedEvent: connection - PoolReadyEvent: connection - PoolClearedEvent: connection - PoolClosedEvent: connection - ConnectionCreatedEvent: connection - ConnectionReadyEvent: connection - ConnectionClosedEvent: connection - ConnectionCheckOutStartedEvent: connection - ConnectionCheckOutFailedEvent: connection - ConnectionCheckedOutEvent: connection - ConnectionCheckedInEvent: connection + connection: + - PoolCreatedEvent + - PoolReadyEvent + - PoolClearedEvent + - PoolClosedEvent + - ConnectionCreatedEvent + - ConnectionReadyEvent + - ConnectionClosedEvent + - ConnectionCheckOutStartedEvent + - ConnectionCheckOutFailedEvent + - ConnectionCheckedOutEvent + - ConnectionCheckedInEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryWrites-resizeCluster.yml b/tests/retryWrites-resizeCluster.yml index 6e6e3e12..c397f577 100644 --- a/tests/retryWrites-resizeCluster.yml +++ b/tests/retryWrites-resizeCluster.yml @@ -28,20 +28,18 @@ driverWorkload: uriOptions: retryWrites: true storeEventsAsEntities: - CommandStartedEvent: command - CommandSucceededEvent: command - CommandFailedEvent: command - PoolCreatedEvent: connection - PoolReadyEvent: connection - PoolClearedEvent: connection - PoolClosedEvent: connection - ConnectionCreatedEvent: connection - ConnectionReadyEvent: connection - ConnectionClosedEvent: connection - ConnectionCheckOutStartedEvent: connection - ConnectionCheckOutFailedEvent: connection - ConnectionCheckedOutEvent: connection - ConnectionCheckedInEvent: connection + connection: + - PoolCreatedEvent + - PoolReadyEvent + - PoolClearedEvent + - PoolClosedEvent + - ConnectionCreatedEvent + - ConnectionReadyEvent + - ConnectionClosedEvent + - ConnectionCheckOutStartedEvent + - ConnectionCheckOutFailedEvent + - ConnectionCheckedOutEvent + - ConnectionCheckedInEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryWrites-toggleServerSideJS.yml b/tests/retryWrites-toggleServerSideJS.yml index 496d22b0..516d1ebe 100644 --- a/tests/retryWrites-toggleServerSideJS.yml +++ b/tests/retryWrites-toggleServerSideJS.yml @@ -24,20 +24,18 @@ driverWorkload: uriOptions: retryWrites: true storeEventsAsEntities: - CommandStartedEvent: command - CommandSucceededEvent: command - CommandFailedEvent: command - PoolCreatedEvent: connection - PoolReadyEvent: connection - PoolClearedEvent: connection - PoolClosedEvent: connection - ConnectionCreatedEvent: connection - ConnectionReadyEvent: connection - ConnectionClosedEvent: connection - ConnectionCheckOutStartedEvent: connection - ConnectionCheckOutFailedEvent: connection - ConnectionCheckedOutEvent: connection - ConnectionCheckedInEvent: connection + connection: + - PoolCreatedEvent + - PoolReadyEvent + - PoolClearedEvent + - PoolClosedEvent + - ConnectionCreatedEvent + - ConnectionReadyEvent + - ConnectionClosedEvent + - ConnectionCheckOutStartedEvent + - ConnectionCheckOutFailedEvent + - ConnectionCheckedOutEvent + - ConnectionCheckedInEvent - database: id: &database0 database0 client: *client0 From aca6b5c0053e300ecda1f115a20e9cd78f47fbf7 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 3 Feb 2021 01:44:00 -0500 Subject: [PATCH 139/172] validate events --- astrolabe/validator.py | 19 +++++++++++++++++++ tests/validator-simple.yml | 13 +++++++++++++ 2 files changed, 32 insertions(+) diff --git a/astrolabe/validator.py b/astrolabe/validator.py index 62677a2b..40de844b 100644 --- a/astrolabe/validator.py +++ b/astrolabe/validator.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os, os.path from copy import deepcopy from subprocess import TimeoutExpired from time import sleep @@ -89,6 +90,9 @@ def test_simple(self): driver_workload = JSONObject.from_dict( yaml.load(open('tests/validator-simple.yml').read(), Loader=yaml.FullLoader)['driverWorkload'] ) + + if os.path.exists('events.json'): + os.unlink('events.json') stats = self.run_test(driver_workload) @@ -111,6 +115,21 @@ def test_simple(self): self.fail( "The workload executor didn't execute any operations " "or didn't execute them appropriately.") + + events = yaml.load(open('events.json').read()) + if 'connection' not in events: + self.fail( + "The workload executor didn't record connection events as expected.") + for event in events['connection']: + if 'name' not in event: + self.fail( + "The workload executor didn't record event name as expected.") + if not event['name'].endswith('Event'): + self.fail( + "The workload executor didn't record event name as expected.") + if 'observedAt' not in event: + self.fail( + "The workload executor didn't record observation time as expected.") def test_num_errors(self): driver_workload = JSONObject.from_dict( diff --git a/tests/validator-simple.yml b/tests/validator-simple.yml index ae25991f..49f5dd6b 100644 --- a/tests/validator-simple.yml +++ b/tests/validator-simple.yml @@ -8,6 +8,19 @@ driverWorkload: createEntities: - client: id: &client0 client0 + storeEventsAsEntities: + connection: + - PoolCreatedEvent + - PoolReadyEvent + - PoolClearedEvent + - PoolClosedEvent + - ConnectionCreatedEvent + - ConnectionReadyEvent + - ConnectionClosedEvent + - ConnectionCheckOutStartedEvent + - ConnectionCheckOutFailedEvent + - ConnectionCheckedOutEvent + - ConnectionCheckedInEvent - database: id: &database0 database0 client: *client0 From b7c7a74ffa4f17cfd29bd909d8b3678b69883064 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 8 Feb 2021 08:16:42 -0500 Subject: [PATCH 140/172] get rid of metrics collector --- integrations/ruby/executor.rb | 1 - integrations/ruby/workload-executor | 1 - 2 files changed, 2 deletions(-) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index d9b0b7dd..2d1cbb9f 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -15,7 +15,6 @@ def initialize(uri, spec) attr_reader :uri, :spec attr_reader :iteration_count, :failure_count, :error_count - attr_reader :metrics_collector def run unified_tests diff --git a/integrations/ruby/workload-executor b/integrations/ruby/workload-executor index 7b37c0c4..d4956b95 100755 --- a/integrations/ruby/workload-executor +++ b/integrations/ruby/workload-executor @@ -62,6 +62,5 @@ executor = Executor.new(uri, spec) if options[:insert] executor.load_data else - $metrics_collector = executor.metrics_collector executor.run end From 58ce52db2a7e9af34b0b155c2541f0c8c8c54069 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 8 Feb 2021 08:18:59 -0500 Subject: [PATCH 141/172] spelling fix --- docs/source/spec-test-format.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/spec-test-format.rst b/docs/source/spec-test-format.rst index 4ce875f5..87202dd5 100644 --- a/docs/source/spec-test-format.rst +++ b/docs/source/spec-test-format.rst @@ -55,7 +55,7 @@ A Test Scenario File has the following keys: testFailover SHOULD be followed by sleep and waitForIdle operations because it does not update maintenance state synchronously (see - `PRODTRUAGE-1232 `_). + `PRODTRIAGE-1232 `_). Example:: From 7317796be12869f75b062052f1bb1bf178913476 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 8 Feb 2021 08:22:06 -0500 Subject: [PATCH 142/172] update test format spec for unified spec changes --- docs/source/spec-test-format.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/source/spec-test-format.rst b/docs/source/spec-test-format.rst index 87202dd5..2a721be5 100644 --- a/docs/source/spec-test-format.rst +++ b/docs/source/spec-test-format.rst @@ -117,7 +117,7 @@ A Test Scenario File has the following keys: `Unified Test Format specification `_. The workload SHOULD use the ``loop`` unified test format operation to - define the server operations to execute during maintenance. There SHOULD + define the MongoDB operations to execute during maintenance. There MUST be exactly one ``loop`` operation per scenario, and it SHOULD be the last operation in the scenario. The scenario SHOULD use ``storeErrorsAsEntity``, ``storeFailuresAsEntity``, ``storeSuccesesAsEntity`` @@ -125,8 +125,10 @@ A Test Scenario File has the following keys: executor to retrieve errors and failures that occur during these operations. The scenario MAY use ``storeEventsAsEntities`` operation argument - when defining MongoClients to record events published during maintenance - and store these events as Evergreen artifacts. + when defining MongoClients to record CMAP events published during maintenance. + The entity name for ``storeEventsAsEntities`` argument MUST be ``events``. + If this option is used, ``astrolabe`` will retrieve the collected events + and store them as an Evergreen build artifact. .. note:: A previous version of this document specified a top-level ``uriOptions`` for specifying URI options for the MongoClient under test. From 8d8113870bd4c85b5ead9d5123c5d6cbe30a4fc2 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 8 Feb 2021 08:35:16 -0500 Subject: [PATCH 143/172] update workload executor spec --- docs/source/spec-test-format.rst | 16 ++++-- docs/source/spec-workload-executor.rst | 80 +++++++++++--------------- 2 files changed, 46 insertions(+), 50 deletions(-) diff --git a/docs/source/spec-test-format.rst b/docs/source/spec-test-format.rst index 2a721be5..cd3af4da 100644 --- a/docs/source/spec-test-format.rst +++ b/docs/source/spec-test-format.rst @@ -119,10 +119,18 @@ A Test Scenario File has the following keys: The workload SHOULD use the ``loop`` unified test format operation to define the MongoDB operations to execute during maintenance. There MUST be exactly one ``loop`` operation per scenario, and it SHOULD be the last - operation in the scenario. The scenario SHOULD use - ``storeErrorsAsEntity``, ``storeFailuresAsEntity``, ``storeSuccesesAsEntity`` - and ``storeIterationsAsEntity`` operation arguments to allow the workload - executor to retrieve errors and failures that occur during these operations. + operation in the scenario. + + The scenario SHOULD use ``storeErrorsAsEntity``, ``storeFailuresAsEntity``, + ``storeSuccesesAsEntity`` and ``storeIterationsAsEntity`` operation arguments + to allow the workload executor to retrieve errors, failures and operation + counts for the executed workload. The entity names for these options MUST + be as follows: + + - ``storeErrorsAsEntity``: ``errors`` + - ``storeFailuresAsEntity``: ``failures`` + - ``storeSuccessesAsEntity``: ``successes`` + - ``storeIterationsAsEntity``: ``iterations`` The scenario MAY use ``storeEventsAsEntities`` operation argument when defining MongoClients to record CMAP events published during maintenance. diff --git a/docs/source/spec-workload-executor.rst b/docs/source/spec-workload-executor.rst index 547eba05..8568ef94 100644 --- a/docs/source/spec-workload-executor.rst +++ b/docs/source/spec-workload-executor.rst @@ -63,7 +63,8 @@ After accepting the inputs, the workload executor: terminated by the workload executor; otherwise, the workload will terminate when the unified test runner finishes executing all of the operations. The workload executor MUST handle the case of a non-looping workload and - it MUST terminate when the workload terminates. + it MUST terminate if the unified test runner completely executes the + specified workload. If the unified test runner raises an error while executing the workload, the error MUST be reported using the same format as errors handled by the @@ -81,68 +82,55 @@ After accepting the inputs, the workload executor: be treated as errors by the workload executor. #. Upon receipt of the termination signal, MUST instruct the - unified test runner to stop running the ``loop`` operation, if one - is currently running. If the unified test runner is not currently running - any ``loop`` operations, the workload executor MUST instruct the - unified test runner to terminate when the next ``loop`` operation is - encountered. The workload executor MAY attempt to terminate the - unified test runner sooner (such as instructing the unified test runner - to terminate after completing the current operation). - The workload executor SHOULD terminate the unified test runner gracefully, - such that in-progress operations are completed to their natural outcome - (success or failure). - -#. MUST wait for the unified test runner to terminate, either due to the - receipt of the termination signal or due to completely executing all of - the operations if they do not include loops. + unified test runner to stop looping, as defined in the unified test format. + +#. MUST wait for the unified test runner to finish executing. -#. MUST use the driver's unified test runner to retrieve the following - entities from the entity map, if they are set: +#. MUST use the unified test runner to retrieve the following + entities by name from the entity map, if they are set: - * iteration count: the number of iterations that the workload executor - performed over the looped operations. + * ``iterations``: the number of iterations that the workload executor + performed over the looped operations. If the iteration count was not + reported by the test runner, such as because the respective option was + not specified in the test scenario, the workload executor MUST use + ``-1`` as the number of iterations. - * success count: the number of successful operations that the workload - executor performed over the looped operations. + * ``successes``: the number of successful operations that the workload + executor performed over the looped operations. If the iteration count + was not reported by the test runner, such as because the respective + option was not specified in the test scenario, the workload executor + MUST use ``-1`` as the number of successes. - * error lists: arrays of documents describing the errors that occurred - while the workload executor was executing the operations. Each client - entity may report errors to a separate error list, or the same - error list may be used by multiple client entities. + * ``errors``: array of documents describing the errors that occurred + while the workload executor was executing the operations. - * failure lists: arrays of documents describing the failures that occurred - while the workload executor was executing the operations. Each client - entity may report errors to a separate failure list, or the same - failure list may be used by multiple client entities. + * ``failures``: array of documents describing the failures that occurred + while the workload executor was executing the operations. - * event lists: arrays of documents describing the events that occurred - while the workload executor was executing the operations. Each client - entity may report events to a separate event list, or the same - event list may be used by multiple client entities. + * ``events``: array of documents describing the CMAP events that occurred + while the workload executor was executing the operations. #. MUST calculate the aggregate counts of errors (``numErrors``) and failures - (``numFailures``) from the error and failure lists. + (``numFailures``) from the error and failure lists. If the errors or + failures were not reported by the test runner, such as because the + respective options were not specified in the test scenario, the workload + executor MUST use ``-1`` as the value for the respective counts. #. MUST write the collected events, errors and failures into a JSON file named ``events.json`` in the current directory (i.e. the directory from where the workload executor is being executed). The data written MUST be a map with the following fields: - - For each event list entity, the name of the entity MUST become a key and the - documents stored in the entity MUST become the respective value. - - - For each error list entity, the name of the entity MUST become a key and the - documents stored in the entity MUST become the respective value. + - ``events``: the collected CMAP events. - - The errors that the workload executor handles MUST be stored using the - ``errors`` key. + - ``errors``: the reported errors. - - For each failure list entity, the name of the entity MUST become a key and the - documents stored in the entity MUST become the respective value. + - ``failures``: the reported errors. - - The failures that the workload executor handles MUST be stored using the - ``failures`` key. - + If events, errors or failures were not reported by the unified test runner, + such as because the scenario did not specify the corresponding options, + the workload executor MUST write empty arrays into ``events.json``. + #. MUST write the collected workload statistics into a JSON file named ``results.json`` in the current working directory (i.e. the directory from where the workload executor is being executed). Workload statistics From 566caa581f8d876a85e1d833d320acec79e62f5c Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 8 Feb 2021 08:43:13 -0500 Subject: [PATCH 144/172] update entity retrieval --- integrations/ruby/executor.rb | 24 ++++++--------------- tests/retryReads-move-sharded.yml | 3 ++- tests/retryReads-move.yml | 3 ++- tests/retryReads-primaryRemoval.yml | 3 ++- tests/retryReads-primaryTakeover.yml | 3 ++- tests/retryReads-processRestart-sharded.yml | 3 ++- tests/retryReads-processRestart.yml | 3 ++- tests/retryReads-resizeCluster.yml | 3 ++- tests/retryReads-testFailover-sharded.yml | 3 ++- tests/retryReads-testFailover.yml | 3 ++- tests/retryReads-toggleServerSideJS.yml | 3 ++- tests/retryReads-vmRestart-sharded.yml | 3 ++- tests/retryReads-vmRestart.yml | 3 ++- tests/retryWrites-resizeCluster.yml | 3 ++- tests/retryWrites-toggleServerSideJS.yml | 3 ++- tests/validator-simple.yml | 2 +- 16 files changed, 35 insertions(+), 33 deletions(-) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index 2d1cbb9f..5820a021 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -64,27 +64,15 @@ def result def write_result {}.tap do |event_result| + @events = [] + @errors = [] + @failures = [] unified_tests.map do |test| @iteration_count += test.entities.get(:iteration_count, 'iterations') @success_count += test.entities.get(:success_count, 'successes') - test.entities[:event_list]&.each do |name, events| - event_result[name] ||= [] - event_result[name] += events - end - test.entities[:event_list]&.each do |name, events| - event_result[name] ||= [] - event_result[name] += events - end - test.entities[:error_list]&.each do |name, errors| - @error_count += errors.length - event_result[name] ||= [] - event_result[name] += errors - end - test.entities[:failure_list]&.each do |name, failures| - @failure_count += failures.length - event_result[name] ||= [] - event_result[name] += failures - end + @events += test.entities.get(:event_list, 'events') + @errors += test.entities.get(:error_list, 'errors') + @failures += test.entities.get(:failure_list, 'failures') end File.open('events.json', 'w') do |f| f << JSON.dump(event_result) diff --git a/tests/retryReads-move-sharded.yml b/tests/retryReads-move-sharded.yml index fff86e42..0abe6b72 100644 --- a/tests/retryReads-move-sharded.yml +++ b/tests/retryReads-move-sharded.yml @@ -28,7 +28,7 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - connection: + events: - PoolCreatedEvent - PoolReadyEvent - PoolClearedEvent @@ -65,6 +65,7 @@ driverWorkload: arguments: storeErrorsAsEntity: errors storeIterationsAsEntity: iterations + storeSuccessesAsEntity: successes operations: - name: find object: *collection0 diff --git a/tests/retryReads-move.yml b/tests/retryReads-move.yml index 37108563..7b32af62 100644 --- a/tests/retryReads-move.yml +++ b/tests/retryReads-move.yml @@ -28,7 +28,7 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - connection: + events: - PoolCreatedEvent - PoolReadyEvent - PoolClearedEvent @@ -65,6 +65,7 @@ driverWorkload: arguments: storeErrorsAsEntity: errors storeIterationsAsEntity: iterations + storeSuccessesAsEntity: successes operations: - name: find object: *collection0 diff --git a/tests/retryReads-primaryRemoval.yml b/tests/retryReads-primaryRemoval.yml index 587e9044..d2db39a3 100644 --- a/tests/retryReads-primaryRemoval.yml +++ b/tests/retryReads-primaryRemoval.yml @@ -56,7 +56,7 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - connection: + events: - PoolCreatedEvent - PoolReadyEvent - PoolClearedEvent @@ -93,6 +93,7 @@ driverWorkload: arguments: storeErrorsAsEntity: errors storeIterationsAsEntity: iterations + storeSuccessesAsEntity: successes operations: - name: find object: *collection0 diff --git a/tests/retryReads-primaryTakeover.yml b/tests/retryReads-primaryTakeover.yml index 064495f9..0a168370 100644 --- a/tests/retryReads-primaryTakeover.yml +++ b/tests/retryReads-primaryTakeover.yml @@ -57,7 +57,7 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - connection: + events: - PoolCreatedEvent - PoolReadyEvent - PoolClearedEvent @@ -94,6 +94,7 @@ driverWorkload: arguments: storeErrorsAsEntity: errors storeIterationsAsEntity: iterations + storeSuccessesAsEntity: successes operations: - name: find object: *collection0 diff --git a/tests/retryReads-processRestart-sharded.yml b/tests/retryReads-processRestart-sharded.yml index 3b97090c..234bb2dd 100644 --- a/tests/retryReads-processRestart-sharded.yml +++ b/tests/retryReads-processRestart-sharded.yml @@ -30,7 +30,7 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - connection: + events: - PoolCreatedEvent - PoolReadyEvent - PoolClearedEvent @@ -67,6 +67,7 @@ driverWorkload: arguments: storeErrorsAsEntity: errors storeIterationsAsEntity: iterations + storeSuccessesAsEntity: successes operations: - name: find object: *collection0 diff --git a/tests/retryReads-processRestart.yml b/tests/retryReads-processRestart.yml index 9b9c429a..f4b5cf1e 100644 --- a/tests/retryReads-processRestart.yml +++ b/tests/retryReads-processRestart.yml @@ -30,7 +30,7 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - connection: + events: - PoolCreatedEvent - PoolReadyEvent - PoolClearedEvent @@ -67,6 +67,7 @@ driverWorkload: arguments: storeErrorsAsEntity: errors storeIterationsAsEntity: iterations + storeSuccessesAsEntity: successes operations: - name: find object: *collection0 diff --git a/tests/retryReads-resizeCluster.yml b/tests/retryReads-resizeCluster.yml index b02789fb..34caca22 100644 --- a/tests/retryReads-resizeCluster.yml +++ b/tests/retryReads-resizeCluster.yml @@ -28,7 +28,7 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - connection: + events: - PoolCreatedEvent - PoolReadyEvent - PoolClearedEvent @@ -65,6 +65,7 @@ driverWorkload: arguments: storeErrorsAsEntity: errors storeIterationsAsEntity: iterations + storeSuccessesAsEntity: successes operations: - name: find object: *collection0 diff --git a/tests/retryReads-testFailover-sharded.yml b/tests/retryReads-testFailover-sharded.yml index aff98095..86424a25 100644 --- a/tests/retryReads-testFailover-sharded.yml +++ b/tests/retryReads-testFailover-sharded.yml @@ -26,7 +26,7 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - connection: + events: - PoolCreatedEvent - PoolReadyEvent - PoolClearedEvent @@ -63,6 +63,7 @@ driverWorkload: arguments: storeErrorsAsEntity: errors storeIterationsAsEntity: iterations + storeSuccessesAsEntity: successes operations: - name: find object: *collection0 diff --git a/tests/retryReads-testFailover.yml b/tests/retryReads-testFailover.yml index 95c6b341..1c1c271b 100644 --- a/tests/retryReads-testFailover.yml +++ b/tests/retryReads-testFailover.yml @@ -26,7 +26,7 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - connection: + events: - PoolCreatedEvent - PoolReadyEvent - PoolClearedEvent @@ -63,6 +63,7 @@ driverWorkload: arguments: storeErrorsAsEntity: errors storeIterationsAsEntity: iterations + storeSuccessesAsEntity: successes operations: - name: find object: *collection0 diff --git a/tests/retryReads-toggleServerSideJS.yml b/tests/retryReads-toggleServerSideJS.yml index 5dec61f7..2653f109 100644 --- a/tests/retryReads-toggleServerSideJS.yml +++ b/tests/retryReads-toggleServerSideJS.yml @@ -25,7 +25,7 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - connection: + events: - PoolCreatedEvent - PoolReadyEvent - PoolClearedEvent @@ -62,6 +62,7 @@ driverWorkload: arguments: storeErrorsAsEntity: errors storeIterationsAsEntity: iterations + storeSuccessesAsEntity: successes operations: - name: find object: *collection0 diff --git a/tests/retryReads-vmRestart-sharded.yml b/tests/retryReads-vmRestart-sharded.yml index 0dbd0b57..6a2b9ddc 100644 --- a/tests/retryReads-vmRestart-sharded.yml +++ b/tests/retryReads-vmRestart-sharded.yml @@ -26,7 +26,7 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - connection: + events: - PoolCreatedEvent - PoolReadyEvent - PoolClearedEvent @@ -63,6 +63,7 @@ driverWorkload: arguments: storeErrorsAsEntity: errors storeIterationsAsEntity: iterations + storeSuccessesAsEntity: successes operations: - name: find object: *collection0 diff --git a/tests/retryReads-vmRestart.yml b/tests/retryReads-vmRestart.yml index fea98662..e18e1a16 100644 --- a/tests/retryReads-vmRestart.yml +++ b/tests/retryReads-vmRestart.yml @@ -26,7 +26,7 @@ driverWorkload: uriOptions: retryReads: true storeEventsAsEntities: - connection: + events: - PoolCreatedEvent - PoolReadyEvent - PoolClearedEvent @@ -63,6 +63,7 @@ driverWorkload: arguments: storeErrorsAsEntity: errors storeIterationsAsEntity: iterations + storeSuccessesAsEntity: successes operations: - name: find object: *collection0 diff --git a/tests/retryWrites-resizeCluster.yml b/tests/retryWrites-resizeCluster.yml index c397f577..795e599c 100644 --- a/tests/retryWrites-resizeCluster.yml +++ b/tests/retryWrites-resizeCluster.yml @@ -28,7 +28,7 @@ driverWorkload: uriOptions: retryWrites: true storeEventsAsEntities: - connection: + events: - PoolCreatedEvent - PoolReadyEvent - PoolClearedEvent @@ -57,6 +57,7 @@ driverWorkload: arguments: storeErrorsAsEntity: errors storeIterationsAsEntity: iterations + storeSuccessesAsEntity: successes operations: - name: insertOne object: *collection0 diff --git a/tests/retryWrites-toggleServerSideJS.yml b/tests/retryWrites-toggleServerSideJS.yml index 516d1ebe..670c7d2d 100644 --- a/tests/retryWrites-toggleServerSideJS.yml +++ b/tests/retryWrites-toggleServerSideJS.yml @@ -24,7 +24,7 @@ driverWorkload: uriOptions: retryWrites: true storeEventsAsEntities: - connection: + events: - PoolCreatedEvent - PoolReadyEvent - PoolClearedEvent @@ -53,6 +53,7 @@ driverWorkload: arguments: storeErrorsAsEntity: errors storeIterationsAsEntity: iterations + storeSuccessesAsEntity: successes operations: - name: insertOne object: *collection0 diff --git a/tests/validator-simple.yml b/tests/validator-simple.yml index 49f5dd6b..1bfe3062 100644 --- a/tests/validator-simple.yml +++ b/tests/validator-simple.yml @@ -9,7 +9,7 @@ driverWorkload: - client: id: &client0 client0 storeEventsAsEntities: - connection: + events: - PoolCreatedEvent - PoolReadyEvent - PoolClearedEvent From 45d963c8f69b252c0811bb7e00ae2c0feb12c812 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 8 Feb 2021 08:57:50 -0500 Subject: [PATCH 145/172] account for missing entities --- docs/source/spec-test-format.rst | 7 +++++-- docs/source/spec-workload-executor.rst | 3 ++- integrations/ruby/executor.rb | 27 +++++++++++++++++++++----- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/docs/source/spec-test-format.rst b/docs/source/spec-test-format.rst index cd3af4da..fb3b3c7b 100644 --- a/docs/source/spec-test-format.rst +++ b/docs/source/spec-test-format.rst @@ -116,12 +116,15 @@ A Test Scenario File has the following keys: The document must be a complete test as defined by the `Unified Test Format specification `_. - The workload SHOULD use the ``loop`` unified test format operation to + The workload MUST use a single test, as defined in the unified test format + specification. + + The workload MUST use the ``loop`` unified test format operation to define the MongoDB operations to execute during maintenance. There MUST be exactly one ``loop`` operation per scenario, and it SHOULD be the last operation in the scenario. - The scenario SHOULD use ``storeErrorsAsEntity``, ``storeFailuresAsEntity``, + The scenario MUST use ``storeErrorsAsEntity``, ``storeFailuresAsEntity``, ``storeSuccesesAsEntity`` and ``storeIterationsAsEntity`` operation arguments to allow the workload executor to retrieve errors, failures and operation counts for the executed workload. The entity names for these options MUST diff --git a/docs/source/spec-workload-executor.rst b/docs/source/spec-workload-executor.rst index 8568ef94..db950672 100644 --- a/docs/source/spec-workload-executor.rst +++ b/docs/source/spec-workload-executor.rst @@ -188,7 +188,8 @@ Pseudocode Implementation } let results = {}; - let numSuccesses = runner.entityMap.get('iterationCount'); + let numIterations = runner.entityMap.get('iterations'); + let numSuccesses = runner.entityMap.get('successes'); let numErrors = 0; let numFailures = 0; for (name, events in runner.entityMap.get('events')) { diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index 5820a021..8d585fd1 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -64,15 +64,32 @@ def result def write_result {}.tap do |event_result| + @iteration_count = -1 + @success_count = -1 @events = [] @errors = [] @failures = [] unified_tests.map do |test| - @iteration_count += test.entities.get(:iteration_count, 'iterations') - @success_count += test.entities.get(:success_count, 'successes') - @events += test.entities.get(:event_list, 'events') - @errors += test.entities.get(:error_list, 'errors') - @failures += test.entities.get(:failure_list, 'failures') + begin + @iteration_count += test.entities.get(:iteration_count, 'iterations') + rescue Unified::Error::EntityMissing + end + begin + @success_count += test.entities.get(:success_count, 'successes') + rescue Unified::Error::EntityMissing + end + begin + @events += test.entities.get(:event_list, 'events') + rescue Unified::Error::EntityMissing + end + begin + @errors += test.entities.get(:error_list, 'errors') + rescue Unified::Error::EntityMissing + end + begin + @failures += test.entities.get(:failure_list, 'failures') + rescue Unified::Error::EntityMissing + end end File.open('events.json', 'w') do |f| f << JSON.dump(event_result) From 4e06e9092459e82474cb8c8a4bf0a164cd2a24dc Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 8 Feb 2021 09:03:23 -0500 Subject: [PATCH 146/172] update pseudocode for workload executor --- docs/source/spec-workload-executor.rst | 47 +++++++++++++++++--------- integrations/ruby/executor.rb | 6 +++- 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/docs/source/spec-workload-executor.rst b/docs/source/spec-workload-executor.rst index db950672..0cb293a2 100644 --- a/docs/source/spec-workload-executor.rst +++ b/docs/source/spec-workload-executor.rst @@ -188,26 +188,41 @@ Pseudocode Implementation } let results = {}; - let numIterations = runner.entityMap.get('iterations'); - let numSuccesses = runner.entityMap.get('successes'); - let numErrors = 0; - let numFailures = 0; - for (name, events in runner.entityMap.get('events')) { - results[name] ||= []; - results[name].concat(events); + try { + numIterations = runner.entityMap.get('iterations'); + } catch { + numIterations = -1; + } + try { + numSuccesses = runner.entityMap.get('successes'); + } catch { + numSuccesses = -1; + } + try { + errors = runner.entityMap.get('errors'); + numErrors = errors.length; + } catch { + errors = []; + numErrors = -1; } - for (name, errors in runner.entityMap.get('errors')) { - results[name] ||= []; - results[name].concat(errors); - numErrors += errors.length; + try { + failures = runner.entityMap.get('failures'); + numFailures = failures.length; + } catch { + failures = []; + numFailures = -1; } - for (name, failures in runner.entityMap.get('failures')) { - results[name] ||= []; - results[name].concat(failures); - numFailures += failures.length; + try { + events = runner.entityMap.get('events'); + } catch { + events = []; } - fs.writeFile('events.json', JSON.stringify(results); + fs.writeFile('events.json', JSON.stringify({ + events: events, + errors: errors, + failures: failures, + })); fs.writeFile('results.json', JSON.stringify({ ‘numErrors’: numErrors, 'numFailures': numFailures, diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index 8d585fd1..56354fc4 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -92,7 +92,11 @@ def write_result end end File.open('events.json', 'w') do |f| - f << JSON.dump(event_result) + f << JSON.dump( + errors: @errors, + failures: @failures, + events: @events, + ) end end File.open('results.json', 'w') do |f| From d748119c930143ecc1a5505546cd47df146faa98 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 8 Feb 2021 09:04:04 -0500 Subject: [PATCH 147/172] Ruby is reference implementation --- docs/source/spec-workload-executor.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/spec-workload-executor.rst b/docs/source/spec-workload-executor.rst index 0cb293a2..e1191276 100644 --- a/docs/source/spec-workload-executor.rst +++ b/docs/source/spec-workload-executor.rst @@ -233,7 +233,7 @@ Pseudocode Implementation Reference Implementation ------------------------ -`PyMongo's workload executor `_ +`Ruby's workload executor `_ serves as the reference implementation of the script described by this specification. From e8f90ebe865c2e667560ec268c7c05d22120b81d Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 8 Feb 2021 09:10:33 -0500 Subject: [PATCH 148/172] workload executor must set both errors and failures --- docs/source/spec-workload-executor.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/source/spec-workload-executor.rst b/docs/source/spec-workload-executor.rst index e1191276..171b0a82 100644 --- a/docs/source/spec-workload-executor.rst +++ b/docs/source/spec-workload-executor.rst @@ -110,6 +110,10 @@ After accepting the inputs, the workload executor: * ``events``: array of documents describing the CMAP events that occurred while the workload executor was executing the operations. + If the driver's unified test format does not distinguish between errors + and failures, and reports one but not the other, the workload executor MUST + set the non-reported entry to the empty array. + #. MUST calculate the aggregate counts of errors (``numErrors``) and failures (``numFailures``) from the error and failure lists. If the errors or failures were not reported by the test runner, such as because the From 78fb5e6f4ff83ba93c718ba0863d051fe7cb6fa0 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 11 Feb 2021 09:14:34 -0500 Subject: [PATCH 149/172] record command events --- tests/retryReads-move-sharded.yml | 3 +++ tests/retryReads-move.yml | 3 +++ tests/retryReads-primaryRemoval.yml | 3 +++ tests/retryReads-primaryTakeover.yml | 3 +++ tests/retryReads-processRestart-sharded.yml | 3 +++ tests/retryReads-processRestart.yml | 3 +++ tests/retryReads-resizeCluster.yml | 3 +++ tests/retryReads-testFailover-sharded.yml | 3 +++ tests/retryReads-testFailover.yml | 3 +++ tests/retryReads-toggleServerSideJS.yml | 3 +++ tests/retryReads-vmRestart-sharded.yml | 3 +++ tests/retryReads-vmRestart.yml | 3 +++ tests/retryWrites-resizeCluster.yml | 3 +++ tests/retryWrites-toggleServerSideJS.yml | 3 +++ tests/validator-simple.yml | 3 +++ 15 files changed, 45 insertions(+) diff --git a/tests/retryReads-move-sharded.yml b/tests/retryReads-move-sharded.yml index 0abe6b72..cb435189 100644 --- a/tests/retryReads-move-sharded.yml +++ b/tests/retryReads-move-sharded.yml @@ -40,6 +40,9 @@ driverWorkload: - ConnectionCheckOutFailedEvent - ConnectionCheckedOutEvent - ConnectionCheckedInEvent + - CommandStartedEvent + - CommandSucceededEvent + - CommandFailedEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-move.yml b/tests/retryReads-move.yml index 7b32af62..69c250dc 100644 --- a/tests/retryReads-move.yml +++ b/tests/retryReads-move.yml @@ -40,6 +40,9 @@ driverWorkload: - ConnectionCheckOutFailedEvent - ConnectionCheckedOutEvent - ConnectionCheckedInEvent + - CommandStartedEvent + - CommandSucceededEvent + - CommandFailedEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-primaryRemoval.yml b/tests/retryReads-primaryRemoval.yml index d2db39a3..41949cf7 100644 --- a/tests/retryReads-primaryRemoval.yml +++ b/tests/retryReads-primaryRemoval.yml @@ -68,6 +68,9 @@ driverWorkload: - ConnectionCheckOutFailedEvent - ConnectionCheckedOutEvent - ConnectionCheckedInEvent + - CommandStartedEvent + - CommandSucceededEvent + - CommandFailedEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-primaryTakeover.yml b/tests/retryReads-primaryTakeover.yml index 0a168370..64d0a6db 100644 --- a/tests/retryReads-primaryTakeover.yml +++ b/tests/retryReads-primaryTakeover.yml @@ -69,6 +69,9 @@ driverWorkload: - ConnectionCheckOutFailedEvent - ConnectionCheckedOutEvent - ConnectionCheckedInEvent + - CommandStartedEvent + - CommandSucceededEvent + - CommandFailedEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-processRestart-sharded.yml b/tests/retryReads-processRestart-sharded.yml index 234bb2dd..58e8cce4 100644 --- a/tests/retryReads-processRestart-sharded.yml +++ b/tests/retryReads-processRestart-sharded.yml @@ -42,6 +42,9 @@ driverWorkload: - ConnectionCheckOutFailedEvent - ConnectionCheckedOutEvent - ConnectionCheckedInEvent + - CommandStartedEvent + - CommandSucceededEvent + - CommandFailedEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-processRestart.yml b/tests/retryReads-processRestart.yml index f4b5cf1e..96647bfd 100644 --- a/tests/retryReads-processRestart.yml +++ b/tests/retryReads-processRestart.yml @@ -42,6 +42,9 @@ driverWorkload: - ConnectionCheckOutFailedEvent - ConnectionCheckedOutEvent - ConnectionCheckedInEvent + - CommandStartedEvent + - CommandSucceededEvent + - CommandFailedEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-resizeCluster.yml b/tests/retryReads-resizeCluster.yml index 34caca22..60bbfb80 100644 --- a/tests/retryReads-resizeCluster.yml +++ b/tests/retryReads-resizeCluster.yml @@ -40,6 +40,9 @@ driverWorkload: - ConnectionCheckOutFailedEvent - ConnectionCheckedOutEvent - ConnectionCheckedInEvent + - CommandStartedEvent + - CommandSucceededEvent + - CommandFailedEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-testFailover-sharded.yml b/tests/retryReads-testFailover-sharded.yml index 86424a25..bbeea866 100644 --- a/tests/retryReads-testFailover-sharded.yml +++ b/tests/retryReads-testFailover-sharded.yml @@ -38,6 +38,9 @@ driverWorkload: - ConnectionCheckOutFailedEvent - ConnectionCheckedOutEvent - ConnectionCheckedInEvent + - CommandStartedEvent + - CommandSucceededEvent + - CommandFailedEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-testFailover.yml b/tests/retryReads-testFailover.yml index 1c1c271b..29f242aa 100644 --- a/tests/retryReads-testFailover.yml +++ b/tests/retryReads-testFailover.yml @@ -38,6 +38,9 @@ driverWorkload: - ConnectionCheckOutFailedEvent - ConnectionCheckedOutEvent - ConnectionCheckedInEvent + - CommandStartedEvent + - CommandSucceededEvent + - CommandFailedEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-toggleServerSideJS.yml b/tests/retryReads-toggleServerSideJS.yml index 2653f109..42ece6a5 100644 --- a/tests/retryReads-toggleServerSideJS.yml +++ b/tests/retryReads-toggleServerSideJS.yml @@ -37,6 +37,9 @@ driverWorkload: - ConnectionCheckOutFailedEvent - ConnectionCheckedOutEvent - ConnectionCheckedInEvent + - CommandStartedEvent + - CommandSucceededEvent + - CommandFailedEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-vmRestart-sharded.yml b/tests/retryReads-vmRestart-sharded.yml index 6a2b9ddc..1a85f70b 100644 --- a/tests/retryReads-vmRestart-sharded.yml +++ b/tests/retryReads-vmRestart-sharded.yml @@ -38,6 +38,9 @@ driverWorkload: - ConnectionCheckOutFailedEvent - ConnectionCheckedOutEvent - ConnectionCheckedInEvent + - CommandStartedEvent + - CommandSucceededEvent + - CommandFailedEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryReads-vmRestart.yml b/tests/retryReads-vmRestart.yml index e18e1a16..bd031696 100644 --- a/tests/retryReads-vmRestart.yml +++ b/tests/retryReads-vmRestart.yml @@ -38,6 +38,9 @@ driverWorkload: - ConnectionCheckOutFailedEvent - ConnectionCheckedOutEvent - ConnectionCheckedInEvent + - CommandStartedEvent + - CommandSucceededEvent + - CommandFailedEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryWrites-resizeCluster.yml b/tests/retryWrites-resizeCluster.yml index 795e599c..0a8df9c3 100644 --- a/tests/retryWrites-resizeCluster.yml +++ b/tests/retryWrites-resizeCluster.yml @@ -40,6 +40,9 @@ driverWorkload: - ConnectionCheckOutFailedEvent - ConnectionCheckedOutEvent - ConnectionCheckedInEvent + - CommandStartedEvent + - CommandSucceededEvent + - CommandFailedEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/retryWrites-toggleServerSideJS.yml b/tests/retryWrites-toggleServerSideJS.yml index 670c7d2d..a45e85b1 100644 --- a/tests/retryWrites-toggleServerSideJS.yml +++ b/tests/retryWrites-toggleServerSideJS.yml @@ -36,6 +36,9 @@ driverWorkload: - ConnectionCheckOutFailedEvent - ConnectionCheckedOutEvent - ConnectionCheckedInEvent + - CommandStartedEvent + - CommandSucceededEvent + - CommandFailedEvent - database: id: &database0 database0 client: *client0 diff --git a/tests/validator-simple.yml b/tests/validator-simple.yml index 1bfe3062..1a75bab8 100644 --- a/tests/validator-simple.yml +++ b/tests/validator-simple.yml @@ -21,6 +21,9 @@ driverWorkload: - ConnectionCheckOutFailedEvent - ConnectionCheckedOutEvent - ConnectionCheckedInEvent + - CommandStartedEvent + - CommandSucceededEvent + - CommandFailedEvent - database: id: &database0 database0 client: *client0 From 5d68eda84a5302d416ed0849bdaf6fcb3d88f046 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 11 Feb 2021 09:27:16 -0500 Subject: [PATCH 150/172] correlate command events --- astrolabe/commands.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/astrolabe/commands.py b/astrolabe/commands.py index a32c5a15..3a43c29c 100644 --- a/astrolabe/commands.py +++ b/astrolabe/commands.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections import defaultdict import logging import json @@ -110,13 +111,30 @@ def aggregate_statistics(): import numpy - command_events = events['commands'] - command_times = [c['duration'] for c in command_events] + command_events = [ + event for event in events['events'] + if event['name'].startswith('Command') + ] + map = {} + correlated_events = [] + for event in command_events: + if event['name'] == 'CommandStartedEvent': + map[event['requestId']] = event + else: + started_event = map[event['requestId']] + del map[event['requestId']] + _event = dict(started_event) + _event.update(event) + correlated_events.append(_event) + command_times = [c['duration'] for c in correlated_events] stats['avgCommandTime'] = numpy.average(command_times) stats['p95CommandTime'] = numpy.percentile(command_times, 95) stats['p99CommandTime'] = numpy.percentile(command_times, 99) - conn_events = events['connections'] + conn_events = [ + event for event in events['events'] + if event['name'].startswith('Connection') or event['name'].startswith('Pool') + ] counts = defaultdict(lambda: 0) max_counts = defaultdict(lambda: 0) conn_count = max_conn_count = 0 From 83285fe2994704fc378545120475d99843c15642 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 11 Feb 2021 09:27:47 -0500 Subject: [PATCH 151/172] fix event name --- astrolabe/commands.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/astrolabe/commands.py b/astrolabe/commands.py index 3a43c29c..2dce9e1a 100644 --- a/astrolabe/commands.py +++ b/astrolabe/commands.py @@ -139,9 +139,9 @@ def aggregate_statistics(): max_counts = defaultdict(lambda: 0) conn_count = max_conn_count = 0 for e in conn_events: - if e['name'] == 'ConnectionCreated': + if e['name'] == 'ConnectionCreatedEvent': counts[e['address']] += 1 - elif e['name'] == 'ConnectionClosed': + elif e['name'] == 'ConnectionClosedEvent': counts[e['address']] -= 1 if counts[e['address']] > max_counts[e['address']]: max_counts[e['address']] = counts[e['address']] From 7c2d0aa35cd99580d34cf1c46295628169a5e1b4 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 11 Feb 2021 14:22:42 -0500 Subject: [PATCH 152/172] fix dependency --- astrolabe/runner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 681ad3f1..214c7a68 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -22,6 +22,7 @@ import junitparser import yaml +from .utils import mongo_client from atlasclient import AtlasApiError, JSONObject from astrolabe.commands import ( get_one_organization_by_name, ensure_project, ensure_admin_user, From da1448f576d637b23e2f6b16a5cb18e98bf69989 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 11 Feb 2021 16:05:17 -0500 Subject: [PATCH 153/172] load yaml safely --- astrolabe/runner.py | 2 +- astrolabe/validator.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index 214c7a68..a4f6e660 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -286,7 +286,7 @@ def __init__(self, *, client, admin_client, test_locator_token, configuration, x # Step-1: load test specification. with open(full_path, 'r') as spec_file: test_spec = JSONObject.from_dict( - yaml.load(spec_file, Loader=yaml.FullLoader)) + yaml.safe_load(spec_file)) # Step-2: generate test name. test_name = get_test_name_from_spec_file(full_path) diff --git a/astrolabe/validator.py b/astrolabe/validator.py index 40de844b..9e9178c7 100644 --- a/astrolabe/validator.py +++ b/astrolabe/validator.py @@ -88,7 +88,7 @@ def run_test(self, driver_workload): def test_simple(self): driver_workload = JSONObject.from_dict( - yaml.load(open('tests/validator-simple.yml').read(), Loader=yaml.FullLoader)['driverWorkload'] + yaml.safe_load(open('tests/validator-simple.yml').read())['driverWorkload'] ) if os.path.exists('events.json'): @@ -116,7 +116,7 @@ def test_simple(self): "The workload executor didn't execute any operations " "or didn't execute them appropriately.") - events = yaml.load(open('events.json').read()) + events = yaml.safe_load(open('events.json').read()) if 'connection' not in events: self.fail( "The workload executor didn't record connection events as expected.") @@ -133,7 +133,7 @@ def test_simple(self): def test_num_errors(self): driver_workload = JSONObject.from_dict( - yaml.load(open('tests/validator-numErrors.yml').read(), Loader=yaml.FullLoader)['driverWorkload'] + yaml.safe_load(open('tests/validator-numErrors.yml').read())['driverWorkload'] ) stats = self.run_test(driver_workload) @@ -164,7 +164,7 @@ def test_num_errors(self): def test_num_failures(self): driver_workload = JSONObject.from_dict( - yaml.load(open('tests/validator-numFailures.yml').read(), Loader=yaml.FullLoader)['driverWorkload'] + yaml.safe_load(open('tests/validator-numFailures.yml').read())['driverWorkload'] ) stats = self.run_test(driver_workload) @@ -187,7 +187,7 @@ def test_num_failures(self): def test_num_failures_as_errors(self): driver_workload = JSONObject.from_dict( - yaml.load(open('tests/validator-numFailures-as-errors.yml').read(), Loader=yaml.FullLoader)['driverWorkload'] + yaml.safe_load(open('tests/validator-numFailures-as-errors.yml').read())['driverWorkload'] ) stats = self.run_test(driver_workload) From 32caa8a2375886bd8f8a1e59bec470e0968088f5 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 11 Feb 2021 16:09:51 -0500 Subject: [PATCH 154/172] update validator --- astrolabe/validator.py | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/astrolabe/validator.py b/astrolabe/validator.py index 9e9178c7..753357e8 100644 --- a/astrolabe/validator.py +++ b/astrolabe/validator.py @@ -116,20 +116,37 @@ def test_simple(self): "The workload executor didn't execute any operations " "or didn't execute them appropriately.") - events = yaml.safe_load(open('events.json').read()) - if 'connection' not in events: + _events = yaml.safe_load(open('events.json').read()) + if 'events' not in _events: + self.fail( + "The workload executor didn't record events as expected.") + events = _events['events'] + connection_events = [event for event in events + if event['name'].startswith('Connection')] + if not connection_events: self.fail( "The workload executor didn't record connection events as expected.") - for event in events['connection']: - if 'name' not in event: - self.fail( - "The workload executor didn't record event name as expected.") - if not event['name'].endswith('Event'): - self.fail( - "The workload executor didn't record event name as expected.") - if 'observedAt' not in event: - self.fail( - "The workload executor didn't record observation time as expected.") + pool_events = [event for event in events + if event['name'].startswith('Pool')] + if not pool_events: + self.fail( + "The workload executor didn't record connection pool events as expected.") + command_events = [event for event in events + if event['name'].startswith('Command')] + if not command_events: + self.fail( + "The workload executor didn't record command events as expected.") + for event_list in [connection_events, pool_events, command_events]: + for event in event_list: + if 'name' not in event: + self.fail( + "The workload executor didn't record event name as expected.") + if not event['name'].endswith('Event'): + self.fail( + "The workload executor didn't record event name as expected.") + if 'observedAt' not in event: + self.fail( + "The workload executor didn't record observation time as expected.") def test_num_errors(self): driver_workload = JSONObject.from_dict( From 29232506842177e3b501c2d4e4b96561ec1143b2 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 11 Feb 2021 16:12:35 -0500 Subject: [PATCH 155/172] fix error and failure count reporting --- integrations/ruby/executor.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/integrations/ruby/executor.rb b/integrations/ruby/executor.rb index 56354fc4..4f774a47 100644 --- a/integrations/ruby/executor.rb +++ b/integrations/ruby/executor.rb @@ -91,6 +91,8 @@ def write_result rescue Unified::Error::EntityMissing end end + @error_count += @errors.length + @failure_count += @failures.length File.open('events.json', 'w') do |f| f << JSON.dump( errors: @errors, From 96778b1e12a83fd8aa6e8ca063289a29bc4a5fef Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 11 Feb 2021 16:17:29 -0500 Subject: [PATCH 156/172] fix failure validator --- tests/validator-numFailures.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/validator-numFailures.yml b/tests/validator-numFailures.yml index 76da3575..c8a4d391 100644 --- a/tests/validator-numFailures.yml +++ b/tests/validator-numFailures.yml @@ -36,7 +36,7 @@ driverWorkload: arguments: storeIterationsAsEntity: iterations storeSuccessesAsEntity: successes - storeFailuresAsEntity: errors + storeFailuresAsEntity: failures operations: - name: find object: *collection0 From 18a4ff5a60ac975662b23feb9fe9807e81c440f6 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 11 Feb 2021 16:35:28 -0500 Subject: [PATCH 157/172] add polling diagnostics --- astrolabe/poller.py | 2 +- astrolabe/runner.py | 31 ++++++++++++++++++------------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/astrolabe/poller.py b/astrolabe/poller.py index 22977855..293076bb 100644 --- a/astrolabe/poller.py +++ b/astrolabe/poller.py @@ -45,7 +45,7 @@ def poll(self, objects, *, attribute, args, kwargs): timer.start() while timer.elapsed < self.timeout: logmsg = "Polling {} [elapsed: {:.2f} seconds]" - LOGGER.debug(logmsg.format(objects, timer.elapsed)) + LOGGER.info(logmsg.format(objects, timer.elapsed)) for obj in objects: return_value = self._check_ready(obj, attribute, args, kwargs) if return_value: diff --git a/astrolabe/runner.py b/astrolabe/runner.py index a4f6e660..eaddee2f 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -261,12 +261,22 @@ def run(self, persist_cluster=False, startup_time=1): return junit_test def wait_for_idle(self): - selector = BooleanCallablePoller( - frequency=self.config.polling_frequency, - timeout=self.config.polling_timeout) - LOGGER.info("Waiting for cluster maintenance to complete") - selector.poll([self], attribute="is_cluster_state", args=("IDLE",), - kwargs={}) + LOGGER.info("Waiting for cluster %s to become idle" % self.cluster_name) + timer = Timer() + timer.start() + ok = False + timeout = self.config.polling_timeout + wanted_state = 'idle' + while timer.elapsed < timeout: + cluster_info = self.cluster_url.get().data + actual_state = cluster_info.stateName.lower() + if actual_state == wanted_state: + ok = True + break + LOGGER.info("Cluster %s: current state: %s; wanted state: %s; waited for %.1f sec" % (self.cluster_name, actual_state, wanted_state, timer.elapsed)) + sleep(1.0 / self.config.polling_frequency) + if not ok: + raise PollingTimeoutError("Polling timed out after %s seconds" % timeout) class SpecTestRunnerBase: @@ -360,15 +370,10 @@ def run(self): # Step-2: run tests round-robin until all have been run. remaining_test_cases = self.cases.copy() while remaining_test_cases: - selector = BooleanCallablePoller( - frequency=self.config.polling_frequency, - timeout=self.config.polling_timeout) + active_case = remaining_test_cases[0] # Select a case whose cluster is ready. - LOGGER.info("Waiting for a test cluster to become ready") - active_case = selector.poll( - remaining_test_cases, attribute="is_cluster_state", - args=("IDLE",), kwargs={}) + active_case.wait_for_idle() LOGGER.info("Test cluster {!r} is ready".format( active_case.cluster_name)) From f4f51501c3ccaabe39ba61427af2999aedca2891 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 11 Feb 2021 16:42:05 -0500 Subject: [PATCH 158/172] give workload executors 10 more seconds of runtime --- astrolabe/runner.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index eaddee2f..ce146402 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -225,6 +225,10 @@ def run(self, persist_cluster=False, startup_time=1): else: sleep(5) + # Wait 10 seconds to ensure that the driver is not experiencing any + # errors after the maintenance has concluded. + sleep(10) + # Step-5: interrupt driver workload and capture streams stats = self.workload_runner.terminate() From 01a68db34e9c5d880a12cde132d807ccbda4e8d4 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 11 Feb 2021 18:52:41 -0500 Subject: [PATCH 159/172] fix write scenarios --- tests/retryWrites-resizeCluster.yml | 3 ++- tests/retryWrites-toggleServerSideJS.yml | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/retryWrites-resizeCluster.yml b/tests/retryWrites-resizeCluster.yml index 0a8df9c3..c280957a 100644 --- a/tests/retryWrites-resizeCluster.yml +++ b/tests/retryWrites-resizeCluster.yml @@ -64,4 +64,5 @@ driverWorkload: operations: - name: insertOne object: *collection0 - arguments: { data: 100 } + arguments: + document: { data: 100 } diff --git a/tests/retryWrites-toggleServerSideJS.yml b/tests/retryWrites-toggleServerSideJS.yml index a45e85b1..1a927749 100644 --- a/tests/retryWrites-toggleServerSideJS.yml +++ b/tests/retryWrites-toggleServerSideJS.yml @@ -60,4 +60,5 @@ driverWorkload: operations: - name: insertOne object: *collection0 - arguments: { data: 100 } + arguments: + document: { data: 100 } From 55837640e4e2345fbd9136bb7a132e951ed75b55 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Thu, 11 Feb 2021 22:12:11 -0500 Subject: [PATCH 160/172] temp --- .evergreen/config.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 04cf56c9..7560739c 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -30,6 +30,11 @@ functions: working_dir: astrolabe-src command: | ${PYTHON3_BINARY} -m pip install virtualenv + - command: subprocess.exec + params: + working_dir: astrolabe-src + command: | + cat /etc/resolv.conf # Create virtualenv using a CPython 3.5+ binary. - command: subprocess.exec params: From 1f7529ab451dfbfa4b904d92fe21243d4190a9c4 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 12 Feb 2021 09:20:11 -0500 Subject: [PATCH 161/172] reinstate command monitoring --- docs/source/spec-test-format.rst | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/source/spec-test-format.rst b/docs/source/spec-test-format.rst index fb3b3c7b..c1b6047a 100644 --- a/docs/source/spec-test-format.rst +++ b/docs/source/spec-test-format.rst @@ -135,11 +135,13 @@ A Test Scenario File has the following keys: - ``storeSuccessesAsEntity``: ``successes`` - ``storeIterationsAsEntity``: ``iterations`` - The scenario MAY use ``storeEventsAsEntities`` operation argument - when defining MongoClients to record CMAP events published during maintenance. - The entity name for ``storeEventsAsEntities`` argument MUST be ``events``. - If this option is used, ``astrolabe`` will retrieve the collected events - and store them as an Evergreen build artifact. + The scenario MUST use ``storeEventsAsEntities`` operation argument + when defining MongoClients to record CMAP and command events published + during maintenance. The entity name for ``storeEventsAsEntities`` argument + MUST be ``events``. When this option is used, ``astrolabe`` will retrieve + the collected events and store them as an Evergreen build artifact, and + will also calculate statistics for command execution time and connection + counts. .. note:: A previous version of this document specified a top-level ``uriOptions`` for specifying URI options for the MongoClient under test. From afa7f0c6bee65753fcb730b5092b03b342118bee Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 16 Feb 2021 09:20:01 -0500 Subject: [PATCH 162/172] command events were reinstated --- docs/source/spec-workload-executor.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/spec-workload-executor.rst b/docs/source/spec-workload-executor.rst index 171b0a82..d64d6f4c 100644 --- a/docs/source/spec-workload-executor.rst +++ b/docs/source/spec-workload-executor.rst @@ -107,8 +107,8 @@ After accepting the inputs, the workload executor: * ``failures``: array of documents describing the failures that occurred while the workload executor was executing the operations. - * ``events``: array of documents describing the CMAP events that occurred - while the workload executor was executing the operations. + * ``events``: array of documents describing the command and CMAP events + that occurred while the workload executor was executing the operations. If the driver's unified test format does not distinguish between errors and failures, and reports one but not the other, the workload executor MUST @@ -125,7 +125,7 @@ After accepting the inputs, the workload executor: (i.e. the directory from where the workload executor is being executed). The data written MUST be a map with the following fields: - - ``events``: the collected CMAP events. + - ``events``: the collected command and CMAP events. - ``errors``: the reported errors. From ed4bafacef0e10230a2868fea824218afa39bab1 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 17 Feb 2021 18:30:05 -0500 Subject: [PATCH 163/172] fix space --- docs/source/spec-workload-executor.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/spec-workload-executor.rst b/docs/source/spec-workload-executor.rst index d64d6f4c..f5e4e84a 100644 --- a/docs/source/spec-workload-executor.rst +++ b/docs/source/spec-workload-executor.rst @@ -47,7 +47,7 @@ After accepting the inputs, the workload executor: #. MUST parse the incoming ``driverWorkload`` document and set up the driver's unified test runner to execute the provided workload. - .. note:: + .. note:: The workload SHOULD include a ``loop`` operation, as described in the unified test format, but the workload executor SHOULD NOT validate that From 9504d1bce3dbe4891927ac0713608f1c8286f7a6 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 17 Feb 2021 18:36:51 -0500 Subject: [PATCH 164/172] comments --- astrolabe/cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/astrolabe/cli.py b/astrolabe/cli.py index ca14298a..8f9fae71 100644 --- a/astrolabe/cli.py +++ b/astrolabe/cli.py @@ -16,7 +16,6 @@ from pprint import pprint import unittest from urllib.parse import unquote_plus -from collections import defaultdict import click @@ -78,7 +77,9 @@ NOCREATE_FLAG = click.option( '--no-create', is_flag=True, default=False, - help=('Do not create and configure clusters at the beginning of the run if they already exist, assume they have already been provisioned by a previous run.')) + help=('Do not create and configure clusters at the beginning of the run ' + 'if they already exist, assume they have already been provisioned by ' + 'a previous run.')) class ContextStore: @@ -105,7 +106,6 @@ def cli(ctx, atlas_base_url, atlas_api_username, Astrolabe is a command-line application for running automated driver tests against a MongoDB Atlas cluster undergoing maintenance. """ - # Create an atlasclient and attach it to the context. client = AtlasClient( base_url=atlas_base_url, From 9f417ecf015b79bb7dd6c68b0ce49cb5a31e192b Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 17 Feb 2021 18:39:10 -0500 Subject: [PATCH 165/172] unrecognized operation --- astrolabe/runner.py | 13 ++++++++----- tests/retryWrites-toggleServerSideJS.yml | 7 ++++--- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index ce146402..d0245434 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -186,23 +186,23 @@ def run(self, persist_cluster=False, startup_time=1): self.verify_cluster_configuration_matches(final_config) LOGGER.info("Cluster maintenance complete") - if op_name == 'testFailover': + elif op_name == 'testFailover': self.cluster_url['restartPrimaries'].post() self.wait_for_idle() - if op_name == 'sleep': + elif op_name == 'sleep': _time.sleep(op_spec) - if op_name == 'waitForIdle': + elif op_name == 'waitForIdle': self.wait_for_idle() - if op_name == 'restartVms': + elif op_name == 'restartVms': rv = self.admin_client.nds.groups[self.project.id].clusters[self.cluster_name].reboot.post(api_version='private') self.wait_for_idle() - if op_name == 'assertPrimaryRegion': + elif op_name == 'assertPrimaryRegion': region = op_spec['region'] cluster_config = self.cluster_url.get().data @@ -224,6 +224,9 @@ def run(self, persist_cluster=False, startup_time=1): raise Exception("Primary in cluster not in expected region '%s' (actual region '%s')" % (region, member_region)) else: sleep(5) + + else: + raise Exception('Unrecognized operation %s' % op_name) # Wait 10 seconds to ensure that the driver is not experiencing any # errors after the maintenance has concluded. diff --git a/tests/retryWrites-toggleServerSideJS.yml b/tests/retryWrites-toggleServerSideJS.yml index 1a927749..18133b5a 100644 --- a/tests/retryWrites-toggleServerSideJS.yml +++ b/tests/retryWrites-toggleServerSideJS.yml @@ -9,9 +9,10 @@ initialConfiguration: operations: - - clusterConfiguration: {} - processArgs: - javascriptEnabled: true + setClusterConfiguration: + clusterConfiguration: {} + processArgs: + javascriptEnabled: true driverWorkload: description: "Insert" From f8271f98f710ae9f51f920ed593f1a25c4cae0eb Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 17 Feb 2021 18:40:13 -0500 Subject: [PATCH 166/172] use rstrip --- atlasclient/client.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/atlasclient/client.py b/atlasclient/client.py index c9647055..8b4e4406 100644 --- a/atlasclient/client.py +++ b/atlasclient/client.py @@ -226,8 +226,7 @@ def construct_resource_url(self, path, api_version=None): url_template = '{base_url}{resource_path}' base_url = self.config.base_url # Allow trailing slash like https://cloud-dev.mongodb.com/ in the base URL - while base_url.endswith('/'): - base_url = base_url[:-1] + base_url = base_url.rstrip('/') return url_template.format( base_url=base_url, version=api_version or self.config.api_version, From a71d6f4616bc68f674ced765cdff66902d6e6065 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 17 Feb 2021 18:41:07 -0500 Subject: [PATCH 167/172] comment --- .evergreen/generate-tasks.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.evergreen/generate-tasks.sh b/.evergreen/generate-tasks.sh index dd4d379e..1ee661b8 100755 --- a/.evergreen/generate-tasks.sh +++ b/.evergreen/generate-tasks.sh @@ -1,5 +1,7 @@ #!/bin/sh +# Use this script to generate the task list for config.yml. + for f in tests/*.yml; do task=`basename $f |sed -e s/.yml//` From 5b03146603fe424045663d71f8bb5ef99a26b298 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 17 Feb 2021 18:42:18 -0500 Subject: [PATCH 168/172] logs destination --- astrolabe/cli.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/astrolabe/cli.py b/astrolabe/cli.py index 8f9fae71..d4cc43fd 100644 --- a/astrolabe/cli.py +++ b/astrolabe/cli.py @@ -448,7 +448,8 @@ def get_logs_cmd(ctx, spec_test_file, org_name, project_name, cluster_name_salt, polling_timeout, polling_frequency, ): """ - Retrieves logs for the cluster. + Retrieves logs for the cluster and saves them in logs.tar.gz in the + current working directory. """ # Step-1: determine the cluster name for the given test. From 887a336aad49568ed2c5a0f7ae619aec51449943 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 17 Feb 2021 18:43:05 -0500 Subject: [PATCH 169/172] delete obsolete --- docs/source/installing-running-locally.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/source/installing-running-locally.rst b/docs/source/installing-running-locally.rst index c07147c3..386ac934 100644 --- a/docs/source/installing-running-locally.rst +++ b/docs/source/installing-running-locally.rst @@ -147,9 +147,7 @@ to further speed up the test runs, but it can only be used for scenarios where the cluster configuration does not change from the initial one (otherwise the test would start with the wrong configuration). Using ``--no-delete`` is recommended with ``--no-create``, otherwise each run will -delete the cluster upon completion. **If the cluster exists but has the wrong -configuration, ``astrolabe`` will use it as is and the results of the test -may be incorrect.** +delete the cluster upon completion. Debugging From 28ea6afa7feb234d630f968af92e0d8aad8f60fc Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 17 Feb 2021 19:02:17 -0500 Subject: [PATCH 170/172] note wait for idle --- astrolabe/runner.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index d0245434..d4eaad1c 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -268,6 +268,15 @@ def run(self, persist_cluster=False, startup_time=1): return junit_test def wait_for_idle(self): + # Small delay to account for Atlas not updating cluster state + # synchronously potentially in all maintenance operations + # (https://jira.mongodb.org/browse/PRODTRIAGE-1232). + # VM restarts in sharded clusters require a much longer wait + # (30+ seconds in some circumstances); scenarios that perform + # VM restarts in sharded clusters should use explicit sleep operations + # after the restarts until this is fixed. + LOGGER.info("Waiting to wait for cluster %s to become idle" % self.cluster_name) + sleep(5) LOGGER.info("Waiting for cluster %s to become idle" % self.cluster_name) timer = Timer() timer.start() From 244f00bc0a9e8fd06bc1e855522864141c6abd3f Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 17 Feb 2021 19:05:07 -0500 Subject: [PATCH 171/172] note wait for idle --- docs/source/spec-test-format.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/source/spec-test-format.rst b/docs/source/spec-test-format.rst index c1b6047a..3c0ff349 100644 --- a/docs/source/spec-test-format.rst +++ b/docs/source/spec-test-format.rst @@ -112,6 +112,12 @@ A Test Scenario File has the following keys: waitForIdle: true + For all maintenance operations other than ``sleep``, after the maintenance + operation is performed, ``astrolabe`` will wait for cluster state to become + idle. When performing a VM restart in a sharded cluster, due to the state + not being updated for a potentially long time, the test SHOULD add an + explicit ``sleep`` operation for at least 30 seconds. + * driverWorkload (document): Description of the driver workload to execute The document must be a complete test as defined by the `Unified Test Format specification `_. From c25c664d0ed1b41c7cd4c83af62ea31f9ee74863 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 17 Feb 2021 23:28:54 -0500 Subject: [PATCH 172/172] syntax fix --- astrolabe/runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/astrolabe/runner.py b/astrolabe/runner.py index d4eaad1c..4406db8d 100644 --- a/astrolabe/runner.py +++ b/astrolabe/runner.py @@ -225,8 +225,8 @@ def run(self, persist_cluster=False, startup_time=1): else: sleep(5) - else: - raise Exception('Unrecognized operation %s' % op_name) + else: + raise Exception('Unrecognized operation %s' % op_name) # Wait 10 seconds to ensure that the driver is not experiencing any # errors after the maintenance has concluded.