Skip to content

Commit

Permalink
reducing get calls rate to avoid errors
Browse files Browse the repository at this point in the history
ERR: Failed to get cluster 'mu-terr-0002': Too Many Requests
  • Loading branch information
mukrishn committed Nov 2, 2023
1 parent bb85fc8 commit 0731b5b
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 12 deletions.
18 changes: 8 additions & 10 deletions libs/platforms/rosa/terraform/terraform.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ def apply_tf_template(self, platform):
self.logging.info(f"Trying to install clusters with TF template {tf_name} looping {loop_counter + 1} with {cluster_workers} workers up to 5 times using terraform provider")
trying = 0
while trying <= 5:
cluster_start_time = int(datetime.datetime.utcnow().timestamp())
if self.utils.force_terminate:
self.logging.error(f"Exiting clusters creation for {tf_name} looping {loop_counter + 1} after capturing Ctrl-C")
return 0
Expand Down Expand Up @@ -190,7 +189,6 @@ def destroy_tf_template(self, platform, tf_module="cluster"):
myenv["TF_VAR_clusters_per_apply"] = str(self.environment['clusters_per_apply'])
myenv["TF_VAR_loop_factor"] = str((loop_counter * self.environment['clusters_per_apply']))

cluster_start_time = int(datetime.datetime.utcnow().timestamp())
if tf_module == "oidc":
# additional env for oidc_provider template
myenv["TF_VAR_managed"] = "true"
Expand Down Expand Up @@ -235,7 +233,7 @@ def delete_cluster(self, platform, cluster_name):
cluster_info["total_count"] = platform.environment['cluster_count']
self.logging.info(f"Checking uninstall log for cluster {cluster_name}")

while retry_loop <= 600: # 1hr timeout
while retry_loop <= 600: # 1hr timeout
retry_loop += 1
cluster_delete_start_time = int(datetime.datetime.utcnow().timestamp())
watch_code, watch_out, watch_err = self.utils.subprocess_exec("rosa logs uninstall -c " + cluster_name + " --watch", cluster_info["path"] + "/cleanup.log", {'preexec_fn': self.utils.disable_signals})
Expand Down Expand Up @@ -312,30 +310,30 @@ def create_cluster(self, platform, cluster_name):
cluster_info["uuid"] = self.environment["uuid"]
cluster_info["install_method"] = "terraform"
cluster_info["per_template_count"] = platform.environment['clusters_per_apply']
cluster_info["tf_count"] = platform.environment['clusters_per_apply_count']
cluster_info["tf_count"] = platform.environment['clusters_per_apply_count']
cluster_info["total_count"] = platform.environment['cluster_count']
self.logging.info(f"Creating cluster {cluster_info['index']} on ROSA with name {cluster_name} and {cluster_info['workers']} workers")
cluster_info["path"] = platform.environment["path"] + "/" + cluster_name
os.mkdir(cluster_info["path"])
self.logging.debug("Output directory set to %s" % cluster_info["path"])

while retry_loop <= 1800: # 1hr timeout
while retry_loop <= 600: # 1hr timeout
retry_loop += 1
cluster_start_time = int(datetime.datetime.utcnow().timestamp())
status_code, status_out, status_err = self.utils.subprocess_exec("rosa describe cluster -c " + cluster_name + " -o json", extra_params={"universal_newlines": True})
if status_code != 0:
if retry_loop <= 1800:
if retry_loop <= 600:
self.logging.debug(f"ROSA cluster {cluster_name} is not available yet, retrying..")
self.logging.debug(status_out)
time.sleep(2)
time.sleep(6)
else:
cluster_info['status'] = "not ready"
self.logging.debug(watch_out)
self.logging.error(watch_err)
self.logging.debug(status_out)
self.logging.error(status_err)
return 1
else:
preflight_ch = self._preflight_wait(cluster_name, cluster_name)
cluster_info["preflight_checks"] = preflight_ch.result()
cluster_info["preflight_checks"] = preflight_ch
break

watch_code, watch_out, watch_err = self.utils.subprocess_exec("rosa logs install -c " + cluster_name + " --watch", cluster_info["path"] + "/installation.log", {'preexec_fn': self.utils.disable_signals})
Expand Down
5 changes: 3 additions & 2 deletions libs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def cleanup_scheduler(self, platform):
time.sleep(platform.environment["delay_between_cleanup"])
if platform.environment["subplatform"] and platform.environment["subplatform"] == "terraform":
if platform.destroy_tf_template(platform) != 0:
return 1
return 1
return delete_cluster_thread_list

# To form the cluster_info dict for cleanup funtions
Expand Down Expand Up @@ -201,9 +201,10 @@ def install_scheduler(self, platform):
cluster_thread_list.append(thread)
thread.start()
self.logging.debug("Number of alive threads %d" % threading.active_count())
time.sleep(1)
if platform.environment["subplatform"] and platform.environment["subplatform"] == "terraform":
if platform.apply_tf_template(platform) != 0:
return 1
return 1
except Exception as err:
self.logging.error(err)
self.logging.error("Thread creation failed")
Expand Down

0 comments on commit 0731b5b

Please sign in to comment.