From 8536fde3485f68e6b0e2359c5b4dd25e2209f4fb Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Tue, 1 Mar 2022 23:52:27 -0500 Subject: [PATCH 01/82] Fix for missing physical plan in UI --- heron/tools/ui/resources/static/js/exceptions.js | 6 +++--- heron/tools/ui/resources/static/js/physical-plan.js | 2 +- heron/tools/ui/resources/static/js/plan-controller.js | 2 +- heron/tools/ui/resources/static/js/topologies.js | 4 ++-- heron/tools/ui/src/python/main.py | 8 ++++---- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/heron/tools/ui/resources/static/js/exceptions.js b/heron/tools/ui/resources/static/js/exceptions.js index baea39fa225..92081541bc9 100644 --- a/heron/tools/ui/resources/static/js/exceptions.js +++ b/heron/tools/ui/resources/static/js/exceptions.js @@ -170,9 +170,9 @@ var InstanceExceptionLogs = React.createClass({ } if (instanceInfo) { var logfile = instanceInfo.logfile; - var stmgrId = instanceInfo.stmgrId; - var jobUrl = pplan.stmgrs[stmgrId].joburl; - var host = "http://" + pplan.stmgrs[stmgrId].host + ":1338"; + var stmgr_id = instanceInfo.stmgr_id; + var jobUrl = pplan.stmgrs[stmgr_id].joburl; + var host = "http://" + pplan.stmgrs[stmgr_id].host + ":1338"; mainLinks = mainLinks.concat([['Logs', logfile], ['Aurora', jobUrl], ['Host', host]]); } } diff --git a/heron/tools/ui/resources/static/js/physical-plan.js b/heron/tools/ui/resources/static/js/physical-plan.js index cf43232074b..16438c096fd 100644 --- a/heron/tools/ui/resources/static/js/physical-plan.js +++ b/heron/tools/ui/resources/static/js/physical-plan.js @@ -52,7 +52,7 @@ for (var wk in instances) { var worker = instances[wk]; if (worker.name === comp) { - containers[worker.stmgrId].children.push(worker); + containers[worker.stmgr_id].children.push(worker); } } } diff --git a/heron/tools/ui/resources/static/js/plan-controller.js b/heron/tools/ui/resources/static/js/plan-controller.js index cbb7e138519..df3df2acd57 100644 --- a/heron/tools/ui/resources/static/js/plan-controller.js +++ b/heron/tools/ui/resources/static/js/plan-controller.js @@ -45,7 +45,7 @@ function PlanController(baseUrl, cluster, environ, toponame, physicalPlan, logic var containers = {}, instances = {}; d3instances.each(function (d) { if (!d3.select(this).classed('fade')) { - containers[d.stmgrId] = true; + containers[d.stmgr_id] = true; instances[d.id] = true; } }); diff --git a/heron/tools/ui/resources/static/js/topologies.js b/heron/tools/ui/resources/static/js/topologies.js index a9a104a6d0e..7ed9b8adb0c 100644 --- a/heron/tools/ui/resources/static/js/topologies.js +++ b/heron/tools/ui/resources/static/js/topologies.js @@ -1480,8 +1480,8 @@ var InstanceCounters = React.createClass({ } } if (instanceInfo) { - var stmgrId = instanceInfo.stmgrId; - var container = stmgrId.split("-")[1] + var stmgr_id = instanceInfo.stmgr_id; + var container = stmgr_id.split("-")[1] var topologyParams = this.props.info.cluster + '/' + this.props.info.environ + '/' + this.props.info.topology var instanceParams = topologyParams + '/' + instanceInfo.id diff --git a/heron/tools/ui/src/python/main.py b/heron/tools/ui/src/python/main.py index 6cee080ea4e..8ff6c25828f 100644 --- a/heron/tools/ui/src/python/main.py +++ b/heron/tools/ui/src/python/main.py @@ -462,7 +462,7 @@ def pid_snippet( ) -> Response: """Render a HTML snippet containing topology output of container.""" physical_plan = tracker.get_physical_plan(cluster, environment, topology) - host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgrId"]][ + host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgr_id"]][ "host" ] info = tracker.get_instance_pid(cluster, environment, topology, instance) @@ -492,7 +492,7 @@ def jstack_snippet( ) -> HTMLResponse: """Render a HTML snippet containing jstack output of container.""" physical_plan = tracker.get_physical_plan(cluster, environment, topology) - host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgrId"]][ + host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgr_id"]][ "host" ] info = tracker.get_instance_jstack(cluster, environment, topology, instance) @@ -521,7 +521,7 @@ def jmap_snippet( ) -> HTMLResponse: """Render a HTML snippet containing jmap output of container.""" physical_plan = tracker.get_physical_plan(cluster, environment, topology) - host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgrId"]][ + host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgr_id"]][ "host" ] info = tracker.run_instance_jmap(cluster, environment, topology, instance) @@ -558,7 +558,7 @@ def histogram_snippet( """Render a HTML snippet containing jmap histogram output of container.""" # use a function to DRY up these container API methods physical_plan = tracker.get_physical_plan(cluster, environment, topology) - host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgrId"]][ + host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgr_id"]][ "host" ] info = tracker.get_instance_mem_histogram( From 9cd8e237846fef7a60e0542137ec06c9cdcaed06 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Tue, 1 Mar 2022 23:57:24 -0500 Subject: [PATCH 02/82] Fix the Heron UI timeline metrics --- heron/tools/tracker/src/python/routers/metrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/heron/tools/tracker/src/python/routers/metrics.py b/heron/tools/tracker/src/python/routers/metrics.py index 59abed6bc63..941ffd8ff46 100644 --- a/heron/tools/tracker/src/python/routers/metrics.py +++ b/heron/tools/tracker/src/python/routers/metrics.py @@ -119,7 +119,7 @@ async def get_metrics( # pylint: disable=too-many-arguments ) -@router.get("/metricstimeline", response_model=metricstimeline.MetricsTimeline) +@router.get("/metrics/timeline", response_model=metricstimeline.MetricsTimeline) async def get_metrics_timeline( # pylint: disable=too-many-arguments cluster: str, role: Optional[str], @@ -158,7 +158,7 @@ class MetricsQueryResponse(BaseModel): # pylint: disable=too-few-public-methods ) -@router.get("/metricsquery", response_model=MetricsQueryResponse) +@router.get("/metrics/query", response_model=MetricsQueryResponse) async def get_metrics_query( # pylint: disable=too-many-arguments cluster: str, role: Optional[str], From 6c84aa8c8f57a8c2ad0d93b5d315b6914c2e7de1 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Thu, 3 Mar 2022 08:16:52 -0500 Subject: [PATCH 03/82] Reverting the tracker API back to previous calls --- heron/tools/tracker/src/python/routers/metrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/heron/tools/tracker/src/python/routers/metrics.py b/heron/tools/tracker/src/python/routers/metrics.py index 941ffd8ff46..59abed6bc63 100644 --- a/heron/tools/tracker/src/python/routers/metrics.py +++ b/heron/tools/tracker/src/python/routers/metrics.py @@ -119,7 +119,7 @@ async def get_metrics( # pylint: disable=too-many-arguments ) -@router.get("/metrics/timeline", response_model=metricstimeline.MetricsTimeline) +@router.get("/metricstimeline", response_model=metricstimeline.MetricsTimeline) async def get_metrics_timeline( # pylint: disable=too-many-arguments cluster: str, role: Optional[str], @@ -158,7 +158,7 @@ class MetricsQueryResponse(BaseModel): # pylint: disable=too-few-public-methods ) -@router.get("/metrics/query", response_model=MetricsQueryResponse) +@router.get("/metricsquery", response_model=MetricsQueryResponse) async def get_metrics_query( # pylint: disable=too-many-arguments cluster: str, role: Optional[str], From dd23309cba06853e495f378f5d2cf2c50706a41e Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Thu, 3 Mar 2022 08:17:51 -0500 Subject: [PATCH 04/82] More cleanup to make the UI metricstimeline work --- heron/tools/ui/resources/static/js/plan-stats.js | 2 +- heron/tools/ui/resources/static/js/stat-trendlines.js | 2 +- heron/tools/ui/src/python/main.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/heron/tools/ui/resources/static/js/plan-stats.js b/heron/tools/ui/resources/static/js/plan-stats.js index d3607fb7301..f690ab97dd0 100644 --- a/heron/tools/ui/resources/static/js/plan-stats.js +++ b/heron/tools/ui/resources/static/js/plan-stats.js @@ -375,7 +375,7 @@ } function createMetricsUrl(metric, component, instance, start, end) { - var url = baseUrl + '/topologies/metrics/timeline?'; + var url = baseUrl + '/topologies/metricstimeline?'; return [ url + 'cluster=' + cluster, 'environ=' + environ, diff --git a/heron/tools/ui/resources/static/js/stat-trendlines.js b/heron/tools/ui/resources/static/js/stat-trendlines.js index 8b845f8e1aa..fc19e727784 100644 --- a/heron/tools/ui/resources/static/js/stat-trendlines.js +++ b/heron/tools/ui/resources/static/js/stat-trendlines.js @@ -259,7 +259,7 @@ function StatTrendlines(baseUrl, cluster, environ, toponame, physicalPlan, logic executeMetricsQuery(); function executeMetricsQuery() { - var u = baseUrl + '/topologies/metrics/timeline?' + var u = baseUrl + '/topologies/metricstimeline?' var request = [ u + 'cluster=' + cluster, 'environ=' + environ, diff --git a/heron/tools/ui/src/python/main.py b/heron/tools/ui/src/python/main.py index 8ff6c25828f..7c4c8f84093 100644 --- a/heron/tools/ui/src/python/main.py +++ b/heron/tools/ui/src/python/main.py @@ -190,7 +190,7 @@ def metrics( return result query_handler = tracker.HeronQueryHandler() -@topologies_router.get("/metrics/timeline") +@topologies_router.get("/metricstimeline") def timeline( cluster: str, environ: str, From 7e3eeb48884d06561b43c4c4696b73d9b5672b47 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Thu, 3 Mar 2022 23:21:29 -0500 Subject: [PATCH 05/82] Cleaning up the REST API structure --- .../sensors/TrackerMetricsProvider.java | 2 +- heron/tools/common/src/python/clients/tracker.py | 10 +++++----- .../tracker/src/python/routers/container.py | 6 +++--- .../tools/tracker/src/python/routers/metrics.py | 4 ++-- heron/tools/ui/resources/static/js/plan-stats.js | 2 +- .../ui/resources/static/js/stat-trendlines.js | 2 +- heron/tools/ui/src/python/main.py | 10 +++++----- .../user-manuals-tracker-rest.md | 16 ++++++++-------- 8 files changed, 26 insertions(+), 26 deletions(-) diff --git a/heron/healthmgr/src/java/org/apache/heron/healthmgr/sensors/TrackerMetricsProvider.java b/heron/healthmgr/src/java/org/apache/heron/healthmgr/sensors/TrackerMetricsProvider.java index 742f6182c73..e2cb9af3fe2 100644 --- a/heron/healthmgr/src/java/org/apache/heron/healthmgr/sensors/TrackerMetricsProvider.java +++ b/heron/healthmgr/src/java/org/apache/heron/healthmgr/sensors/TrackerMetricsProvider.java @@ -63,7 +63,7 @@ public TrackerMetricsProvider(@Named(CONF_METRICS_SOURCE_URL) String trackerURL, Client client = ClientBuilder.newClient(); this.baseTarget = client.target(trackerURL) - .path("topologies/metricstimeline") + .path("topologies/metrics/timeline") .queryParam("cluster", cluster) .queryParam("environ", environ) .queryParam("topology", topologyName); diff --git a/heron/tools/common/src/python/clients/tracker.py b/heron/tools/common/src/python/clients/tracker.py index 7a034cde8b5..eb11ec8137c 100644 --- a/heron/tools/common/src/python/clients/tracker.py +++ b/heron/tools/common/src/python/clients/tracker.py @@ -54,8 +54,8 @@ SCHEDULER_LOCATION_URL_FMT = "%s/schedulerlocation" % TOPOLOGIES_URL_FMT METRICS_URL_FMT = "%s/metrics" % TOPOLOGIES_URL_FMT -METRICS_QUERY_URL_FMT = "%s/metricsquery" % TOPOLOGIES_URL_FMT -METRICS_TIMELINE_URL_FMT = "%s/metricstimeline" % TOPOLOGIES_URL_FMT +METRICS_QUERY_URL_FMT = "%s/metrics/query" % TOPOLOGIES_URL_FMT +METRICS_TIMELINE_URL_FMT = "%s/metrics/timeline" % TOPOLOGIES_URL_FMT EXCEPTIONS_URL_FMT = "%s/exceptions" % TOPOLOGIES_URL_FMT EXCEPTION_SUMMARY_URL_FMT = "%s/exceptionsummary" % TOPOLOGIES_URL_FMT @@ -66,9 +66,9 @@ JMAP_URL_FMT = "%s/jmap" % TOPOLOGIES_URL_FMT HISTOGRAM_URL_FMT = "%s/histo" % TOPOLOGIES_URL_FMT -FILE_DATA_URL_FMT = "%s/containerfiledata" % TOPOLOGIES_URL_FMT -FILE_DOWNLOAD_URL_FMT = "%s/containerfiledownload" % TOPOLOGIES_URL_FMT -FILESTATS_URL_FMT = "%s/containerfilestats" % TOPOLOGIES_URL_FMT +FILE_DATA_URL_FMT = "%s/container/filedata" % TOPOLOGIES_URL_FMT +FILE_DOWNLOAD_URL_FMT = "%s/container/filedownload" % TOPOLOGIES_URL_FMT +FILESTATS_URL_FMT = "%s/container/filestats" % TOPOLOGIES_URL_FMT def strip_whitespace(s): diff --git a/heron/tools/tracker/src/python/routers/container.py b/heron/tools/tracker/src/python/routers/container.py index 284ccbf50ce..f556da3cb29 100644 --- a/heron/tools/tracker/src/python/routers/container.py +++ b/heron/tools/tracker/src/python/routers/container.py @@ -37,7 +37,7 @@ router = EnvelopingAPIRouter() -@router.get("/containerfiledata") +@router.get("/container/filedata") async def get_container_file_slice( # pylint: disable=too-many-arguments cluster: str, environ: str, @@ -64,7 +64,7 @@ async def get_container_file_slice( # pylint: disable=too-many-arguments return response.json() -@router.get("/containerfiledownload", response_class=StreamingResponse) +@router.get("/container/filedownload", response_class=StreamingResponse) async def get_container_file( # pylint: disable=too-many-arguments cluster: str, environ: str, @@ -86,7 +86,7 @@ async def get_container_file( # pylint: disable=too-many-arguments ) -@router.get("/containerfilestats") +@router.get("/container/filestats") async def get_container_file_listing( # pylint: disable=too-many-arguments cluster: str, environ: str, diff --git a/heron/tools/tracker/src/python/routers/metrics.py b/heron/tools/tracker/src/python/routers/metrics.py index 59abed6bc63..941ffd8ff46 100644 --- a/heron/tools/tracker/src/python/routers/metrics.py +++ b/heron/tools/tracker/src/python/routers/metrics.py @@ -119,7 +119,7 @@ async def get_metrics( # pylint: disable=too-many-arguments ) -@router.get("/metricstimeline", response_model=metricstimeline.MetricsTimeline) +@router.get("/metrics/timeline", response_model=metricstimeline.MetricsTimeline) async def get_metrics_timeline( # pylint: disable=too-many-arguments cluster: str, role: Optional[str], @@ -158,7 +158,7 @@ class MetricsQueryResponse(BaseModel): # pylint: disable=too-few-public-methods ) -@router.get("/metricsquery", response_model=MetricsQueryResponse) +@router.get("/metrics/query", response_model=MetricsQueryResponse) async def get_metrics_query( # pylint: disable=too-many-arguments cluster: str, role: Optional[str], diff --git a/heron/tools/ui/resources/static/js/plan-stats.js b/heron/tools/ui/resources/static/js/plan-stats.js index f690ab97dd0..d3607fb7301 100644 --- a/heron/tools/ui/resources/static/js/plan-stats.js +++ b/heron/tools/ui/resources/static/js/plan-stats.js @@ -375,7 +375,7 @@ } function createMetricsUrl(metric, component, instance, start, end) { - var url = baseUrl + '/topologies/metricstimeline?'; + var url = baseUrl + '/topologies/metrics/timeline?'; return [ url + 'cluster=' + cluster, 'environ=' + environ, diff --git a/heron/tools/ui/resources/static/js/stat-trendlines.js b/heron/tools/ui/resources/static/js/stat-trendlines.js index fc19e727784..8b845f8e1aa 100644 --- a/heron/tools/ui/resources/static/js/stat-trendlines.js +++ b/heron/tools/ui/resources/static/js/stat-trendlines.js @@ -259,7 +259,7 @@ function StatTrendlines(baseUrl, cluster, environ, toponame, physicalPlan, logic executeMetricsQuery(); function executeMetricsQuery() { - var u = baseUrl + '/topologies/metricstimeline?' + var u = baseUrl + '/topologies/metrics/timeline?' var request = [ u + 'cluster=' + cluster, 'environ=' + environ, diff --git a/heron/tools/ui/src/python/main.py b/heron/tools/ui/src/python/main.py index 7c4c8f84093..6cee080ea4e 100644 --- a/heron/tools/ui/src/python/main.py +++ b/heron/tools/ui/src/python/main.py @@ -190,7 +190,7 @@ def metrics( return result query_handler = tracker.HeronQueryHandler() -@topologies_router.get("/metricstimeline") +@topologies_router.get("/metrics/timeline") def timeline( cluster: str, environ: str, @@ -462,7 +462,7 @@ def pid_snippet( ) -> Response: """Render a HTML snippet containing topology output of container.""" physical_plan = tracker.get_physical_plan(cluster, environment, topology) - host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgr_id"]][ + host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgrId"]][ "host" ] info = tracker.get_instance_pid(cluster, environment, topology, instance) @@ -492,7 +492,7 @@ def jstack_snippet( ) -> HTMLResponse: """Render a HTML snippet containing jstack output of container.""" physical_plan = tracker.get_physical_plan(cluster, environment, topology) - host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgr_id"]][ + host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgrId"]][ "host" ] info = tracker.get_instance_jstack(cluster, environment, topology, instance) @@ -521,7 +521,7 @@ def jmap_snippet( ) -> HTMLResponse: """Render a HTML snippet containing jmap output of container.""" physical_plan = tracker.get_physical_plan(cluster, environment, topology) - host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgr_id"]][ + host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgrId"]][ "host" ] info = tracker.run_instance_jmap(cluster, environment, topology, instance) @@ -558,7 +558,7 @@ def histogram_snippet( """Render a HTML snippet containing jmap histogram output of container.""" # use a function to DRY up these container API methods physical_plan = tracker.get_physical_plan(cluster, environment, topology) - host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgr_id"]][ + host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgrId"]][ "host" ] info = tracker.get_instance_mem_histogram( diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/user-manuals-tracker-rest.md b/website2/website/versioned_docs/version-0.20.0-incubating/user-manuals-tracker-rest.md index 8dd511d0b9c..564e0f8d109 100644 --- a/website2/website/versioned_docs/version-0.20.0-incubating/user-manuals-tracker-rest.md +++ b/website2/website/versioned_docs/version-0.20.0-incubating/user-manuals-tracker-rest.md @@ -46,10 +46,10 @@ All Heron Tracker endpoints return a JSON object with the following information: * [`/topologies/executionstate`](#topologies_executionstate) * [`/topologies/schedulerlocation`](#topologies_schedulerlocation) * [`/topologies/metrics`](#topologies_metrics) -* [`/topologies/metricstimeline`](#topologies_metricstimeline) -* [`/topologies/metricsquery`](#topologies_metricsquery) -* [`/topologies/containerfiledata`](#topologies_containerfiledata) -* [`/topologies/containerfilestats`](#topologies_containerfilestats) +* [`/topologies/metrics/timeline`](#topologies_metricstimeline) +* [`/topologies/metrics/query`](#topologies_metricsquery) +* [`/topologies/container/filedata`](#topologies_containerfiledata) +* [`/topologies/container/filestats`](#topologies_containerfilestats) * [`/topologies/exceptions`](#topologies_exceptions) * [`/topologies/exceptionsummary`](#topologies_exceptionsummary) * [`/topologies/pid`](#topologies_pid) @@ -237,7 +237,7 @@ port, and the heron-shell port that it exposes. --- -### /topologies/containerfilestats +### /topologies/container/filestats Returns the file stats for a container. This is the output of the command `ls -lh` when run in the directory where the heron-controller launched all the processes. @@ -255,7 +255,7 @@ This endpoint is mainly used by ui for exploring files in a container. --- -### /topologies/containerfiledata +### /topologies/container/filedata Returns the file data for a file of a container. @@ -302,7 +302,7 @@ Returns a JSON map of instances of the topology to their respective metrics time To filter instances returned use the `instance` parameter discussed below. The difference between this and `/metrics` endpoint above, is that `/metrics` will report -cumulative value over the period of `interval` provided. On the other hand, `/metricstimeline` +cumulative value over the period of `interval` provided. On the other hand, `/metrics/timeline` endpoint will report minutely values for each metricname for each instance. Note that these metrics come from TManager, which only holds metrics @@ -321,7 +321,7 @@ is older than 3 hours ago, those minutes would not be part of the response. and greater than `starttime`) * `instance` (optional) --- IDs of the instances. If not present, return for all the instances. -### /topologies/metricsquery +### /topologies/metrics/query Executes the metrics query for the topology and returns the result in form of minutely timeseries. A detailed description of query language is given [below](#metricsquery). From feb4fe6f94e71ec8b39d34aadaa8ce4fc26fdf1a Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sun, 6 Mar 2022 09:20:42 -0500 Subject: [PATCH 06/82] Various heron-tracker fixes --- heron/tools/tracker/src/python/metricstimeline.py | 4 ++-- .../tools/tracker/src/python/routers/container.py | 14 +++++++------- heron/tools/tracker/src/python/routers/metrics.py | 6 +++--- heron/tools/tracker/src/python/tracker.py | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/heron/tools/tracker/src/python/metricstimeline.py b/heron/tools/tracker/src/python/metricstimeline.py index 75b33c8f1b8..eb0c21b52de 100644 --- a/heron/tools/tracker/src/python/metricstimeline.py +++ b/heron/tools/tracker/src/python/metricstimeline.py @@ -73,11 +73,11 @@ async def get_metrics_timeline( # Form and send the http request. url = f"http://{tmanager.host}:{tmanager.stats_port}/stats" - with httpx.AsyncClient() as client: + async with httpx.AsyncClient() as client: result = await client.post(url, data=request_parameters.SerializeToString()) # Check the response code - error if it is in 400s or 500s - if result.code >= 400: + if result.status_code >= 400: message = f"Error in getting metrics from Tmanager, code: {result.code}" raise Exception(message) diff --git a/heron/tools/tracker/src/python/routers/container.py b/heron/tools/tracker/src/python/routers/container.py index f556da3cb29..51f1ffed062 100644 --- a/heron/tools/tracker/src/python/routers/container.py +++ b/heron/tools/tracker/src/python/routers/container.py @@ -68,9 +68,9 @@ async def get_container_file_slice( # pylint: disable=too-many-arguments async def get_container_file( # pylint: disable=too-many-arguments cluster: str, environ: str, - role: Optional[str], container: str, path: str, + role: Optional[str] = None, topology_name: str = Query(..., alias="topology"), ): """Return a given raw file.""" @@ -90,9 +90,9 @@ async def get_container_file( # pylint: disable=too-many-arguments async def get_container_file_listing( # pylint: disable=too-many-arguments cluster: str, environ: str, - role: Optional[str], container: str, path: str, + role: Optional[str] = None, topology_name: str = Query(..., alias="topology"), ): """Return the stats for a given directory.""" @@ -186,9 +186,9 @@ async def _get_exception_log_response( @router.get("/exceptions", response_model=List[ExceptionLog]) async def get_exceptions( # pylint: disable=too-many-arguments cluster: str, - role: Optional[str], environ: str, component: str, + role: Optional[str] = None, instances: List[str] = Query(..., alias="instance"), topology_name: str = Query(..., alias="topology"), ): @@ -220,9 +220,9 @@ class ExceptionSummaryItem(BaseModel): @router.get("/exceptionsummary", response_model=List[ExceptionSummaryItem]) async def get_exceptions_summary( # pylint: disable=too-many-arguments cluster: str, - role: Optional[str], environ: str, component: str, + role: Optional[str] = None, instances: List[str] = Query(..., alias="instance"), topology_name: str = Query(..., alias="topology"), ): @@ -269,9 +269,9 @@ async def get_container_heron_pid( @router.get("/jstack", response_model=ShellResponse) async def get_container_heron_jstack( cluster: str, - role: Optional[str], environ: str, instance: str, + role: Optional[str] = None, topology_name: str = Query(..., alias="topology"), ): """Get jstack output for the heron process.""" @@ -289,9 +289,9 @@ async def get_container_heron_jstack( @router.get("/jmap", response_model=ShellResponse) async def get_container_heron_jmap( cluster: str, - role: Optional[str], environ: str, instance: str, + role: Optional[str] = None, topology_name: str = Query(..., alias="topology"), ): """Get jmap output for the heron process.""" @@ -309,9 +309,9 @@ async def get_container_heron_jmap( @router.get("/histo", response_model=ShellResponse) async def get_container_heron_memory_histogram( cluster: str, - role: Optional[str], environ: str, instance: str, + role: Optional[str] = None, topology_name: str = Query(..., alias="topology"), ): """Get memory usage histogram the heron process. This uses the ouput of the last jmap run.""" diff --git a/heron/tools/tracker/src/python/routers/metrics.py b/heron/tools/tracker/src/python/routers/metrics.py index 941ffd8ff46..d407993622b 100644 --- a/heron/tools/tracker/src/python/routers/metrics.py +++ b/heron/tools/tracker/src/python/routers/metrics.py @@ -100,9 +100,9 @@ async def get_component_metrics( @router.get("/metrics", response_model=ComponentMetrics) async def get_metrics( # pylint: disable=too-many-arguments cluster: str, - role: Optional[str], environ: str, component: str, + role: Optional[str] = None, topology_name: str = Query(..., alias="topology"), metric_names: Optional[List[str]] = Query(None, alias="metricname"), instances: Optional[List[str]] = Query(None, alias="instance"), @@ -122,11 +122,11 @@ async def get_metrics( # pylint: disable=too-many-arguments @router.get("/metrics/timeline", response_model=metricstimeline.MetricsTimeline) async def get_metrics_timeline( # pylint: disable=too-many-arguments cluster: str, - role: Optional[str], environ: str, component: str, start_time: int, end_time: int, + role: Optional[str] = None, topology_name: str = Query(..., alias="topology"), metric_names: Optional[List[str]] = Query(None, alias="metricname"), instances: Optional[List[str]] = Query(None, alias="instance"), @@ -161,9 +161,9 @@ class MetricsQueryResponse(BaseModel): # pylint: disable=too-few-public-methods @router.get("/metrics/query", response_model=MetricsQueryResponse) async def get_metrics_query( # pylint: disable=too-many-arguments cluster: str, - role: Optional[str], environ: str, query: str, + role: Optional[str] = None, start_time: int = Query(..., alias="starttime"), end_time: int = Query(..., alias="endtime"), topology_name: str = Query(..., alias="topology"), diff --git a/heron/tools/tracker/src/python/tracker.py b/heron/tools/tracker/src/python/tracker.py index 5fb2fce4167..756f3c76b2f 100644 --- a/heron/tools/tracker/src/python/tracker.py +++ b/heron/tools/tracker/src/python/tracker.py @@ -87,9 +87,9 @@ def stop_sync(self) -> None: def get_topology( self, cluster: str, - role: Optional[str], environ: str, topology_name: str, + role: Optional[str] = None, ) -> Any: """ Find and return the topology given its cluster, environ, topology name, and From 32d339e273d09df8f3e3b631d79604a1289789e5 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Mon, 7 Mar 2022 01:33:34 -0500 Subject: [PATCH 07/82] More fixes --- .../src/python/handlers/downloadhandler.py | 62 ++++++++++--------- .../tracker/src/python/metricstimeline.py | 8 ++- .../tracker/src/python/routers/container.py | 45 +++++++------- heron/tools/tracker/src/python/tracker.py | 4 +- .../ui/resources/static/js/topologies.js | 7 ++- 5 files changed, 66 insertions(+), 60 deletions(-) diff --git a/heron/shell/src/python/handlers/downloadhandler.py b/heron/shell/src/python/handlers/downloadhandler.py index 351a8b3bbe2..3186253941c 100644 --- a/heron/shell/src/python/handlers/downloadhandler.py +++ b/heron/shell/src/python/handlers/downloadhandler.py @@ -20,19 +20,19 @@ ''' downloadhandler.py ''' -import mimetypes import os import logging -from tornado import web, iostream, gen +import tornado.web import anticrlf from heron.shell.src.python import utils -class DownloadHandler(web.RequestHandler): +class DownloadHandler(tornado.web.RequestHandler): """ Responsible for downloading the files. """ - async def get(self, path): + @tornado.web.asynchronous + def get(self, path): """ get method """ handler = logging.StreamHandler() @@ -43,37 +43,41 @@ async def get(self, path): logger.debug("request to download: %s", path) + # If the file is large, we want to abandon downloading + # if user cancels the requests. + # pylint: disable=attribute-defined-outside-init + self.connection_closed = False + self.set_header("Content-Disposition", "attachment") if not utils.check_path(path): + self.write("Only relative paths are allowed") self.set_status(403) - await self.finish("Only relative paths are allowed") + self.finish() return if path is None or not os.path.isfile(path): + self.write("File %s not found" % path) self.set_status(404) - await self.finish("File %s not found" % path) + self.finish() return - chunk_size = int(4 * 1024 * 1024) - content_type = mimetypes.guess_type(path) - self.set_header("Content-Type", content_type[0]) - with open(path, 'rb') as f: - while True: - chunk = f.read(chunk_size) - if not chunk: - break - try: - self.write(chunk) # write the chunk to response - await self.flush() # send the chunk to client - except iostream.StreamCloseError: - # this means the client has closed the connection - # so break the loop - break - finally: - # deleting the chunk is very important because - # if many client are downloading files at the - # same time, the chunks in memory will keep - # increasing and will eat up the RAM - del chunk - # pause the coroutine so other handlers can run - await gen.sleep(0.000000001) # 1 nanosecond + length = int(4 * 1024 * 1024) + offset = int(0) + while True: + data = utils.read_chunk(path, offset=offset, length=length, escape_data=False) + if self.connection_closed or 'data' not in data or len(data['data']) < length: + break + offset += length + self.write(data['data']) + self.flush() + + if 'data' in data: + self.write(data['data']) + self.finish() + +def on_connection_close(self): + ''' + :return: + ''' + # pylint: disable=attribute-defined-outside-init + self.connection_closed = True diff --git a/heron/tools/tracker/src/python/metricstimeline.py b/heron/tools/tracker/src/python/metricstimeline.py index eb0c21b52de..337724f349b 100644 --- a/heron/tools/tracker/src/python/metricstimeline.py +++ b/heron/tools/tracker/src/python/metricstimeline.py @@ -85,9 +85,9 @@ async def get_metrics_timeline( response_data = tmanager_pb2.MetricResponse() response_data.ParseFromString(result.content) - if response_data.status.status == common_pb2.NOTOK: - if response_data.status.HasField("message"): - Log.warn("Received response from Tmanager: %s", response_data.status.message) + # if response_data.status.status == common_pb2.NOTOK: + # if response_data.status.HasField("message"): + # Log.warn("Received response from Tmanager: %s", response_data.status.message) timeline = {} # Loop through all the metrics @@ -99,6 +99,8 @@ async def get_metrics_timeline( # Loop through all individual metrics. for im in metric.metric: metricname = im.name + if metricname not in timeline: + timeline.setdefault(metricname, {}) if instance not in timeline[metricname]: timeline.setdefault(metricname, {})[instance] = {} diff --git a/heron/tools/tracker/src/python/routers/container.py b/heron/tools/tracker/src/python/routers/container.py index 51f1ffed062..de0a21f03c9 100644 --- a/heron/tools/tracker/src/python/routers/container.py +++ b/heron/tools/tracker/src/python/routers/container.py @@ -41,11 +41,11 @@ async def get_container_file_slice( # pylint: disable=too-many-arguments cluster: str, environ: str, - role: Optional[str], container: str, path: str, offset: int, length: int, + role: Optional[str] = None, topology_name: str = Query(..., alias="topology"), ): """ @@ -55,11 +55,11 @@ async def get_container_file_slice( # pylint: disable=too-many-arguments """ topology = state.tracker.get_topology(cluster, role, environ, topology_name) - stmgr = state.tracker.pb2_to_api(topology)["physical_plan"]["stmgrs"][f"stmgr-{container}"] - url = f"http://{stmgr['host']}:{stmgr['shell_port']}/filedata/{path}" + stmgr = topology.info.physical_plan.stmgrs[f"stmgr-{container}"] + url = f"http://{stmgr.host}:{stmgr.shell_port}/filedata/{path}" params = {"offset": offset, "length": length} - with httpx.AsyncClient() as client: + async with httpx.AsyncClient() as client: response = await client.get(url, params=params) return response.json() @@ -75,8 +75,8 @@ async def get_container_file( # pylint: disable=too-many-arguments ): """Return a given raw file.""" topology = state.tracker.get_topology(cluster, role, environ, topology_name) - stmgr = state.tracker.pb2_to_api(topology)["physical_plan"]["stmgrs"][f"stmgr-{container}"] - url = f"http://{stmgr['host']}:{stmgr['shell_port']}/download/{path}" + stmgr = topology.info.physical_plan.stmgrs[f"stmgr-{container}"] + url = f"http://{stmgr.host}:{stmgr.shell_port}/download/{path}" _, _, filename = path.rpartition("/") with httpx.stream("GET", url) as response: @@ -85,7 +85,6 @@ async def get_container_file( # pylint: disable=too-many-arguments headers={"Content-Disposition": f"attachment; filename={filename}"}, ) - @router.get("/container/filestats") async def get_container_file_listing( # pylint: disable=too-many-arguments cluster: str, @@ -97,9 +96,9 @@ async def get_container_file_listing( # pylint: disable=too-many-arguments ): """Return the stats for a given directory.""" topology = state.tracker.get_topology(cluster, role, environ, topology_name) - stmgr = state.tracker.pb2_to_api(topology)["physical_plan"]["stmgrs"][f"stmgr-{container}"] - url = utils.make_shell_filestats_url(stmgr["host"], stmgr["shell_port"], path) - with httpx.AsyncClient() as client: + stmgr = topology.info.physical_plan.stmgrs[f"stmgr-{container}"] + url = utils.make_shell_filestats_url(stmgr.host, stmgr.shell_port, path) + async with httpx.AsyncClient() as client: response = await client.get(url) return response.json() @@ -107,8 +106,8 @@ async def get_container_file_listing( # pylint: disable=too-many-arguments @router.get("/runtimestate") async def get_container_runtime_state( cluster: str, - role: Optional[str], environ: str, + role: Optional[str] = None, topology_name: str = Query(..., alias="topology"), ): """Return the runtime state.""" @@ -120,7 +119,7 @@ async def get_container_runtime_state( if not (tmanager and tmanager.host and tmanager.stats_port): raise ValueError("TManager not set yet") url = f"http://{tmanager.host}:{tmanager.stats_port}/stmgrsregistrationsummary" - with httpx.AsyncClient() as client: + async with httpx.AsyncClient() as client: response = await client.post( url, data=tmanager_pb2.StmgrsRegistrationSummaryRequest().SerializeToString(), @@ -167,7 +166,7 @@ async def _get_exception_log_response( exception_request.instances.extend(instances) url_suffix = "ummary" if summary else "" url = f"http://{tmanager.host}:{tmanager.stats_port}/exceptions{url_suffix}" - with httpx.AsyncClient() as client: + async with httpx.AsyncClient() as client: response = await client.post(url, data=exception_request.SerializeToString()) response.raise_for_status() @@ -188,9 +187,9 @@ async def get_exceptions( # pylint: disable=too-many-arguments cluster: str, environ: str, component: str, - role: Optional[str] = None, instances: List[str] = Query(..., alias="instance"), topology_name: str = Query(..., alias="topology"), + role: Optional[str] = None, ): """Return info about exceptions that have occurred per instance.""" exception_response = await _get_exception_log_response( @@ -253,16 +252,16 @@ class ShellResponse(BaseModel): # pylint: disable=too-few-public-methods @router.get("/pid", response_model=ShellResponse) async def get_container_heron_pid( cluster: str, - role: Optional[str], environ: str, instance: str, + role: Optional[str] = None, topology_name: str = Query(..., alias="topology"), ): """Get the PId of the heron process.""" topology = state.tracker.get_topology(cluster, role, environ, topology_name) - base_url = utils.make_shell_endpoint(state.tracker.pb2_to_api(topology), instance) + base_url = utils.make_shell_endpoint(topology, instance) url = f"{base_url}/pid/{instance}" - with httpx.AsyncClient() as client: + async with httpx.AsyncClient() as client: return await client.get(url).json() @@ -280,9 +279,9 @@ async def get_container_heron_jstack( pid_response = await get_container_heron_pid(cluster, role, environ, instance, topology_name) pid = pid_response["stdout"].strip() - base_url = utils.make_shell_endpoint(state.tracker.pb2_to_api(topology), instance) + base_url = utils.make_shell_endpoint(topology, instance) url = f"{base_url}/jstack/{pid}" - with httpx.AsyncClient() as client: + async with httpx.AsyncClient() as client: return await client.get(url).json() @@ -300,9 +299,9 @@ async def get_container_heron_jmap( pid_response = await get_container_heron_pid(cluster, role, environ, instance, topology_name) pid = pid_response["stdout"].strip() - base_url = utils.make_shell_endpoint(state.tracker.pb2_to_api(topology), instance) + base_url = utils.make_shell_endpoint(topology, instance) url = f"{base_url}/jmap/{pid}" - with httpx.AsyncClient() as client: + async with httpx.AsyncClient() as client: return await client.get(url).json() @@ -320,7 +319,7 @@ async def get_container_heron_memory_histogram( pid_response = await get_container_heron_pid(cluster, role, environ, instance, topology_name) pid = pid_response["stdout"].strip() - base_url = utils.make_shell_endpoint(state.tracker.pb2_to_api(topology), instance) + base_url = utils.make_shell_endpoint(topology, instance) url = f"{base_url}/histo/{pid}" - with httpx.AsyncClient() as client: + async with httpx.AsyncClient() as client: return await client.get(url).json() diff --git a/heron/tools/tracker/src/python/tracker.py b/heron/tools/tracker/src/python/tracker.py index 756f3c76b2f..7f3004de1ba 100644 --- a/heron/tools/tracker/src/python/tracker.py +++ b/heron/tools/tracker/src/python/tracker.py @@ -87,9 +87,9 @@ def stop_sync(self) -> None: def get_topology( self, cluster: str, + role: Optional[str], environ: str, - topology_name: str, - role: Optional[str] = None, + topology_name: str, ) -> Any: """ Find and return the topology given its cluster, environ, topology name, and diff --git a/heron/tools/ui/resources/static/js/topologies.js b/heron/tools/ui/resources/static/js/topologies.js index 7ed9b8adb0c..1f2c930e39c 100644 --- a/heron/tools/ui/resources/static/js/topologies.js +++ b/heron/tools/ui/resources/static/js/topologies.js @@ -37,8 +37,9 @@ var AllExceptions = React.createClass({ }, fetchExceptionSummary: function() { var compName = this.props.info.comp_name ? this.props.info.comp_name : 'All'; + var instance = this.props.info.instance; var fetchUrl = './' + this.props.info.topology - + '/' + compName + '/exceptionsummary.json' + + '/' + compName + '/' + instance + '/exceptionsummary.json' console.log('fetching url ' + fetchUrl); $.ajax({ url: fetchUrl, @@ -1480,8 +1481,8 @@ var InstanceCounters = React.createClass({ } } if (instanceInfo) { - var stmgr_id = instanceInfo.stmgr_id; - var container = stmgr_id.split("-")[1] + var stmgrId = instanceInfo.stmgrId; + var container = stmgrId.split("-")[1] var topologyParams = this.props.info.cluster + '/' + this.props.info.environ + '/' + this.props.info.topology var instanceParams = topologyParams + '/' + instanceInfo.id From c7dcdb2f072fbfd58ca5f56c10896dad4365a582 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Mon, 7 Mar 2022 01:39:19 -0500 Subject: [PATCH 08/82] Rolling back a change --- heron/tools/ui/resources/static/js/topologies.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/heron/tools/ui/resources/static/js/topologies.js b/heron/tools/ui/resources/static/js/topologies.js index 1f2c930e39c..a9a104a6d0e 100644 --- a/heron/tools/ui/resources/static/js/topologies.js +++ b/heron/tools/ui/resources/static/js/topologies.js @@ -37,9 +37,8 @@ var AllExceptions = React.createClass({ }, fetchExceptionSummary: function() { var compName = this.props.info.comp_name ? this.props.info.comp_name : 'All'; - var instance = this.props.info.instance; var fetchUrl = './' + this.props.info.topology - + '/' + compName + '/' + instance + '/exceptionsummary.json' + + '/' + compName + '/exceptionsummary.json' console.log('fetching url ' + fetchUrl); $.ajax({ url: fetchUrl, From c95e92672f646e9d1dd846a4af6749202d592561 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Mon, 7 Mar 2022 01:44:50 -0500 Subject: [PATCH 09/82] Typo fix --- .../version-0.20.0-incubating/user-manuals-tracker-rest.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/user-manuals-tracker-rest.md b/website2/website/versioned_docs/version-0.20.0-incubating/user-manuals-tracker-rest.md index 564e0f8d109..0cde4d3ea3a 100644 --- a/website2/website/versioned_docs/version-0.20.0-incubating/user-manuals-tracker-rest.md +++ b/website2/website/versioned_docs/version-0.20.0-incubating/user-manuals-tracker-rest.md @@ -296,7 +296,7 @@ is greater than `10800` seconds, the values will be for all-time metrics. --- -### /topologies/metricstimeline +### /topologies/metrics/timeline Returns a JSON map of instances of the topology to their respective metrics timeline. To filter instances returned use the `instance` parameter discussed below. From 26725756d4b3591936e53351aa6e270b2195f197 Mon Sep 17 00:00:00 2001 From: choi se Date: Tue, 8 Mar 2022 18:48:27 +0900 Subject: [PATCH 10/82] typo --- heron/tools/tracker/src/python/metricstimeline.py | 6 +++--- heron/tools/tracker/src/python/query_operators.py | 4 ++-- heron/tools/tracker/src/python/routers/metrics.py | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/heron/tools/tracker/src/python/metricstimeline.py b/heron/tools/tracker/src/python/metricstimeline.py index 337724f349b..4e65b19e13b 100644 --- a/heron/tools/tracker/src/python/metricstimeline.py +++ b/heron/tools/tracker/src/python/metricstimeline.py @@ -32,9 +32,9 @@ class MetricsTimeline(BaseModel): component: str - start_time: int = Field(..., alias="starttime") - end_time: int = Field(..., alias="enddtime") - timeline: Dict[str, Dict[str, Dict[int, int]]] = Field( + starttime: int = Field(..., alias="starttime") + endtime: int = Field(..., alias="endtime") + timeline: Dict[str, Dict[str, Dict[int, str]]] = Field( ..., description="map of (metric name, instance, start) to metric value", ) diff --git a/heron/tools/tracker/src/python/query_operators.py b/heron/tools/tracker/src/python/query_operators.py index 8475e123f5d..c5290e1a06c 100644 --- a/heron/tools/tracker/src/python/query_operators.py +++ b/heron/tools/tracker/src/python/query_operators.py @@ -150,11 +150,11 @@ async def execute( raise Exception(metrics["message"]) # Put a blank timeline. - if not metrics.get("timeline"): + if not metrics.timeline: metrics["timeline"] = { self.metric_name: {} } - timelines = metrics["timeline"][self.metric_name] + timelines = metrics.timeline[self.metric_name] all_metrics = [ Metrics(self.component, self.metric_name, instance, start, end, { k: float(v) diff --git a/heron/tools/tracker/src/python/routers/metrics.py b/heron/tools/tracker/src/python/routers/metrics.py index d407993622b..2132b59021d 100644 --- a/heron/tools/tracker/src/python/routers/metrics.py +++ b/heron/tools/tracker/src/python/routers/metrics.py @@ -151,8 +151,8 @@ class TimelinePoint(BaseModel): # pylint: disable=too-few-public-methods class MetricsQueryResponse(BaseModel): # pylint: disable=too-few-public-methods """A metrics timeline over an interval.""" - start_time: int = Field(..., alias="starttime") - end_time: int = Field(..., alias="endtime") + starttime: int = Field(..., alias="starttime") + endtime: int = Field(..., alias="endtime") timeline: List[TimelinePoint] = Field( ..., description="list of timeline point objects", ) @@ -180,7 +180,7 @@ async def get_metrics_query( # pylint: disable=too-many-arguments ] return MetricsQueryResponse( - startime=start_time, + starttime=start_time, endtime=end_time, timeline=timeline, ) From ab144e3463e88d800ed70b51645f02c040e07207 Mon Sep 17 00:00:00 2001 From: choi se Date: Tue, 8 Mar 2022 19:22:12 +0900 Subject: [PATCH 11/82] Add check eliments --- .../tracker/src/python/query_operators.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/heron/tools/tracker/src/python/query_operators.py b/heron/tools/tracker/src/python/query_operators.py index c5290e1a06c..1c3e0a8eeec 100644 --- a/heron/tools/tracker/src/python/query_operators.py +++ b/heron/tools/tracker/src/python/query_operators.py @@ -151,7 +151,7 @@ async def execute( # Put a blank timeline. if not metrics.timeline: - metrics["timeline"] = { + metrics.timeline = { self.metric_name: {} } timelines = metrics.timeline[self.metric_name] @@ -460,14 +460,15 @@ async def execute(self, tracker, tmanager: TManagerLocation, start: int, end: in for key, metric in metrics2.items(): # Initialize with first metrics timeline, but second metric's instance # because that is multivariate - met = Metrics(None, None, metric.instance, start, end, metrics[""].timeline.copy()) - for timestamp in list(met.timeline.keys()): - v = self._f(met.timeline[timestamp], metric.timeline.get(timestamp)) - if v is None: - met.timeline.pop(timestamp, None) - else: - met.timeline[timestamp] = v - all_metrics.append(met) + if metrics: + met = Metrics(None, None, metric.instance, start, end, metrics[""].timeline.copy()) + for timestamp in list(met.timeline.keys()): + v = self._f(met.timeline[timestamp], metric.timeline[timestamp]) + if v is None: + met.timeline.pop(timestamp, None) + else: + met.timeline[timestamp] = v + all_metrics.append(met) return all_metrics # If second is univariate @@ -476,7 +477,7 @@ async def execute(self, tracker, tmanager: TManagerLocation, start: int, end: in # Initialize with first metrics timeline and its instance met = Metrics(None, None, metric.instance, start, end, metric.timeline.copy()) for timestamp in list(met.timeline.keys()): - v = self._f(met.timeline[timestamp], metrics2[""].timeline.get(timestamp)) + v = self._f(met.timeline[timestamp], metrics2[""].timeline[timestamp]) if v is None: met.timeline.pop(timestamp, None) else: From a67e4f28e150753f694ef766000846d816589738 Mon Sep 17 00:00:00 2001 From: choi se <357785+thinker0@users.noreply.github.com> Date: Tue, 8 Mar 2022 22:05:43 +0900 Subject: [PATCH 12/82] Fix typos (#3788) --- .../tracker/src/python/metricstimeline.py | 6 ++--- .../tracker/src/python/query_operators.py | 25 ++++++++++--------- .../tracker/src/python/routers/metrics.py | 6 ++--- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/heron/tools/tracker/src/python/metricstimeline.py b/heron/tools/tracker/src/python/metricstimeline.py index 337724f349b..4e65b19e13b 100644 --- a/heron/tools/tracker/src/python/metricstimeline.py +++ b/heron/tools/tracker/src/python/metricstimeline.py @@ -32,9 +32,9 @@ class MetricsTimeline(BaseModel): component: str - start_time: int = Field(..., alias="starttime") - end_time: int = Field(..., alias="enddtime") - timeline: Dict[str, Dict[str, Dict[int, int]]] = Field( + starttime: int = Field(..., alias="starttime") + endtime: int = Field(..., alias="endtime") + timeline: Dict[str, Dict[str, Dict[int, str]]] = Field( ..., description="map of (metric name, instance, start) to metric value", ) diff --git a/heron/tools/tracker/src/python/query_operators.py b/heron/tools/tracker/src/python/query_operators.py index 8475e123f5d..1c3e0a8eeec 100644 --- a/heron/tools/tracker/src/python/query_operators.py +++ b/heron/tools/tracker/src/python/query_operators.py @@ -150,11 +150,11 @@ async def execute( raise Exception(metrics["message"]) # Put a blank timeline. - if not metrics.get("timeline"): - metrics["timeline"] = { + if not metrics.timeline: + metrics.timeline = { self.metric_name: {} } - timelines = metrics["timeline"][self.metric_name] + timelines = metrics.timeline[self.metric_name] all_metrics = [ Metrics(self.component, self.metric_name, instance, start, end, { k: float(v) @@ -460,14 +460,15 @@ async def execute(self, tracker, tmanager: TManagerLocation, start: int, end: in for key, metric in metrics2.items(): # Initialize with first metrics timeline, but second metric's instance # because that is multivariate - met = Metrics(None, None, metric.instance, start, end, metrics[""].timeline.copy()) - for timestamp in list(met.timeline.keys()): - v = self._f(met.timeline[timestamp], metric.timeline.get(timestamp)) - if v is None: - met.timeline.pop(timestamp, None) - else: - met.timeline[timestamp] = v - all_metrics.append(met) + if metrics: + met = Metrics(None, None, metric.instance, start, end, metrics[""].timeline.copy()) + for timestamp in list(met.timeline.keys()): + v = self._f(met.timeline[timestamp], metric.timeline[timestamp]) + if v is None: + met.timeline.pop(timestamp, None) + else: + met.timeline[timestamp] = v + all_metrics.append(met) return all_metrics # If second is univariate @@ -476,7 +477,7 @@ async def execute(self, tracker, tmanager: TManagerLocation, start: int, end: in # Initialize with first metrics timeline and its instance met = Metrics(None, None, metric.instance, start, end, metric.timeline.copy()) for timestamp in list(met.timeline.keys()): - v = self._f(met.timeline[timestamp], metrics2[""].timeline.get(timestamp)) + v = self._f(met.timeline[timestamp], metrics2[""].timeline[timestamp]) if v is None: met.timeline.pop(timestamp, None) else: diff --git a/heron/tools/tracker/src/python/routers/metrics.py b/heron/tools/tracker/src/python/routers/metrics.py index d407993622b..2132b59021d 100644 --- a/heron/tools/tracker/src/python/routers/metrics.py +++ b/heron/tools/tracker/src/python/routers/metrics.py @@ -151,8 +151,8 @@ class TimelinePoint(BaseModel): # pylint: disable=too-few-public-methods class MetricsQueryResponse(BaseModel): # pylint: disable=too-few-public-methods """A metrics timeline over an interval.""" - start_time: int = Field(..., alias="starttime") - end_time: int = Field(..., alias="endtime") + starttime: int = Field(..., alias="starttime") + endtime: int = Field(..., alias="endtime") timeline: List[TimelinePoint] = Field( ..., description="list of timeline point objects", ) @@ -180,7 +180,7 @@ async def get_metrics_query( # pylint: disable=too-many-arguments ] return MetricsQueryResponse( - startime=start_time, + starttime=start_time, endtime=end_time, timeline=timeline, ) From 6b9338411ca7aa027df2b78adbde89ed45b29d2f Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Tue, 8 Mar 2022 21:49:20 -0500 Subject: [PATCH 13/82] Fix name mismatch --- heron/tools/ui/resources/static/js/topologies.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/tools/ui/resources/static/js/topologies.js b/heron/tools/ui/resources/static/js/topologies.js index a9a104a6d0e..a658a99225c 100644 --- a/heron/tools/ui/resources/static/js/topologies.js +++ b/heron/tools/ui/resources/static/js/topologies.js @@ -1480,7 +1480,7 @@ var InstanceCounters = React.createClass({ } } if (instanceInfo) { - var stmgrId = instanceInfo.stmgrId; + var stmgrId = instanceInfo.stmgr_id; var container = stmgrId.split("-")[1] var topologyParams = this.props.info.cluster + '/' + this.props.info.environ + '/' + this.props.info.topology From 3542351445ee972d9b5201cae6aaa8651212e816 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Wed, 9 Mar 2022 01:44:52 -0500 Subject: [PATCH 14/82] Removed EnvelopeAPI Router which was not working --- .../common/src/python/clients/tracker.py | 16 +++--- heron/tools/tracker/src/python/app.py | 32 +++--------- .../tracker/src/python/routers/container.py | 6 +-- .../tracker/src/python/routers/metrics.py | 5 +- .../tracker/src/python/routers/topologies.py | 5 +- heron/tools/tracker/src/python/utils.py | 50 +------------------ .../ui/resources/static/js/topologies.js | 4 +- 7 files changed, 28 insertions(+), 90 deletions(-) diff --git a/heron/tools/common/src/python/clients/tracker.py b/heron/tools/common/src/python/clients/tracker.py index eb11ec8137c..984cd5d129d 100644 --- a/heron/tools/common/src/python/clients/tracker.py +++ b/heron/tools/common/src/python/clients/tracker.py @@ -133,16 +133,14 @@ def api_get(url: str, params=None) -> dict: return None end = time.time() data = response.json() - if data["status"] != "success": - Log.error("error from tracker: %s", data["message"]) + if response.status_code != requests.codes.ok: + Log.error("error from tracker: %s", response.status_code) return None - execution = data["executiontime"] * 1000 duration = (end - start) * 1000 - Log.debug(f"URL fetch took {execution:.2}ms server time for {url}") Log.debug(f"URL fetch took {duration:.2}ms round trip time for {url}") - return data["result"] + return data def create_url(fmt: str) -> str: @@ -233,6 +231,7 @@ def get_component_exceptionsummary( environ: str, topology: str, component: str, + instance: str, role: Optional[str]=None, ) -> Any: """Get summary of exception for a component.""" @@ -243,15 +242,18 @@ def get_component_exceptionsummary( "topology": topology, "role": role, "component": component, + "instance": instance, + "summary": True, } return api_get(base_url, params) -def get_component_exceptions( +def get_comp_instance_exceptions( cluster: str, environ: str, topology: str, component: str, + instance: str, role: Optional[str]=None, ) -> Any: """Get exceptions for 'component' for 'topology'.""" @@ -262,6 +264,8 @@ def get_component_exceptions( "topology": topology, "role": role, "component": component, + "instance": instance, + "summary": False, } return api_get(base_url, params) diff --git a/heron/tools/tracker/src/python/app.py b/heron/tools/tracker/src/python/app.py index 572854a5d8a..4908697ecbc 100644 --- a/heron/tools/tracker/src/python/app.py +++ b/heron/tools/tracker/src/python/app.py @@ -26,7 +26,6 @@ from typing import Dict, List, Optional from heron.tools.tracker.src.python import constants, state, query -from heron.tools.tracker.src.python.utils import ResponseEnvelope from heron.tools.tracker.src.python.routers import topologies, container, metrics from fastapi import FastAPI, Query @@ -80,35 +79,23 @@ async def shutdown_event(): """Stop recieving topology updates.""" state.tracker.stop_sync() - @app.exception_handler(Exception) async def handle_exception(_, exc: Exception): - payload = ResponseEnvelope[str]( - result=None, - execution_time=0.0, - message=f"request failed: {exc}", - status=constants.RESPONSE_STATUS_FAILURE - ) + message=f"request failed: {exc}" status_code = 500 if isinstance(exc, StarletteHTTPException): status_code = exc.status_code if isinstance(exc, RequestValidationError): status_code = 400 - return JSONResponse(content=payload.dict(), status_code=status_code) - + return JSONResponse(content=message, status_code=status_code) -@app.get("/clusters", response_model=ResponseEnvelope[List[str]]) +@app.get("/clusters") async def clusters() -> List[str]: - return ResponseEnvelope[List[str]]( - execution_time=0.0, - message="ok", - status="success", - result=[s.name for s in state.tracker.state_managers], - ) - + return (s.name for s in state.tracker.state_managers) + @app.get( "/machines", - response_model=ResponseEnvelope[Dict[str, Dict[str, Dict[str, List[str]]]]], + response_model=Dict[str, Dict[str, Dict[str, List[str]]]], ) async def get_machines( cluster_names: Optional[List[str]] = Query(None, alias="cluster"), @@ -134,9 +121,4 @@ async def get_machines( topology.name ] = topology.get_machines() - return ResponseEnvelope[Dict[str, Dict[str, Dict[str, List[str]]]]]( - execution_time=0.0, - result=response, - status="success", - message="ok", - ) + return response diff --git a/heron/tools/tracker/src/python/routers/container.py b/heron/tools/tracker/src/python/routers/container.py index de0a21f03c9..23b5688d904 100644 --- a/heron/tools/tracker/src/python/routers/container.py +++ b/heron/tools/tracker/src/python/routers/container.py @@ -26,15 +26,15 @@ from heron.proto import common_pb2, tmanager_pb2 from heron.tools.tracker.src.python import state, utils -from heron.tools.tracker.src.python.utils import EnvelopingAPIRouter import httpx -from fastapi import Query +# from fastapi import Query +from fastapi import Query, APIRouter from pydantic import BaseModel, Field from starlette.responses import StreamingResponse -router = EnvelopingAPIRouter() +router = APIRouter() @router.get("/container/filedata") diff --git a/heron/tools/tracker/src/python/routers/metrics.py b/heron/tools/tracker/src/python/routers/metrics.py index 2132b59021d..db12de0135f 100644 --- a/heron/tools/tracker/src/python/routers/metrics.py +++ b/heron/tools/tracker/src/python/routers/metrics.py @@ -28,14 +28,15 @@ from heron.proto import tmanager_pb2 from heron.tools.tracker.src.python import metricstimeline, state from heron.tools.tracker.src.python.query import Query as TManagerQuery -from heron.tools.tracker.src.python.utils import EnvelopingAPIRouter, BadRequest +from heron.tools.tracker.src.python.utils import BadRequest import httpx from fastapi import Query +from fastapi import Query, APIRouter from pydantic import BaseModel, Field -router = EnvelopingAPIRouter() +router = APIRouter() class ComponentMetrics(BaseModel): interval: int diff --git a/heron/tools/tracker/src/python/routers/topologies.py b/heron/tools/tracker/src/python/routers/topologies.py index d3411050e61..37868876aeb 100644 --- a/heron/tools/tracker/src/python/routers/topologies.py +++ b/heron/tools/tracker/src/python/routers/topologies.py @@ -37,11 +37,10 @@ TopologyInfoPhysicalPlan, TopologyInfoSchedulerLocation, ) -from heron.tools.tracker.src.python.utils import EnvelopingAPIRouter -from fastapi import Query +from fastapi import Query, APIRouter -router = EnvelopingAPIRouter() +router = APIRouter() @router.get("", response_model=Dict[str, Dict[str, Dict[str, List[str]]]]) diff --git a/heron/tools/tracker/src/python/utils.py b/heron/tools/tracker/src/python/utils.py index 2db2b7644e0..786d963d2b3 100644 --- a/heron/tools/tracker/src/python/utils.py +++ b/heron/tools/tracker/src/python/utils.py @@ -31,7 +31,7 @@ from asyncio import iscoroutinefunction from functools import wraps from pathlib import Path -from typing import Any, Generic, Literal, Optional, TypeVar +from typing import Any, Optional, TypeVar from heron.common.src.python.utils.log import Log from heron.tools.tracker.src.python import constants @@ -52,59 +52,11 @@ ResultType = TypeVar("ResultType") - -class ResponseEnvelope(GenericModel, Generic[ResultType]): - execution_time: float = Field(0.0, alias="executiontime") - message: str - result: Optional[ResultType] = None - status: Literal[ - constants.RESPONSE_STATUS_FAILURE, constants.RESPONSE_STATUS_SUCCESS - ] - tracker_version: str = constants.API_VERSION - class BadRequest(HTTPException): """Raised when bad input is recieved.""" def __init__(self, detail: str = None) -> None: super().__init__(400, detail) -class EnvelopingAPIRouter(APIRouter): - """Router which wraps response_models with ResponseEnvelope.""" - - def api_route(self, response_model=None, **kwargs): - """This provides the decorator used by router..""" - if not response_model: - return super().api_route(response_model=response_model, **kwargs) - - wrapped_response_model = ResponseEnvelope[response_model] - decorator = super().api_route(response_model=wrapped_response_model, **kwargs) - - @wraps(decorator) - def new_decorator(f): - if iscoroutinefunction(f): - @wraps(f) - async def envelope(*args, **kwargs): - result = await f(*args, **kwargs) - return wrapped_response_model( - result=result, - execution_time=0.0, - message="ok", - status="success", - ) - else: - @wraps(f) - def envelope(*args, **kwargs): - result = f(*args, **kwargs) - return wrapped_response_model( - result=result, - execution_time=0.0, - message="ok", - status="success", - ) - return decorator(envelope) - - return new_decorator - - def make_shell_endpoint(topology_info: dict, instance_id: int) -> str: """ Makes the http endpoint for the heron shell diff --git a/heron/tools/ui/resources/static/js/topologies.js b/heron/tools/ui/resources/static/js/topologies.js index a658a99225c..7ed9b8adb0c 100644 --- a/heron/tools/ui/resources/static/js/topologies.js +++ b/heron/tools/ui/resources/static/js/topologies.js @@ -1480,8 +1480,8 @@ var InstanceCounters = React.createClass({ } } if (instanceInfo) { - var stmgrId = instanceInfo.stmgr_id; - var container = stmgrId.split("-")[1] + var stmgr_id = instanceInfo.stmgr_id; + var container = stmgr_id.split("-")[1] var topologyParams = this.props.info.cluster + '/' + this.props.info.environ + '/' + this.props.info.topology var instanceParams = topologyParams + '/' + instanceInfo.id From 0560c8ed022b9a42b9e2e1460d838ee1b58f1f57 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Mon, 14 Mar 2022 02:24:29 -0400 Subject: [PATCH 15/82] More fixes --- .../src/python/handlers/downloadhandler.py | 5 +- heron/shell/src/python/handlers/pidhandler.py | 9 ++- heron/shell/src/python/main.py | 2 +- heron/shell/src/python/utils.py | 15 ----- .../common/src/python/clients/tracker.py | 11 +--- heron/tools/tracker/src/python/main.py | 3 +- .../tracker/src/python/routers/container.py | 62 ++++++++++--------- heron/tools/tracker/src/python/topology.py | 2 +- heron/tools/tracker/src/python/utils.py | 8 +-- heron/tools/ui/src/python/main.py | 40 ++++++++---- 10 files changed, 80 insertions(+), 77 deletions(-) diff --git a/heron/shell/src/python/handlers/downloadhandler.py b/heron/shell/src/python/handlers/downloadhandler.py index 3186253941c..e4f3fb5ad69 100644 --- a/heron/shell/src/python/handlers/downloadhandler.py +++ b/heron/shell/src/python/handlers/downloadhandler.py @@ -31,8 +31,7 @@ class DownloadHandler(tornado.web.RequestHandler): """ Responsible for downloading the files. """ - @tornado.web.asynchronous - def get(self, path): + async def get(self, path): """ get method """ handler = logging.StreamHandler() @@ -64,7 +63,7 @@ def get(self, path): length = int(4 * 1024 * 1024) offset = int(0) while True: - data = utils.read_chunk(path, offset=offset, length=length, escape_data=False) + data = await utils.read_chunk(path, offset=offset, length=length, escape_data=False) if self.connection_closed or 'data' not in data or len(data['data']) < length: break offset += length diff --git a/heron/shell/src/python/handlers/pidhandler.py b/heron/shell/src/python/handlers/pidhandler.py index 1652b2e04e2..ba4564f8035 100644 --- a/heron/shell/src/python/handlers/pidhandler.py +++ b/heron/shell/src/python/handlers/pidhandler.py @@ -22,6 +22,7 @@ ''' pidhandler.py ''' import json import tornado.web +import subprocess from heron.shell.src.python import utils @@ -33,5 +34,9 @@ class PidHandler(tornado.web.RequestHandler): # pylint: disable=attribute-defined-outside-init async def get(self, instance_id): ''' get method ''' - self.content_type = 'application/json' - await self.finish(json.dumps(utils.chain([['cat', "%s.pid" % instance_id]])).strip()) + pid = subprocess.run(['cat', "%s.pid" % instance_id], capture_output=True, text=True) + await self.finish({ + 'command': ' '.join(pid.args), + 'stdout': pid.stdout, + 'stderr': pid.stderr, + }) diff --git a/heron/shell/src/python/main.py b/heron/shell/src/python/main.py index 9705e7cd536..b5c1fee6634 100644 --- a/heron/shell/src/python/main.py +++ b/heron/shell/src/python/main.py @@ -59,7 +59,7 @@ def run(url_to_handlers=default_handlers): AsyncHTTPClient.configure(None, defaults=dict(request_timeout=120.0)) app = tornado.web.Application(url_to_handlers) app.listen(options.port) - tornado.ioloop.IOLoop.instance().start() + tornado.ioloop.IOLoop.current().start() if __name__ == '__main__': run() diff --git a/heron/shell/src/python/utils.py b/heron/shell/src/python/utils.py index 8223d79a45e..6312ff7474f 100644 --- a/heron/shell/src/python/utils.py +++ b/heron/shell/src/python/utils.py @@ -177,21 +177,6 @@ def str_cmd(cmd, cwd, env): stdout, stderr = stdout_builder.result(), stderr_builder.result() return {'command': ' '.join(cmd), 'stderr': stderr, 'stdout': stdout} -# pylint: disable=unnecessary-lambda -def chain(cmd_list): - """ - Feed output of one command to the next and return final output - Returns string output of chained application of commands. - """ - command = ' | '.join([' '.join(x) for x in cmd_list]) - chained_proc = functools.reduce(pipe, [None] + cmd_list) - stdout_builder = proc.async_stdout_builder(chained_proc) - chained_proc.wait() - return { - 'command': command, - 'stdout': stdout_builder.result() - } - def get_container_id(instance_id): ''' get container id ''' return instance_id.split('_')[1] # Format: container__component_name_ diff --git a/heron/tools/common/src/python/clients/tracker.py b/heron/tools/common/src/python/clients/tracker.py index 984cd5d129d..49ee408aecf 100644 --- a/heron/tools/common/src/python/clients/tracker.py +++ b/heron/tools/common/src/python/clients/tracker.py @@ -126,10 +126,11 @@ def api_get(url: str, params=None) -> dict: """Make a GET request to a tracker URL and return the result.""" start = time.time() try: + Log.debug(f"Requesting URL: {url} with params: {params}") response = requests.get(url, params) response.raise_for_status() except Exception as e: - Log.error(f"Unable to get response from {url}: {e}") + Log.error(f"Unable to get response from {url} with params {params}: {e}") return None end = time.time() data = response.json() @@ -231,7 +232,6 @@ def get_component_exceptionsummary( environ: str, topology: str, component: str, - instance: str, role: Optional[str]=None, ) -> Any: """Get summary of exception for a component.""" @@ -242,18 +242,15 @@ def get_component_exceptionsummary( "topology": topology, "role": role, "component": component, - "instance": instance, - "summary": True, } return api_get(base_url, params) -def get_comp_instance_exceptions( +def get_component_exceptions( cluster: str, environ: str, topology: str, component: str, - instance: str, role: Optional[str]=None, ) -> Any: """Get exceptions for 'component' for 'topology'.""" @@ -264,8 +261,6 @@ def get_comp_instance_exceptions( "topology": topology, "role": role, "component": component, - "instance": instance, - "summary": False, } return api_get(base_url, params) diff --git a/heron/tools/tracker/src/python/main.py b/heron/tools/tracker/src/python/main.py index a0560810209..8218e10f749 100644 --- a/heron/tools/tracker/src/python/main.py +++ b/heron/tools/tracker/src/python/main.py @@ -37,7 +37,7 @@ import uvicorn Log = log.Log - +Log.setLevel(logging.DEBUG) def create_tracker_config(config_file: str, stmgr_override: dict) -> dict: # try to parse the config file if we find one @@ -125,6 +125,7 @@ def cli( log_level = logging.DEBUG if verbose else logging.INFO log.configure(log_level) + Log = log.Log stmgr_override = { "type": stmgr_type, diff --git a/heron/tools/tracker/src/python/routers/container.py b/heron/tools/tracker/src/python/routers/container.py index 23b5688d904..cea6286553a 100644 --- a/heron/tools/tracker/src/python/routers/container.py +++ b/heron/tools/tracker/src/python/routers/container.py @@ -22,7 +22,10 @@ topology, particularly data about heron containers. """ +from asyncio import constants +from subprocess import CompletedProcess from typing import List, Optional +from heron.common.src.python.utils.log import Log from heron.proto import common_pb2, tmanager_pb2 from heron.tools.tracker.src.python import state, utils @@ -45,8 +48,8 @@ async def get_container_file_slice( # pylint: disable=too-many-arguments path: str, offset: int, length: int, - role: Optional[str] = None, topology_name: str = Query(..., alias="topology"), + role: Optional[str] = None, ): """ Return a range of bytes for the given file wrapped in JSON. @@ -70,8 +73,8 @@ async def get_container_file( # pylint: disable=too-many-arguments environ: str, container: str, path: str, - role: Optional[str] = None, topology_name: str = Query(..., alias="topology"), + role: Optional[str] = None, ): """Return a given raw file.""" topology = state.tracker.get_topology(cluster, role, environ, topology_name) @@ -79,8 +82,8 @@ async def get_container_file( # pylint: disable=too-many-arguments url = f"http://{stmgr.host}:{stmgr.shell_port}/download/{path}" _, _, filename = path.rpartition("/") - with httpx.stream("GET", url) as response: - return StreamingResponse( + async with httpx.stream("GET", url) as response: + return await StreamingResponse( content=response.iter_bytes(), headers={"Content-Disposition": f"attachment; filename={filename}"}, ) @@ -91,8 +94,8 @@ async def get_container_file_listing( # pylint: disable=too-many-arguments environ: str, container: str, path: str, - role: Optional[str] = None, topology_name: str = Query(..., alias="topology"), + role: Optional[str] = None, ): """Return the stats for a given directory.""" topology = state.tracker.get_topology(cluster, role, environ, topology_name) @@ -142,8 +145,8 @@ class ExceptionLog(BaseModel): hostname: str instance_id: str stack_trace: str = Field(..., alias="stacktrace") - last_time: int = Field(..., alias="lasttime") - first_time: int = Field(..., alias="firsttime") + last_time: str = Field(..., alias="lasttime") + first_time: str = Field(..., alias="firsttime") count: str = Field(..., description="number of occurances during collection interval") logging: str = Field(..., description="additional text logged with exception") @@ -152,8 +155,8 @@ async def _get_exception_log_response( role: Optional[str], environ: str, component: str, - instances: List[str] = Query(..., alias="instance"), - topology_name: str = Query(..., alias="topology"), + instances: Optional[List[str]], + topology_name: str, summary: bool = False, ) -> List[tmanager_pb2.ExceptionLogResponse]: topology = state.tracker.get_topology(cluster, role, environ, topology_name) @@ -163,7 +166,8 @@ async def _get_exception_log_response( raise ValueError("TManager not set yet") exception_request = tmanager_pb2.ExceptionLogRequest() exception_request.component_name = component - exception_request.instances.extend(instances) + if instances is not None and len(instances) > 0: + exception_request.instances.extend(instances) url_suffix = "ummary" if summary else "" url = f"http://{tmanager.host}:{tmanager.stats_port}/exceptions{url_suffix}" async with httpx.AsyncClient() as client: @@ -187,7 +191,7 @@ async def get_exceptions( # pylint: disable=too-many-arguments cluster: str, environ: str, component: str, - instances: List[str] = Query(..., alias="instance"), + instances: Optional[List[str]] = Query(None, alias="instance"), topology_name: str = Query(..., alias="topology"), role: Optional[str] = None, ): @@ -221,13 +225,13 @@ async def get_exceptions_summary( # pylint: disable=too-many-arguments cluster: str, environ: str, component: str, - role: Optional[str] = None, - instances: List[str] = Query(..., alias="instance"), + instances: Optional[List[str]] = Query(None, alias="instance"), topology_name: str = Query(..., alias="topology"), + role: Optional[str] = None, ): """Return info about exceptions that have occurred.""" exception_response = await _get_exception_log_response( - cluster, role, environ, component, instances, topology_name, summary=False + cluster, role, environ, component, instances, topology_name, summary=True ) return [ @@ -254,15 +258,15 @@ async def get_container_heron_pid( cluster: str, environ: str, instance: str, - role: Optional[str] = None, topology_name: str = Query(..., alias="topology"), + role: Optional[str] = None, ): """Get the PId of the heron process.""" topology = state.tracker.get_topology(cluster, role, environ, topology_name) - base_url = utils.make_shell_endpoint(topology, instance) + base_url = utils.make_shell_endpoint(topology.info, instance) url = f"{base_url}/pid/{instance}" async with httpx.AsyncClient() as client: - return await client.get(url).json() + return (await client.get(url)).json() @router.get("/jstack", response_model=ShellResponse) @@ -270,19 +274,19 @@ async def get_container_heron_jstack( cluster: str, environ: str, instance: str, - role: Optional[str] = None, topology_name: str = Query(..., alias="topology"), + role: Optional[str] = None, ): """Get jstack output for the heron process.""" topology = state.tracker.get_topology(cluster, role, environ, topology_name) - pid_response = await get_container_heron_pid(cluster, role, environ, instance, topology_name) + pid_response = await get_container_heron_pid(cluster, environ, instance, topology_name, role) pid = pid_response["stdout"].strip() - base_url = utils.make_shell_endpoint(topology, instance) + base_url = utils.make_shell_endpoint(topology.info, instance) url = f"{base_url}/jstack/{pid}" async with httpx.AsyncClient() as client: - return await client.get(url).json() + return (await client.get(url)).json() @router.get("/jmap", response_model=ShellResponse) @@ -290,19 +294,19 @@ async def get_container_heron_jmap( cluster: str, environ: str, instance: str, - role: Optional[str] = None, topology_name: str = Query(..., alias="topology"), + role: Optional[str] = None, ): """Get jmap output for the heron process.""" topology = state.tracker.get_topology(cluster, role, environ, topology_name) - pid_response = await get_container_heron_pid(cluster, role, environ, instance, topology_name) + pid_response = await get_container_heron_pid(cluster, environ, instance, topology_name, role) pid = pid_response["stdout"].strip() - base_url = utils.make_shell_endpoint(topology, instance) + base_url = utils.make_shell_endpoint(topology.info, instance) url = f"{base_url}/jmap/{pid}" async with httpx.AsyncClient() as client: - return await client.get(url).json() + return (await client.get(url)).json() @router.get("/histo", response_model=ShellResponse) @@ -310,16 +314,16 @@ async def get_container_heron_memory_histogram( cluster: str, environ: str, instance: str, - role: Optional[str] = None, topology_name: str = Query(..., alias="topology"), + role: Optional[str] = None, ): """Get memory usage histogram the heron process. This uses the ouput of the last jmap run.""" topology = state.tracker.get_topology(cluster, role, environ, topology_name) - pid_response = await get_container_heron_pid(cluster, role, environ, instance, topology_name) + pid_response = await get_container_heron_pid(cluster, environ, instance, topology_name, role) pid = pid_response["stdout"].strip() - base_url = utils.make_shell_endpoint(topology, instance) + base_url = utils.make_shell_endpoint(topology.info, instance) url = f"{base_url}/histo/{pid}" async with httpx.AsyncClient() as client: - return await client.get(url).json() + return (await client.get(url)).json() diff --git a/heron/tools/tracker/src/python/topology.py b/heron/tools/tracker/src/python/topology.py index cd71d2830be..15c4de612df 100644 --- a/heron/tools/tracker/src/python/topology.py +++ b/heron/tools/tracker/src/python/topology.py @@ -258,7 +258,7 @@ def _render_extra_links(extra_links, topology, execution_state: ExecutionState_p link[EXTRA_LINK_URL_KEY] = string.Template(link[EXTRA_LINK_FORMATTER_KEY]).substitute(subs) def _rebuild_info(self, t_state: TopologyState) -> Optional[TopologyInfo]: - # Execution state is the most basic info. If returnecution state, just return + # Execution state is the most basic info. If return execution state, just return # as the rest of the things don't matter. execution_state = t_state.execution_state if not execution_state: diff --git a/heron/tools/tracker/src/python/utils.py b/heron/tools/tracker/src/python/utils.py index 786d963d2b3..2de17317018 100644 --- a/heron/tools/tracker/src/python/utils.py +++ b/heron/tools/tracker/src/python/utils.py @@ -64,10 +64,10 @@ def make_shell_endpoint(topology_info: dict, instance_id: int) -> str: """ # Format: container__ - pplan = topology_info["physical_plan"] - stmgrId = pplan["instances"][instance_id]["stmgrId"] - host = pplan["stmgrs"][stmgrId]["host"] - shell_port = pplan["stmgrs"][stmgrId]["shell_port"] + pplan = topology_info.physical_plan + stmgrId = pplan.instances[instance_id].stmgr_id + host = pplan.stmgrs[stmgrId].host + shell_port = pplan.stmgrs[stmgrId].shell_port return f"http://{host}:{shell_port}" def make_shell_job_url(host: str, shell_port: int, _) -> Optional[str]: diff --git a/heron/tools/ui/src/python/main.py b/heron/tools/ui/src/python/main.py index 6cee080ea4e..3d0c45fa79d 100644 --- a/heron/tools/ui/src/python/main.py +++ b/heron/tools/ui/src/python/main.py @@ -58,6 +58,9 @@ base_url = DEFAULT_BASE_URL tracker_url = DEFAULT_TRACKER_URL +Log = log.Log +Log.setLevel(logging.DEBUG) + app = FastAPI(title="Heron UI", version=VERSION) templates = Jinja2Templates( @@ -105,7 +108,7 @@ def config_page( ) -@topologies_router.get("/{cluster}/{environment}/{topology}/{instance}/{component}/exceptions") +@topologies_router.get("/{cluster}/{environment}/{topology}/{component}/{instance}/exceptions") def exceptions_page( cluster: str, environment: str, topology: str, component: str, instance: str, request: Request @@ -316,17 +319,26 @@ class ApiEnvelope(pydantic.BaseModel): executiontime: int result: dict - def api_topology_json(method: Callable[[], dict]) -> ApiEnvelope: """Wrap the output of a method with a response envelope.""" started = time.time() result = method() - return ApiEnvelope( - status="success", - message="", + print(f"NICK: API Topology result: {type(result)}: {result}") + Log.debug(f"Api topology: {result}") + if type(result) is None: + return ApiEnvelope( + status="failure", + message="No topology found", executiontime=time.time() - started, - result=result, - ) + result={}, + ) + else: + return ApiEnvelope( + status="success", + message="", + executiontime=time.time() - started, + result=result, + ) @topologies_router.get("/list.json") def topologies_json() -> dict: @@ -392,7 +404,7 @@ def execution_state_json(cluster: str, environment: str, topology: str) -> ApiEn ) def scheduler_location_json(cluster: str, environment: str, topology: str) -> ApiEnvelope: """Unimplemented method which is currently a duplicate of execution state.""" - return api_topology_json(lambda: tracker.get_execution_state( + return api_topology_json(lambda: tracker.get_scheduler_location( cluster, environment, topology, )) @@ -462,7 +474,7 @@ def pid_snippet( ) -> Response: """Render a HTML snippet containing topology output of container.""" physical_plan = tracker.get_physical_plan(cluster, environment, topology) - host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgrId"]][ + host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgr_id"]][ "host" ] info = tracker.get_instance_pid(cluster, environment, topology, instance) @@ -492,7 +504,7 @@ def jstack_snippet( ) -> HTMLResponse: """Render a HTML snippet containing jstack output of container.""" physical_plan = tracker.get_physical_plan(cluster, environment, topology) - host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgrId"]][ + host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgr_id"]][ "host" ] info = tracker.get_instance_jstack(cluster, environment, topology, instance) @@ -521,7 +533,7 @@ def jmap_snippet( ) -> HTMLResponse: """Render a HTML snippet containing jmap output of container.""" physical_plan = tracker.get_physical_plan(cluster, environment, topology) - host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgrId"]][ + host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgr_id"]][ "host" ] info = tracker.run_instance_jmap(cluster, environment, topology, instance) @@ -558,7 +570,7 @@ def histogram_snippet( """Render a HTML snippet containing jmap histogram output of container.""" # use a function to DRY up these container API methods physical_plan = tracker.get_physical_plan(cluster, environment, topology) - host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgrId"]][ + host = physical_plan["stmgrs"][physical_plan["instances"][instance]["stmgr_id"]][ "host" ] info = tracker.get_instance_mem_histogram( @@ -616,7 +628,9 @@ def cli( """Start a web UI for heron which renders information from the tracker.""" global base_url, tracker_url base_url = base_url_option - log.configure(level=logging.DEBUG if verbose else logging.INFO) + log_level = logging.DEBUG if verbose else logging.INFO + log.configure(log_level) + Log = log.Log tracker.tracker_url = tracker_url_option uvicorn.run(app, host=host, port=port, log_config=None) From 5dc7d2e48797e6a478ee2710a0beb505f3e0f33e Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Mon, 14 Mar 2022 02:28:26 -0400 Subject: [PATCH 16/82] Updated AckingTopology to not burn so much CPU --- .../apache/heron/examples/api/AckingTopology.java | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/examples/src/java/org/apache/heron/examples/api/AckingTopology.java b/examples/src/java/org/apache/heron/examples/api/AckingTopology.java index 12bdfadbd4e..92a88aac67c 100644 --- a/examples/src/java/org/apache/heron/examples/api/AckingTopology.java +++ b/examples/src/java/org/apache/heron/examples/api/AckingTopology.java @@ -20,6 +20,7 @@ package org.apache.heron.examples.api; +import java.time.Duration; import java.util.Map; import java.util.Random; @@ -36,6 +37,7 @@ import org.apache.heron.api.tuple.Fields; import org.apache.heron.api.tuple.Tuple; import org.apache.heron.api.tuple.Values; +import org.apache.heron.common.basics.SysUtils; /** * This is a basic example of a Heron topology with acking enable. @@ -53,7 +55,7 @@ public static void main(String[] args) throws Exception { int spouts = 2; int bolts = 2; - builder.setSpout("word", new AckingTestWordSpout(), spouts); + builder.setSpout("word", new AckingTestWordSpout(Duration.ofMillis(200)), spouts); builder.setBolt("exclaim1", new ExclamationBolt(), bolts) .shuffleGrouping("word"); @@ -97,8 +99,10 @@ public static class AckingTestWordSpout extends BaseRichSpout { private SpoutOutputCollector collector; private String[] words; private Random rand; + private final Duration throttleDuration; - public AckingTestWordSpout() { + public AckingTestWordSpout(Duration throttleDuration) { + this.throttleDuration = throttleDuration; } @SuppressWarnings("rawtypes") @@ -116,7 +120,9 @@ public void close() { public void nextTuple() { final String word = words[rand.nextInt(words.length)]; - + if (!throttleDuration.isZero()) { + SysUtils.sleep(throttleDuration); // sleep to throttle back CPU usage + } // To enable acking, we need to emit each tuple with a MessageId, which is an Object. // Each new message emitted needs to be annotated with a unique ID, which allows // the spout to keep track of which messages should be acked back to the producer or From 59ce8b419ed6531df571cbffadb6eab983b9f232 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Tue, 15 Mar 2022 02:47:06 -0400 Subject: [PATCH 17/82] More potential fixes --- .../common/src/python/clients/tracker.py | 2 +- heron/tools/tracker/src/python/BUILD | 2 +- .../tracker/src/python/routers/container.py | 27 +++++++------- .../ui/resources/static/js/exceptions.js | 8 ++--- heron/tools/ui/src/python/BUILD | 2 +- heron/tools/ui/src/python/main.py | 36 ++++++++++++++++--- 6 files changed, 52 insertions(+), 25 deletions(-) diff --git a/heron/tools/common/src/python/clients/tracker.py b/heron/tools/common/src/python/clients/tracker.py index 49ee408aecf..fdafa693f4b 100644 --- a/heron/tools/common/src/python/clients/tracker.py +++ b/heron/tools/common/src/python/clients/tracker.py @@ -122,7 +122,7 @@ def strip_whitespace(s): backpressure=backpressure ) -def api_get(url: str, params=None) -> dict: +def api_get(url: str, params=None) -> Any: """Make a GET request to a tracker URL and return the result.""" start = time.time() try: diff --git a/heron/tools/tracker/src/python/BUILD b/heron/tools/tracker/src/python/BUILD index 6a2daaaf907..86a6503a5e2 100644 --- a/heron/tools/tracker/src/python/BUILD +++ b/heron/tools/tracker/src/python/BUILD @@ -8,7 +8,7 @@ pex_library( ), reqs = [ "click==7.1.2", - "fastapi==0.62.0", + "fastapi==0.75.0", "httpx==0.16.1", "javaobj-py3==0.4.1", "networkx==2.5", diff --git a/heron/tools/tracker/src/python/routers/container.py b/heron/tools/tracker/src/python/routers/container.py index cea6286553a..061441b1677 100644 --- a/heron/tools/tracker/src/python/routers/container.py +++ b/heron/tools/tracker/src/python/routers/container.py @@ -199,19 +199,20 @@ async def get_exceptions( # pylint: disable=too-many-arguments exception_response = await _get_exception_log_response( cluster, role, environ, component, instances, topology_name, summary=False ) - - return [ - ExceptionLog( - hostname=exception_log.hostname, - instance_id=exception_log.instance_id, - stack_trace=exception_log.stacktrace, - lasttime=exception_log.lasttime, - firsttime=exception_log.firsttime, - count=str(exception_log.count), - logging=exception_log.logging, - ) - for exception_log in exception_response.exceptions - ] + print(f"NICK: exception response: {exception_response}") + + ret = [] + for exception_log in exception_response.exceptions: + ret.append(ExceptionLog( + hostname = exception_log.hostname, + instance_id = exception_log.instance_id, + stacktrace = exception_log.stacktrace, + lasttime = exception_log.lasttime, + firsttime = exception_log.firsttime, + count = str(exception_log.count), + logging = exception_log.logging, + )) + return ret class ExceptionSummaryItem(BaseModel): diff --git a/heron/tools/ui/resources/static/js/exceptions.js b/heron/tools/ui/resources/static/js/exceptions.js index 92081541bc9..8ced6628a28 100644 --- a/heron/tools/ui/resources/static/js/exceptions.js +++ b/heron/tools/ui/resources/static/js/exceptions.js @@ -109,7 +109,7 @@ var InstanceExceptionLogs = React.createClass({ var uniqExceptions = {}; for (i = 0; i < exceptionLogs.length; ++i) { - var trace = exceptionLogs[i].stack_trace; + var trace = exceptionLogs[i].stacktrace; var tokens = trace.split("\n", 3); if (filterId != 'All' && filterId != exceptionLogs[i].instance_id) { continue; @@ -124,7 +124,7 @@ var InstanceExceptionLogs = React.createClass({ fontSize: '80%', } uniqExceptions[key] = {'instance': exceptionLogs[i].instance_id, - 'stack_trace': (
{trace}
), + 'stacktrace': (
{trace}
), 'count': parseFloat(exceptionLogs[i].count), 'firsttime': exceptionLogs[i].firsttime, 'lasttime': exceptionLogs[i].lasttime, @@ -176,8 +176,8 @@ var InstanceExceptionLogs = React.createClass({ mainLinks = mainLinks.concat([['Logs', logfile], ['Aurora', jobUrl], ['Host', host]]); } } - row = [ exceptionLogs[i].stack_trace, - exceptionLogs[i].instance, + row = [ exceptionLogs[i].stacktrace, + exceptionLogs[i].instance_id, exceptionLogs[i].firsttime, exceptionLogs[i].lasttime, exceptionLogs[i].count, diff --git a/heron/tools/ui/src/python/BUILD b/heron/tools/ui/src/python/BUILD index 7c4f2619a7a..87474d784e0 100644 --- a/heron/tools/ui/src/python/BUILD +++ b/heron/tools/ui/src/python/BUILD @@ -9,7 +9,7 @@ pex_library( reqs = [ "requests==2.27.1", "click==7.1.2", - "fastapi==0.60.1", + "fastapi==0.75.0", "jinja2==3.0.3", "aiofiles==0.5.0", "uvicorn==0.11.7", diff --git a/heron/tools/ui/src/python/main.py b/heron/tools/ui/src/python/main.py index 3d0c45fa79d..eaa75f522f7 100644 --- a/heron/tools/ui/src/python/main.py +++ b/heron/tools/ui/src/python/main.py @@ -309,6 +309,34 @@ def file_download( headers={"Content-Disposition": f"attachment; filename={filename}"}, ) +# List envelope for Exceptions response +class ApiListEnvelope(pydantic.BaseModel): + """Envelope for heron-ui JSON API.""" + status: str + message: str + version: str = VERSION + executiontime: int + result: list + +def api_topology_list_json(method: Callable[[], dict]) -> ApiListEnvelope: + """Wrap the output of a method with a response envelope.""" + started = time.time() + result = method() + Log.debug(f"Api topology: {result}") + if type(result) is None: + return ApiEnvelope( + status="failure", + message="No topology found", + executiontime=time.time() - started, + result={}, + ) + else: + return ApiListEnvelope( + status="success", + message="", + executiontime=time.time() - started, + result=result, + ) # topology list and plan handlers class ApiEnvelope(pydantic.BaseModel): @@ -323,7 +351,6 @@ def api_topology_json(method: Callable[[], dict]) -> ApiEnvelope: """Wrap the output of a method with a response envelope.""" started = time.time() result = method() - print(f"NICK: API Topology result: {type(result)}: {result}") Log.debug(f"Api topology: {result}") if type(result) is None: return ApiEnvelope( @@ -411,15 +438,14 @@ def scheduler_location_json(cluster: str, environment: str, topology: str) -> Ap @topologies_router.get( "/{cluster}/{environment}/{topology}/{component}/exceptions.json", - response_model=ApiEnvelope, + response_model=ApiListEnvelope, ) -def exceptions_json(cluster: str, environment: str, topology: str, component: str) -> ApiEnvelope: +def exceptions_json(cluster: str, environment: str, topology: str, component: str) -> ApiListEnvelope: """Return a list of exceptions for a component.""" - return api_topology_json(lambda: tracker.get_component_exceptions( + return api_topology_list_json(lambda: tracker.get_component_exceptions( cluster, environment, topology, component, )) - @topologies_router.get( "/{cluster}/{environment}/{topology}/{component}/exceptionsummary.json", response_model=ApiEnvelope, From 5388489b12c7f70711056d6145a5cbed9cb4962b Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Fri, 25 Mar 2022 23:25:48 -0400 Subject: [PATCH 18/82] Update heron/tools/common/src/python/clients/tracker.py Co-authored-by: Saad Ur Rahman --- heron/tools/common/src/python/clients/tracker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/heron/tools/common/src/python/clients/tracker.py b/heron/tools/common/src/python/clients/tracker.py index fdafa693f4b..c222fdb0734 100644 --- a/heron/tools/common/src/python/clients/tracker.py +++ b/heron/tools/common/src/python/clients/tracker.py @@ -54,8 +54,8 @@ SCHEDULER_LOCATION_URL_FMT = "%s/schedulerlocation" % TOPOLOGIES_URL_FMT METRICS_URL_FMT = "%s/metrics" % TOPOLOGIES_URL_FMT -METRICS_QUERY_URL_FMT = "%s/metrics/query" % TOPOLOGIES_URL_FMT -METRICS_TIMELINE_URL_FMT = "%s/metrics/timeline" % TOPOLOGIES_URL_FMT +METRICS_QUERY_URL_FMT = METRICS_URL_FMT % "%s/query" % TOPOLOGIES_URL_FMT +METRICS_TIMELINE_URL_FMT = METRICS_URL_FMT % "%s/timeline" % TOPOLOGIES_URL_FMT EXCEPTIONS_URL_FMT = "%s/exceptions" % TOPOLOGIES_URL_FMT EXCEPTION_SUMMARY_URL_FMT = "%s/exceptionsummary" % TOPOLOGIES_URL_FMT From d92ad262c76200370caf57f511d858266630b4ad Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Fri, 25 Mar 2022 23:26:05 -0400 Subject: [PATCH 19/82] Update heron/tools/common/src/python/clients/tracker.py Co-authored-by: Saad Ur Rahman --- heron/tools/common/src/python/clients/tracker.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/heron/tools/common/src/python/clients/tracker.py b/heron/tools/common/src/python/clients/tracker.py index c222fdb0734..a699e6e4b72 100644 --- a/heron/tools/common/src/python/clients/tracker.py +++ b/heron/tools/common/src/python/clients/tracker.py @@ -66,9 +66,10 @@ JMAP_URL_FMT = "%s/jmap" % TOPOLOGIES_URL_FMT HISTOGRAM_URL_FMT = "%s/histo" % TOPOLOGIES_URL_FMT -FILE_DATA_URL_FMT = "%s/container/filedata" % TOPOLOGIES_URL_FMT -FILE_DOWNLOAD_URL_FMT = "%s/container/filedownload" % TOPOLOGIES_URL_FMT -FILESTATS_URL_FMT = "%s/container/filestats" % TOPOLOGIES_URL_FMT +CONTAINER_URL_FMT = "%s/container" +FILE_DATA_URL_FMT = CONTAINER_URL_FMT % "%s/filedata" % TOPOLOGIES_URL_FMT +FILE_DOWNLOAD_URL_FMT = CONTAINER_URL_FMT % "%s/filedownload" % TOPOLOGIES_URL_FMT +FILESTATS_URL_FMT = CONTAINER_URL_FMT % "%s/filestats" % TOPOLOGIES_URL_FMT def strip_whitespace(s): From 72769a3d4537743cc76d6b1af40cb640ecf11c0d Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Fri, 25 Mar 2022 23:28:50 -0400 Subject: [PATCH 20/82] Update heron/tools/tracker/src/python/app.py Co-authored-by: Saad Ur Rahman --- heron/tools/tracker/src/python/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/tools/tracker/src/python/app.py b/heron/tools/tracker/src/python/app.py index 4908697ecbc..54f9bb543ba 100644 --- a/heron/tools/tracker/src/python/app.py +++ b/heron/tools/tracker/src/python/app.py @@ -81,7 +81,7 @@ async def shutdown_event(): @app.exception_handler(Exception) async def handle_exception(_, exc: Exception): - message=f"request failed: {exc}" + message = f"request failed: {exc}" status_code = 500 if isinstance(exc, StarletteHTTPException): status_code = exc.status_code From a1d46abb65606d9534e837158f8bb038cdf213d8 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Fri, 25 Mar 2022 23:29:14 -0400 Subject: [PATCH 21/82] Update heron/tools/tracker/src/python/tracker.py Co-authored-by: Saad Ur Rahman --- heron/tools/tracker/src/python/tracker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/tools/tracker/src/python/tracker.py b/heron/tools/tracker/src/python/tracker.py index 7f3004de1ba..5fb2fce4167 100644 --- a/heron/tools/tracker/src/python/tracker.py +++ b/heron/tools/tracker/src/python/tracker.py @@ -89,7 +89,7 @@ def get_topology( cluster: str, role: Optional[str], environ: str, - topology_name: str, + topology_name: str, ) -> Any: """ Find and return the topology given its cluster, environ, topology name, and From eddf0a36512f9cdaa2f966b12389c050bef69e77 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Fri, 25 Mar 2022 23:36:41 -0400 Subject: [PATCH 22/82] Update heron/tools/tracker/src/python/app.py Co-authored-by: Saad Ur Rahman --- heron/tools/tracker/src/python/app.py | 1 - 1 file changed, 1 deletion(-) diff --git a/heron/tools/tracker/src/python/app.py b/heron/tools/tracker/src/python/app.py index 54f9bb543ba..b3f7714ba5f 100644 --- a/heron/tools/tracker/src/python/app.py +++ b/heron/tools/tracker/src/python/app.py @@ -92,7 +92,6 @@ async def handle_exception(_, exc: Exception): @app.get("/clusters") async def clusters() -> List[str]: return (s.name for s in state.tracker.state_managers) - @app.get( "/machines", response_model=Dict[str, Dict[str, Dict[str, List[str]]]], From 234240ca0d7f8248191a8aec8e0408fd1188a6e6 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sun, 27 Mar 2022 12:17:59 -0400 Subject: [PATCH 23/82] Fixing the Tracker client URIs --- .../common/src/python/clients/tracker.py | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/heron/tools/common/src/python/clients/tracker.py b/heron/tools/common/src/python/clients/tracker.py index a699e6e4b72..4cccade71f8 100644 --- a/heron/tools/common/src/python/clients/tracker.py +++ b/heron/tools/common/src/python/clients/tracker.py @@ -45,6 +45,8 @@ # pylint: disable=bad-whitespace CLUSTER_URL_FMT = "%s/clusters" + +# Nested under /topologies TOPOLOGIES_URL_FMT = "%s/topologies" TOPOLOGIES_STATS_URL_FMT = "%s/states" % TOPOLOGIES_URL_FMT EXECUTION_STATE_URL_FMT = "%s/executionstate" % TOPOLOGIES_URL_FMT @@ -53,10 +55,6 @@ PACKINGPLAN_URL_FMT = "%s/packingplan" % TOPOLOGIES_URL_FMT SCHEDULER_LOCATION_URL_FMT = "%s/schedulerlocation" % TOPOLOGIES_URL_FMT -METRICS_URL_FMT = "%s/metrics" % TOPOLOGIES_URL_FMT -METRICS_QUERY_URL_FMT = METRICS_URL_FMT % "%s/query" % TOPOLOGIES_URL_FMT -METRICS_TIMELINE_URL_FMT = METRICS_URL_FMT % "%s/timeline" % TOPOLOGIES_URL_FMT - EXCEPTIONS_URL_FMT = "%s/exceptions" % TOPOLOGIES_URL_FMT EXCEPTION_SUMMARY_URL_FMT = "%s/exceptionsummary" % TOPOLOGIES_URL_FMT @@ -66,10 +64,16 @@ JMAP_URL_FMT = "%s/jmap" % TOPOLOGIES_URL_FMT HISTOGRAM_URL_FMT = "%s/histo" % TOPOLOGIES_URL_FMT -CONTAINER_URL_FMT = "%s/container" -FILE_DATA_URL_FMT = CONTAINER_URL_FMT % "%s/filedata" % TOPOLOGIES_URL_FMT -FILE_DOWNLOAD_URL_FMT = CONTAINER_URL_FMT % "%s/filedownload" % TOPOLOGIES_URL_FMT -FILESTATS_URL_FMT = CONTAINER_URL_FMT % "%s/filestats" % TOPOLOGIES_URL_FMT +# nested under /topologies/metrics/ +METRICS_URL_FMT = "%s/metrics" % TOPOLOGIES_URL_FMT +METRICS_QUERY_URL_FMT = "%s/query" % METRICS_URL_FMT +METRICS_TIMELINE_URL_FMT = "%s/timeline" % METRICS_URL_FMT + +# nested under /topologies/container/ +CONTAINER_URL_FMT = "%s/container" % TOPOLOGIES_URL_FMT +FILE_DATA_URL_FMT = "%s/filedata" % CONTAINER_URL_FMT +FILE_DOWNLOAD_URL_FMT = "%s/filedownload" % CONTAINER_URL_FMT +FILESTATS_URL_FMT = "%s/filestats" % CONTAINER_URL_FMT def strip_whitespace(s): From 771892af0b9139f2a0df6204ea87969abb8ac887 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sun, 27 Mar 2022 12:18:23 -0400 Subject: [PATCH 24/82] Whitespace style check --- heron/shell/src/python/handlers/pidhandler.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/heron/shell/src/python/handlers/pidhandler.py b/heron/shell/src/python/handlers/pidhandler.py index ba4564f8035..22438dad64d 100644 --- a/heron/shell/src/python/handlers/pidhandler.py +++ b/heron/shell/src/python/handlers/pidhandler.py @@ -36,7 +36,7 @@ async def get(self, instance_id): ''' get method ''' pid = subprocess.run(['cat', "%s.pid" % instance_id], capture_output=True, text=True) await self.finish({ - 'command': ' '.join(pid.args), - 'stdout': pid.stdout, - 'stderr': pid.stderr, - }) + 'command': ' '.join(pid.args), + 'stdout': pid.stdout, + 'stderr': pid.stderr, + }) From 64e35fe29ca3636d1f74f2e2c3592a7fd3b6c08d Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 27 Mar 2022 17:15:32 -0400 Subject: [PATCH 25/82] [Tracker|Shell] cleaning up imports. --- heron/shell/src/python/handlers/pidhandler.py | 8 +++----- heron/shell/src/python/utils.py | 1 - heron/tools/tracker/src/python/metricstimeline.py | 2 -- heron/tools/tracker/src/python/routers/container.py | 3 --- heron/tools/tracker/src/python/routers/metrics.py | 1 - heron/tools/tracker/src/python/utils.py | 7 +------ 6 files changed, 4 insertions(+), 18 deletions(-) diff --git a/heron/shell/src/python/handlers/pidhandler.py b/heron/shell/src/python/handlers/pidhandler.py index 22438dad64d..6fdfa2acc40 100644 --- a/heron/shell/src/python/handlers/pidhandler.py +++ b/heron/shell/src/python/handlers/pidhandler.py @@ -20,11 +20,8 @@ ''' pidhandler.py ''' -import json -import tornado.web import subprocess - -from heron.shell.src.python import utils +import tornado.web class PidHandler(tornado.web.RequestHandler): """ @@ -34,7 +31,8 @@ class PidHandler(tornado.web.RequestHandler): # pylint: disable=attribute-defined-outside-init async def get(self, instance_id): ''' get method ''' - pid = subprocess.run(['cat', "%s.pid" % instance_id], capture_output=True, text=True) + pid = subprocess.run(['cat', "%s.pid" % instance_id], capture_output=True, text=True, + check=True) await self.finish({ 'command': ' '.join(pid.args), 'stdout': pid.stdout, diff --git a/heron/shell/src/python/utils.py b/heron/shell/src/python/utils.py index 6312ff7474f..2dc8fee37d2 100644 --- a/heron/shell/src/python/utils.py +++ b/heron/shell/src/python/utils.py @@ -19,7 +19,6 @@ # under the License. ''' utils.py ''' -import functools import grp import os import pkgutil diff --git a/heron/tools/tracker/src/python/metricstimeline.py b/heron/tools/tracker/src/python/metricstimeline.py index 4e65b19e13b..8fea4f972f9 100644 --- a/heron/tools/tracker/src/python/metricstimeline.py +++ b/heron/tools/tracker/src/python/metricstimeline.py @@ -21,8 +21,6 @@ """ metricstimeline.py """ from typing import Dict, List -from heron.common.src.python.utils.log import Log -from heron.proto import common_pb2 from heron.proto import tmanager_pb2 import httpx diff --git a/heron/tools/tracker/src/python/routers/container.py b/heron/tools/tracker/src/python/routers/container.py index 061441b1677..bb5100a5a9a 100644 --- a/heron/tools/tracker/src/python/routers/container.py +++ b/heron/tools/tracker/src/python/routers/container.py @@ -22,10 +22,7 @@ topology, particularly data about heron containers. """ -from asyncio import constants -from subprocess import CompletedProcess from typing import List, Optional -from heron.common.src.python.utils.log import Log from heron.proto import common_pb2, tmanager_pb2 from heron.tools.tracker.src.python import state, utils diff --git a/heron/tools/tracker/src/python/routers/metrics.py b/heron/tools/tracker/src/python/routers/metrics.py index db12de0135f..1246a7e671b 100644 --- a/heron/tools/tracker/src/python/routers/metrics.py +++ b/heron/tools/tracker/src/python/routers/metrics.py @@ -32,7 +32,6 @@ import httpx -from fastapi import Query from fastapi import Query, APIRouter from pydantic import BaseModel, Field diff --git a/heron/tools/tracker/src/python/utils.py b/heron/tools/tracker/src/python/utils.py index 2de17317018..2f79a8468a3 100644 --- a/heron/tools/tracker/src/python/utils.py +++ b/heron/tools/tracker/src/python/utils.py @@ -28,21 +28,16 @@ import sys import subprocess -from asyncio import iscoroutinefunction -from functools import wraps from pathlib import Path from typing import Any, Optional, TypeVar from heron.common.src.python.utils.log import Log -from heron.tools.tracker.src.python import constants from heron.proto import topology_pb2 import javaobj.v1 as javaobj import yaml -from fastapi import APIRouter, HTTPException -from pydantic import Field -from pydantic.generics import GenericModel +from fastapi import HTTPException # directories for heron tools distribution From cf7724202fdc7cec0706dc1661b7b516cfae2dfe Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 27 Mar 2022 17:18:54 -0400 Subject: [PATCH 26/82] [Tracker] whitespace fix. --- .../tools/tracker/src/python/routers/container.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/heron/tools/tracker/src/python/routers/container.py b/heron/tools/tracker/src/python/routers/container.py index bb5100a5a9a..f80d9b4b650 100644 --- a/heron/tools/tracker/src/python/routers/container.py +++ b/heron/tools/tracker/src/python/routers/container.py @@ -201,13 +201,13 @@ async def get_exceptions( # pylint: disable=too-many-arguments ret = [] for exception_log in exception_response.exceptions: ret.append(ExceptionLog( - hostname = exception_log.hostname, - instance_id = exception_log.instance_id, - stacktrace = exception_log.stacktrace, - lasttime = exception_log.lasttime, - firsttime = exception_log.firsttime, - count = str(exception_log.count), - logging = exception_log.logging, + hostname=exception_log.hostname, + instance_id=exception_log.instance_id, + stacktrace=exception_log.stacktrace, + lasttime=exception_log.lasttime, + firsttime=exception_log.firsttime, + count=str(exception_log.count), + logging=exception_log.logging, )) return ret From 9aa40914814841e5eea7e344cb3336b6233d428c Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 27 Mar 2022 18:38:36 -0400 Subject: [PATCH 27/82] [Tracker] setting global Log Updating global Log in CLI. --- heron/tools/tracker/src/python/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/heron/tools/tracker/src/python/main.py b/heron/tools/tracker/src/python/main.py index 8218e10f749..0a139dfe2a5 100644 --- a/heron/tools/tracker/src/python/main.py +++ b/heron/tools/tracker/src/python/main.py @@ -125,6 +125,7 @@ def cli( log_level = logging.DEBUG if verbose else logging.INFO log.configure(log_level) + global Log Log = log.Log stmgr_override = { From 4b17564870f3c5954f96f0d877f3121ad9696a78 Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 27 Mar 2022 18:39:54 -0400 Subject: [PATCH 28/82] [UI] setting global Log Updating global Log in CLI. --- heron/tools/ui/src/python/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/tools/ui/src/python/main.py b/heron/tools/ui/src/python/main.py index eaa75f522f7..d899d25c236 100644 --- a/heron/tools/ui/src/python/main.py +++ b/heron/tools/ui/src/python/main.py @@ -652,7 +652,7 @@ def cli( host: str, port: int, base_url_option: str, tracker_url_option: str, verbose: bool ) -> None: """Start a web UI for heron which renders information from the tracker.""" - global base_url, tracker_url + global base_url, tracker_url, Log base_url = base_url_option log_level = logging.DEBUG if verbose else logging.INFO log.configure(log_level) From f417981fa33c9a1d9990e41a241878898adcb928 Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 27 Mar 2022 18:46:58 -0400 Subject: [PATCH 29/82] [UI] white space fixes. --- heron/tools/ui/src/python/main.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/heron/tools/ui/src/python/main.py b/heron/tools/ui/src/python/main.py index d899d25c236..d452d388a1e 100644 --- a/heron/tools/ui/src/python/main.py +++ b/heron/tools/ui/src/python/main.py @@ -325,10 +325,10 @@ def api_topology_list_json(method: Callable[[], dict]) -> ApiListEnvelope: Log.debug(f"Api topology: {result}") if type(result) is None: return ApiEnvelope( - status="failure", - message="No topology found", - executiontime=time.time() - started, - result={}, + status="failure", + message="No topology found", + executiontime=time.time() - started, + result={}, ) else: return ApiListEnvelope( @@ -354,10 +354,10 @@ def api_topology_json(method: Callable[[], dict]) -> ApiEnvelope: Log.debug(f"Api topology: {result}") if type(result) is None: return ApiEnvelope( - status="failure", - message="No topology found", - executiontime=time.time() - started, - result={}, + status="failure", + message="No topology found", + executiontime=time.time() - started, + result={}, ) else: return ApiEnvelope( @@ -440,7 +440,8 @@ def scheduler_location_json(cluster: str, environment: str, topology: str) -> Ap "/{cluster}/{environment}/{topology}/{component}/exceptions.json", response_model=ApiListEnvelope, ) -def exceptions_json(cluster: str, environment: str, topology: str, component: str) -> ApiListEnvelope: +def exceptions_json(cluster: str, environment: str, topology: str, + component: str) -> ApiListEnvelope: """Return a list of exceptions for a component.""" return api_topology_list_json(lambda: tracker.get_component_exceptions( cluster, environment, topology, component, From 69528d0008d89eba97ce9ace5e35b4a9e2210819 Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 27 Mar 2022 23:08:03 -0400 Subject: [PATCH 30/82] [UI] switching to `None` check for failure. --- heron/tools/ui/src/python/main.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/heron/tools/ui/src/python/main.py b/heron/tools/ui/src/python/main.py index d452d388a1e..312ab449815 100644 --- a/heron/tools/ui/src/python/main.py +++ b/heron/tools/ui/src/python/main.py @@ -323,20 +323,19 @@ def api_topology_list_json(method: Callable[[], dict]) -> ApiListEnvelope: started = time.time() result = method() Log.debug(f"Api topology: {result}") - if type(result) is None: + if result is None: return ApiEnvelope( status="failure", message="No topology found", executiontime=time.time() - started, result={}, ) - else: - return ApiListEnvelope( - status="success", - message="", - executiontime=time.time() - started, - result=result, - ) + return ApiListEnvelope( + status="success", + message="", + executiontime=time.time() - started, + result=result, + ) # topology list and plan handlers class ApiEnvelope(pydantic.BaseModel): @@ -352,20 +351,19 @@ def api_topology_json(method: Callable[[], dict]) -> ApiEnvelope: started = time.time() result = method() Log.debug(f"Api topology: {result}") - if type(result) is None: + if result is None: return ApiEnvelope( status="failure", message="No topology found", executiontime=time.time() - started, result={}, ) - else: - return ApiEnvelope( - status="success", - message="", - executiontime=time.time() - started, - result=result, - ) + return ApiEnvelope( + status="success", + message="", + executiontime=time.time() - started, + result=result, + ) @topologies_router.get("/list.json") def topologies_json() -> dict: From fc8efabef837d68a8be7175d2d326bc7a2ddc3cc Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Mon, 28 Mar 2022 12:06:00 -0400 Subject: [PATCH 31/82] [UI] fixing issues with dict causing CI errors. --- heron/tools/tracker/src/python/query_operators.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/heron/tools/tracker/src/python/query_operators.py b/heron/tools/tracker/src/python/query_operators.py index 1c3e0a8eeec..f9e3161c668 100644 --- a/heron/tools/tracker/src/python/query_operators.py +++ b/heron/tools/tracker/src/python/query_operators.py @@ -150,11 +150,11 @@ async def execute( raise Exception(metrics["message"]) # Put a blank timeline. - if not metrics.timeline: - metrics.timeline = { + if not metrics.get("timeline"): + metrics["timeline"] = { self.metric_name: {} } - timelines = metrics.timeline[self.metric_name] + timelines = metrics["timeline"][self.metric_name] all_metrics = [ Metrics(self.component, self.metric_name, instance, start, end, { k: float(v) @@ -463,7 +463,7 @@ async def execute(self, tracker, tmanager: TManagerLocation, start: int, end: in if metrics: met = Metrics(None, None, metric.instance, start, end, metrics[""].timeline.copy()) for timestamp in list(met.timeline.keys()): - v = self._f(met.timeline[timestamp], metric.timeline[timestamp]) + v = self._f(met.timeline[timestamp], metric.timeline.get(timestamp)) if v is None: met.timeline.pop(timestamp, None) else: @@ -477,7 +477,7 @@ async def execute(self, tracker, tmanager: TManagerLocation, start: int, end: in # Initialize with first metrics timeline and its instance met = Metrics(None, None, metric.instance, start, end, metric.timeline.copy()) for timestamp in list(met.timeline.keys()): - v = self._f(met.timeline[timestamp], metrics2[""].timeline[timestamp]) + v = self._f(met.timeline[timestamp], metrics2[""].timeline.get(timestamp)) if v is None: met.timeline.pop(timestamp, None) else: From 4acae44a3c993394e057f943e88d56d4a912ff73 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sat, 2 Apr 2022 03:38:50 -0400 Subject: [PATCH 32/82] Added tracker server performance timing info in header --- .../heron/scheduler/kubernetes/V1Controller.java | 4 ++-- heron/tools/common/src/python/clients/tracker.py | 4 +++- heron/tools/tracker/src/python/app.py | 10 +++++++++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/heron/schedulers/src/java/org/apache/heron/scheduler/kubernetes/V1Controller.java b/heron/schedulers/src/java/org/apache/heron/scheduler/kubernetes/V1Controller.java index 9f00816af4e..be6e48ebe4f 100644 --- a/heron/schedulers/src/java/org/apache/heron/scheduler/kubernetes/V1Controller.java +++ b/heron/schedulers/src/java/org/apache/heron/scheduler/kubernetes/V1Controller.java @@ -461,7 +461,7 @@ private V1Service createTopologyService() { * @param containerResource Passed down to configure the executor resource limits. * @param numberOfInstances Used to configure the execution command and ports for the executor. * @param isExecutor Flag used to configure components specific to executor and manager. - * @return A fully configured StatefulSet for the topology's executors. + * @return A fully configured V1StatefulSet for the topology's executors. */ private V1StatefulSet createStatefulSet(Resource containerResource, int numberOfInstances, boolean isExecutor) { @@ -1466,7 +1466,7 @@ protected static Map getPersistentVolumeClaimLabels(String topol */ private String getStatefulSetName(boolean isExecutor) { return String.format("%s-%s", getTopologyName(), - isExecutor ? KubernetesConstants.EXECUTOR_NAME + "s" : KubernetesConstants.MANAGER_NAME); + isExecutor ? KubernetesConstants.EXECUTOR_NAME : KubernetesConstants.MANAGER_NAME); } /** diff --git a/heron/tools/common/src/python/clients/tracker.py b/heron/tools/common/src/python/clients/tracker.py index 4cccade71f8..2a5fef78540 100644 --- a/heron/tools/common/src/python/clients/tracker.py +++ b/heron/tools/common/src/python/clients/tracker.py @@ -143,8 +143,10 @@ def api_get(url: str, params=None) -> Any: Log.error("error from tracker: %s", response.status_code) return None + execution = float(response.headers.get("x-process-time")) * 1000 duration = (end - start) * 1000 - Log.debug(f"URL fetch took {duration:.2}ms round trip time for {url}") + Log.debug(f"URL fetch took {execution:.2} ms server time for {url}") + Log.debug(f"URL fetch took {duration:.2} ms round trip time for {url}") return data diff --git a/heron/tools/tracker/src/python/app.py b/heron/tools/tracker/src/python/app.py index b3f7714ba5f..7428ac45b06 100644 --- a/heron/tools/tracker/src/python/app.py +++ b/heron/tools/tracker/src/python/app.py @@ -23,12 +23,13 @@ when prompted to. """ +import time from typing import Dict, List, Optional from heron.tools.tracker.src.python import constants, state, query from heron.tools.tracker.src.python.routers import topologies, container, metrics -from fastapi import FastAPI, Query +from fastapi import FastAPI, Query, Request from fastapi.exceptions import RequestValidationError from fastapi.responses import JSONResponse from starlette.exceptions import HTTPException as StarletteHTTPException @@ -68,6 +69,13 @@ app.include_router(metrics.router, prefix="/topologies", tags=["metrics"]) app.include_router(topologies.router, prefix="/topologies", tags=["topologies"]) +@app.middleware("http") +async def wrap_response(request: Request, call_next): + start_time = time.time() + response = await call_next(request) + process_time = time.time() - start_time + response.headers["x-process-time"] = str(process_time) + return response @app.on_event("startup") async def startup_event(): From 56416b25a537f1f336570fdff468f67b7909c849 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sat, 2 Apr 2022 03:44:55 -0400 Subject: [PATCH 33/82] Fix for metrics timeline --- heron/tools/tracker/src/python/query_operators.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/heron/tools/tracker/src/python/query_operators.py b/heron/tools/tracker/src/python/query_operators.py index f9e3161c668..d7ed232fb41 100644 --- a/heron/tools/tracker/src/python/query_operators.py +++ b/heron/tools/tracker/src/python/query_operators.py @@ -150,11 +150,11 @@ async def execute( raise Exception(metrics["message"]) # Put a blank timeline. - if not metrics.get("timeline"): - metrics["timeline"] = { + if not metrics.timeline: + metrics.timeline = { self.metric_name: {} } - timelines = metrics["timeline"][self.metric_name] + timelines = metrics.timeline[self.metric_name] all_metrics = [ Metrics(self.component, self.metric_name, instance, start, end, { k: float(v) From cbac2df1bcc98e7794ee212603d837566b68f1b3 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sat, 2 Apr 2022 13:02:03 -0400 Subject: [PATCH 34/82] Updated the unit tests --- .../tracker/tests/python/app_unittest.py | 15 ++-------- .../tests/python/query_operator_unittest.py | 28 +++++++++---------- 2 files changed, 17 insertions(+), 26 deletions(-) diff --git a/heron/tools/tracker/tests/python/app_unittest.py b/heron/tools/tracker/tests/python/app_unittest.py index 9060250504e..f66b4286c92 100644 --- a/heron/tools/tracker/tests/python/app_unittest.py +++ b/heron/tools/tracker/tests/python/app_unittest.py @@ -8,15 +8,6 @@ from fastapi.testclient import TestClient -def ok(result) -> dict: - return { - "executiontime": 0.0, - "result": result, - "message": "ok", - "status": constants.RESPONSE_STATUS_SUCCESS, - "tracker_version": constants.API_VERSION, - } - @pytest.fixture def tracker(monkeypatch): mock = MagicMock(Tracker) @@ -34,7 +25,7 @@ def test_clusters(client, tracker): tracker.state_managers = [c1, c2] response = client.get("/clusters") - assert response.json() == ok(["c1", "c2"]) + assert response.json() == ["c1", "c2"] assert response.status_code == 200 def test_machines(client): @@ -42,11 +33,11 @@ def test_machines(client): "cluster": ["c1", "c3"], "environ": ["e1", "e3"], }) - assert response.json() == ok({}) + assert response.json() == {} def test_topologies(client): response = client.get("/topologies", json={ "cluster": [], "environ": [], }) - assert response.json() == ok({}) + assert response.json() == {} diff --git a/heron/tools/tracker/tests/python/query_operator_unittest.py b/heron/tools/tracker/tests/python/query_operator_unittest.py index 0396a29ca69..0a5be1cb566 100644 --- a/heron/tools/tracker/tests/python/query_operator_unittest.py +++ b/heron/tools/tracker/tests/python/query_operator_unittest.py @@ -20,6 +20,7 @@ # pylint: disable=bad-continuation # pylint: disable=unused-argument, unused-variable from unittest.mock import patch, Mock +from heron.tools.tracker.src.python.metricstimeline import MetricsTimeline from heron.tools.tracker.src.python.query_operators import * @@ -37,11 +38,11 @@ async def test_TS_execute(): # Return mocked timeline def getMetricTimelineSideEffect(*args): assert (tmanager, "a", ["c"], ["b"], 40, 360) == args - return ({ - "starttime": 40, - "endtime": 360, - "component": "a", - "timeline": { + return MetricsTimeline( + starttime = 40, + endtime = 360, + component = "a", + timeline = { "c": { "b": { 40: "1.0", @@ -52,8 +53,8 @@ def getMetricTimelineSideEffect(*args): 340: "1.0" } } - } - }) + }, + ) with patch("heron.tools.tracker.src.python.query_operators.get_metrics_timeline", side_effect=getMetricTimelineSideEffect): @@ -101,11 +102,11 @@ async def test_TS_execute_with_multiple_instances(): # With multiple instances def getMetricTimelineSideEffect(*args): assert (tmanager, "a", ["c"], [], 40, 360) == args - return ({ - "starttime": 40, - "endtime": 360, - "component": "a", - "timeline": { + return MetricsTimeline( + starttime = 40, + endtime = 360, + component = "a", + timeline = { "c": { "b": { 40: "1.0", @@ -124,8 +125,7 @@ def getMetricTimelineSideEffect(*args): 340: "2.0" } } - } - }) + }) # pylint: disable=unused-variable with patch("heron.tools.tracker.src.python.query_operators.get_metrics_timeline", From 1723c159f5cf2df122d05d92c3eac729e0c4ee53 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sat, 2 Apr 2022 13:02:30 -0400 Subject: [PATCH 35/82] Updated pylint to fix Python 3.9 issues --- third_party/python/pylint/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/python/pylint/BUILD b/third_party/python/pylint/BUILD index 040baf6c066..f6720f558ee 100644 --- a/third_party/python/pylint/BUILD +++ b/third_party/python/pylint/BUILD @@ -5,5 +5,5 @@ package(default_visibility = ["//visibility:public"]) pex_binary( name = "pylint", entrypoint = "pylint", - reqs = ["pylint==2.5.0"], + reqs = ["pylint==2.13.4"], ) From df977259e3b82fda02cff1171e2c6457d92e8196 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sun, 3 Apr 2022 04:24:43 -0400 Subject: [PATCH 36/82] Whole lot of formatting changes --- bazel_configure.py | 14 +-- examples/src/python/bolt/consume_bolt.py | 2 +- examples/src/python/bolt/count_bolt.py | 6 +- examples/src/python/bolt/half_ack_bolt.py | 6 +- .../src/python/bolt/stateful_count_bolt.py | 6 +- .../src/python/bolt/stream_aggregate_bolt.py | 2 +- examples/src/python/bolt/window_size_bolt.py | 2 +- .../src/python/spout/multi_stream_spout.py | 6 +- .../src/python/spout/stateful_word_spout.py | 6 +- examples/src/python/spout/word_spout.py | 8 +- heron/common/src/python/pex_loader.py | 22 ++--- heron/executor/src/python/heron_executor.py | 83 ++++++++-------- .../tests/python/heron_executor_unittest.py | 32 +++---- .../src/python/basics/base_instance.py | 10 +- .../src/python/basics/bolt_instance.py | 4 +- .../src/python/basics/spout_instance.py | 11 +-- heron/instance/src/python/instance.py | 17 ++-- .../src/python/network/heron_client.py | 40 ++++---- .../src/python/network/metricsmgr_client.py | 13 ++- heron/instance/src/python/network/protocol.py | 5 +- .../src/python/network/socket_options.py | 4 +- .../python/utils/metrics/metrics_helper.py | 8 +- .../src/python/utils/misc/communicator.py | 4 +- .../utils/misc/custom_grouping_helper.py | 12 +-- .../src/python/utils/misc/pplan_helper.py | 10 +- .../utils/topology/topology_context_impl.py | 2 +- .../src/python/handlers/downloadhandler.py | 2 +- heron/shell/src/python/handlers/pidhandler.py | 2 +- heron/statemgrs/src/python/configloader.py | 9 +- heron/statemgrs/src/python/statemanager.py | 7 +- .../src/python/statemanagerfactory.py | 2 +- heron/statemgrs/src/python/zkstatemanager.py | 32 +++---- heron/tools/cli/src/python/args.py | 9 +- heron/tools/cli/src/python/cdefs.py | 2 +- heron/tools/cli/src/python/cli_helper.py | 14 +-- heron/tools/cli/src/python/cliconfig.py | 8 +- heron/tools/cli/src/python/config.py | 12 +-- heron/tools/cli/src/python/execute.py | 7 +- heron/tools/cli/src/python/main.py | 31 +++--- heron/tools/cli/src/python/opts.py | 6 +- heron/tools/cli/src/python/result.py | 18 ++-- heron/tools/cli/src/python/submit.py | 41 ++++---- heron/tools/cli/src/python/update.py | 13 +-- heron/tools/cli/src/python/version.py | 6 +- heron/tools/cli/tests/python/opts_unittest.py | 8 +- .../common/src/python/clients/tracker.py | 50 +++++----- heron/tools/common/src/python/utils/config.py | 57 ++++++----- .../tools/explorer/src/python/logicalplan.py | 8 +- heron/tools/explorer/src/python/main.py | 6 +- .../tools/explorer/src/python/physicalplan.py | 6 +- heron/tools/explorer/src/python/topologies.py | 6 +- heron/tools/tracker/src/python/main.py | 6 +- heron/tools/tracker/src/python/utils.py | 7 +- heron/tools/ui/src/python/main.py | 10 +- heronpy/api/bolt/base_bolt.py | 2 +- heronpy/api/component/component_spec.py | 26 +++-- heronpy/api/spout/base_spout.py | 2 +- heronpy/api/stream.py | 10 +- heronpy/api/topology.py | 21 ++--- heronpy/connectors/pulsar/pulsarspout.py | 24 ++--- .../textfiles/textfilesgenerator.py | 6 +- heronpy/streamlet/config.py | 2 +- heronpy/streamlet/impl/consumebolt.py | 2 +- heronpy/streamlet/impl/filterbolt.py | 2 +- heronpy/streamlet/impl/flatmapbolt.py | 2 +- heronpy/streamlet/impl/generatorspout.py | 2 +- heronpy/streamlet/impl/joinbolt.py | 10 +- heronpy/streamlet/impl/logbolt.py | 2 +- heronpy/streamlet/impl/mapbolt.py | 2 +- heronpy/streamlet/impl/repartitionbolt.py | 2 +- heronpy/streamlet/impl/supplierspout.py | 2 +- heronpy/streamlet/impl/transformbolt.py | 2 +- heronpy/streamlet/impl/unionbolt.py | 2 +- .../src/python/http_server/main.py | 12 +-- .../common/bolt/count_aggregator_bolt.py | 2 +- .../common/bolt/word_count_bolt.py | 4 +- .../integration_test/core/aggregator_bolt.py | 10 +- .../core/integration_test_bolt.py | 6 +- .../core/integration_test_spout.py | 14 +-- .../core/test_topology_builder.py | 2 +- .../src/python/local_test_runner/main.py | 2 +- .../python/local_test_runner/test_scale_up.py | 5 +- .../python/local_test_runner/test_template.py | 37 ++++---- .../src/python/test_runner/main.py | 77 +++++++-------- .../src/python/topology_test_runner/main.py | 94 ++++++++----------- scripts/shutils/save-logs.py | 10 +- third_party/python/semver/semver.py | 8 +- tools/rules/pex/wrapper/pex_wrapper.py | 6 +- 88 files changed, 541 insertions(+), 591 deletions(-) diff --git a/bazel_configure.py b/bazel_configure.py index 5b08afa3f1e..49a8498deaf 100755 --- a/bazel_configure.py +++ b/bazel_configure.py @@ -144,7 +144,7 @@ def real_program_path(program_name): return None def fail(message): - print("\nFAILED: %s" % message) + print(f"\nFAILED: {message}") sys.exit(1) # Assumes the version is at the end of the first line consisting of digits and dots @@ -158,7 +158,7 @@ def discover_version(path): version_flag = "-V" else: version_flag = "--version" - command = "%s %s" % (path, version_flag) + command = f"{path} {version_flag}" version_output = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True) first_line = version_output.decode('ascii', 'ignore').split("\n")[0] version = get_trailing_version(first_line) @@ -215,12 +215,12 @@ def discover_version(path): return version - fail ("Could not determine the version of %s from the following output\n%s\n%s" % (path, command, version_output)) + fail (f"Could not determine the version of {path} from the following output\n{command}\n{version_output}") def to_semver(version): # is version too short if re.search('^[\d]+\.[\d]+$', version): - return "%s.0" % version + return f"{version}.0" # is version too long version_search = re.search('^([\d]+\.[\d]+\.[\d]+)\.[\d]+$', version) @@ -231,8 +231,8 @@ def to_semver(version): def assert_min_version(path, min_version): version = discover_version(path) - if not semver.match(to_semver(version), ">=%s" % to_semver(min_version)): - fail("%s is version %s which is less than the required version %s" % (path, version, min_version)) + if not semver.match(to_semver(version), f">={to_semver(min_version)}"): + fail(f"{path} is version {version} which is less than the required version {min_version}") return version ###################################################################### @@ -391,7 +391,7 @@ def write_heron_config_header(config_file): # MAIN program that sets up your workspace for bazel ###################################################################### def main(): - env_map = dict() + env_map = {} # Discover the platform platform = discover_platform() diff --git a/examples/src/python/bolt/consume_bolt.py b/examples/src/python/bolt/consume_bolt.py index 55df3441d59..c6b194e8358 100644 --- a/examples/src/python/bolt/consume_bolt.py +++ b/examples/src/python/bolt/consume_bolt.py @@ -33,4 +33,4 @@ def process(self, tup): def process_tick(self, tup): self.log("Got tick tuple!") - self.log("Total received data tuple: %d" % self.total) + self.log(f"Total received data tuple: {self.total}") diff --git a/examples/src/python/bolt/count_bolt.py b/examples/src/python/bolt/count_bolt.py index 0a9a33abf40..63e19bc9941 100644 --- a/examples/src/python/bolt/count_bolt.py +++ b/examples/src/python/bolt/count_bolt.py @@ -34,8 +34,8 @@ def initialize(self, config, context): self.counter = Counter() self.total = 0 - self.logger.info("Component-specific config: \n%s" % str(config)) - self.logger.info("Context: \n%s" % str(context)) + self.logger.info(f"Component-specific config: \n{str(config)}") + self.logger.info(f"Context: \n{str(context)}") def _increment(self, word, inc_by): self.counter[word] += inc_by @@ -49,4 +49,4 @@ def process(self, tup): def process_tick(self, tup): self.log("Got tick tuple!") - self.log("Current map: %s" % str(self.counter)) + self.log(f"Current map: {str(self.counter)}") diff --git a/examples/src/python/bolt/half_ack_bolt.py b/examples/src/python/bolt/half_ack_bolt.py index 5b6dab4852e..537edaa3cb8 100644 --- a/examples/src/python/bolt/half_ack_bolt.py +++ b/examples/src/python/bolt/half_ack_bolt.py @@ -30,12 +30,12 @@ def initialize(self, config, context): def process(self, tup): self.total += 1 if self.total % 2 == 0: - self.logger.debug("Failing a tuple: %s" % str(tup)) + self.logger.debug("Failing a tuple: %s", str(tup)) self.fail(tup) else: - self.logger.debug("Acking a tuple: %s" % str(tup)) + self.logger.debug("Acking a tuple: %s", str(tup)) self.ack(tup) def process_tick(self, tup): self.log("Got tick tuple!") - self.log("Total received: %d" % self.total) + self.log("Total received: %d", self.total) diff --git a/examples/src/python/bolt/stateful_count_bolt.py b/examples/src/python/bolt/stateful_count_bolt.py index 26fd0002b42..cc97d995233 100644 --- a/examples/src/python/bolt/stateful_count_bolt.py +++ b/examples/src/python/bolt/stateful_count_bolt.py @@ -34,19 +34,19 @@ class StatefulCountBolt(Bolt, StatefulComponent): # pylint: disable=attribute-defined-outside-init def init_state(self, stateful_state): self.recovered_state = stateful_state - self.logger.info("Checkpoint Snapshot recovered : %s" % str(self.recovered_state)) + self.logger.info(f"Checkpoint Snapshot recovered : {str(self.recovered_state)}") def pre_save(self, checkpoint_id): for (k, v) in list(self.counter.items()): self.recovered_state.put(k, v) - self.logger.info("Checkpoint Snapshot %s : %s" % (checkpoint_id, str(self.recovered_state))) + self.logger.info(f"Checkpoint Snapshot {checkpoint_id} : {str(self.recovered_state)}") def initialize(self, config, context): self.logger.info("In prepare() of CountBolt") self.counter = Counter() self.total = 0 - self.logger.info("Component-specific config: \n%s" % str(config)) + self.logger.info(f"Component-specific config: \n{str(config)}") def _increment(self, word, inc_by): self.counter[word] += inc_by diff --git a/examples/src/python/bolt/stream_aggregate_bolt.py b/examples/src/python/bolt/stream_aggregate_bolt.py index c73af5d6017..3aa62ab2ae9 100644 --- a/examples/src/python/bolt/stream_aggregate_bolt.py +++ b/examples/src/python/bolt/stream_aggregate_bolt.py @@ -34,4 +34,4 @@ def process(self, tup): def process_tick(self, tup): self.log("Got tick tuple!") - self.log("Current stream counter: %s" % str(self.stream_counter)) + self.log(f"Current stream counter: {str(self.stream_counter)}") diff --git a/examples/src/python/bolt/window_size_bolt.py b/examples/src/python/bolt/window_size_bolt.py index 56040386b4e..9a14afd8da6 100644 --- a/examples/src/python/bolt/window_size_bolt.py +++ b/examples/src/python/bolt/window_size_bolt.py @@ -34,4 +34,4 @@ def initialize(self, config, context): def processWindow(self, window_info, tuples): self.numerator += len(tuples) self.denominator += 1 - self.logger.info("The current average is %f" % (self.numerator / self.denominator)) + self.logger.info(f"The current average is {(self.numerator / self.denominator)}") diff --git a/examples/src/python/spout/multi_stream_spout.py b/examples/src/python/spout/multi_stream_spout.py index d2ded5d0808..056b5778889 100644 --- a/examples/src/python/spout/multi_stream_spout.py +++ b/examples/src/python/spout/multi_stream_spout.py @@ -36,8 +36,8 @@ def initialize(self, config, context): self.emit_count = 0 - self.logger.info("Component-specific config: \n%s" % str(config)) - self.logger.info("Context: \n%s" % str(context)) + self.logger.info(f"Component-specific config: \n{str(config)}") + self.logger.info(f"Context: \n{str(context)}") def next_tuple(self): word = next(self.words) @@ -45,6 +45,6 @@ def next_tuple(self): self.emit_count += 1 if self.emit_count % 100000 == 0: - self.logger.info("Emitted %s" % str(self.emit_count)) + self.logger.info(f"Emitted {str(self.emit_count)}") self.logger.info("Emitting to error stream") self.emit(["test error message"], stream='error') diff --git a/examples/src/python/spout/stateful_word_spout.py b/examples/src/python/spout/stateful_word_spout.py index c976ab84f5a..05895dad689 100644 --- a/examples/src/python/spout/stateful_word_spout.py +++ b/examples/src/python/spout/stateful_word_spout.py @@ -34,13 +34,13 @@ class StatefulWordSpout(Spout, StatefulComponent): # pylint: disable=attribute-defined-outside-init def init_state(self, stateful_state): self.recovered_state = stateful_state - self.logger.info("Checkpoint Snapshot recovered : %s" % str(self.recovered_state)) + self.logger.info(f"Checkpoint Snapshot recovered : {str(self.recovered_state)}") def pre_save(self, checkpoint_id): # Purely for debugging purposes for (k, v) in list(self.counter.items()): self.recovered_state.put(k, v) - self.logger.info("Checkpoint Snapshot %s : %s" % (checkpoint_id, str(self.recovered_state))) + self.logger.info(f"Checkpoint Snapshot {checkpoint_id} : {str(self.recovered_state)}") # pylint: disable=unused-argument def initialize(self, config, context): @@ -52,7 +52,7 @@ def initialize(self, config, context): self.ack_count = 0 self.fail_count = 0 - self.logger.info("Component-specific config: \n%s" % str(config)) + self.logger.info(f"Component-specific config: \n{str(config)}") def next_tuple(self): word = next(self.words) diff --git a/examples/src/python/spout/word_spout.py b/examples/src/python/spout/word_spout.py index 25ab969500e..537cde7278a 100644 --- a/examples/src/python/spout/word_spout.py +++ b/examples/src/python/spout/word_spout.py @@ -37,8 +37,8 @@ def initialize(self, config, context): self.ack_count = 0 self.fail_count = 0 - self.logger.info("Component-specific config: \n%s" % str(config)) - self.logger.info("Context: \n%s" % str(context)) + self.logger.info(f"Component-specific config: \n{str(config)}" + self.logger.info(f"Context: \n{str(context)}" def next_tuple(self): word = next(self.words) @@ -50,9 +50,9 @@ def next_tuple(self): def ack(self, tup_id): self.ack_count += 1 if self.ack_count % 100000 == 0: - self.logger.info("Acked %sth tuples, tup_id: %s" % (str(self.ack_count), str(tup_id))) + self.logger.info(f"Acked {str(self.ack_count)}th tuples, tup_id: {str(tup_id)}") def fail(self, tup_id): self.fail_count += 1 if self.fail_count % 100000 == 0: - self.logger.info("Failed %sth tuples, tup_id: %s" % (str(self.fail_count), str(tup_id))) + self.logger.info(f"Failed {str(self.fail_count)}th tuples, tup_id: {str(tup_id)}") diff --git a/heron/common/src/python/pex_loader.py b/heron/common/src/python/pex_loader.py index 565dbdf32ed..ecad8e925f7 100644 --- a/heron/common/src/python/pex_loader.py +++ b/heron/common/src/python/pex_loader.py @@ -35,15 +35,15 @@ def _get_deps_list(abs_path_to_pex): Note that dependencies are located under `.deps` directory """ - pex = zipfile.ZipFile(abs_path_to_pex, mode='r') - deps = list({re.match(egg_regex, i).group(1) for i in pex.namelist() - if re.match(egg_regex, i) is not None}) + with zipfile.ZipFile(abs_path_to_pex, mode='r') as pex: + deps = list({re.match(egg_regex, i).group(1) for i in pex.namelist() + if re.match(egg_regex, i) is not None}) return deps def load_pex(path_to_pex, include_deps=True): """Loads pex file and its dependencies to the current python path""" abs_path_to_pex = os.path.abspath(path_to_pex) - Log.debug("Add a pex to the path: %s" % abs_path_to_pex) + Log.debug(f"Add a pex to the path: {abs_path_to_pex}") if abs_path_to_pex not in sys.path: sys.path.insert(0, os.path.dirname(abs_path_to_pex)) @@ -52,10 +52,10 @@ def load_pex(path_to_pex, include_deps=True): for dep in _get_deps_list(abs_path_to_pex): to_join = os.path.join(os.path.dirname(abs_path_to_pex), dep) if to_join not in sys.path: - Log.debug("Add a new dependency to the path: %s" % dep) + Log.debug(f"Add a new dependency to the path: {dep}") sys.path.insert(0, to_join) - Log.debug("Python path: %s" % str(sys.path)) + Log.debug(f"Python path: {str(sys.path)}") def resolve_heron_suffix_issue(abs_pex_path, class_path): """Resolves duplicate package suffix problems @@ -108,13 +108,13 @@ def import_and_get_class(path_to_pex, python_class_name): """ abs_path_to_pex = os.path.abspath(path_to_pex) - Log.debug("Add a pex to the path: %s" % abs_path_to_pex) - Log.debug("In import_and_get_class with cls_name: %s" % python_class_name) + Log.debug(f"Add a pex to the path: {abs_path_to_pex}") + Log.debug(f"In import_and_get_class with cls_name: {python_class_name}") split = python_class_name.split('.') from_path = '.'.join(split[:-1]) import_name = python_class_name.split('.')[-1] - Log.debug("From path: %s, import name: %s" % (from_path, import_name)) + Log.debug(f"From path: {from_path}, import name: {import_name}") # Resolve duplicate package suffix problem (heron.), if the top level package name is heron if python_class_name.startswith("heron."): @@ -122,8 +122,8 @@ def import_and_get_class(path_to_pex, python_class_name): mod = resolve_heron_suffix_issue(abs_path_to_pex, python_class_name) return getattr(mod, import_name) except: - Log.error("Could not resolve class %s with special handling" % python_class_name) + Log.error(f"Could not resolve class {python_class_name} with special handling") mod = __import__(from_path, fromlist=[import_name], level=0) - Log.debug("Imported module: %s" % str(mod)) + Log.debug(f"Imported module: {str(mod)}") return getattr(mod, import_name) diff --git a/heron/executor/src/python/heron_executor.py b/heron/executor/src/python/heron_executor.py index 535ccd4777b..bbd6e724039 100755 --- a/heron/executor/src/python/heron_executor.py +++ b/heron/executor/src/python/heron_executor.py @@ -131,10 +131,10 @@ def cli( def id_map(prefix, container_plans, add_zero_id=False): ids = {} if add_zero_id: - ids[0] = "%s-0" % prefix + ids[0] = f"{prefix}-0" for container_plan in container_plans: - ids[container_plan.id] = "%s-%d" % (prefix, container_plan.id) + ids[container_plan.id] = f"{prefix}-{container_plan.id}" return ids def stmgr_map(container_plans): @@ -150,13 +150,13 @@ def heron_shell_map(container_plans): return id_map("heron-shell", container_plans, True) def get_heron_executor_process_name(shard_id): - return 'heron-executor-%d' % shard_id + return f"heron-executor-{shard_id}" def get_process_pid_filename(process_name): - return '%s.pid' % process_name + return f"{process_name}.pid" def get_tmp_filename(): - return '%s.heron.tmp' % (''.join(random.choice(string.ascii_uppercase) for i in range(12))) + return f"{(''.join(random.choice(string.ascii_uppercase) for i in range(12)))}.heron.tmp" def atomic_write_file(path, content): """ @@ -375,7 +375,7 @@ def __init__(self, parsed_args, shell_env): def run_command_or_exit(self, command): if self._run_blocking_process(command, True) != 0: - Log.error("Failed to run command: %s. Exiting" % command) + Log.error(f"Failed to run command: {command}. Exiting") sys.exit(1) def initialize(self): @@ -675,7 +675,7 @@ def _get_jvm_version(self): sys.exit(1) self.jvm_version = process_stdout - Log.info("Detected JVM version %s" % self.jvm_version) + Log.info(f"Detected JVM version {self.jvm_version}") return self.jvm_version # Returns the processes for each Python Heron Instance @@ -684,7 +684,7 @@ def _get_python_instance_cmd(self, instance_info): # TODO: currently ignoring ramsize, heap, etc. retval = {} for (instance_id, component_name, global_task_id, component_index) in instance_info: - Log.info("Python instance %s component: %s" %(instance_id, component_name)) + Log.info(f"Python instance {instance_id} component: {component_name}") instance_cmd = [self.python_instance_binary, '--topology_name=%s' % self.topology_name, '--topology_id=%s' % self.topology_id, @@ -710,7 +710,7 @@ def _get_cpp_instance_cmd(self, instance_info): # TODO: currently ignoring ramsize, heap, etc. retval = {} for (instance_id, component_name, global_task_id, component_index) in instance_info: - Log.info("CPP instance %s component: %s" %(instance_id, component_name)) + Log.info(f"CPP instance {instance_id} component: {component_name}") instance_cmd = [ self.cpp_instance_binary, '--topology_name=%s' % self.topology_name, @@ -745,7 +745,7 @@ def _get_streaming_processes(self): global_task_id = instance_plan.task_id component_index = instance_plan.component_index component_name = instance_plan.component_name - instance_id = "container_%s_%s_%d" % (str(self.shard), component_name, global_task_id) + instance_id = f"container_{str(self.shard)}_{component_name}_{global_task_id}" instance_info.append((instance_id, component_name, global_task_id, component_index)) stmgr_cmd_lst = [ @@ -798,7 +798,7 @@ def _get_streaming_processes(self): elif self.pkg_type == 'dylib': retval.update(self._get_cpp_instance_cmd(instance_info)) else: - raise ValueError("Unrecognized package type: %s" % self.pkg_type) + raise ValueError(f"Unrecognized package type: {self.pkg_type}") return retval @@ -865,9 +865,9 @@ def _get_heron_support_processes(self): def _untar_if_needed(self): if self.pkg_type == "tar": - os.system("tar -xvf %s" % self.topology_binary_file) + os.system(f"tar -xvf {self.topology_binary_file}") elif self.pkg_type == "pex": - os.system("unzip -qq -n %s" % self.topology_binary_file) + os.system(f"unzip -qq -n {self.topology_binary_file}") # pylint: disable=no-self-use def _wait_process_std_out_err(self, name, process): @@ -876,7 +876,7 @@ def _wait_process_std_out_err(self, name, process): process.wait() def _run_process(self, name, cmd): - Log.info("Running %s process as %s" % (name, cmd)) + Log.info(f"Running {name} process as {cmd}") try: # stderr is redirected to stdout so that it can more easily be logged. stderr has a max buffer # size and can cause the child process to deadlock if it fills up @@ -890,7 +890,7 @@ def _run_process(self, name, cmd): return process def _run_blocking_process(self, cmd, is_shell=False): - Log.info("Running blocking process as %s" % cmd) + Log.info(f"Running blocking process as {cmd}") try: # stderr is redirected to stdout so that it can more easily be logged. stderr has a max buffer # size and can cause the child process to deadlock if it fills up @@ -900,7 +900,7 @@ def _run_blocking_process(self, cmd, is_shell=False): # wait for termination self._wait_process_std_out_err(cmd.cmd, process) except Exception: - Log.info("Exception running command %s", cmd) + Log.info(f"Exception running command {cmd}") traceback.print_exc() # return the exit code @@ -913,14 +913,12 @@ def _kill_processes(self, commands): for process_info in list(self.processes_to_monitor.values()): if process_info.name == command_name: del self.processes_to_monitor[process_info.pid] - Log.info("Killing %s process with pid %d: %s" % - (process_info.name, process_info.pid, command)) + Log.info(f"Killing {process_info.name} process with pid {process_info.pid}: {command}") try: process_info.process.terminate() # sends SIGTERM to process except OSError as e: if e.errno == 3: # No such process - Log.warn("Expected process %s with pid %d was not running, ignoring." % - (process_info.name, process_info.pid)) + Log.warn(f"Expected process {process_info.name} with pid {process_info.pid} was not running, ignoring.") else: raise e @@ -954,15 +952,15 @@ def start_process_monitor(self): old_process_info = self.processes_to_monitor[pid] name = old_process_info.name command = old_process_info.command - Log.info("%s (pid=%s) exited with status %d. command=%s" % (name, pid, status, command)) + Log.info(f"{name} (pid={pid}) exited with status {status}. command={command}") # Log the stdout & stderr of the failed process self._wait_process_std_out_err(name, old_process_info.process) # Just make it world readable - if os.path.isfile("core.%d" % pid): - os.system("chmod a+r core.%d" % pid) + if os.path.isfile(f"core.{pid}"): + os.system(f"chmod a+r core.{pid}") if old_process_info.attempts >= self.max_runs: - Log.info("%s exited too many times" % name) + Log.info(f"{name} exited too many times") sys.exit(1) time.sleep(self.interval_between_runs) p = self._run_process(name, command) @@ -985,10 +983,10 @@ def get_commands_to_run(self): if self._get_instance_plans(self.packing_plan, self.shard) is None and self.shard != 0: retval = {} retval['heron-shell'] = Command([ - '%s' % self.heron_shell_binary, - '--port=%s' % self.shell_port, - '--log_file_prefix=%s/heron-shell-%s.log' % (self.log_dir, self.shard), - '--secret=%s' % self.topology_id], self.shell_env) + f'{self.heron_shell_binary}', + f'--port={self.shell_port}', + f'--log_file_prefix={self.log_dir}/heron-shell-{self.shard}.log', + f'--secret={self.topology_id}'], self.shell_env) return retval if self.shard == 0: @@ -1042,17 +1040,17 @@ def launch(self): commands_to_kill, commands_to_keep, commands_to_start = \ self.get_command_changes(current_commands, updated_commands) - Log.info("current commands: %s" % sorted(current_commands.keys())) - Log.info("new commands : %s" % sorted(updated_commands.keys())) - Log.info("commands_to_kill: %s" % sorted(commands_to_kill.keys())) - Log.info("commands_to_keep: %s" % sorted(commands_to_keep.keys())) - Log.info("commands_to_start: %s" % sorted(commands_to_start.keys())) + Log.info(f"current commands: {sorted(current_commands.keys())}") + Log.info(f"new commands : {sorted(updated_commands.keys())}") + Log.info(f"commands_to_kill: {sorted(commands_to_kill.keys())}") + Log.info(f"commands_to_keep: {sorted(commands_to_keep.keys())}") + Log.info(f"commands_to_start: {sorted(commands_to_start.keys())}") self._kill_processes(commands_to_kill) self._start_processes(commands_to_start) - Log.info("Launch complete - processes killed=%s kept=%s started=%s monitored=%s" % - (len(commands_to_kill), len(commands_to_keep), - len(commands_to_start), len(self.processes_to_monitor))) + Log.info(f"Launch complete - processes killed={len(commands_to_kill)}"\ + f" kept={len(commands_to_keep)} started={len(commands_to_start)}"\ + f" monitored={len(self.processes_to_monitor)}") # pylint: disable=global-statement def start_state_manager_watches(self): @@ -1076,14 +1074,14 @@ def start_state_manager_watches(self): for state_manager in self.state_managers: state_manager.start() except Exception as ex: - Log.error("Found exception while initializing state managers: %s. Bailing out..." % ex) + Log.error(f"Found exception while initializing state managers: {ex}. Bailing out...") traceback.print_exc() sys.exit(1) # pylint: disable=unused-argument def on_packing_plan_watch(state_manager, new_packing_plan): - Log.debug("State watch triggered for PackingPlan update on shard %s. Existing: %s, New: %s" % - (self.shard, str(self.packing_plan), str(new_packing_plan))) + Log.debug(f"State watch triggered for PackingPlan update on shard {self.shard}. "\ + f"Existing: {str(self.packing_plan)}, New: {str(new_packing_plan)}") if self.packing_plan != new_packing_plan: Log.info("PackingPlan change detected on shard %s, relaunching effected processes." @@ -1101,8 +1099,7 @@ def on_packing_plan_watch(state_manager, new_packing_plan): # state_manager as first variable. onPackingPlanWatch = functools.partial(on_packing_plan_watch, state_manager) state_manager.get_packing_plan(self.topology_name, onPackingPlanWatch) - Log.info("Registered state watch for packing plan changes with state manager %s." % - str(state_manager)) + Log.info(f"Registered state watch for packing plan changes with state manager {str(state_manager)}.") def stop_state_manager_watches(self): Log.info("Stopping state managers") @@ -1115,7 +1112,7 @@ def setup(executor): def signal_handler(signal_to_handle, frame): # We would do nothing here but just exit # Just catch the SIGTERM and then cleanup(), registered with atexit, would invoke - Log.info('signal_handler invoked with signal %s', signal_to_handle) + Log.info(f'signal_handler invoked with signal {signal_to_handle}') executor.stop_state_manager_watches() sys.exit(signal_to_handle) @@ -1137,7 +1134,7 @@ def cleanup(): # Redirect stdout and stderr to files in append mode # The filename format is heron-executor-.stdxxx shardid = executor.shard - log.configure(logfile='heron-executor-%s.stdout' % shardid) + log.configure(logfile=f'heron-executor-{shardid}.stdout') pid = os.getpid() sid = os.getsid(pid) diff --git a/heron/executor/tests/python/heron_executor_unittest.py b/heron/executor/tests/python/heron_executor_unittest.py index 4745183189b..872ed34a2db 100644 --- a/heron/executor/tests/python/heron_executor_unittest.py +++ b/heron/executor/tests/python/heron_executor_unittest.py @@ -113,12 +113,11 @@ def get_expected_metricsmgr_command(container_id): "-XX:MaxGCPauseMillis=100 -XX:InitiatingHeapOccupancyPercent=30 " \ "-XX:ParallelGCThreads=4 " \ "-cp metricsmgr_classpath org.apache.heron.metricsmgr.MetricsManager " \ - "--id=metricsmgr-%d --port=metricsmgr_port " \ + f"--id=metricsmgr-{container_id} --port=metricsmgr_port " \ "--topology=topname --cluster=cluster --role=role --environment=environ " \ "--topology-id=topid " \ - "--system-config-file=%s --override-config-file=%s " \ - "--sink-config-file=metrics_sinks_config_file" % \ - (container_id, INTERNAL_CONF_PATH, OVERRIDE_PATH) + f"--system-config-file={INTERNAL_CONF_PATH} --override-config-file={OVERRIDE_PATH} " \ + "--sink-config-file=metrics_sinks_config_file" def get_expected_metricscachemgr_command(): return "heron_java_home/bin/java -Xmx1024M -XX:+PrintCommandLineFlags " \ @@ -129,10 +128,9 @@ def get_expected_metricscachemgr_command(): "-cp metricscachemgr_classpath org.apache.heron.metricscachemgr.MetricsCacheManager " \ "--metricscache_id metricscache-0 --server_port metricscachemgr_serverport " \ "--stats_port metricscachemgr_statsport --topology_name topname --topology_id topid " \ - "--system_config_file %s --override_config_file %s " \ + f"--system_config_file {INTERNAL_CONF_PATH} --override_config_file {OVERRIDE_PATH} " \ "--sink_config_file metrics_sinks_config_file " \ - "--cluster cluster --role role --environment environ" \ - % (INTERNAL_CONF_PATH, OVERRIDE_PATH) + "--cluster cluster --role role --environment environ" def get_expected_healthmgr_command(): return "heron_java_home/bin/java -Xmx1024M -XX:+PrintCommandLineFlags " \ @@ -145,7 +143,7 @@ def get_expected_healthmgr_command(): "--environment environ --topology_name topname --metricsmgr_port metricsmgr_port" def get_expected_instance_command(component_name, instance_id, container_id): - instance_name = "container_%d_%s_%d" % (container_id, component_name, instance_id) + instance_name = f"container_{container_id}_{component_name}_{instance_id}" return "heron_java_home/bin/java -Xmx320M -Xms320M -XX:MaxMetaspaceSize=128M " \ "-XX:MetaspaceSize=128M -XX:ReservedCodeCacheSize=64M -XX:+PrintCommandLineFlags " \ "-Djava.net.preferIPv4Stack=true " \ @@ -154,11 +152,10 @@ def get_expected_instance_command(component_name, instance_id, container_id): "-XX:ParallelGCThreads=4 " \ "-cp instance_classpath:classpath -XX:+HeapDumpOnOutOfMemoryError " \ "org.apache.heron.instance.HeronInstance -topology_name topname -topology_id topid " \ - "-instance_id %s -component_name %s -task_id %d -component_index 0 -stmgr_id stmgr-%d " \ + f"-instance_id {instance_name} -component_name {component_name} " \ + f"-task_id {instance_id} -component_index 0 -stmgr_id stmgr-{container_id} " \ "-stmgr_port tmanager_controller_port -metricsmgr_port metricsmgr_port " \ - "-system_config_file %s -override_config_file %s" \ - % (instance_name, component_name, instance_id, - container_id, INTERNAL_CONF_PATH, OVERRIDE_PATH) + f"-system_config_file {INTERNAL_CONF_PATH} -override_config_file {OVERRIDE_PATH}" MockPOpen.set_next_pid(37) expected_processes_container_0 = [ @@ -204,13 +201,12 @@ def get_expected_instance_command(component_name, instance_id, container_id): 'stmgr_binary --topology_name=topname --topology_id=topid ' '--topologydefn_file=topdefnfile --zkhostportlist=zknode --zkroot=zkroot ' '--stmgr_id=stmgr-7 ' - '--instance_ids=container_7_word_11,container_7_exclaim1_210 --myhost=%s ' + f'--instance_ids=container_7_word_11,container_7_exclaim1_210 --myhost={HOSTNAME} ' '--data_port=server_port ' '--local_data_port=tmanager_controller_port --metricsmgr_port=metricsmgr_port ' - '--shell_port=shell-port --config_file=%s --override_config_file=%s ' + f'--shell_port=shell-port --config_file={INTERNAL_CONF_PATH} --override_config_file={OVERRIDE_PATH} ' '--ckptmgr_port=ckptmgr-port --ckptmgr_id=ckptmgr-7 ' - '--metricscachemgr_mode=cluster' - % (HOSTNAME, INTERNAL_CONF_PATH, OVERRIDE_PATH)), + '--metricscachemgr_mode=cluster'), ProcessInfo(MockPOpen(), 'metricsmgr-7', get_expected_metricsmgr_command(7)), ProcessInfo(MockPOpen(), 'container_7_word_11', get_expected_instance_command('word', 11, 7)), ProcessInfo(MockPOpen(), 'container_7_exclaim1_210', @@ -344,8 +340,8 @@ def test_change_instance_dist_container_1(self): current_json = json.dumps(current_commands, sort_keys=True, cls=CommandEncoder).split(' ') temp_json = json.dumps(temp_dict, sort_keys=True).split(' ') - print("current_json: %s" % current_json) - print("temp_json: %s" % temp_json) + print(f"current_json: {current_json}") + print(f"temp_json: {temp_json}") # better test error report for (s1, s2) in zip(current_json, temp_json): diff --git a/heron/instance/src/python/basics/base_instance.py b/heron/instance/src/python/basics/base_instance.py index ba114e5c08a..224494a12a6 100644 --- a/heron/instance/src/python/basics/base_instance.py +++ b/heron/instance/src/python/basics/base_instance.py @@ -92,7 +92,7 @@ def log(self, message, level=None): elif level == "error": _log_level = logging.ERROR else: - raise ValueError("%s is not supported as logging level" % str(level)) + raise ValueError(f"{str(level)} is not supported as logging level") self.logger.log(_log_level, message) @@ -114,11 +114,11 @@ def load_py_instance(self, is_spout): if is_spout: spout_proto = self.pplan_helper.get_my_spout() py_classpath = spout_proto.comp.class_name - self.logger.info("Loading Spout from: %s", py_classpath) + self.logger.info(f"Loading Spout from: {py_classpath}") else: bolt_proto = self.pplan_helper.get_my_bolt() py_classpath = bolt_proto.comp.class_name - self.logger.info("Loading Bolt from: %s", py_classpath) + self.logger.info(f"Loading Bolt from: {py_classpath}") pex_loader.load_pex(self.pplan_helper.topology_pex_abs_path) spbl_class = pex_loader.import_and_get_class(self.pplan_helper.topology_pex_abs_path, @@ -126,11 +126,11 @@ def load_py_instance(self, is_spout): except Exception as e: spbl = "spout" if is_spout else "bolt" self.logger.error(traceback.format_exc()) - raise RuntimeError("Error when loading a %s from pex: %s" % (spbl, str(e))) + raise RuntimeError(f"Error when loading a {spbl} from pex: {str(e)}") return spbl_class def handle_initiate_stateful_checkpoint(self, ckptmsg, component): - Log.info("Received initiate state checkpoint message for %s" % ckptmsg.checkpoint_id) + Log.info(f"Received initiate state checkpoint message for {ckptmsg.checkpoint_id}") if not self.is_stateful: raise RuntimeError("Received state checkpoint message but we are not stateful topology") if isinstance(component, StatefulComponent): diff --git a/heron/instance/src/python/basics/bolt_instance.py b/heron/instance/src/python/basics/bolt_instance.py index ad18bcd051a..5b597e87450 100644 --- a/heron/instance/src/python/basics/bolt_instance.py +++ b/heron/instance/src/python/basics/bolt_instance.py @@ -54,7 +54,7 @@ def __init__(self, pplan_helper, in_stream, out_stream, looper): api_constants.TopologyReliabilityMode.ATMOST_ONCE) self.acking_enabled = bool(mode == api_constants.TopologyReliabilityMode.ATLEAST_ONCE) self._initialized_metrics_and_tasks = False - Log.info("Enable ACK: %s" % str(self.acking_enabled)) + Log.info(f"Enable ACK: {str(self.acking_enabled)}") # load user's bolt class bolt_impl_class = super(BoltInstance, self).load_py_instance(is_spout=False) @@ -228,7 +228,7 @@ def _prepare_tick_tup_timer(self): cluster_config = self.pplan_helper.context.get_cluster_config() if api_constants.TOPOLOGY_TICK_TUPLE_FREQ_SECS in cluster_config: tick_freq_sec = cluster_config[api_constants.TOPOLOGY_TICK_TUPLE_FREQ_SECS] - Log.debug("Tick Tuple Frequency: %s sec." % str(tick_freq_sec)) + Log.debug(f"Tick Tuple Frequency: {str(tick_freq_sec)} sec.") def send_tick(): tick = TupleHelper.make_tick_tuple() diff --git a/heron/instance/src/python/basics/spout_instance.py b/heron/instance/src/python/basics/spout_instance.py index 206dac5862c..48c86ff090c 100644 --- a/heron/instance/src/python/basics/spout_instance.py +++ b/heron/instance/src/python/basics/spout_instance.py @@ -60,8 +60,8 @@ def __init__(self, pplan_helper, in_stream, out_stream, looper): self.enable_message_timeouts = \ context.get_cluster_config().get(api_constants.TOPOLOGY_ENABLE_MESSAGE_TIMEOUTS) self._initialized_metrics_and_tasks = False - Log.info("Enable ACK: %s" % str(self.acking_enabled)) - Log.info("Enable Message Timeouts: %s" % str(self.enable_message_timeouts)) + Log.info(f"Enable ACK: {str(self.acking_enabled)}") + Log.info(f"Enable Message Timeouts: {str(self.enable_message_timeouts)}") # map tuple_info>, ordered by insertion time self.in_flight_tuples = collections.OrderedDict() @@ -131,8 +131,7 @@ def emit(self, tup, tup_id=None, stream=Stream.DEFAULT_STREAM_ID, if direct_task is not None: if not isinstance(direct_task, int): - raise TypeError("direct_task argument needs to be an integer, given: %s" - % str(type(direct_task))) + raise TypeError(f"direct_task argument needs to be an integer, given: {str(type(direct_task))}") # performing emit-direct data_tuple.dest_task_ids.append(direct_task) elif custom_target_task_ids is not None: @@ -351,13 +350,13 @@ def _do_immediate_acks(self): self._invoke_ack(tuple_info.tuple_id, tuple_info.stream_id, 0) def _invoke_ack(self, tuple_id, stream_id, complete_latency_ns): - Log.debug("In invoke_ack(): Acking %s from stream: %s" % (str(tuple_id), stream_id)) + Log.debug(f"In invoke_ack(): Acking {str(tuple_id)} from stream: {stream_id}") self.spout_impl.ack(tuple_id) self.pplan_helper.context.invoke_hook_spout_ack(tuple_id, complete_latency_ns) self.spout_metrics.acked_tuple(stream_id, complete_latency_ns) def _invoke_fail(self, tuple_id, stream_id, fail_latency_ns): - Log.debug("In invoke_fail(): Failing %s from stream: %s" % (str(tuple_id), stream_id)) + Log.debug(f"In invoke_fail(): Failing {str(tuple_id)} from stream: {stream_id}") self.spout_impl.fail(tuple_id) self.pplan_helper.context.invoke_hook_spout_fail(tuple_id, fail_latency_ns) self.spout_metrics.failed_tuple(stream_id, fail_latency_ns) diff --git a/heron/instance/src/python/instance.py b/heron/instance/src/python/instance.py index cb9b268c835..96188a95714 100644 --- a/heron/instance/src/python/instance.py +++ b/heron/instance/src/python/instance.py @@ -71,7 +71,7 @@ def __init__(self, topology_name, topology_id, instance, self.in_stream = HeronCommunicator(producer_cb=None, consumer_cb=None) self.out_stream = HeronCommunicator(producer_cb=None, consumer_cb=None) - self.socket_map = dict() + self.socket_map = {} self.looper = GatewayLooper(self.socket_map) # Initialize metrics related @@ -153,7 +153,7 @@ def handle_start_stateful_processing(self, start_msg): """Called when we receive StartInstanceStatefulProcessing message :param start_msg: StartInstanceStatefulProcessing type """ - Log.info("Received start stateful processing for %s" % start_msg.checkpoint_id) + Log.info(f"Received start stateful processing for {start_msg.checkpoint_id}") self.is_stateful_started = True self.start_instance_if_possible() @@ -161,7 +161,7 @@ def handle_restore_instance_state(self, restore_msg): """Called when we receive RestoreInstanceStateRequest message :param restore_msg: RestoreInstanceStateRequest type """ - Log.info("Restoring instance state to checkpoint %s" % restore_msg.state.checkpoint_id) + Log.info(f"Restoring instance state to checkpoint {restore_msg.state.checkpoint_id}") # Stop the instance if self.is_stateful_started: self.my_instance.py_class.stop() @@ -218,7 +218,7 @@ def _handle_state_change_msg(self, new_helper): elif new_helper.is_topology_paused(): self.my_instance.py_class.invoke_deactivate() else: - raise RuntimeError("Unexpected TopologyState update: %s" % new_helper.get_topology_state()) + raise RuntimeError(f"Unexpected TopologyState update: {new_helper.get_topology_state()}") else: Log.info("Topology state remains the same.") @@ -245,8 +245,7 @@ def handle_assignment_msg(self, pplan): self._handle_assignment_msg(new_helper) else: Log.info("Received a new Physical Plan with the same assignment -- State Change") - Log.info("Old state: %s, new state: %s.", - self.my_pplan_helper.get_topology_state(), new_helper.get_topology_state()) + Log.info(f"Old state: {self.my_pplan_helper.get_topology_state()}, new state: {new_helper.get_topology_state()}.") self._handle_state_change_msg(new_helper) def _handle_assignment_msg(self, pplan_helper): @@ -256,8 +255,8 @@ def _handle_assignment_msg(self, pplan_helper): if self.my_pplan_helper.is_spout: # Starting a spout my_spout = self.my_pplan_helper.get_my_spout() - Log.info("Incarnating ourselves as spout: %s with task id %s", - self.my_pplan_helper.my_component_name, str(self.my_pplan_helper.my_task_id)) + Log.info(f"Incarnating ourselves as spout: {self.my_pplan_helper.my_component_name} "\ + f"with task id {str(self.my_pplan_helper.my_task_id)}") self.in_stream. \ register_capacity(self.sys_config[constants.INSTANCE_INTERNAL_SPOUT_READ_QUEUE_CAPACITY]) @@ -313,7 +312,7 @@ def start_instance_if_possible(self): Log.info("Started instance successfully.") except Exception as e: Log.error(traceback.format_exc()) - Log.error("Error when starting bolt/spout, bailing out...: %s", str(e)) + Log.error(f"Error when starting bolt/spout, bailing out...: {str(e)}") self.looper.exit_loop() def yaml_config_reader(config_path): diff --git a/heron/instance/src/python/network/heron_client.py b/heron/instance/src/python/network/heron_client.py index 5e63af0645a..33f7dac8a69 100644 --- a/heron/instance/src/python/network/heron_client.py +++ b/heron/instance/src/python/network/heron_client.py @@ -58,9 +58,9 @@ def __init__(self, looper, hostname, port, socket_map, socket_options): self.socket_options = socket_options # map message.Message object> - self.registered_message_map = dict() - self.response_message_map = dict() - self.context_map = dict() + self.registered_message_map = {} + self.response_message_map = {} + self.context_map = {} self.incomplete_pkt = None self.total_bytes_written = 0 @@ -82,13 +82,13 @@ def __init__(self, looper, hostname, port, socket_map, socket_options): # called when connect is ready def handle_connect(self): - Log.info("Connected to %s:%d" % (self.hostname, self.port)) + Log.info(f"Connected to {self.hostname}:{self.port}") self._connecting = False self.on_connect(StatusCode.OK) # called when close is ready def handle_close(self): - Log.info("%s: handle_close() called" % self._get_classname()) + Log.info(f"{self._get_classname()}: handle_close() called") self._handle_close() self.on_error() @@ -103,9 +103,9 @@ def _clean_up_state(self): self.total_bytes_received = 0 self.total_pkt_received = 0 - self.registered_message_map = dict() - self.response_message_map = dict() - self.context_map = dict() + self.registered_message_map = {} + self.response_message_map = {} + self.context_map = {} self.incomplete_pkt = None self._connecting = False @@ -134,7 +134,7 @@ def handle_read(self): if pkt.is_complete: num_pkt_read += 1 bytes_read += pkt.get_pktsize() - Log.debug("Read a complete packet of size %d" % bytes_read) + Log.debug(f"Read a complete packet of size {bytes_read}") self.incomplete_pkt = None read_pkt_list.append(pkt) else: @@ -190,7 +190,7 @@ def start_connect(self): ``loop()`` method needs to be called after this. """ - Log.debug("In start_connect() of %s" % self._get_classname()) + Log.debug(f"In start_connect() of {self._get_classname()}") # TODO: specify buffer size, exception handling self.create_socket(socket.AF_INET, socket.SOCK_STREAM) @@ -209,14 +209,14 @@ def register_on_message(self, msg_builder): :param msg_builder: callable to create a protobuf message that this client wants to receive """ message = msg_builder() - Log.debug("In register_on_message(): %s" % message.DESCRIPTOR.full_name) + Log.debug(f"In register_on_message(): {message.DESCRIPTOR.full_name}") self.registered_message_map[message.DESCRIPTOR.full_name] = msg_builder def send_request(self, request, context, response_type, timeout_sec): """Sends a request message (REQID is non-zero)""" # generates a unique request id reqid = REQID.generate() - Log.debug("%s: In send_request() with REQID: %s" % (self._get_classname(), str(reqid))) + Log.debug(f"{self._get_classname()}: In send_request() with REQID: {str(reqid)}") # register response message type self.response_message_map[reqid] = response_type self.context_map[reqid] = context @@ -232,7 +232,7 @@ def timeout_task(): def send_message(self, message): """Sends a message (REQID is zero)""" - Log.debug("In send_message() of %s" % self._get_classname()) + Log.debug(f"In send_message() of {self._get_classname()}") outgoing_pkt = OutgoingPacket.create_packet(REQID.generate_zero(), message) self._send_packet(outgoing_pkt) @@ -246,9 +246,8 @@ def handle_timeout(self, reqid): def handle_error(self): _, t, v, tbinfo = asyncore.compact_traceback() - self_msg = "%s failed for object at %0x" % (self._get_classname(), id(self)) - Log.error("Uncaptured python exception, closing channel %s (%s:%s %s)" % - (self_msg, t, v, tbinfo)) + self_msg = f"{self._get_classname()} failed for object at {id(self):x}") + Log.error(f"Uncaptured python exception, closing channel {self_msg} ({t}:{v} {tbinfo})") if self._connecting: # Error when trying to connect @@ -272,7 +271,7 @@ def _handle_packet(self, packet): try: response_msg.ParseFromString(serialized_msg) except Exception as e: - Log.error("Invalid Packet Error: %s" % str(e)) + Log.error(f"Invalid Packet Error: {str(e)}") self._handle_close() self.on_error() return @@ -287,7 +286,7 @@ def _handle_packet(self, packet): # this is a Message -- no need to send back response try: if typename not in self.registered_message_map: - raise ValueError("%s is not registered in message map" % typename) + raise ValueError(f"{typename} is not registered in message map") msg_builder = self.registered_message_map[typename] message = msg_builder() message.ParseFromString(serialized_msg) @@ -296,13 +295,12 @@ def _handle_packet(self, packet): else: raise RuntimeError("Message not initialized") except Exception as e: - Log.error("Error when handling message packet: %s" % str(e)) + Log.error(f"Error when handling message packet: {str(e)}") Log.error(traceback.format_exc()) raise RuntimeError("Problem reading message") else: # might be a timeout response - Log.info("In handle_packet(): Received message whose REQID is not registered: %s" - % str(reqid)) + Log.info(f"In handle_packet(): Received message whose REQID is not registered: {str(reqid)}") def _send_packet(self, pkt): """Pushes a packet to a send buffer, the content of which will be send when available""" diff --git a/heron/instance/src/python/network/metricsmgr_client.py b/heron/instance/src/python/network/metricsmgr_client.py index 22c38d1e5d3..93cb119f7e3 100644 --- a/heron/instance/src/python/network/metricsmgr_client.py +++ b/heron/instance/src/python/network/metricsmgr_client.py @@ -71,7 +71,7 @@ def _send_metrics_messages(self): while not self.out_queue.is_empty(): message = self.out_queue.poll() assert isinstance(message, metrics_pb2.MetricPublisherPublishMessage) - Log.debug("Sending metric message: %s" % str(message)) + Log.debug(f"Sending metric message: {str(message)}") self.send_message(message) self.gateway_metrics.update_sent_metrics_size(message.ByteSize()) self.gateway_metrics.update_sent_metrics(len(message.metrics), len(message.exceptions)) @@ -79,26 +79,25 @@ def _send_metrics_messages(self): def on_connect(self, status): Log.debug("In on_connect of MetricsManagerClient") if status != StatusCode.OK: - Log.error("Error connecting to Metrics Manager with status: %s" % str(status)) + Log.error(f"Error connecting to Metrics Manager with status: {str(status)}") retry_interval = float(self.sys_config[constants.INSTANCE_RECONNECT_METRICSMGR_INTERVAL_SEC]) self.looper.register_timer_task_in_sec(self.start_connect, retry_interval) return self._send_register_req() def on_response(self, status, context, response): - Log.debug("In on_response with status: %s, with context: %s" % (str(status), str(context))) + Log.debug(f"In on_response with status: {str(status)}, with context: {str(context)}") if status != StatusCode.OK: raise RuntimeError("Response from Metrics Manager not OK") if isinstance(response, metrics_pb2.MetricPublisherRegisterResponse): self._handle_register_response(response) else: - Log.error("Unknown kind of response received: %s" % response.DESCRIPTOR.full_name) + Log.error(f"Unknown kind of response received: {response.DESCRIPTOR.full_name}") raise RuntimeError("Unknown kind of response received from Metrics Manager") # pylint: disable=no-self-use def on_incoming_message(self, message): - raise RuntimeError("Metrics Client got an unknown message from Metrics Manager: %s" - % str(message)) + raise RuntimeError(f"Metrics Client got an unknown message from Metrics Manager: {str(message)}") def on_error(self): Log.error("Disconnected from Metrics Manager") @@ -117,7 +116,7 @@ def _send_register_req(self): request = metrics_pb2.MetricPublisherRegisterRequest() request.publisher.CopyFrom(metric_publisher) - Log.debug("Sending MetricsCli register request: \n%s" % str(request)) + Log.debug(f"Sending MetricsCli register request: \n{str(request)}") timeout_sec = float(self.sys_config[constants.INSTANCE_RECONNECT_METRICSMGR_INTERVAL_SEC]) self.send_request(request, "MetricsClientContext", diff --git a/heron/instance/src/python/network/protocol.py b/heron/instance/src/python/network/protocol.py index 2e356920264..fd8105c2ba8 100644 --- a/heron/instance/src/python/network/protocol.py +++ b/heron/instance/src/python/network/protocol.py @@ -195,7 +195,7 @@ def read(self, dispatcher): if len(self.header) == HeronProtocol.HEADER_SIZE: self.is_header_read = True else: - Log.debug("Header read incomplete; read %d bytes of header" % len(self.header)) + Log.debug(f"Header read incomplete; read {len(self.header)} bytes of header") return if self.is_header_read and not self.is_complete: @@ -214,8 +214,7 @@ def read(self, dispatcher): raise RuntimeError("Fatal error occured in IncomingPacket.read()") def __str__(self): - return "Packet ID: %s, header: %s, complete: %s" % \ - (str(self.id), self.is_header_read, self.is_complete) + return f"Packet ID: {str(self.id)}, header: {self.is_header_read}, complete: {self.is_complete}" class REQID: diff --git a/heron/instance/src/python/network/socket_options.py b/heron/instance/src/python/network/socket_options.py index d23520a5cf8..8de011a3c30 100644 --- a/heron/instance/src/python/network/socket_options.py +++ b/heron/instance/src/python/network/socket_options.py @@ -47,7 +47,7 @@ def create_socket_options(): return sock_opt except ValueError as e: # couldn't convert to int - raise ValueError("Invalid value in sys_config: %s" % str(e)) + raise ValueError(f"Invalid value in sys_config: {str(e)}") except KeyError as e: # option key was not found - raise KeyError("Incomplete sys_config: %s" % str(e)) + raise KeyError(f"Incomplete sys_config: {str(e)}") diff --git a/heron/instance/src/python/utils/metrics/metrics_helper.py b/heron/instance/src/python/utils/metrics/metrics_helper.py index 8e6c0c806b0..258b4243050 100644 --- a/heron/instance/src/python/utils/metrics/metrics_helper.py +++ b/heron/instance/src/python/utils/metrics/metrics_helper.py @@ -330,9 +330,9 @@ class MetricsCollector: def __init__(self, looper, out_metrics): self.looper = looper # map IMetric object> - self.metrics_map = dict() + self.metrics_map = {} # map metrics name> - self.time_bucket_in_sec_to_metrics_name = dict() + self.time_bucket_in_sec_to_metrics_name = {} # out metrics queue self.out_metrics = out_metrics @@ -344,7 +344,7 @@ def register_metric(self, name, metric, time_bucket_in_sec): :param time_bucket_in_sec: time interval for update to the metrics manager """ if name in self.metrics_map: - raise RuntimeError("Another metric has already been registered with name: %s" % name) + raise RuntimeError(f"Another metric has already been registered with name: {name}") Log.debug("Register metric: %s, with interval: %s", name, str(time_bucket_in_sec)) self.metrics_map[name] = metric @@ -382,7 +382,7 @@ def _gather_one_metric(self, name, message): for key, value in list(metric_value.items()): if key is not None and value is not None: self._add_data_to_message(message, name + "/" + str(key), value) - self._add_data_to_message(message, "%s/%s" % (name, str(key)), value) + self._add_data_to_message(message, f"{name}/{str(key)}", value) else: Log.info("When gathering metric: %s, <%s:%s> is not a valid key-value to output " "as metric. Skipping...", name, str(key), str(value)) diff --git a/heron/instance/src/python/utils/misc/communicator.py b/heron/instance/src/python/utils/misc/communicator.py index 5f11cbbf89a..4ee5d47ef23 100644 --- a/heron/instance/src/python/utils/misc/communicator.py +++ b/heron/instance/src/python/utils/misc/communicator.py @@ -73,7 +73,7 @@ def poll(self): self._producer_callback() return ret except Empty: - Log.debug("%s: Empty in poll()" % str(self)) + Log.debug("%s: Empty in poll()", str(self)) raise Empty def offer(self, item): @@ -88,7 +88,7 @@ def offer(self, item): self._consumer_callback() return True except Full: - Log.debug("%s: Full in offer()" % str(self)) + Log.debug("%s: Full in offer()", str(self)) raise Full def clear(self): diff --git a/heron/instance/src/python/utils/misc/custom_grouping_helper.py b/heron/instance/src/python/utils/misc/custom_grouping_helper.py index b149ceb3d7d..59f832fb2e9 100644 --- a/heron/instance/src/python/utils/misc/custom_grouping_helper.py +++ b/heron/instance/src/python/utils/misc/custom_grouping_helper.py @@ -70,13 +70,13 @@ def choose_tasks(self, values): """Invoke choose_tasks() of this custom grouping""" ret = self.grouping.choose_tasks(values) if not isinstance(ret, list): - raise TypeError("Returned object after custom grouping's choose_tasks() " - "needs to be a list, given: %s" % str(type(ret))) + raise TypeError("Returned object after custom grouping's choose_tasks() "\ + f"needs to be a list, given: {str(type(ret))}") for i in ret: if not isinstance(i, int): - raise TypeError("Returned object after custom grouping's choose_tasks() " - "contained non-integer: %s" % str(i)) + raise TypeError("Returned object after custom grouping's choose_tasks() "\ + f"contained non-integer: {str(i)}") if i not in self.task_ids: - raise ValueError("Returned object after custom grouping's choose_tasks() contained " - "a task id that is not registered: %d" % i) + raise ValueError("Returned object after custom grouping's choose_tasks() contained "\ + f"a task id that is not registered: {i}") return ret diff --git a/heron/instance/src/python/utils/misc/pplan_helper.py b/heron/instance/src/python/utils/misc/pplan_helper.py index 62989698ad2..6c8668f9bf3 100644 --- a/heron/instance/src/python/utils/misc/pplan_helper.py +++ b/heron/instance/src/python/utils/misc/pplan_helper.py @@ -59,7 +59,7 @@ def __init__(self, pplan, instance_id, topology_pex_abs_path): break if self.my_instance is None: - raise RuntimeError("There was no instance that matched my id: %s" % self.my_instance_id) + raise RuntimeError(f"There was no instance that matched my id: {self.my_instance_id}") self.my_component_name = self.my_instance.info.component_name self.my_task_id = self.my_instance.info.task_id @@ -68,7 +68,7 @@ def __init__(self, pplan, instance_id, topology_pex_abs_path): self._my_spbl, self.is_spout = self._get_my_spout_or_bolt(pplan.topology) # Map number of fields in that stream's schema> - self._output_schema = dict() + self._output_schema = {} outputs = self._my_spbl.outputs # setup output schema @@ -122,8 +122,8 @@ def check_output_schema(self, stream_id, tup): raise RuntimeError("%s emitting to stream %s but was not declared in output fields" % (self.my_component_name, stream_id)) if size != len(tup): - raise RuntimeError("Number of fields emitted in stream %s does not match what's expected. " - "Expected: %s, Observed: %s" % (stream_id, size, len(tup))) + raise RuntimeError(f"Number of fields emitted in stream {stream_id} does not match what's expected. "\ + f"Expected: {size}, Observed: {len(tup)}") def get_my_spout(self): """Returns spout instance, or ``None`` if bolt is assigned""" @@ -252,7 +252,7 @@ def _setup_custom_grouping(self, topology): raise NotImplementedError("Java-serialized custom grouping is not yet supported " "for python topology") else: - raise ValueError("Unrecognized custom grouping type found: %s" % str(in_stream.type)) + raise ValueError(f"Unrecognized custom grouping type found: {str(in_stream.type)}") def _get_taskids_for_component(self, component_name): return [instance.info.task_id for instance in self.pplan.instances diff --git a/heron/instance/src/python/utils/topology/topology_context_impl.py b/heron/instance/src/python/utils/topology/topology_context_impl.py index 8d3c4c133dc..3d5ec57e6e5 100644 --- a/heron/instance/src/python/utils/topology/topology_context_impl.py +++ b/heron/instance/src/python/utils/topology/topology_context_impl.py @@ -163,7 +163,7 @@ def _get_output_to_comp_fields(outputs): stream_id = out_stream.stream.id if comp_name not in out_fields: - out_fields[comp_name] = dict() + out_fields[comp_name] = {} # get the fields of a particular output stream ret = [] diff --git a/heron/shell/src/python/handlers/downloadhandler.py b/heron/shell/src/python/handlers/downloadhandler.py index e4f3fb5ad69..a8c1d57c5a6 100644 --- a/heron/shell/src/python/handlers/downloadhandler.py +++ b/heron/shell/src/python/handlers/downloadhandler.py @@ -55,7 +55,7 @@ async def get(self, path): return if path is None or not os.path.isfile(path): - self.write("File %s not found" % path) + self.write(f"File {path} not found") self.set_status(404) self.finish() return diff --git a/heron/shell/src/python/handlers/pidhandler.py b/heron/shell/src/python/handlers/pidhandler.py index 6fdfa2acc40..76c1d076448 100644 --- a/heron/shell/src/python/handlers/pidhandler.py +++ b/heron/shell/src/python/handlers/pidhandler.py @@ -31,7 +31,7 @@ class PidHandler(tornado.web.RequestHandler): # pylint: disable=attribute-defined-outside-init async def get(self, instance_id): ''' get method ''' - pid = subprocess.run(['cat', "%s.pid" % instance_id], capture_output=True, text=True, + pid = subprocess.run(['cat', f"{instance_id}.pid"], capture_output=True, text=True, check=True) await self.finish({ 'command': ' '.join(pid.args), diff --git a/heron/statemgrs/src/python/configloader.py b/heron/statemgrs/src/python/configloader.py index 9c9c74a11fc..7568032bf32 100644 --- a/heron/statemgrs/src/python/configloader.py +++ b/heron/statemgrs/src/python/configloader.py @@ -32,7 +32,7 @@ def load_state_manager_locations(cluster, state_manager_config_file='heron-conf/ """ Reads configs to determine which state manager to use and converts them to state manager locations. Handles a subset of config wildcard substitution supported in the substitute method in org.apache.heron.spi.common.Misc.java""" - with open(state_manager_config_file, 'r') as stream: + with open(state_manager_config_file, 'r', encoding='utf8') as stream: config = yaml.safe_load(stream) home_dir = os.path.expanduser("~") @@ -64,6 +64,7 @@ def load_state_manager_locations(cluster, state_manager_config_file='heron-conf/ 'heron.statemgr.tunnel.host': 'tunnelhost', 'heron.statemgr.root.path': 'rootpath', } + # pylint: disable=consider-using-dict-items for config_key in key_mappings: if config_key in config: state_manager_location[key_mappings[config_key]] = config[config_key] @@ -86,8 +87,8 @@ def __replace(config, wildcards, config_file): config_value = config_value.replace(token, wildcards[token]) found = re.findall(r'\${[A-Z_]+}', config_value) if found: - raise ValueError("%s=%s in file %s contains unsupported or unset wildcard tokens: %s" % - (config_key, original_value, config_file, ", ".join(found))) + raise ValueError(f'{config_key}={original_value} in file "\ + f"{config_file} contains unsupported or unset wildcard tokens: {", ".join(found)}') config[config_key] = config_value return config @@ -98,4 +99,4 @@ def __replace(config, wildcards, config_file): locations = load_state_manager_locations('local', sys.argv[1]) else: locations = load_state_manager_locations('local') - print("locations: %s" % locations) + print(f"locations: {locations}") diff --git a/heron/statemgrs/src/python/statemanager.py b/heron/statemgrs/src/python/statemanager.py index f285dba610b..cb097aa4ad4 100644 --- a/heron/statemgrs/src/python/statemanager.py +++ b/heron/statemgrs/src/python/statemanager.py @@ -92,8 +92,8 @@ def is_host_port_reachable(self): socket.create_connection(hostport, StateManager.TIMEOUT_SECONDS) return True except: - LOG.info("StateManager %s Unable to connect to host: %s port %i" - % (self.name, hostport[0], hostport[1])) + LOG.info("StateManager %s Unable to connect to host: %s port %i", + self.name, hostport[0], hostport[1]) continue return False @@ -114,8 +114,9 @@ def establish_ssh_tunnel(self): localportlist = [] for (host, port) in self.hostportlist: localport = self.pick_unused_port() + # pylint: disable=consider-using-with self.tunnel.append(subprocess.Popen( - ('ssh', self.tunnelhost, '-NL127.0.0.1:%d:%s:%d' % (localport, host, port)))) + ('ssh', self.tunnelhost, f'-NL127.0.0.1:{localport}:{host}:{port}'))) localportlist.append(('127.0.0.1', localport)) return localportlist diff --git a/heron/statemgrs/src/python/statemanagerfactory.py b/heron/statemgrs/src/python/statemanagerfactory.py index a0d03b5f793..b8f3b4583c6 100644 --- a/heron/statemgrs/src/python/statemanagerfactory.py +++ b/heron/statemgrs/src/python/statemanagerfactory.py @@ -67,7 +67,7 @@ def get_all_zk_state_managers(conf): host = hostandport[0] port = int(hostandport[1]) if not host or not port: - raise Exception("Hostport for %s must be of the format 'host:port'." % (name)) + raise Exception(f"Hostport for {name} must be of the format 'host:port'.") hostportlist.append((host, port)) tunnelhost = location['tunnelhost'] rootpath = location['rootpath'] diff --git a/heron/statemgrs/src/python/zkstatemanager.py b/heron/statemgrs/src/python/zkstatemanager.py index f8a932b8d11..f92c5966a0c 100644 --- a/heron/statemgrs/src/python/zkstatemanager.py +++ b/heron/statemgrs/src/python/zkstatemanager.py @@ -21,6 +21,12 @@ ''' zkstatemanager.py ''' import contextlib +from kazoo.client import KazooClient +from kazoo.exceptions import NodeExistsError +from kazoo.exceptions import NoNodeError +from kazoo.exceptions import NotEmptyError +from kazoo.exceptions import ZookeeperError + from heron.proto.execution_state_pb2 import ExecutionState from heron.proto.packing_plan_pb2 import PackingPlan from heron.proto.physical_plan_pb2 import PhysicalPlan @@ -32,13 +38,9 @@ from heron.statemgrs.src.python.statemanager import StateManager from heron.statemgrs.src.python.stateexceptions import StateException -from kazoo.client import KazooClient -from kazoo.exceptions import NodeExistsError -from kazoo.exceptions import NoNodeError -from kazoo.exceptions import NotEmptyError -from kazoo.exceptions import ZookeeperError def _makehostportlist(hostportlist): + # pylint: disable=consider-using-f-string return ','.join(["%s:%i" % hp for hp in hostportlist]) @contextlib.contextmanager @@ -68,7 +70,7 @@ class ZkStateManager(StateManager): """ def __init__(self, name, hostportlist, rootpath, tunnelhost): - super(ZkStateManager, self).__init__() + super().__init__() self.name = name self.hostportlist = hostportlist self.tunnelhost = tunnelhost @@ -209,8 +211,7 @@ def create_topology(self, topologyName, topology): StateException.EX_TYPE_PROTOBUF_ERROR) path = self.get_topology_path(topologyName) - LOG.info("Adding topology: {0} to path: {1}".format( - topologyName, path)) + LOG.info(f"Adding topology: {topologyName} to path: {path}") topologyString = topology.SerializeToString() with reraise_from_zk_exceptions("creating topology"): self.client.create(path, value=topologyString, makepath=True) @@ -219,8 +220,7 @@ def create_topology(self, topologyName, topology): def delete_topology(self, topologyName): """ delete topology """ path = self.get_topology_path(topologyName) - LOG.info("Removing topology: {0} from path: {1}".format( - topologyName, path)) + LOG.info(f"Removing topology: {topologyName} from path: {path}") with reraise_from_zk_exceptions("deleting topology"): self.client.delete(path) return True @@ -328,8 +328,7 @@ def create_pplan(self, topologyName, pplan): StateException.EX_TYPE_PROTOBUF_ERROR) path = self.get_pplan_path(topologyName) - LOG.info("Adding topology: {0} to path: {1}".format( - topologyName, path)) + LOG.info(f"Adding topology: {topologyName} to path: {path}") pplanString = pplan.SerializeToString() with reraise_from_zk_exceptions("creating pplan"): self.client.create(path, value=pplanString, makepath=True) @@ -338,8 +337,7 @@ def create_pplan(self, topologyName, pplan): def delete_pplan(self, topologyName): """ delete physical plan info """ path = self.get_pplan_path(topologyName) - LOG.info("Removing topology: {0} from path: {1}".format( - topologyName, path)) + LOG.info(f"Removing topology: {topologyName} from path: {path}") with reraise_from_zk_exceptions("deleting pplan"): self.client.delete(path) return True @@ -400,8 +398,7 @@ def create_execution_state(self, topologyName, executionState): StateException.EX_TYPE_PROTOBUF_ERROR) path = self.get_execution_state_path(topologyName) - LOG.info("Adding topology: {0} to path: {1}".format( - topologyName, path)) + LOG.info(f"Adding topology: {topologyName} to path: {path}") executionStateString = executionState.SerializeToString() with reraise_from_zk_exceptions("creating execution state"): self.client.create(path, value=executionStateString, makepath=True) @@ -410,8 +407,7 @@ def create_execution_state(self, topologyName, executionState): def delete_execution_state(self, topologyName): """ delete execution state """ path = self.get_execution_state_path(topologyName) - LOG.info("Removing topology: {0} from path: {1}".format( - topologyName, path)) + LOG.info(f"Removing topology: {topologyName} from path: {path}") with reraise_from_zk_exceptions("deleting execution state"): self.client.delete(path) return True diff --git a/heron/tools/cli/src/python/args.py b/heron/tools/cli/src/python/args.py index 8f833ec41f8..003db5c4a73 100644 --- a/heron/tools/cli/src/python/args.py +++ b/heron/tools/cli/src/python/args.py @@ -195,13 +195,13 @@ def add_dry_run(parser): ''' default_format = 'table' resp_formats = ['raw', 'table', 'colored_table', 'json'] + # pylint: disable=consider-using-f-string available_options = ', '.join(['%s' % opt for opt in resp_formats]) def dry_run_resp_format(value): if value not in resp_formats: raise argparse.ArgumentTypeError( - 'Invalid dry-run response format: %s. Available formats: %s' - % (value, available_options)) + f'Invalid dry-run response format: {value}. Available formats: {available_options}') return value parser.add_argument( @@ -216,7 +216,8 @@ def dry_run_resp_format(value): metavar='DRY_RUN_FORMAT', default='colored_table' if sys.stdout.isatty() else 'table', type=dry_run_resp_format, - help='The format of the dry-run output ([%s], default=%s). ' - 'Ignored when dry-run mode is not enabled' % ('|'.join(resp_formats), default_format)) + help="The format of the dry-run output "\ + f"([{'|'.join(resp_formats)}], default={default_format}). " + "Ignored when dry-run mode is not enabled") return parser diff --git a/heron/tools/cli/src/python/cdefs.py b/heron/tools/cli/src/python/cdefs.py index d89cb25ef3d..95c1a4adeb8 100644 --- a/heron/tools/cli/src/python/cdefs.py +++ b/heron/tools/cli/src/python/cdefs.py @@ -34,7 +34,7 @@ def read_server_mode_cluster_definition(cluster, cl_args): :return: ''' - client_confs = dict() + client_confs = {} client_confs[cluster] = cliconfig.cluster_config(cluster) # now check if the service-url from command line is set, if so override it diff --git a/heron/tools/cli/src/python/cli_helper.py b/heron/tools/cli/src/python/cli_helper.py index da228aa1f10..1b91ebb3f6a 100644 --- a/heron/tools/cli/src/python/cli_helper.py +++ b/heron/tools/cli/src/python/cli_helper.py @@ -67,7 +67,7 @@ def flatten_args(fargs): ################################################################################ # pylint: disable=dangerous-default-value -def run_server(command, cl_args, action, extra_args=dict()): +def run_server(command, cl_args, action, extra_args={}): ''' helper function to take action on topologies using REST API :param command: @@ -90,14 +90,14 @@ def run_server(command, cl_args, action, extra_args=dict()): # convert the dictionary to a list of tuples data = flatten_args(extra_args) - err_msg = "Failed to %s: %s" % (action, topology_name) - succ_msg = "Successfully %s: %s" % (action, topology_name) + err_msg = f"Failed to {action}: {topology_name}" + succ_msg = f"Successfully {action}: {topology_name}" try: r = service_method(service_apiurl, data=data) s = Status.Ok if r.status_code == requests.codes.ok else Status.HeronError if r.status_code != requests.codes.ok: - Log.error(r.json().get('message', "Unknown error from API server %d" % r.status_code)) + Log.error(r.json().get('message', f"Unknown error from API server {r.status_code}")) except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as err: Log.error(err) return SimpleResult(Status.HeronError, err_msg, succ_msg) @@ -143,13 +143,13 @@ def run_direct(command, cl_args, action, extra_args=[], extra_lib_jars=[]): args=new_args ) - err_msg = "Failed to %s: %s" % (action, topology_name) - succ_msg = "Successfully %s: %s" % (action, topology_name) + err_msg = f"Failed to {action}: {topology_name}" + succ_msg = f"Successfully {action}: {topology_name}" result.add_context(err_msg, succ_msg) return result ################################################################################ def run(command, cl_args, action, extra_lib_jars=[]): if cl_args['deploy_mode'] == config.SERVER_MODE: - return run_server(command, cl_args, action, extra_args=dict()) + return run_server(command, cl_args, action, extra_args={}) return run_direct(command, cl_args, action, extra_args=[], extra_lib_jars=extra_lib_jars) diff --git a/heron/tools/cli/src/python/cliconfig.py b/heron/tools/cli/src/python/cliconfig.py index af67c4baea6..c10ab4cb6b7 100644 --- a/heron/tools/cli/src/python/cliconfig.py +++ b/heron/tools/cli/src/python/cliconfig.py @@ -39,7 +39,7 @@ def cluster_config(cluster): config = _cluster_config(cluster) if _config_has_property(config, PROP_SERVICE_URL): return {PROP_SERVICE_URL: config[PROP_SERVICE_URL]} - return dict() + return {} def is_valid_property(prop): @@ -79,7 +79,7 @@ def _save_or_remove(config, cluster): config_directory = get_config_directory(cluster) if not os.path.isdir(config_directory): os.makedirs(config_directory) - with open(cluster_config_file, 'w') as cf: + with open(cluster_config_file, 'w', encoding='utf8') as cf: yaml.dump(config, cf, default_flow_style=False) else: if os.path.isfile(cluster_config_file): @@ -90,10 +90,10 @@ def _save_or_remove(config, cluster): def _cluster_config(cluster): - config = dict() + config = {} cluster_config_file = get_cluster_config_file(cluster) if os.path.isfile(cluster_config_file): - with open(cluster_config_file, 'r') as cf: + with open(cluster_config_file, 'r', encoding='utf8') as cf: config = yaml.safe_load(cf) return config diff --git a/heron/tools/cli/src/python/config.py b/heron/tools/cli/src/python/config.py index 137a96969e8..74358ec7145 100644 --- a/heron/tools/cli/src/python/config.py +++ b/heron/tools/cli/src/python/config.py @@ -92,9 +92,9 @@ def _list(cl_args): config = cliconfig.cluster_config(cluster) if config: for k, v in list(config.items()): - print("%s = %s" % (str(k), str(v))) + print(f"{str(k)} = {str(v)}") else: - print("No config for cluster %s" % cluster) + print(f"No config for cluster {cluster}") return SimpleResult(Status.Ok) @@ -103,9 +103,9 @@ def _set(cl_args): cluster, prop, value = cl_args['cluster'], cl_args['property'], cl_args['value'] if cliconfig.is_valid_property(prop): cliconfig.set_property(cluster, prop, value) - print("Updated property [%s] for cluster %s" % (prop, cluster)) + print(f"Updated property [{prop}] for cluster {cluster}") else: - print("Error: Unknown property [%s] for cluster %s" % (prop, cluster)) + print(f"Error: Unknown property [{prop}] for cluster {cluster}") return SimpleResult(Status.Ok) @@ -115,9 +115,9 @@ def _unset(cl_args): cluster, prop = cl_args['cluster'], cl_args['property'] if cliconfig.is_valid_property(prop): cliconfig.unset_property(cluster, prop) - print("Cleared property [%s] for cluster %s" % (prop, cluster)) + print(f"Cleared property [{prop}] for cluster {cluster}") else: - print("Error: Unknown property [%s] for cluster %s" % (prop, cluster)) + print(f"Error: Unknown property [{prop}] for cluster {cluster}") return SimpleResult(Status.Ok) diff --git a/heron/tools/cli/src/python/execute.py b/heron/tools/cli/src/python/execute.py index bff33b5f19f..5b633b6b88a 100644 --- a/heron/tools/cli/src/python/execute.py +++ b/heron/tools/cli/src/python/execute.py @@ -153,8 +153,7 @@ def heron_pex(topology_pex, topology_class_name, args=None): return SimpleResult(Status.Ok) except Exception as ex: Log.debug(traceback.format_exc()) - err_context = "Topology %s failed to be loaded from the given pex: %s" %\ - (topology_class_name, ex) + err_context = f"Topology {topology_class_name} failed to be loaded from the given pex: {ex}" return SimpleResult(Status.HeronError, err_context) return None @@ -168,8 +167,8 @@ def heron_cpp(topology_binary, args=None): cmd.extend(args) Log.debug("Invoking binary using command: ``%s''", ' '.join(cmd)) Log.debug('Heron options: {%s}', str(heron_env['HERON_OPTIONS'])) - print("Invoking class using command: ``%s''" % ' '.join(cmd)) - print('Heron options: {%s}' % str(heron_env['HERON_OPTIONS'])) + print(f"""Invoking class using command: ``{' '.join(cmd)}''""") + print(f"Heron options: {str(heron_env['HERON_OPTIONS'])}") # invoke the command with subprocess and print error message, if any proc = subprocess.Popen(cmd, env=heron_env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, bufsize=1) diff --git a/heron/tools/cli/src/python/main.py b/heron/tools/cli/src/python/main.py index 735a91087e0..c922000057f 100644 --- a/heron/tools/cli/src/python/main.py +++ b/heron/tools/cli/src/python/main.py @@ -70,7 +70,7 @@ def __call__(self, parser, namespace, values, option_string=None): for subparsers_action in subparsers_actions: # get all subparsers and print help for choice, subparser in list(subparsers_action.choices.items()): - print("Subparser '{}'".format(choice)) + print(f"Subparser '{choice}'") print(subparser.format_help()) return @@ -127,7 +127,7 @@ def run(handlers, command, parser, command_args, unknown_args): if command in handlers: return handlers[command].run(command, parser, command_args, unknown_args) - err_context = 'Unknown subcommand: %s' % command + err_context = f'Unknown subcommand: {command}' return result.SimpleResult(result.Status.InvocationError, err_context) def cleanup(files): @@ -165,14 +165,13 @@ def server_deployment_mode(command, parser, cluster, cl_args): client_confs = cdefs.read_server_mode_cluster_definition(cluster, cl_args) if not client_confs[cluster]: - return dict() + return {} # tell the user which definition that we are using if not cl_args.get('service_url', None): - Log.debug("Using cluster definition from file %s" \ - % cliconfig.get_cluster_config_file(cluster)) + Log.debug("Using cluster definition from file %s", cliconfig.get_cluster_config_file(cluster)) else: - Log.debug("Using cluster service url %s" % cl_args['service_url']) + Log.debug("Using cluster service url %s", cl_args['service_url']) # if cluster definition exists, but service_url is not set, it is an error if not 'service_url' in client_confs[cluster]: @@ -192,7 +191,7 @@ def server_deployment_mode(command, parser, cluster, cl_args): Log.error("Argument cluster/[role]/[env] is not correct: %s", str(ex)) sys.exit(1) - new_cl_args = dict() + new_cl_args = {} new_cl_args['cluster'] = cluster_tuple[0] new_cl_args['role'] = cluster_tuple[1] new_cl_args['environ'] = cluster_tuple[2] @@ -223,19 +222,19 @@ def direct_deployment_mode(command, parser, cluster, cl_args): # if some of the arguments are not found, print error and exit subparser = config.get_subparser(parser, command) print(subparser.format_help()) - return dict() + return {} # check if the cluster config directory exists if not cdefs.check_direct_mode_cluster_definition(cluster, config_path): Log.error("Cluster config directory \'%s\' does not exist", config_path) - return dict() + return {} config_path = config.get_heron_cluster_conf_dir(cluster, config_path) if not os.path.isdir(config_path): Log.error("Cluster config directory \'%s\' does not exist", config_path) - return dict() + return {} - Log.info("Using cluster definition in %s" % config_path) + Log.info("Using cluster definition in %s", config_path) try: cluster_role_env = (cl_args['cluster'], cl_args['role'], cl_args['environ']) @@ -243,9 +242,9 @@ def direct_deployment_mode(command, parser, cluster, cl_args): cluster_tuple = config.defaults_cluster_role_env(cluster_role_env) except Exception as ex: Log.error("Argument cluster/[role]/[env] is not correct: %s", str(ex)) - return dict() + return {} - new_cl_args = dict() + new_cl_args = {} new_cl_args['cluster'] = cluster_tuple[0] new_cl_args['role'] = cluster_tuple[1] new_cl_args['environ'] = cluster_tuple[2] @@ -268,7 +267,7 @@ def deployment_mode(command, parser, cl_args): if len(new_cl_args) > 0: return new_cl_args - return dict() + return {} ################################################################################ @@ -289,9 +288,9 @@ def extract_common_args(command, parser, cl_args): # if some of the arguments are not found, print error and exit subparser = config.get_subparser(parser, command) print(subparser.format_help()) - return dict() + return {} - new_cl_args = dict() + new_cl_args = {} cluster_tuple = config.get_cluster_role_env(cluster_role_env) new_cl_args['cluster'] = cluster_tuple[0] new_cl_args['role'] = cluster_tuple[1] diff --git a/heron/tools/cli/src/python/opts.py b/heron/tools/cli/src/python/opts.py index 160be97796e..1aaff50cb18 100644 --- a/heron/tools/cli/src/python/opts.py +++ b/heron/tools/cli/src/python/opts.py @@ -24,7 +24,7 @@ # Global variable to store config map and verbosity ################################################################################ # pylint: disable=invalid-name,global-variable-not-assigned,global-statement -config_opts = dict() +config_opts = {} verbose_flag = False cleaned_up_files = [] @@ -38,7 +38,7 @@ def get_heron_config(): ''' opt_list = [] for (key, value) in list(config_opts.items()): - opt_list.append('%s=%s' % (key, value)) + opt_list.append(f"{key}={value}") all_opts = (','.join(opt_list)).replace(' ', '%%%%') return all_opts @@ -76,4 +76,4 @@ def clear_config(): :return: ''' global config_opts - config_opts = dict() + config_opts = {} diff --git a/heron/tools/cli/src/python/result.py b/heron/tools/cli/src/python/result.py index f37d5d04173..1aed7e72cfa 100644 --- a/heron/tools/cli/src/python/result.py +++ b/heron/tools/cli/src/python/result.py @@ -87,7 +87,8 @@ def _log_context(self): self._do_log(Log.error, self.err_context) else: raise RuntimeError( - "Unknown status type of value %d. Expected value: %s" % (self.status.value, list(Status))) + f"Unknown status type of value {self.status.value}. Expected value: {list(Status)}" + ) def add_context(self, err_context, succ_context=None): """ Prepend msg to add some context information @@ -107,7 +108,7 @@ class SimpleResult(Result): """Simple result: result that already and only contains status of the result""" def __init__(self, *args): - super(SimpleResult, self).__init__(*args) + super().__init__(*args) def render(self): self._log_context() @@ -116,7 +117,7 @@ def render(self): class ProcessResult(Result): """Process result: a wrapper of result class""" def __init__(self, process): - super(ProcessResult, self).__init__() + super().__init__() self.process = process self.stdout_builder = proc.async_stdout_builder(process) # start redirect stderr in initialization, before render() gets called @@ -162,9 +163,8 @@ def renderProcessStdOut(self, stdout): elif self.status == Status.InvocationError: self._do_print(sys.stdout, stdout) else: - raise RuntimeError( - "Unknown status type of value %d. Expected value: %s" % \ - (self.status.value, list(Status))) + raise RuntimeError("Unknown status type of value "\ + f"{self.status.value}. Expected value: {list(Status)}") def render(self): self.process.wait() @@ -179,12 +179,12 @@ def render(results): for r in results: r.render() else: - raise RuntimeError("Unknown result instance: %s" % (str(results.__class__),)) + raise RuntimeError(f"Unknown result instance: {(str(results.__class__),)}") # check if all results are successful def is_successful(results): if isinstance(results, list): return all([is_successful(result) for result in results]) if isinstance(results, Result): - return results.status == Status.Ok or results.status == Status.DryRun - raise RuntimeError("Unknown result instance: %s" % (str(results.__class__),)) + return results.status in (Status.Ok, Status.DryRun) + raise RuntimeError(f"Unknown result instance: {(str(results.__class__),)}") diff --git a/heron/tools/cli/src/python/submit.py b/heron/tools/cli/src/python/submit.py index e921c39b833..5f2e6cb142a 100644 --- a/heron/tools/cli/src/python/submit.py +++ b/heron/tools/cli/src/python/submit.py @@ -149,8 +149,8 @@ def launch_a_topology(cl_args, tmp_dir, topology_file, topology_defn_file, topol args=args, java_defines=[]) - err_ctxt = "Failed to launch topology '%s' %s" % (topology_name, launch_mode_msg(cl_args)) - succ_ctxt = "Successfully launched topology '%s' %s" % (topology_name, launch_mode_msg(cl_args)) + err_ctxt = f"Failed to launch topology '{topology_name}' {launch_mode_msg(cl_args)}" + succ_ctxt = f"Successfully launched topology '{topology_name}' {launch_mode_msg(cl_args)}" res.add_context(err_ctxt, succ_ctxt) return res @@ -177,7 +177,7 @@ def launch_topology_server(cl_args, topology_file, topology_defn_file, topology_ ) Log.info("" + str(cl_args)) - overrides = dict() + overrides = {} if 'config_property' in cl_args: overrides = config.parse_override_config(cl_args['config_property']) @@ -195,8 +195,8 @@ def launch_topology_server(cl_args, topology_file, topology_defn_file, topology_ topology=open(topology_file, 'rb'), ) - err_ctxt = "Failed to launch topology '%s' %s" % (topology_name, launch_mode_msg(cl_args)) - succ_ctxt = "Successfully launched topology '%s' %s" % (topology_name, launch_mode_msg(cl_args)) + err_ctxt = f"Failed to launch topology '{topology_name}' {launch_mode_msg(cl_args)}" + succ_ctxt = f"Successfully launched topology '{topology_name}' {launch_mode_msg(cl_args)}" try: r = service_method(service_apiurl, data=data, files=files) @@ -204,7 +204,7 @@ def launch_topology_server(cl_args, topology_file, topology_defn_file, topology_ created = r.status_code is requests.codes.created s = Status.Ok if created or ok else Status.HeronError if s is Status.HeronError: - Log.error(r.json().get('message', "Unknown error from API server %d" % r.status_code)) + Log.error(r.json().get('message', f"Unknown error from API server {r.status_code}")) elif ok: # this case happens when we request a dry_run print(r.json().get("response")) @@ -227,18 +227,17 @@ def launch_topologies(cl_args, topology_file, tmp_dir): defn_files = glob.glob(tmp_dir + '/*.defn') if len(defn_files) == 0: - return SimpleResult(Status.HeronError, "No topologies found under %s" % tmp_dir) + return SimpleResult(Status.HeronError, f"No topologies found under {tmp_dir}") results = [] for defn_file in defn_files: # load the topology definition from the file topology_defn = topology_pb2.Topology() try: - handle = open(defn_file, "rb") - topology_defn.ParseFromString(handle.read()) - handle.close() + with open(defn_file, "rb") as handle: + topology_defn.ParseFromString(handle.read()) except Exception as e: - err_context = "Cannot load topology definition '%s': %s" % (defn_file, e) + err_context = f"Cannot load topology definition '{defn_file}': {e}" return SimpleResult(Status.HeronError, err_context) # log topology and components configurations @@ -297,7 +296,7 @@ def submit_fatjar(cl_args, unknown_args, tmp_dir): if not result.is_successful(res): err_context = ("Failed to create topology definition " \ - "file when executing class '%s' of file '%s'") % (main_class, topology_file) + f"file when executing class '{main_class}' of file '{topology_file}'") res.add_context(err_context) return res @@ -341,7 +340,7 @@ def submit_tar(cl_args, unknown_args, tmp_dir): if not result.is_successful(res): err_context = ("Failed to create topology definition " \ - "file when executing class '%s' of file '%s'") % (main_class, topology_file) + f"file when executing class '{main_class}' of file '{topology_file}'") res.add_context(err_context) return res @@ -362,7 +361,7 @@ def submit_pex(cl_args, unknown_args, tmp_dir): result.render(res) if not result.is_successful(res): err_context = ("Failed to create topology definition " \ - "file when executing class '%s' of file '%s'") % (topology_class_name, topology_file) + f"file when executing class '{topology_class_name}' of file '{topology_file}'") res.add_context(err_context) return res @@ -382,7 +381,7 @@ def submit_cpp(cl_args, unknown_args, tmp_dir): result.render(res) if not result.is_successful(res): err_context = ("Failed to create topology definition " \ - "file when executing cpp binary '%s'") % (topology_binary_name) + f"file when executing cpp binary '{topology_binary_name}'") res.add_context(err_context) return res @@ -433,7 +432,7 @@ def run(command, parser, cl_args, unknown_args): # check to see if the topology file exists if not os.path.isfile(topology_file): - err_context = "Topology file '%s' does not exist" % topology_file + err_context = f"Topology file '{topology_file}' does not exist" return SimpleResult(Status.InvocationError, err_context) # check if it is a valid file type @@ -443,17 +442,17 @@ def run(command, parser, cl_args, unknown_args): cpp_type = topology_file.endswith(".dylib") or topology_file.endswith(".so") if not (jar_type or tar_type or pex_type or cpp_type): _, ext_name = os.path.splitext(topology_file) - err_context = "Unknown file type '%s'. Please use .tar "\ - "or .tar.gz or .jar or .pex or .dylib or .so file"\ - % ext_name + err_context = f"Unknown file type '{ext_name}'. Please use .tar "\ + "or .tar.gz or .jar or .pex or .dylib or .so file" return SimpleResult(Status.InvocationError, err_context) # check if extra launch classpath is provided and if it is validate if cl_args['extra_launch_classpath']: valid_classpath = classpath.valid_java_classpath(cl_args['extra_launch_classpath']) if not valid_classpath: - err_context = "One of jar or directory in extra launch classpath does not exist: %s" % \ - cl_args['extra_launch_classpath'] + err_context = "One of jar or directory in extra launch classpath"\ + f"does not exist: {cl_args['extra_launch_classpath']}" + return SimpleResult(Status.InvocationError, err_context) # create a temporary directory for topology definition file diff --git a/heron/tools/cli/src/python/update.py b/heron/tools/cli/src/python/update.py index 8dddb18f11d..a117b368934 100644 --- a/heron/tools/cli/src/python/update.py +++ b/heron/tools/cli/src/python/update.py @@ -55,7 +55,8 @@ def parallelism_type(value): pattern = re.compile(r"^[\w\.-]+:[\d]+$") if not pattern.match(value): raise argparse.ArgumentTypeError( - "Invalid syntax for component parallelism (): %s" % value) + f"Invalid syntax for component parallelism (): {value}" + ) return value parser.add_argument( @@ -70,8 +71,8 @@ def runtime_config_type(value): pattern = re.compile(r"^([\w\.-]+:){1,2}[\w\.-]+$") if not pattern.match(value): raise argparse.ArgumentTypeError( - "Invalid syntax for runtime config ([component:]): %s" - % value) + f"Invalid syntax for runtime config ([component:]): {value}" + ) return value parser.add_argument( @@ -86,8 +87,8 @@ def container_number_type(value): pattern = re.compile(r"^\d+$") if not pattern.match(value): raise argparse.ArgumentTypeError( - "Invalid syntax for container number (value): %s" - % value) + f"Invalid syntax for container number (value): {value}" + ) return value parser.add_argument( @@ -166,7 +167,7 @@ def run(command, parser, cl_args, unknown_args): # Build jar list extra_lib_jars = jars.packing_jars() - action = "update topology%s" % (' in dry-run mode' if cl_args["dry_run"] else '') + action = f"""update topology{(' in dry-run mode' if cl_args["dry_run"] else '')}""" # Build extra args dict_extra_args = {} diff --git a/heron/tools/cli/src/python/version.py b/heron/tools/cli/src/python/version.py index b101f94c323..86fa050c9a6 100644 --- a/heron/tools/cli/src/python/version.py +++ b/heron/tools/cli/src/python/version.py @@ -78,7 +78,7 @@ def run(command, parser, cl_args, unknown_args): # server mode if cluster: config_file = config.heron_rc_file() - client_confs = dict() + client_confs = {} # Read the cluster definition, if not found client_confs = cdefs.read_server_mode_cluster_definition(cluster, cl_args, config_file) @@ -99,10 +99,10 @@ def run(command, parser, cl_args, unknown_args): try: r = service_method(service_apiurl) if r.status_code != requests.codes.ok: - Log.error(r.json().get('message', "Unknown error from API server %d" % r.status_code)) + Log.error(r.json().get('message', f"Unknown error from API server {r.status_code}")) sorted_items = sorted(list(r.json().items()), key=lambda tup: tup[0]) for key, value in sorted_items: - print("%s : %s" % (key, value)) + print(f"{key} : {value}") except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as err: Log.error(err) return SimpleResult(Status.HeronError) diff --git a/heron/tools/cli/tests/python/opts_unittest.py b/heron/tools/cli/tests/python/opts_unittest.py index 9ca7954d839..12f45479194 100644 --- a/heron/tools/cli/tests/python/opts_unittest.py +++ b/heron/tools/cli/tests/python/opts_unittest.py @@ -48,13 +48,13 @@ def test_non_exist_key(self): def test_many_opts(self): opts.clear_config() for k in range(1, 100): - key = "key-%d" % (k) - value = "value-%d" % (k) + key = f"key-{k}" + value = f"value-{k}" opts.set_config(key, value) for k in range(1, 100): - key = "key-%d" % (k) - value = "value-%d" % (k) + key = f"key-{k}" + value = f"value-{k}" self.assertEqual(value, opts.get_config(key)) def test_clear_opts(self): diff --git a/heron/tools/common/src/python/clients/tracker.py b/heron/tools/common/src/python/clients/tracker.py index 2a5fef78540..eb76bbb7dfd 100644 --- a/heron/tools/common/src/python/clients/tracker.py +++ b/heron/tools/common/src/python/clients/tracker.py @@ -36,44 +36,42 @@ from typing import Any, Iterable, List, Optional, Tuple from urllib.parse import urlencode -from heron.common.src.python.utils.log import Log - import requests +from heron.common.src.python.utils.log import Log # This requires setting tracker_url = "http://127.0.0.1:8888" -# pylint: disable=bad-whitespace CLUSTER_URL_FMT = "%s/clusters" # Nested under /topologies TOPOLOGIES_URL_FMT = "%s/topologies" -TOPOLOGIES_STATS_URL_FMT = "%s/states" % TOPOLOGIES_URL_FMT -EXECUTION_STATE_URL_FMT = "%s/executionstate" % TOPOLOGIES_URL_FMT -LOGICALPLAN_URL_FMT = "%s/logicalplan" % TOPOLOGIES_URL_FMT -PHYSICALPLAN_URL_FMT = "%s/physicalplan" % TOPOLOGIES_URL_FMT -PACKINGPLAN_URL_FMT = "%s/packingplan" % TOPOLOGIES_URL_FMT -SCHEDULER_LOCATION_URL_FMT = "%s/schedulerlocation" % TOPOLOGIES_URL_FMT - -EXCEPTIONS_URL_FMT = "%s/exceptions" % TOPOLOGIES_URL_FMT -EXCEPTION_SUMMARY_URL_FMT = "%s/exceptionsummary" % TOPOLOGIES_URL_FMT - -INFO_URL_FMT = "%s/info" % TOPOLOGIES_URL_FMT -PID_URL_FMT = "%s/pid" % TOPOLOGIES_URL_FMT -JSTACK_URL_FMT = "%s/jstack" % TOPOLOGIES_URL_FMT -JMAP_URL_FMT = "%s/jmap" % TOPOLOGIES_URL_FMT -HISTOGRAM_URL_FMT = "%s/histo" % TOPOLOGIES_URL_FMT +TOPOLOGIES_STATS_URL_FMT = f"{TOPOLOGIES_URL_FMT}/states" +EXECUTION_STATE_URL_FMT = f"{TOPOLOGIES_URL_FMT}/executionstate" +LOGICALPLAN_URL_FMT = f"{TOPOLOGIES_URL_FMT}/logicalplan" +PHYSICALPLAN_URL_FMT = f"{TOPOLOGIES_URL_FMT}/physicalplan" +PACKINGPLAN_URL_FMT = f"{TOPOLOGIES_URL_FMT}/packingplan" +SCHEDULER_LOCATION_URL_FMT = f"{TOPOLOGIES_URL_FMT}/schedulerlocation" + +EXCEPTIONS_URL_FMT = f"{TOPOLOGIES_URL_FMT}/exceptions" +EXCEPTION_SUMMARY_URL_FMT = f"{TOPOLOGIES_URL_FMT}/exceptionsummary" + +INFO_URL_FMT = f"{TOPOLOGIES_URL_FMT}/info" +PID_URL_FMT = f"{TOPOLOGIES_URL_FMT}/pid" +JSTACK_URL_FMT = f"{TOPOLOGIES_URL_FMT}/jstack" +JMAP_URL_FMT = f"{TOPOLOGIES_URL_FMT}/jmap" +HISTOGRAM_URL_FMT = f"{TOPOLOGIES_URL_FMT}/histo" # nested under /topologies/metrics/ -METRICS_URL_FMT = "%s/metrics" % TOPOLOGIES_URL_FMT -METRICS_QUERY_URL_FMT = "%s/query" % METRICS_URL_FMT -METRICS_TIMELINE_URL_FMT = "%s/timeline" % METRICS_URL_FMT +METRICS_URL_FMT = f"{TOPOLOGIES_URL_FMT}/metrics" +METRICS_QUERY_URL_FMT = f"{METRICS_URL_FMT}/query" +METRICS_TIMELINE_URL_FMT = f"{METRICS_URL_FMT}/timeline" # nested under /topologies/container/ -CONTAINER_URL_FMT = "%s/container" % TOPOLOGIES_URL_FMT -FILE_DATA_URL_FMT = "%s/filedata" % CONTAINER_URL_FMT -FILE_DOWNLOAD_URL_FMT = "%s/filedownload" % CONTAINER_URL_FMT -FILESTATS_URL_FMT = "%s/filestats" % CONTAINER_URL_FMT +CONTAINER_URL_FMT = f"{TOPOLOGIES_URL_FMT}/container" +FILE_DATA_URL_FMT = f"{CONTAINER_URL_FMT}/filedata" +FILE_DOWNLOAD_URL_FMT = f"{CONTAINER_URL_FMT}/filedownload" +FILESTATS_URL_FMT = f"{CONTAINER_URL_FMT}/filestats" def strip_whitespace(s): @@ -575,7 +573,7 @@ def fetch_max( comp_metrics = [] for comp in components: query = self.get_query(metric, comp, instance) - max_query = "MAX(%s)" % query + max_query = f"MAX({query})" comp_metrics.append(get_metrics(cluster, environ, topology, timerange, max_query)) data = self.compute_max(comp_metrics) diff --git a/heron/tools/common/src/python/utils/config.py b/heron/tools/common/src/python/utils/config.py index 28b388804f7..8f736068b7d 100644 --- a/heron/tools/common/src/python/utils/config.py +++ b/heron/tools/common/src/python/utils/config.py @@ -75,19 +75,19 @@ def create_tar(tar_filename, files, config_dir, config_files): if os.path.isfile(filename): tar.add(filename, arcname=os.path.basename(filename)) else: - raise Exception("%s is not an existing file" % filename) + raise Exception(f"{filename} is not an existing file") if os.path.isdir(config_dir): tar.add(config_dir, arcname=get_heron_sandbox_conf_dir()) else: - raise Exception("%s is not an existing directory" % config_dir) + raise Exception(f"{config_dir} is not an existing directory") for filename in config_files: if os.path.isfile(filename): arcfile = os.path.join(get_heron_sandbox_conf_dir(), os.path.basename(filename)) tar.add(filename, arcname=arcfile) else: - raise Exception("%s is not an existing file" % filename) + raise Exception(f"{filename} is not an existing file") def get_subparser(parser, command): @@ -113,8 +113,8 @@ def cygpath(x): normalized class path on cygwin ''' command = ['cygpath', '-wp', x] - p = subprocess.Popen(command, stdout=subprocess.PIPE, universal_newlines=True) - result = p.communicate() + with subprocess.Popen(command, stdout=subprocess.PIPE, universal_newlines=True) as p: + result = p.communicate() output = result[0] lines = output.split("\n") return lines[0] @@ -248,7 +248,7 @@ def parse_cluster_role_env(cluster_role_env, config_path): """Parse cluster/[role]/[environ], supply default, if not provided, not required""" parts = cluster_role_env.split('/')[:3] if not os.path.isdir(config_path): - Log.error("Config path cluster directory does not exist: %s" % config_path) + Log.error(f"Config path cluster directory does not exist: {config_path}") raise Exception("Invalid config path") # if cluster/role/env is not completely provided, check further @@ -264,26 +264,26 @@ def parse_cluster_role_env(cluster_role_env, config_path): parts.append(ENVIRON) else: cli_confs = {} - with open(cli_conf_file, 'r') as conf_file: + with open(cli_conf_file, 'r', encoding="utf8") as conf_file: tmp_confs = yaml.safe_load(conf_file) # the return value of yaml.load can be None if conf_file is an empty file if tmp_confs is not None: cli_confs = tmp_confs else: - print("Failed to read: %s due to it is empty" % (CLIENT_YAML)) + print(f"Failed to read: {CLIENT_YAML} due to it is empty") # if role is required but not provided, raise exception if len(parts) == 1: if (ROLE_REQUIRED in cli_confs) and (cli_confs[ROLE_REQUIRED] is True): - raise Exception("role required but not provided (cluster/role/env = %s). See %s in %s" - % (cluster_role_env, ROLE_REQUIRED, cli_conf_file)) + raise Exception(f"role required but not provided (cluster/role/env "\ + f"= {cluster_role_env}). See {ROLE_REQUIRED} in {cli_conf_file}") parts.append(getpass.getuser()) # if environ is required but not provided, raise exception if len(parts) == 2: if (ENV_REQUIRED in cli_confs) and (cli_confs[ENV_REQUIRED] is True): - raise Exception("environ required but not provided (cluster/role/env = %s). See %s in %s" - % (cluster_role_env, ENV_REQUIRED, cli_conf_file)) + raise Exception(f"environ required but not provided (cluster/role/env "\ + f"= {cluster_role_env}). See {ENV_REQUIRED} in {cli_conf_file}") parts.append(ENVIRON) # if cluster or role or environ is empty, print @@ -320,7 +320,7 @@ def direct_mode_cluster_role_env(cluster_role_env, config_path): return True client_confs = {} - with open(cli_conf_file, 'r') as conf_file: + with open(cli_conf_file, 'r', encoding="utf8") as conf_file: client_confs = yaml.safe_load(conf_file) # the return value of yaml.load can be None if conf_file is an empty file @@ -331,15 +331,15 @@ def direct_mode_cluster_role_env(cluster_role_env, config_path): role_present = bool(cluster_role_env[1]) # pylint: disable=simplifiable-if-expression if ROLE_REQUIRED in client_confs and client_confs[ROLE_REQUIRED] and not role_present: - raise Exception("role required but not provided (cluster/role/env = %s). See %s in %s" - % (cluster_role_env, ROLE_REQUIRED, cli_conf_file)) + raise Exception("role required but not provided (cluster/role/env"\ + f" = {cluster_role_env}). See {ROLE_REQUIRED} in {cli_conf_file}") # if environ is required but not provided, raise exception # pylint: disable=simplifiable-if-expression environ_present = True if len(cluster_role_env[2]) > 0 else False if ENV_REQUIRED in client_confs and client_confs[ENV_REQUIRED] and not environ_present: - raise Exception("environ required but not provided (cluster/role/env = %s). See %s in %s" - % (cluster_role_env, ENV_REQUIRED, cli_conf_file)) + raise Exception("environ required but not provided (cluster/role/env"\ + f" = {cluster_role_env}). See {ENV_REQUIRED} in {cli_conf_file}") return True @@ -353,16 +353,13 @@ def server_mode_cluster_role_env(cluster_role_env, config_map): role_present = bool(cluster_role_env[1]) # pylint: disable=simplifiable-if-expression if ROLE_KEY in cmap and cmap[ROLE_KEY] and not role_present: - raise Exception("role required but not provided (cluster/role/env = %s)."\ - % (cluster_role_env)) + raise Exception(f"role required but not provided (cluster/role/env = {cluster_role_env}).") # if environ is required but not provided, raise exception environ_present = True if len(cluster_role_env[2]) > 0 else False # pylint: disable=simplifiable-if-expression if ENVIRON_KEY in cmap and cmap[ENVIRON_KEY] and not environ_present: - raise Exception("environ required but not provided (cluster/role/env = %s)."\ - % (cluster_role_env)) - + raise Exception(f"environ required but not provided (cluster/role/env = {cluster_role_env}).") return True ################################################################################ @@ -388,21 +385,21 @@ def parse_override_config_and_write_file(namespace): try: tmp_dir = tempfile.mkdtemp() override_config_file = os.path.join(tmp_dir, OVERRIDE_YAML) - with open(override_config_file, 'w') as f: + with open(override_config_file, 'w', encoding="utf8") as f: f.write(yaml.dump(overrides)) return override_config_file except Exception as e: - raise Exception("Failed to parse override config: %s" % str(e)) + raise Exception("Failed to parse override config") from e def parse_override_config(namespace): """Parse the command line for overriding the defaults""" - overrides = dict() + overrides = {} for config in namespace: kv = config.split("=") if len(kv) != 2: - raise Exception("Invalid config property format (%s) expected key=value" % config) + raise Exception(f"Invalid config property format ({config}) expected key=value") if kv[1] in ['true', 'True', 'TRUE']: overrides[kv[0]] = True elif kv[1] in ['false', 'False', 'FALSE']: @@ -430,7 +427,7 @@ def check_release_file_exists(): # if the file does not exist and is not a file if not os.path.isfile(release_file): - Log.error("Required file not found: %s" % release_file) + Log.error(f"Required file not found: {release_file}") return False return True @@ -439,16 +436,16 @@ def print_build_info(): """Print build_info from release.yaml""" release_file = get_heron_release_file() - with open(release_file) as release_info: + with open(release_file, encoding="utf8") as release_info: release_map = yaml.safe_load(release_info) release_items = sorted(list(release_map.items()), key=lambda tup: tup[0]) for key, value in release_items: - print("%s : %s" % (key, value)) + print(f"{key} : {value}") def get_version_number(): """Print version from release.yaml""" release_file = get_heron_release_file() - with open(release_file) as release_info: + with open(release_file, encoding="utf8") as release_info: for line in release_info: trunks = line[:-1].split(' ') if trunks[0] == 'heron.build.version': diff --git a/heron/tools/explorer/src/python/logicalplan.py b/heron/tools/explorer/src/python/logicalplan.py index 2acf144dacb..6b06497bd38 100644 --- a/heron/tools/explorer/src/python/logicalplan.py +++ b/heron/tools/explorer/src/python/logicalplan.py @@ -20,15 +20,13 @@ ''' logicalplan.py ''' import sys - from collections import defaultdict - -from heron.common.src.python.utils.log import Log -from heron.tools.common.src.python.clients import tracker +import requests from tabulate import tabulate -import requests +from heron.common.src.python.utils.log import Log +from heron.tools.common.src.python.clients import tracker def to_table(components, topo_info, component_filter): diff --git a/heron/tools/explorer/src/python/main.py b/heron/tools/explorer/src/python/main.py index e4a48cc3a7c..9b5dc33a738 100644 --- a/heron/tools/explorer/src/python/main.py +++ b/heron/tools/explorer/src/python/main.py @@ -23,6 +23,9 @@ import os import sys +import click +import requests + from heron.common.src.python.utils import log from heron.tools.common.src.python.clients import tracker from heron.tools.common.src.python.utils import config @@ -30,9 +33,6 @@ from heron.tools.explorer.src.python import physicalplan from heron.tools.explorer.src.python import topologies -import click -import requests - Log = log.Log DEFAULT_TRACKER_URL = "http://127.0.0.1:8888" diff --git a/heron/tools/explorer/src/python/physicalplan.py b/heron/tools/explorer/src/python/physicalplan.py index 14d9634d89d..13a5d2aad8f 100644 --- a/heron/tools/explorer/src/python/physicalplan.py +++ b/heron/tools/explorer/src/python/physicalplan.py @@ -23,12 +23,12 @@ from typing import Optional -from heron.common.src.python.utils.log import Log -from heron.tools.common.src.python.clients import tracker +import requests from tabulate import tabulate -import requests +from heron.common.src.python.utils.log import Log +from heron.tools.common.src.python.clients import tracker def to_table(metrics): diff --git a/heron/tools/explorer/src/python/topologies.py b/heron/tools/explorer/src/python/topologies.py index 65bdc2147e9..12eb7c9f003 100644 --- a/heron/tools/explorer/src/python/topologies.py +++ b/heron/tools/explorer/src/python/topologies.py @@ -21,13 +21,13 @@ ''' topologies.py ''' import sys -from heron.common.src.python.utils.log import Log -from heron.tools.common.src.python.clients import tracker - from tabulate import tabulate import requests +from heron.common.src.python.utils.log import Log +from heron.tools.common.src.python.clients import tracker + def to_table(result): table = [] diff --git a/heron/tools/tracker/src/python/main.py b/heron/tools/tracker/src/python/main.py index 0a139dfe2a5..ac0e49a22ed 100644 --- a/heron/tools/tracker/src/python/main.py +++ b/heron/tools/tracker/src/python/main.py @@ -24,6 +24,9 @@ import os import sys +import click +import uvicorn + from heron.tools.common.src.python.utils import config as common_config from heron.common.src.python.utils import log from heron.tools.tracker.src.python import constants @@ -33,9 +36,6 @@ from heron.tools.tracker.src.python.app import app from heron.tools.tracker.src.python import state -import click -import uvicorn - Log = log.Log Log.setLevel(logging.DEBUG) diff --git a/heron/tools/tracker/src/python/utils.py b/heron/tools/tracker/src/python/utils.py index 2f79a8468a3..e4428b7da82 100644 --- a/heron/tools/tracker/src/python/utils.py +++ b/heron/tools/tracker/src/python/utils.py @@ -31,14 +31,13 @@ from pathlib import Path from typing import Any, Optional, TypeVar -from heron.common.src.python.utils.log import Log -from heron.proto import topology_pb2 - import javaobj.v1 as javaobj import yaml - from fastapi import HTTPException +from heron.common.src.python.utils.log import Log +from heron.proto import topology_pb2 + # directories for heron tools distribution BIN_DIR = "bin" diff --git a/heron/tools/ui/src/python/main.py b/heron/tools/ui/src/python/main.py index 312ab449815..269fa5e0ca4 100644 --- a/heron/tools/ui/src/python/main.py +++ b/heron/tools/ui/src/python/main.py @@ -32,10 +32,6 @@ from datetime import datetime from typing import Callable, List, Optional -from heron.tools.common.src.python.utils import config -from heron.tools.common.src.python.clients import tracker -from heron.common.src.python.utils import log - import click import pydantic import requests @@ -48,6 +44,10 @@ from starlette.responses import RedirectResponse, Response from starlette.exceptions import HTTPException as StarletteHTTPException +from heron.tools.common.src.python.utils import config +from heron.tools.common.src.python.clients import tracker +from heron.common.src.python.utils import log + VERSION = config.get_version_number() DEFAULT_ADDRESS = "0.0.0.0" @@ -651,7 +651,7 @@ def cli( host: str, port: int, base_url_option: str, tracker_url_option: str, verbose: bool ) -> None: """Start a web UI for heron which renders information from the tracker.""" - global base_url, tracker_url, Log + global base_url, Log base_url = base_url_option log_level = logging.DEBUG if verbose else logging.INFO log.configure(log_level) diff --git a/heronpy/api/bolt/base_bolt.py b/heronpy/api/bolt/base_bolt.py index 9174109603f..b0ea7fccba8 100644 --- a/heronpy/api/bolt/base_bolt.py +++ b/heronpy/api/bolt/base_bolt.py @@ -68,7 +68,7 @@ def spec(cls, name=None, inputs=None, par=1, config=None, optional_outputs=None) argument, and exists only for supporting dynamic output field declaration. """ - python_class_path = "%s.%s" % (cls.__module__, cls.__name__) + python_class_path = f"{cls.__module__}.{cls.__name__}" if hasattr(cls, 'outputs'): # avoid modification to cls.outputs diff --git a/heronpy/api/component/component_spec.py b/heronpy/api/component/component_spec.py index 29269ada78a..fc04c6b05ed 100644 --- a/heronpy/api/component/component_spec.py +++ b/heronpy/api/component/component_spec.py @@ -209,7 +209,7 @@ def _sanitize_inputs(self): elif isinstance(key, GlobalStreamId): ret[key] = grouping else: - raise ValueError("%s is not supported as a key to inputs" % str(key)) + raise ValueError(f"{str(key)} is not supported as a key to inputs") elif isinstance(self.inputs, (list, tuple)): # inputs are lists, must be either a list of HeronComponentSpec or GlobalStreamId # will use SHUFFLE grouping @@ -224,9 +224,9 @@ def _sanitize_inputs(self): elif isinstance(input_obj, GlobalStreamId): ret[input_obj] = Grouping.SHUFFLE else: - raise ValueError("%s is not supported as an input" % str(input_obj)) + raise ValueError(f"{str(input_obj)} is not supported as an input") else: - raise TypeError("Inputs must be a list, dict, or None, given: %s" % str(self.inputs)) + raise TypeError(f"Inputs must be a list, dict, or None, given: {str(self.inputs)}") return ret @@ -250,12 +250,11 @@ def _sanitize_outputs(self): return None if not isinstance(self.outputs, (list, tuple)): - raise TypeError("Argument to outputs must be either list or tuple, given: %s" - % str(type(self.outputs))) + raise TypeError(f"Argument to outputs must be either list or tuple, given: {str(type(self.outputs))}") for output in self.outputs: if not isinstance(output, (str, Stream)): - raise TypeError("Outputs must be a list of strings or Streams, given: %s" % str(output)) + raise TypeError(f"Outputs must be a list of strings or Streams, given: {str(output)}") if isinstance(output, str): # it's a default stream @@ -277,19 +276,18 @@ def get_out_streamids(self): return set() if not isinstance(self.outputs, (list, tuple)): - raise TypeError("Argument to outputs must be either list or tuple, given: %s" - % str(type(self.outputs))) + raise TypeError(f"Argument to outputs must be either list or tuple, given: {str(type(self.outputs))}") ret_lst = [] for output in self.outputs: if not isinstance(output, (str, Stream)): - raise TypeError("Outputs must be a list of strings or Streams, given: %s" % str(output)) + raise TypeError(f"Outputs must be a list of strings or Streams, given: {str(output)}") ret_lst.append(Stream.DEFAULT_STREAM_ID if isinstance(output, str) else output.stream_id) return set(ret_lst) def __getitem__(self, stream_id): """Get GlobalStreamId for a given stream_id""" if stream_id not in self.get_out_streamids(): - raise ValueError("A given stream id does not exist on this component: %s" % stream_id) + raise ValueError(f"A given stream id does not exist on this component: {stream_id}") component_id = self.name or self return GlobalStreamId(componentId=component_id, streamId=stream_id) @@ -355,12 +353,12 @@ def component_id(self): # TopologyType metaclass finally sets it. This statement is to support __eq__(), # __hash__() and __str__() methods with safety, as raising Exception is not # appropriate this case. - return "" % self._component_id.uuid + return f"" return self._component_id.name if isinstance(self._component_id, str): return self._component_id - raise ValueError("Component Id for this GlobalStreamId is not properly set: <%s:%s>" - % (str(type(self._component_id)), str(self._component_id))) + raise ValueError("Component Id for this GlobalStreamId is not "\ + f"properly set: <{str(type(self._component_id))}:{str(self._component_id)}>") def __eq__(self, other): return hasattr(other, 'component_id') and self.component_id == other.component_id \ @@ -370,4 +368,4 @@ def __hash__(self): return hash(self.__str__()) def __str__(self): - return "%s:%s" % (self.component_id, self.stream_id) + return f"{self.component_id}:{self.stream_id}" diff --git a/heronpy/api/spout/base_spout.py b/heronpy/api/spout/base_spout.py index 44d98ba8da0..dae4d27625a 100644 --- a/heronpy/api/spout/base_spout.py +++ b/heronpy/api/spout/base_spout.py @@ -53,7 +53,7 @@ def spec(cls, name=None, par=1, config=None, optional_outputs=None): This is an optional argument, and exists only for supporting dynamic output field declaration. """ - python_class_path = "%s.%s" % (cls.__module__, cls.__name__) + python_class_path = f"{cls.__module__}.{cls.__name__}" if hasattr(cls, 'outputs'): # avoid modification to cls.outputs diff --git a/heronpy/api/stream.py b/heronpy/api/stream.py index 828f55342a3..2f77a08374d 100644 --- a/heronpy/api/stream.py +++ b/heronpy/api/stream.py @@ -41,9 +41,9 @@ def __init__(self, fields=None, name=DEFAULT_STREAM_ID, direct=False): fields = list(fields) for field in fields: if not isinstance(field, str): - raise TypeError("All field names must be strings, given: %s" % str(field)) + raise TypeError(f"All field names must be strings, given: {str(field)}") else: - raise TypeError("Stream fields must be a list, tuple or None, given: %s" % str(fields)) + raise TypeError(f"Stream fields must be a list, tuple or None, given: {str(fields)}") # self.fields is always list self.fields = fields @@ -53,14 +53,14 @@ def __init__(self, fields=None, name=DEFAULT_STREAM_ID, direct=False): if isinstance(name, str): self.stream_id = name else: - raise TypeError("Stream name must be a string, given: %s" % str(name)) + raise TypeError(f"Stream name must be a string, given: {str(name)}") if isinstance(direct, bool): self.direct = direct if self.direct: raise NotImplementedError("Direct stream is not supported yet.") else: - raise TypeError("'direct' must be either True or False, given: %s" % str(direct)) + raise TypeError(f"'direct' must be either True or False, given: {str(direct)}") class Grouping: """Helper class for defining Grouping for Python topology""" @@ -143,7 +143,7 @@ def custom_serialized(cls, serialized, is_java=True): """ if not isinstance(serialized, bytes): raise TypeError("Argument to custom_serialized() must be " - "a serialized Python class as bytes, given: %s" % str(serialized)) + f"a serialized Python class as bytes, given: {str(serialized)}") if not is_java: return cls.CUSTOM(gtype=topology_pb2.Grouping.Value("CUSTOM"), python_serialized=serialized) diff --git a/heronpy/api/topology.py b/heronpy/api/topology.py index 7e8a412e223..055178b66ba 100644 --- a/heronpy/api/topology.py +++ b/heronpy/api/topology.py @@ -78,7 +78,7 @@ def class_dict_to_specs(mcs, class_dict): if spec.name is None: spec.name = name if spec.name in specs: - raise ValueError("Duplicate component name: %s" % spec.name) + raise ValueError(f"Duplicate component name: {spec.name}") specs[spec.name] = spec return specs @@ -112,16 +112,16 @@ def class_dict_to_topo_config(mcs, class_dict): def add_spout_specs(mcs, spec, spout_specs): if not spec.outputs: raise ValueError( - "%s: %s requires at least one output, because it is a spout" % - (spec.python_class_path, spec.name)) + f"{spec.python_class_path}: {spec.name} requires at least one output, because it is a spout" + ) spout_specs[spec.name] = spec.get_protobuf() @classmethod def add_bolt_specs(mcs, spec, bolt_specs): if not spec.inputs: raise ValueError( - "%s: %s requires at least one input, because it is a bolt" % - (spec.python_class_path, spec.name)) + f"{spec.python_class_path}: {spec.name} requires at least one input, because it is a bolt" + ) bolt_specs[spec.name] = spec.get_protobuf() @classmethod @@ -210,7 +210,7 @@ def get_heron_options_from_env(): if sep: options[key] = value else: - raise ValueError("Invalid HERON_OPTIONS part %r" % option_line) + raise ValueError(f"Invalid HERON_OPTIONS part {option_line!r}") return options @classmethod @@ -243,8 +243,7 @@ def _sanitize_config(custom_config): sanitized = {} for key, value in list(custom_config.items()): if not isinstance(key, str): - raise TypeError("Key for topology-wide configuration must be string, given: %s: %s" - % (str(type(key)), str(key))) + raise TypeError(f"Key for topology-wide configuration must be string, given: {str(type(key))}: {str(key)}") if isinstance(value, bool): sanitized[key] = "true" if value else "false" @@ -295,7 +294,7 @@ def write(cls): """ if cls.__name__ == 'Topology': raise ValueError("The base Topology class cannot be writable") - filename = "%s.defn" % cls.topology_name + filename = f"{cls.topology_name}.defn" path = os.path.join(cls.topologydefn_tmpdir, filename) with open(path, 'wb') as f: @@ -353,7 +352,7 @@ def add_spec(self, *specs): if spec.name == "config": raise ValueError("config is a reserved name") if spec.name in self._specs: - raise ValueError("Attempting to add duplicate spec name: %r %r" % (spec.name, spec)) + raise ValueError(f"Attempting to add duplicate spec name: {spec.name!r} {spec!r}") self._specs[spec.name] = spec @@ -378,7 +377,7 @@ def set_config(self, config): :param config: topology-wide config """ if not isinstance(config, dict): - raise TypeError("Argument to set_config needs to be dict, given: %s" % str(config)) + raise TypeError(f"Argument to set_config needs to be dict, given: {str(config)}") self._topology_config = config def _construct_topo_class_dict(self): diff --git a/heronpy/connectors/pulsar/pulsarspout.py b/heronpy/connectors/pulsar/pulsarspout.py index e81bfb66d1c..057e1fefb64 100644 --- a/heronpy/connectors/pulsar/pulsarspout.py +++ b/heronpy/connectors/pulsar/pulsarspout.py @@ -31,16 +31,16 @@ from heronpy.streamlet.src.python.streamletboltbase import StreamletBoltBase def GenerateLogConfContents(logFileName): - return """ + return f""" # Define the root logger with appender file log4j.rootLogger = INFO, FILE # Define the file appender log4j.appender.FILE=org.apache.log4j.DailyRollingFileAppender -log4j.appender.FILE.File=%s""" % logFileName + """ +log4j.appender.FILE.File={logFileName} log4j.appender.FILE.Threshold=INFO log4j.appender.FILE.DatePattern='.' yyyy-MM-dd-a log4j.appender.FILE.layout=org.apache.log4j.PatternLayout -log4j.appender.FILE.layout.ConversionPattern=%d{yy-MM-dd HH:mm:ss.SSS} %X{pname}:%X{pid} %-5p %l- %m%n +log4j.appender.FILE.layout.ConversionPattern=%d{{yy-MM-dd HH:mm:ss.SSS}} %X{{pname}}:%X{{pid}} %-5p %l- %m%n """ def GenerateLogConfig(context): @@ -72,8 +72,8 @@ def default_deserializer(self, msg): def initialize(self, config, context): """Implements Pulsar Spout's initialize method""" self.logger.info("Initializing PulsarSpout with the following") - self.logger.info("Component-specific config: \n%s" % str(config)) - self.logger.info("Context: \n%s" % str(context)) + self.logger.info("Component-specific config: \n%s", str(config)) + self.logger.info("Context: \n%s", str(context)) self.emit_count = 0 self.ack_count = 0 @@ -101,35 +101,35 @@ def initialize(self, config, context): # First generate the config self.logConfFileName = GenerateLogConfig(context) - self.logger.info("Generated LogConf at %s" % self.logConfFileName) + self.logger.info("Generated LogConf at %s", self.logConfFileName) # We currently use the high level consumer API # For supporting effectively once, we will need to switch # to using lower level Reader API, when it becomes # available in python self.client = pulsar.Client(self.pulsar_cluster, log_conf_file_path=self.logConfFileName) - self.logger.info("Setup Client with cluster %s" % self.pulsar_cluster) + self.logger.info("Setup Client with cluster %s", self.pulsar_cluster) try: self.consumer = self.client.subscribe(self.topic, context.get_topology_name(), consumer_type=pulsar.ConsumerType.Failover, unacked_messages_timeout_ms=self.acking_timeout) except Exception as e: - self.logger.fatal("Pulsar client subscription failed: %s" % str(e)) + self.logger.fatal("Pulsar client subscription failed: %s", str(e)) - self.logger.info("Subscribed to topic %s" % self.topic) + self.logger.info("Subscribed to topic %s", self.topic) def next_tuple(self): try: msg = self.consumer.receive(timeout_millis=self.receive_timeout_ms) except Exception as e: - self.logger.debug("Exception during recieve: %s" % str(e)) + self.logger.debug("Exception during recieve: %s", str(e)) return try: self.emit(self.deserializer(msg.data()), tup_id=msg.message_id()) self.emit_count += 1 except Exception as e: - self.logger.info("Exception during emit: %s" % str(e)) + self.logger.info("Exception during emit: %s", str(e)) def ack(self, tup_id): self.ack_count += 1 @@ -137,4 +137,4 @@ def ack(self, tup_id): def fail(self, tup_id): self.fail_count += 1 - self.logger.debug("Failed tuple %s" % str(tup_id)) + self.logger.debug("Failed tuple %s", str(tup_id)) diff --git a/heronpy/connectors/textfiles/textfilesgenerator.py b/heronpy/connectors/textfiles/textfilesgenerator.py index 129e04684a1..3d901e6bb93 100644 --- a/heronpy/connectors/textfiles/textfilesgenerator.py +++ b/heronpy/connectors/textfiles/textfilesgenerator.py @@ -36,7 +36,7 @@ def setup(self, context): """Implements TextFile Generator's setup method""" myindex = context.get_partition_index() self._files_to_consume = self._files[myindex::context.get_num_partitions()] - self.logger.info("TextFileSpout files to consume %s" % self._files_to_consume) + self.logger.info("TextFileSpout files to consume %s", self._files_to_consume) self._lines_to_consume = self._get_next_lines() self._emit_count = 0 @@ -62,12 +62,12 @@ def _consume_next_file(self): if file_to_consume is None: self.logger.info("All files consumed") return None - self.logger.info("Now reading file %s" % file_to_consume) + self.logger.info("Now reading file %s", file_to_consume) try: filep = open(file_to_consume, 'r') return filep.readlines() except IOError as e: - self.logger.info("Could not open the file %s" % file_to_consume) + self.logger.info("Could not open the file %s", file_to_consume) raise e def _get_next_file_to_consume(self): diff --git a/heronpy/streamlet/config.py b/heronpy/streamlet/config.py index 7432735ef9c..1d2e649739e 100644 --- a/heronpy/streamlet/config.py +++ b/heronpy/streamlet/config.py @@ -51,7 +51,7 @@ def set_delivery_semantics(self, semantics): self._api_config[api_constants.TOPOLOGY_RELIABILITY_MODE] =\ api_constants.TopologyReliabilityMode.EFFECTIVELY_ONCE else: - raise RuntimeError("Unknown Topology delivery semantics %s" % str(semantics)) + raise RuntimeError(f"Unknown Topology delivery semantics {str(semantics)}") def set_num_containers(self, ncontainers): self._api_config[api_constants.TOPOLOGY_STMGRS] = int(ncontainers) diff --git a/heronpy/streamlet/impl/consumebolt.py b/heronpy/streamlet/impl/consumebolt.py index 3e84d234265..eb52254eaee 100644 --- a/heronpy/streamlet/impl/consumebolt.py +++ b/heronpy/streamlet/impl/consumebolt.py @@ -40,7 +40,7 @@ def pre_save(self, checkpoint_id): pass def initialize(self, config, context): - self.logger.debug("ConsumeBolt's Component-specific config: \n%s" % str(config)) + self.logger.debug("ConsumeBolt's Component-specific config: \n%s", str(config)) self.processed = 0 if ConsumeBolt.CONSUMEFUNCTION in config: self._consume_function = config[ConsumeBolt.CONSUMEFUNCTION] diff --git a/heronpy/streamlet/impl/filterbolt.py b/heronpy/streamlet/impl/filterbolt.py index 30ffd8a8235..98e31aea68c 100644 --- a/heronpy/streamlet/impl/filterbolt.py +++ b/heronpy/streamlet/impl/filterbolt.py @@ -41,7 +41,7 @@ def pre_save(self, checkpoint_id): pass def initialize(self, config, context): - self.logger.debug("FilterBolt's Component-specific config: \n%s" % str(config)) + self.logger.debug("FilterBolt's Component-specific config: \n%s", str(config)) self.processed = 0 self.emitted = 0 if FilterBolt.FUNCTION in config: diff --git a/heronpy/streamlet/impl/flatmapbolt.py b/heronpy/streamlet/impl/flatmapbolt.py index 3af381d5005..6fbd46f68a6 100644 --- a/heronpy/streamlet/impl/flatmapbolt.py +++ b/heronpy/streamlet/impl/flatmapbolt.py @@ -42,7 +42,7 @@ def pre_save(self, checkpoint_id): pass def initialize(self, config, context): - self.logger.debug("FlatMapBolt's Component-specific config: \n%s" % str(config)) + self.logger.debug("FlatMapBolt's Component-specific config: \n%s", str(config)) self.processed = 0 self.emitted = 0 if FlatMapBolt.FUNCTION in config: diff --git a/heronpy/streamlet/impl/generatorspout.py b/heronpy/streamlet/impl/generatorspout.py index 265b71f0044..b9943d6d2fa 100644 --- a/heronpy/streamlet/impl/generatorspout.py +++ b/heronpy/streamlet/impl/generatorspout.py @@ -40,7 +40,7 @@ def pre_save(self, checkpoint_id): pass def initialize(self, config, context): - self.logger.debug("GeneratorSpout's Component-specific config: \n%s" % str(config)) + self.logger.debug("GeneratorSpout's Component-specific config: \n%s", str(config)) self.emitted = 0 if GeneratorSpout.GENERATOR in config: self._generator = config[GeneratorSpout.GENERATOR] diff --git a/heronpy/streamlet/impl/joinbolt.py b/heronpy/streamlet/impl/joinbolt.py index 1dde5276987..7f8ffa7eaac 100644 --- a/heronpy/streamlet/impl/joinbolt.py +++ b/heronpy/streamlet/impl/joinbolt.py @@ -63,13 +63,13 @@ def _add(self, key, value, src_component, mymap): def initialize(self, config, context): super(JoinBolt, self).initialize(config, context) if not JoinBolt.JOINEDCOMPONENT in config: - raise RuntimeError("%s must be specified in the JoinBolt" % JoinBolt.JOINEDCOMPONENT) + raise RuntimeError(f"{JoinBolt.JOINEDCOMPONENT} must be specified in the JoinBolt") self._joined_component = config[JoinBolt.JOINEDCOMPONENT] if not JoinBolt.JOINFUNCTION in config: - raise RuntimeError("%s must be specified in the JoinBolt" % JoinBolt.JOINFUNCTION) + raise RuntimeError(f"{JoinBolt.JOINFUNCTION} must be specified in the JoinBolt") self._join_function = config[JoinBolt.JOINFUNCTION] if not JoinBolt.JOINTYPE in config: - raise RuntimeError("%s must be specified in the JoinBolt" % JoinBolt.JOINTYPE) + raise RuntimeError(f"{JoinBolt.JOINTYPE} must be specified in the JoinBolt") self._join_type = config[JoinBolt.JOINTYPE] def processWindow(self, window_config, tuples): @@ -173,8 +173,8 @@ def _calculate_inputs(self): # pylint: disable=superfluous-parens def _build_this(self, builder, stage_names): - print("join_build_this left: %s right: %s" % (self._left._built, self._right._built)) - print("left: %s right: %s" % (self._left.get_name(), self._right.get_name())) + print(f"join_build_this left: {self._left._built} right: {self._right._built}") + print(f"left: {self._left.get_name()} right: {self._right.get_name()}") if not self._left._built or not self._right._built: return False if not self.get_name(): diff --git a/heronpy/streamlet/impl/logbolt.py b/heronpy/streamlet/impl/logbolt.py index c18ada4ecba..063952ef812 100644 --- a/heronpy/streamlet/impl/logbolt.py +++ b/heronpy/streamlet/impl/logbolt.py @@ -39,7 +39,7 @@ def pre_save(self, checkpoint_id): pass def initialize(self, config, context): - self.logger.debug("LogBolt's Component-specific config: \n%s" % str(config)) + self.logger.debug("LogBolt's Component-specific config: \n%s", str(config)) self.processed = 0 def process(self, tup): diff --git a/heronpy/streamlet/impl/mapbolt.py b/heronpy/streamlet/impl/mapbolt.py index 444585242d6..80d5efe3127 100644 --- a/heronpy/streamlet/impl/mapbolt.py +++ b/heronpy/streamlet/impl/mapbolt.py @@ -41,7 +41,7 @@ def pre_save(self, checkpoint_id): pass def initialize(self, config, context): - self.logger.debug("MapBolt's Component-specific config: \n%s" % str(config)) + self.logger.debug("MapBolt's Component-specific config: \n%s", str(config)) self.processed = 0 self.emitted = 0 if MapBolt.FUNCTION in config: diff --git a/heronpy/streamlet/impl/repartitionbolt.py b/heronpy/streamlet/impl/repartitionbolt.py index 1e70c91044b..902e71d95a3 100644 --- a/heronpy/streamlet/impl/repartitionbolt.py +++ b/heronpy/streamlet/impl/repartitionbolt.py @@ -70,7 +70,7 @@ def pre_save(self, checkpoint_id): pass def initialize(self, config, context): - self.logger.debug("RepartitionBolt's Component-specific config: \n%s" % str(config)) + self.logger.debug("RepartitionBolt's Component-specific config: \n%s", str(config)) self.processed = 0 self.emitted = 0 diff --git a/heronpy/streamlet/impl/supplierspout.py b/heronpy/streamlet/impl/supplierspout.py index 46e18339eee..074372b4c0e 100644 --- a/heronpy/streamlet/impl/supplierspout.py +++ b/heronpy/streamlet/impl/supplierspout.py @@ -39,7 +39,7 @@ def pre_save(self, checkpoint_id): pass def initialize(self, config, context): - self.logger.debug("SupplierSpout's Component-specific config: \n%s" % str(config)) + self.logger.debug("SupplierSpout's Component-specific config: \n%s", str(config)) self.emitted = 0 if SupplierSpout.FUNCTION in config: self._supplier_function = config[SupplierSpout.FUNCTION] diff --git a/heronpy/streamlet/impl/transformbolt.py b/heronpy/streamlet/impl/transformbolt.py index a0906a91178..a72e544638c 100644 --- a/heronpy/streamlet/impl/transformbolt.py +++ b/heronpy/streamlet/impl/transformbolt.py @@ -43,7 +43,7 @@ def pre_save(self, checkpoint_id): pass def initialize(self, config, context): - self.logger.debug("TransformBolt's Component-specific config: \n%s" % str(config)) + self.logger.debug("TransformBolt's Component-specific config: \n%s", str(config)) self.processed = 0 self.emitted = 0 if TransformBolt.OPERATOR in config: diff --git a/heronpy/streamlet/impl/unionbolt.py b/heronpy/streamlet/impl/unionbolt.py index fb9b3c5126f..d7cdd50bb56 100644 --- a/heronpy/streamlet/impl/unionbolt.py +++ b/heronpy/streamlet/impl/unionbolt.py @@ -39,7 +39,7 @@ def pre_save(self, checkpoint_id): pass def initialize(self, config, context): - self.logger.debug("UnionBolt's Component-specific config: \n%s" % str(config)) + self.logger.debug("UnionBolt's Component-specific config: \n%s", str(config)) self.processed = 0 self.emitted = 0 diff --git a/integration_test/src/python/http_server/main.py b/integration_test/src/python/http_server/main.py index 8ef6630775f..d0a0b966cee 100644 --- a/integration_test/src/python/http_server/main.py +++ b/integration_test/src/python/http_server/main.py @@ -38,9 +38,9 @@ def get(self, fileName): if not os.path.exists(jsonFilePath): self.clear() self.set_status(404) - self.finish("%s does not exist" % (fileName + ".json")) + self.finish(f"{fileName}.json does not exist") else: - with open(jsonFilePath, "r") as jsonFile: + with open(jsonFilePath, "r", encoding="utf8") as jsonFile: data = jsonFile.read() self.set_header("Content-Type", 'application/json; charset="utf-8"') @@ -50,7 +50,7 @@ def post(self, fileName): jsonFilePath = RESULTS_DIRECTORY + "/" + secure_filename(fileName) + ".json" #Overwrites the existing file - with open(jsonFilePath, "w") as jsonFile: + with open(jsonFilePath, "w", encoding="utf8") as jsonFile: try: data = tornado.escape.json_decode(self.request.body) jsonFile.write(tornado.escape.json_encode(data)) @@ -79,7 +79,7 @@ def get(self, key): if key in self.state_map: self.write(self.state_map[key]) else: - raise tornado.web.HTTPError(status_code=404, log_message="Key %s not found" % key) + raise tornado.web.HTTPError(status_code=404, log_message=f"Key {key} not found") else: self.write(str(self.state_map)) @@ -99,7 +99,7 @@ def get(self, key): if key in self.result_map: self.write(tornado.escape.json_encode(self.result_map[key])) else: - raise tornado.web.HTTPError(status_code=404, log_message="Key %s not found" % key) + raise tornado.web.HTTPError(status_code=404, log_message=f"Key {key} not found") else: self.write(tornado.escape.json_encode(self.result_map)) @@ -117,7 +117,7 @@ def post(self, key): self.result_map[key] = [data] self.write("Results written successfully: topology " + key + ' instance ' + list(data.keys())[0]) else: - raise tornado.web.HTTPError(status_code=404, log_message="Invalid key %s" % key) + raise tornado.web.HTTPError(status_code=404, log_message=f"Invalid key {key}") def main(): ''' diff --git a/integration_test/src/python/integration_test/common/bolt/count_aggregator_bolt.py b/integration_test/src/python/integration_test/common/bolt/count_aggregator_bolt.py index 6bfe7c368a2..4fd38ae82c1 100644 --- a/integration_test/src/python/integration_test/common/bolt/count_aggregator_bolt.py +++ b/integration_test/src/python/integration_test/common/bolt/count_aggregator_bolt.py @@ -34,5 +34,5 @@ def process(self, tup): self.sum += int(tup.values[0]) def finish_batch(self): - self.logger.info("In finish batch, emitting: %d" % self.sum) + self.logger.info("In finish batch, emitting: %d", self.sum) self.emit([self.sum]) diff --git a/integration_test/src/python/integration_test/common/bolt/word_count_bolt.py b/integration_test/src/python/integration_test/common/bolt/word_count_bolt.py index 823aac31ecf..74677ba4a6c 100644 --- a/integration_test/src/python/integration_test/common/bolt/word_count_bolt.py +++ b/integration_test/src/python/integration_test/common/bolt/word_count_bolt.py @@ -38,8 +38,8 @@ def process(self, tup): else: self.cache[word] = 1 - self.logger.info("Counter: %s" % str(self.cache)) + self.logger.info("Counter: %s", str(self.cache)) def finish_batch(self): - self.logger.info("In finish batch, emitting: %d" % len(self.cache)) + self.logger.info("In finish batch, emitting: %d", len(self.cache)) self.emit([len(self.cache)]) diff --git a/integration_test/src/python/integration_test/core/aggregator_bolt.py b/integration_test/src/python/integration_test/core/aggregator_bolt.py index 3e611544e67..de22548c351 100644 --- a/integration_test/src/python/integration_test/core/aggregator_bolt.py +++ b/integration_test/src/python/integration_test/core/aggregator_bolt.py @@ -36,7 +36,7 @@ class AggregatorBolt(TerminalBolt): def initialize(self, config, context): self.http_post_url = config[integ_constants.HTTP_POST_URL_KEY] self.result = [] - Log.info("HTTP post url: %s" % self.http_post_url) + Log.info("HTTP post url: %s", self.http_post_url) self.parsed_url = urlparse(self.http_post_url) def process(self, tup): @@ -55,14 +55,14 @@ def _post_result_to_server(self, json_result): def write_finished_data(self): json_result = json.dumps(self.result) - Log.info("Actual result: %s" % json_result) - Log.info("Posting actual result to %s" % self.http_post_url) + Log.info("Actual result: %s", json_result) + Log.info("Posting actual result to %s", self.http_post_url) try: response_code = self._post_result_to_server(json_result) if response_code != 200: # try again response_code = self._post_result_to_server(json_result) if response_code != 200: - raise RuntimeError("Response code: %d" % response_code) + raise RuntimeError(f"Response code: {response_code}") except Exception as e: - raise RuntimeError("Posting result to server failed with: %s" % e.message) + raise RuntimeError(f"Posting result to server failed with: {e.message}") diff --git a/integration_test/src/python/integration_test/core/integration_test_bolt.py b/integration_test/src/python/integration_test/core/integration_test_bolt.py index ea51c4cb7a5..65b9721704c 100644 --- a/integration_test/src/python/integration_test/core/integration_test_bolt.py +++ b/integration_test/src/python/integration_test/core/integration_test_bolt.py @@ -41,7 +41,7 @@ class IntegrationTestBolt(Bolt): @classmethod def spec(cls, name, par, inputs, config, user_bolt_classpath, user_output_fields=None): - python_class_path = "%s.%s" % (cls.__module__, cls.__name__) + python_class_path = f"{cls.__module__}.{cls.__name__}" config[integ_const.USER_BOLT_CLASSPATH] = user_bolt_classpath # avoid modification to cls.outputs _outputs = copy.copy(cls.outputs) @@ -69,7 +69,7 @@ def initialize(self, config, context): self.tuples_processed = 0 self.current_tuple_processing = None - Log.info("Terminals to receive: %d" % self.terminal_to_receive) + Log.info("Terminals to receive: %d", self.terminal_to_receive) self.user_bolt.initialize(config, context) @staticmethod @@ -86,7 +86,7 @@ def process(self, tup): self.tuple_received += 1 stream_id = tup.stream - Log.info("Received a tuple: %s from %s" % (tup, stream_id)) + Log.info("Received a tuple: %s from %s", (tup, stream_id)) if stream_id == integ_const.INTEGRATION_TEST_CONTROL_STREAM_ID: self.terminal_to_receive -= 1 if self.is_done: diff --git a/integration_test/src/python/integration_test/core/integration_test_spout.py b/integration_test/src/python/integration_test/core/integration_test_spout.py index 4edd807703a..7a7cbd14477 100644 --- a/integration_test/src/python/integration_test/core/integration_test_spout.py +++ b/integration_test/src/python/integration_test/core/integration_test_spout.py @@ -38,7 +38,7 @@ class IntegrationTestSpout(Spout): @classmethod def spec(cls, name, par, config, user_spout_classpath, user_output_fields=None): - python_class_path = "%s.%s" % (cls.__module__, cls.__name__) + python_class_path = f"{cls.__module__}.{cls.__name__}" config[integ_const.USER_SPOUT_CLASSPATH] = user_spout_classpath # avoid modification to cls.outputs @@ -57,7 +57,7 @@ def initialize(self, config, context): self.max_executions = config.get(integ_const.USER_MAX_EXECUTIONS, integ_const.MAX_EXECUTIONS) assert isinstance(self.max_executions, int) and self.max_executions > 0 - Log.info("Max executions: %d" % self.max_executions) + Log.info("Max executions: %d", self.max_executions) self.tuples_to_complete = 0 self.user_spout.initialize(config, context) @@ -77,7 +77,7 @@ def next_tuple(self): return self.max_executions -= 1 - Log.info("max executions: %d" % self.max_executions) + Log.info("max executions: %d", self.max_executions) self.user_spout.next_tuple() @@ -86,14 +86,14 @@ def next_tuple(self): Log.info("This topology is finished.") def ack(self, tup_id): - Log.info("Received an ack with tuple id: %s" % str(tup_id)) + Log.info("Received an ack with tuple id: %s", str(tup_id)) self.tuples_to_complete -= 1 if tup_id != integ_const.INTEGRATION_TEST_MOCK_MESSAGE_ID: self.user_spout.ack(tup_id) self._emit_terminal_if_needed() def fail(self, tup_id): - Log.info("Received a fail message with tuple id: %s" % str(tup_id)) + Log.info("Received a fail message with tuple id: %s", str(tup_id)) self.tuples_to_complete -= 1 if tup_id != integ_const.INTEGRATION_TEST_MOCK_MESSAGE_ID: self.user_spout.fail(tup_id) @@ -109,7 +109,7 @@ def emit(self, tup, tup_id=None, stream=Stream.DEFAULT_STREAM_ID, self.tuples_to_complete += 1 if tup_id is None: - Log.info("Add tup_id for tuple: %s" % str(tup)) + Log.info("Add tup_id for tuple: %s", str(tup)) _tup_id = integ_const.INTEGRATION_TEST_MOCK_MESSAGE_ID else: _tup_id = tup_id @@ -117,7 +117,7 @@ def emit(self, tup, tup_id=None, stream=Stream.DEFAULT_STREAM_ID, super(IntegrationTestSpout, self).emit(tup, _tup_id, stream, direct_task, need_task_ids) def _emit_terminal_if_needed(self): - Log.info("is_done: %s, tuples_to_complete: %s" % (self.is_done, self.tuples_to_complete)) + Log.info("is_done: %s, tuples_to_complete: %s", self.is_done, self.tuples_to_complete) if self.is_done and self.tuples_to_complete == 0: Log.info("Emitting terminals to downstream") super(IntegrationTestSpout, self).emit([integ_const.INTEGRATION_TEST_TERMINAL], diff --git a/integration_test/src/python/integration_test/core/test_topology_builder.py b/integration_test/src/python/integration_test/core/test_topology_builder.py index 6881644734d..4b5daaa1731 100644 --- a/integration_test/src/python/integration_test/core/test_topology_builder.py +++ b/integration_test/src/python/integration_test/core/test_topology_builder.py @@ -42,7 +42,7 @@ class TestTopologyBuilder(TopologyBuilder): api_constants.TOPOLOGY_PROJECT_NAME: "heron-integration-test"} def __init__(self, name, http_server_url): super(TestTopologyBuilder, self).__init__(name) - self.output_location = "%s/%s" % (http_server_url, self.topology_name) + self.output_location = f"{http_server_url}/{self.topology_name}" self.set_config(self.DEFAULT_CONFIG) # map spout's component spec> diff --git a/integration_test/src/python/local_test_runner/main.py b/integration_test/src/python/local_test_runner/main.py index e4afbf343b5..1f0ddd95724 100644 --- a/integration_test/src/python/local_test_runner/main.py +++ b/integration_test/src/python/local_test_runner/main.py @@ -118,7 +118,7 @@ def main(): # Convert the conf file to a json format conf = decoder.decode(conf_string) - args = dict() + args = {} home_directory = os.path.expanduser("~") args['cluster'] = conf['cluster'] args['topologyName'] = conf['topology']['topologyName'] diff --git a/integration_test/src/python/local_test_runner/test_scale_up.py b/integration_test/src/python/local_test_runner/test_scale_up.py index d79837837f5..aee9bf756d3 100644 --- a/integration_test/src/python/local_test_runner/test_scale_up.py +++ b/integration_test/src/python/local_test_runner/test_scale_up.py @@ -47,8 +47,7 @@ def pre_check_results(self, physical_plan_json): instances = physical_plan_json['instances'] instance_count = len(instances) if instance_count != self.expected_instance_count: - raise status.TestFailure("Found %s instances but expected %s: %s" % - (instance_count, self.expected_instance_count, instances)) + raise status.TestFailure(f"Found {instance_count} instances but expected {self.expected_instance_count}: {instances}") def scale_up(heron_cli_path, test_cluster, topology_name): splitcmd = [ @@ -57,5 +56,5 @@ def scale_up(heron_cli_path, test_cluster, topology_name): ] logging.info("Increasing number of component instances: %s", splitcmd) if subprocess.call(splitcmd) != 0: - raise status.TestFailure("Unable to update topology %s" % topology_name) + raise status.TestFailure(f"Unable to update topology {topology_name}") logging.info("Increased number of component instances") diff --git a/integration_test/src/python/local_test_runner/test_template.py b/integration_test/src/python/local_test_runner/test_template.py index ebb3a98cc0b..1c11c22e5bc 100644 --- a/integration_test/src/python/local_test_runner/test_template.py +++ b/integration_test/src/python/local_test_runner/test_template.py @@ -159,7 +159,7 @@ def _check_results(self): expected_result = "" actual_result = "" retries_left = RETRY_COUNT - _sleep("before trying to check results for test %s" % self.testname, RETRY_INTERVAL) + _sleep(f"before trying to check results for test {self.testname}", RETRY_INTERVAL) while retries_left > 0: retries_left -= 1 try: @@ -169,7 +169,7 @@ def _check_results(self): actual_result = g.read() except Exception as e: message =\ - "Failed to read expected or actual results from file for test %s: %s" % self.testname + f"Failed to read expected or actual results from file for test: {self.testname}" if retries_left == 0: raise status.TestFailure(message, e) logging.error(message, e) @@ -191,13 +191,13 @@ def _check_results(self): # Compare the actual and expected result if actual_sorted == expected_sorted: success = status.TestSuccess( - "Actual result matched expected result for test %s" % self.testname) + f"Actual result matched expected result for test {self.testname}") logging.info("Actual result ---------- \n%s", actual_sorted) logging.info("Expected result ---------- \n%s", expected_sorted) return success else: failure = status.TestFailure( - "Actual result did not match expected result for test %s" % self.testname) + f"Actual result did not match expected result for test {self.testname}") logging.info("Actual result ---------- \n%s", actual_sorted) logging.info("Expected result ---------- \n%s", expected_sorted) raise failure @@ -222,7 +222,7 @@ def kill_process(self, process_number): """ kills process by running unix command kill """ if process_number < 1: raise RuntimeError( - "Not attempting to kill process id < 1 passed to kill_process: %d" % process_number) + f"Not attempting to kill process id < 1 passed to kill_process: {process_number}") logging.info("Killing process number %s", process_number) @@ -232,15 +232,15 @@ def kill_process(self, process_number): if "No such process" in str(ex): # killing a non-existing process condsidered as success logging.info(str(ex)) else: - raise RuntimeError("Unable to kill process %s" % process_number) + raise RuntimeError(f"Unable to kill process {process_number}") except Exception: - raise RuntimeError("Unable to kill process %s" % process_number) + raise RuntimeError(f"Unable to kill process {process_number}") logging.info("Killed process number %s", process_number) def kill_strmgr(self): logging.info("Executing kill stream manager") - stmgr_pid = self.get_pid('%s-%d' % (STMGR, NON_TMANAGER_SHARD), self.params['workingDirectory']) + stmgr_pid = self.get_pid(f'{STMGR}-{NON_TMANAGER_SHARD}', self.params['workingDirectory']) self.kill_process(stmgr_pid) def kill_metricsmgr(self): @@ -258,13 +258,13 @@ def _get_tracker_pplan(self): if 'result' not in physical_plan_json: raise status.TestFailure( - "Could not find result json in physical plan request to tracker: %s" % url) + f"Could not find result json in physical plan request to tracker: {url}") return physical_plan_json['result'] def _block_until_topology_running(self, min_instances): retries_left = RETRY_COUNT - _sleep("before trying to fetch pplan for test %s" % self.testname, RETRY_INTERVAL) + _sleep(f"before trying to fetch pplan for test {self.testname}", RETRY_INTERVAL) while retries_left > 0: retries_left -= 1 packing_plan = self._get_tracker_pplan() @@ -276,16 +276,15 @@ def _block_until_topology_running(self, min_instances): return packing_plan elif retries_left == 0: raise status.TestFailure( - "Got pplan from tracker for test %s but the number of " % self.testname + - "instances found (%d) was less than min expected (%s)." % - (instances_found, min_instances)) + f"Got pplan from tracker for test {self.testname} but the number of " + + f"instances found ({instances_found}) was less than min expected ({min_instances})." + ) if retries_left > 0: - _sleep("before trying again to fetch pplan for test %s (attempt %s/%s)" % - (self.testname, RETRY_COUNT - retries_left, RETRY_COUNT), RETRY_INTERVAL) + _sleep("before trying again to fetch pplan for test "\ + f"{self.testname} (attempt {RETRY_COUNT - retries_left}/{RETRY_COUNT})", RETRY_INTERVAL) else: - raise status.TestFailure("Failed to get pplan from tracker for test %s after %s attempts." - % (self.testname, RETRY_COUNT)) + raise status.TestFailure(f"Failed to get pplan from tracker for test {self.testname} after {RETRY_COUNT} attempts.") def _block_until_stmgr_running(expected_stmgrs): # block until ./heron-stmgr exists @@ -306,7 +305,7 @@ def _submit_topology(heron_cli_path, test_cluster, test_jar_path, topology_class p = subprocess.Popen(splitcmd) p.wait() if p.returncode != 0: - raise status.TestFailure("Failed to submit topology %s" % topology_name) + raise status.TestFailure(f"Failed to submit topology {topology_name}") logging.info("Submitted topology %s", topology_name) @@ -316,7 +315,7 @@ def _kill_topology(heron_cli_path, test_cluster, topology_name): logging.info("Killing topology: %s", ' '.join(splitcmd)) # this call can be blocking, no need for subprocess if subprocess.call(splitcmd) != 0: - raise RuntimeError("Unable to kill the topology: %s" % topology_name) + raise RuntimeError(f"Unable to kill the topology: {topology_name}") def _get_processes(): """ diff --git a/integration_test/src/python/test_runner/main.py b/integration_test/src/python/test_runner/main.py index 3fec46f7556..fbfd874fe95 100644 --- a/integration_test/src/python/test_runner/main.py +++ b/integration_test/src/python/test_runner/main.py @@ -50,12 +50,12 @@ def fetch_results(self) -> str: # Read expected result from the expected result file try: if not os.path.exists(self.file_path): - raise status.TestFailure("Expected results file %s does not exist" % self.file_path) + raise status.TestFailure(f"Expected results file {self.file_path} does not exist") else: with open(self.file_path, "r") as expected_result_file: return expected_result_file.read().rstrip() except Exception as e: - raise status.TestFailure("Failed to read expected result file %s" % self.file_path, e) + raise status.TestFailure(f"Failed to read expected result file {self.file_path}", e) class HttpBasedExpectedResultsHandler: def __init__(self, server_host_port, topology_name, task_count): @@ -71,21 +71,21 @@ def fetch_results(self) -> str: for i in range(0, self.task_count): task_result = fetch_from_server(self.server_host_port, self.topology_name, 'expected results', - '/state/%s_tuples_emitted_%d' % (self.topology_name, i)) + f'/state/{self.topology_name}_tuples_emitted_{i}') json_result = decoder.decode(task_result) logging.info("Found %d tuples emitted from spout task %d", len(json_result), i) result = result + json_result if len(result) == 0: raise status.TestFailure( - "Expected result set is empty for topology %s" % self.topology_name) + f"Expected result set is empty for topology {self.topology_name}") # need to convert from a list of json objects to a string of a python list, # without the unicode using double quotes, not single quotes. return str([str(x) for x in result]).replace("'", '"') except Exception as e: raise status.TestFailure( - "Fetching expected result failed for %s topology" % self.topology_name, e) + f"Fetching expected result failed for {self.topology_name} topology", e) class HttpBasedActualResultsHandler: def __init__(self, server_host_port, topology_name): @@ -95,9 +95,9 @@ def __init__(self, server_host_port, topology_name): def fetch_results(self) -> str: try: return fetch_from_server(self.server_host_port, self.topology_name, - 'results', '/results/%s' % self.topology_name) + 'results', f'/results/{self.topology_name}') except Exception as e: - raise status.TestFailure("Fetching result failed for %s topology" % self.topology_name, e) + raise status.TestFailure(f"Fetching result failed for {self.topology_name} topology", e) # pylint: disable=unnecessary-lambda class ExactlyOnceResultsChecker: @@ -128,8 +128,8 @@ def _compare(self, expected_results, actual_results): # Compare the actual and expected result if actual_results == expected_results: return status.TestSuccess( - "Topology %s result matches expected result: %s expected tuples found exactly once" % - (len(expected_results), self.topology_name)) + f"Topology {len(expected_results)} result matches expected result: {self.topology_name} expected tuples found exactly once" + ) else: failure = status.TestFailure("Actual result did not match expected result") # lambda required below to remove the unicode 'u' from the output @@ -161,9 +161,9 @@ def _compare(self, expected_results, actual_results): failure = status.TestFailure("Actual result did not match expected result") # lambda required below to remove the unicode 'u' from the output logging.info("Actual value frequencies ---------- \n" + ', '.join( - ["%s(%s)" % (str(k_v[0]), k_v[1]) for k_v in iter(actual_counts.items())])) + [f"{str(k_v[0])}({k_v[1]})" for k_v in iter(actual_counts.items())])) logging.info("Expected value frequencies ---------- \n" + ', '.join( - ["%s(%s)" % (str(k_v1[0]), k_v1[1]) for k_v1 in iter(expected_counts.items())])) + [f"{str(k_v1[0])}({k_v1[1]})" for k_v1 in iter(expected_counts.items())])) raise failure def _frequency_dict(values): @@ -181,13 +181,12 @@ def run_test(topology_name, classpath, results_checker, #submit topology try: - args = "-r http://%s/results -t %s %s" %\ - (http_server_host_port, topology_name, extra_topology_args) + args = f"-r http://{http_server_host_port}/results -t {topology_name} {extra_topology_args}" submit_topology(params.heron_cli_path, params.cli_config_path, params.cluster, params.role, params.env, params.tests_bin_path, classpath, params.release_package_uri, args) except Exception as e: - raise status.TestFailure("Failed to submit %s topology" % topology_name, e) + raise status.TestFailure(f"Failed to submit {topology_name} topology", e) logging.info("Successfully submitted %s topology", topology_name) @@ -206,18 +205,18 @@ def run_test(topology_name, classpath, results_checker, return results_checker.check_results() except Exception as e: - raise status.TestFailure("Checking result failed for %s topology" % topology_name, e) + raise status.TestFailure(f"Checking result failed for {topology_name} topology", e) finally: kill_topology(params.heron_cli_path, params.cli_config_path, params.cluster, params.role, params.env, topology_name) def poll_state_server(server_host_port, topology_name, key): return fetch_from_server( - server_host_port, topology_name, key, '/state/%s_%s' % (topology_name, key)) + server_host_port, topology_name, key, f'/state/{topology_name}_{key}') def update_state_server(http_server_host_port, topology_name, key, value): connection = HTTPConnection(http_server_host_port) - connection.request('POST', '/state/%s_%s' % (topology_name, key), '"%s"' % value) + connection.request('POST', f'/state/{topology_name}_{key}', f'"{value}"') response = connection.getresponse() return response.status == 200 @@ -233,7 +232,7 @@ def fetch_from_server(server_host_port, topology_name, data_name, path) -> str: data_name, response.status, response.reason, response.read()) time.sleep(RETRY_INTERVAL) - raise status.TestFailure("Failed to fetch %s after %d attempts" % (data_name, RETRY_ATTEMPTS)) + raise status.TestFailure(f"Failed to fetch {data_name} after {RETRY_ATTEMPTS} attempts") def get_http_response(server_host_port, path): ''' get HTTP response ''' @@ -247,12 +246,12 @@ def get_http_response(server_host_port, path): time.sleep(RETRY_INTERVAL) continue - raise status.TestFailure("Failed to get HTTP Response after %d attempts" % RETRY_ATTEMPTS) + raise status.TestFailure(f"Failed to get HTTP Response after {RETRY_ATTEMPTS} attempts") def cluster_token(cluster, role, env): if cluster == "local": return cluster - return "%s/%s/%s" % (cluster, role, env) + return f"{cluster}/{role}/{env}" def submit_topology(heron_cli_path, cli_config_path, cluster, role, env, jar_path, classpath, pkg_uri, args=None): @@ -260,14 +259,11 @@ def submit_topology(heron_cli_path, cli_config_path, cluster, role, # Form the command to submit a topology. # Note the single quote around the arg for heron.package.core.uri. # This is needed to prevent shell expansion. - cmd = "%s submit %s --config-path=%s %s %s %s %s" %\ - (heron_cli_path, - "--verbose" if VERBOSE else "", - cli_config_path, cluster_token(cluster, role, env), - jar_path, classpath, args) + cmd = f"""{heron_cli_path} submit {"--verbose" if VERBOSE else ""} """\ + f"--config-path={cli_config_path} {cluster_token(cluster, role, env)} {jar_path} {classpath} {args}" if pkg_uri is not None: - cmd = "%s --config-property heron.package.core.uri='%s'" %(cmd, pkg_uri) + cmd = f"{cmd} --config-property heron.package.core.uri='{pkg_uri}'" logging.info("Submitting topology: %s", cmd) @@ -276,24 +272,21 @@ def submit_topology(heron_cli_path, cli_config_path, cluster, role, def kill_topology(heron_cli_path, cli_config_path, cluster, role, env, topology_name): ''' Kill a topology using heron-cli ''' - cmd = "%s kill --config-path=%s %s %s" %\ - (heron_cli_path, cli_config_path, cluster_token(cluster, role, env), topology_name) + cmd = f"{heron_cli_path} kill --config-path={cli_config_path} {cluster_token(cluster, role, env)} {topology_name}" logging.info("Killing topology: %s", cmd) if os.system(cmd) != 0: - raise status.TestFailure("Failed to kill topology %s" % topology_name) + raise status.TestFailure(f"Failed to kill topology {topology_name}") logging.info("Successfully killed topology %s", topology_name) def update_topology(heron_cli_path, cli_config_path, cluster, role, env, topology_name, update_args): - cmd = "%s update --config-path=%s %s %s %s --verbose" %\ - (heron_cli_path, cli_config_path, - cluster_token(cluster, role, env), update_args, topology_name) + cmd = f"{heron_cli_path} update --config-path={cli_config_path} {cluster_token(cluster, role, env)} {update_args} {topology_name} --verbose" logging.info("Update topology: %s", cmd) if os.system(cmd) != 0: - raise status.TestFailure("Failed to update topology %s" % topology_name) + raise status.TestFailure(f"Failed to update topology {topology_name}") logging.info("Successfully updated topology %s", topology_name) @@ -314,22 +307,22 @@ def run_tests(conf, test_args): lock = Lock() timestamp = time.strftime('%Y%m%d%H%M%S') - http_server_host_port = "%s:%d" % (test_args.http_server_hostname, test_args.http_server_port) + http_server_host_port = f"{test_args.http_server_hostname}:{test_args.http_server_port}" if test_args.tests_bin_path.endswith("scala-integration-tests.jar"): test_topologies = filter_test_topologies(conf["scalaTopologies"], test_args.test_topology_pattern) topology_classpath_prefix = conf["topologyClasspathPrefix"] - extra_topology_args = "-s http://%s/state" % http_server_host_port + extra_topology_args = f"-s http://{http_server_host_port}/state" elif test_args.tests_bin_path.endswith("integration-tests.jar"): test_topologies = filter_test_topologies(conf["javaTopologies"], test_args.test_topology_pattern) topology_classpath_prefix = conf["topologyClasspathPrefix"] - extra_topology_args = "-s http://%s/state" % http_server_host_port + extra_topology_args = f"-s http://{http_server_host_port}/state" elif test_args.tests_bin_path.endswith("heron_integ_topology.pex"): test_topologies = filter_test_topologies(conf["pythonTopologies"], test_args.test_topology_pattern) topology_classpath_prefix = "" extra_topology_args = "" else: - raise ValueError("Unrecognized binary file type: %s" % test_args.tests_bin_path) + raise ValueError(f"Unrecognized binary file type: {test_args.tests_bin_path}") def _run_single_test(topology_name, topology_conf, test_args, http_server_host_port, classpath, update_args, topology_args): @@ -378,7 +371,7 @@ def _run_single_test(topology_name, topology_conf, test_args, http_server_host_p raise ValueError("Specifying a test with emit_until spout wrapper without updateArgs " + "will cause the spout to emit indefinitely. Not running topology " + topology_name) - topology_args = "%s %s" % (topology_args, topology_conf["topologyArgs"]) + topology_args = f"{topology_args} {topology_conf['topologyArgs']}" test_threads.append(Thread(target=_run_single_test, args=(topology_name, topology_conf, test_args, http_server_host_port, classpath, update_args, topology_args))) @@ -418,7 +411,7 @@ def load_expected_result_handler(topology_name, topology_conf, args, http_server http_server_host_port, topology_name, topology_conf["expectedHttpResultTaskCount"]) else: raise status.TestFailure("Either expectedResultRelativePath or expectedHttpResultTaskCount " - + "must be specified for test %s " % topology_name) + + f"must be specified for test {topology_name}.") def main(): ''' main ''' @@ -462,17 +455,17 @@ def main(): tests_start_time = int(time.time()) run_tests(conf, args) total = len(failures) + len(successes) - logging.info("Total integration test time = %ss" % (int(time.time()) - tests_start_time)) + logging.info("Total integration test time = %ss", (int(time.time()) - tests_start_time)) if not failures: logging.info("SUCCESS: %s (all) tests passed:", len(successes)) for test in successes: - logging.info(" - %s: %s", ("[%ss]" % test[1]).ljust(8), test[0]) + logging.info(" - %s: %s", (f"[{test[1]}s]").ljust(8), test[0]) sys.exit(0) else: logging.error("FAILURE: %s/%s tests failed:", len(failures), total) for test in failures: - logging.error(" - %s: %s", ("[%ss]" % test[1]).ljust(8), test[0]) + logging.error(" - %s: %s", (f"[{test[1]}s]").ljust(8), test[0]) sys.exit(1) if __name__ == '__main__': diff --git a/integration_test/src/python/topology_test_runner/main.py b/integration_test/src/python/topology_test_runner/main.py index ab26c4a053b..77867c54a44 100644 --- a/integration_test/src/python/topology_test_runner/main.py +++ b/integration_test/src/python/topology_test_runner/main.py @@ -23,6 +23,7 @@ import re import sys import time +from turtle import update import uuid from http.client import HTTPConnection from threading import Lock, Thread @@ -111,7 +112,7 @@ def _compare(self, expected_results, actual_results): if correct_topology: return status.TestSuccess( - "Topology %s result matches expected result" % self.topology_name) + f"Topology {self.topology_name} result matches expected result") else: raise status.TestFailure("Actual result did not match expected result") @@ -119,8 +120,8 @@ def _parse_expected_results(self, expected_results): """ Parse JSON file and generate expected_nodes and expected_links """ - expected_nodes = dict() - expected_links = dict() + expected_nodes = {} + expected_links = {} for bolt in expected_results["topology"]["bolts"]: name = bolt["comp"]["name"] if name not in expected_links: @@ -140,8 +141,8 @@ def _parse_actual_results(self, actual_results): """ Parse protobuf messege and generate actual_nodes and actual_links """ - actual_nodes = dict() - actual_links = dict() + actual_nodes = {} + actual_links = {} for bolt in actual_results.topology.bolts: name = bolt.comp.name if name not in actual_links: @@ -178,7 +179,7 @@ def check_results(self): topology_structure_check_result = TopologyStructureResultChecker.check_results(self) if isinstance(topology_structure_check_result, status.TestFailure): raise status.TestFailure("The actual topology graph structure does not match the expected one" - + " for topology: %s" % self.topology_name) + + f" for topology: {self.topology_name}") # check instance states, get the instance_state_check_result # if both above are isinstance(status.TestSuccess), return success, else return fail expected_result = self.instance_state_expected_result_handler.fetch_results() @@ -237,12 +238,12 @@ def fetch_results(self) -> str: """ try: if not os.path.exists(self.file_path): - raise status.TestFailure("Expected results file %s does not exist" % self.file_path) + raise status.TestFailure(f"Expected results file {self.file_path} does not exist") else: with open(self.file_path, "r") as expected_result_file: return expected_result_file.read().rstrip() except Exception as e: - raise status.TestFailure("Failed to read expected result file %s" % self.file_path, e) + raise status.TestFailure(f"Failed to read expected result file {self.file_path}", e) class ZkFileBasedActualResultsHandler: @@ -268,8 +269,7 @@ def _load_state_mgr(self, cluster): state_mgr_config[0]["rootpath"], state_mgr_config[0]["tunnelhost"]) else: - raise status.TestFailure("Unrecognized state manager type: %s" - % state_mgr_config["type"]) + raise status.TestFailure(f"Unrecognized state manager type: {state_mgr_config['type']}") def fetch_cur_pplan(self): try: @@ -283,12 +283,10 @@ def fetch_cur_pplan(self): break time.sleep(RETRY_INTERVAL) else: - raise status.TestFailure("Fetching physical plan failed for %s topology" - % self.topology_name) + raise status.TestFailure(f"Fetching physical plan failed for {self.topology_name} topology") return pplan_string except Exception as e: - raise status.TestFailure("Fetching physical plan failed for %s topology" - % self.topology_name, e) + raise status.TestFailure(f"Fetching physical plan failed for {self.topology_name} topology", e) def stop_state_mgr(self): self.state_mgr.stop() @@ -306,9 +304,9 @@ def __init__(self, server_host_port, topology_name): def fetch_results(self) -> str: try: return self.fetch_from_server(self.server_host_port, self.topology_name, - 'instance_state', '/stateResults/%s' % self.topology_name) + 'instance_state', f'/stateResults/{self.topology_name}') except Exception as e: - raise status.TestFailure("Fetching instance state failed for %s topology" % self.topology_name, e) + raise status.TestFailure(f"Fetching instance state failed for {self.topology_name} topology", e) def fetch_from_server(self, server_host_port, topology_name, data_name, path) -> str: ''' Make a http get request to fetch actual results from http server ''' @@ -322,7 +320,7 @@ def fetch_from_server(self, server_host_port, topology_name, data_name, path) -> data_name, response.status, response.reason, response.read().decode()) time.sleep(RETRY_INTERVAL) - raise status.TestFailure("Failed to fetch %s after %d attempts" % (data_name, RETRY_ATTEMPTS)) + raise status.TestFailure(f"Failed to fetch {data_name} after {RETRY_ATTEMPTS} attempts") def get_http_response(self, server_host_port, path): ''' get HTTP response ''' @@ -336,7 +334,7 @@ def get_http_response(self, server_host_port, path): time.sleep(RETRY_INTERVAL) continue - raise status.TestFailure("Failed to get HTTP Response after %d attempts" % RETRY_ATTEMPTS) + raise status.TestFailure(f"Failed to get HTTP Response after {RETRY_ATTEMPTS} attempts") # Result handlers end @@ -359,15 +357,14 @@ def run_topology_test(topology_name, classpath, results_checker, check_type): try: if check_type == 'checkpoint_state': - args = "-r http://%s/stateResults -t %s %s" % \ - (http_server_host_port, topology_name, extra_topology_args) + args = f"-r http://{http_server_host_port}/stateResults -t {topology_name} {extra_topology_args}" else: - args = "-t %s %s" % (topology_name, extra_topology_args) + args = f"-t {topology_name} {extra_topology_args}" submit_topology(params.heron_cli_path, params.cli_config_path, params.cluster, params.role, params.env, params.tests_bin_path, classpath, params.release_package_uri, args) except Exception as e: - raise status.TestFailure("Failed to submit %s topology" % topology_name, e) + raise status.TestFailure(f"Failed to submit {topology_name} topology", e) logging.info("Successfully submitted %s topology", topology_name) @@ -396,7 +393,7 @@ def run_topology_test(topology_name, classpath, results_checker, return results_checker.check_results() except Exception as e: - raise status.TestFailure("Checking result failed for %s topology" % topology_name, e) + raise status.TestFailure(f"Checking result failed for {topology_name} topology", e) finally: kill_topology(params.heron_cli_path, params.cli_config_path, params.cluster, params.role, params.env, topology_name) @@ -410,12 +407,10 @@ def submit_topology(heron_cli_path, cli_config_path, cluster, role, """ Submit topology using heron-cli """ - cmd = "%s submit --config-path=%s %s %s %s %s" % \ - (heron_cli_path, cli_config_path, cluster_token(cluster, role, env), - jar_path, classpath, args) + cmd = f"{heron_cli_path} submit --config-path={cli_config_path} {cluster_token(cluster, role, env)} {jar_path} {classpath} {args}" if pkg_uri is not None: - cmd = "%s --config-property heron.package.core.uri='%s'" %(cmd, pkg_uri) + cmd = f"{cmd} --config-property heron.package.core.uri='{pkg_uri}'" logging.info("Submitting topology: %s", cmd) @@ -425,13 +420,11 @@ def submit_topology(heron_cli_path, cli_config_path, cluster, role, def update_topology(heron_cli_path, cli_config_path, cluster, role, env, topology_name, update_args): - cmd = "%s update --config-path=%s %s %s %s --verbose" % \ - (heron_cli_path, cli_config_path, - cluster_token(cluster, role, env), update_args, topology_name) + cmd = f"{heron_cli_path} update --config-path={cli_config_path} {cluster_token(cluster, role, env)} {update_args} {topology_name} --verbose" logging.info("Update topology: %s", cmd) if os.system(cmd) != 0: - raise status.TestFailure("Failed to update topology %s" % topology_name) + raise status.TestFailure(f"Failed to update topology {topology_name}") logging.info("Successfully updated topology %s", topology_name) @@ -439,32 +432,26 @@ def update_topology(heron_cli_path, cli_config_path, cluster, def deactivate_topology(heron_cli_path, cli_config_path, cluster, role, env, topology_name, deactivate): if deactivate: - cmd = "%s deactivate --config-path=%s %s %s" % \ - (heron_cli_path, cli_config_path, - cluster_token(cluster, role, env), topology_name) + cmd = f"{heron_cli_path} deactivate --config-path={cli_config_path} {cluster_token(cluster, role, env)} {topology_name}" logging.info("deactivate topology: %s", cmd) if os.system(cmd) != 0: - raise status.TestFailure("Failed to deactivate topology %s" % topology_name) + raise status.TestFailure(f"Failed to deactivate topology {topology_name}") logging.info("Successfully deactivate topology %s", topology_name) else: - cmd = "%s activate --config-path=%s %s %s" % \ - (heron_cli_path, cli_config_path, - cluster_token(cluster, role, env), topology_name) + cmd = f"{heron_cli_path} activate --config-path={cli_config_path} {cluster_token(cluster, role, env)} {topology_name}" logging.info("activate topology: %s", cmd) if os.system(cmd) != 0: - raise status.TestFailure("Failed to activate topology %s" % topology_name) + raise status.TestFailure(f"Failed to activate topology {topology_name}") logging.info("Successfully activate topology %s", topology_name) def restart_topology(heron_cli_path, cli_config_path, cluster, role, env, topology_name, container_id): - cmd = "%s restart --config-path=%s %s %s %s" % \ - (heron_cli_path, cli_config_path, - cluster_token(cluster, role, env), topology_name, str(container_id)) + cmd = f"{heron_cli_path} restart --config-path={cli_config_path} {cluster_token(cluster, role, env)} {topology_name} {str(container_id)}" logging.info("Kill container %s", cmd) if os.system(cmd) != 0: - raise status.TestFailure("Failed to kill container %s" % str(container_id)) + raise status.TestFailure(f"Failed to kill container {str(container_id)}") logging.info("Successfully kill container %s", str(container_id)) @@ -473,12 +460,11 @@ def kill_topology(heron_cli_path, cli_config_path, cluster, role, env, topology_ """ Kill a topology using heron-cli """ - cmd = "%s kill --config-path=%s %s %s" % \ - (heron_cli_path, cli_config_path, cluster_token(cluster, role, env), topology_name) + cmd = f"{heron_cli_path} kill --config-path={cli_config_path} {cluster_token(cluster, role, env)} {topology_name}" logging.info("Killing topology: %s", cmd) if os.system(cmd) != 0: - raise status.TestFailure("Failed to kill topology %s" % topology_name) + raise status.TestFailure(f"Failed to kill topology {topology_name}") logging.info("Successfully killed topology %s", topology_name) @@ -486,7 +472,7 @@ def kill_topology(heron_cli_path, cli_config_path, cluster, role, env, topology_ def cluster_token(cluster, role, env): if cluster == "local" or cluster == "localzk": return cluster - return "%s/%s/%s" % (cluster, role, env) + return f"{cluster}/{role}/{env}" # Topology manipulations end @@ -497,7 +483,7 @@ def run_topology_tests(conf, test_args): lock = Lock() timestamp = time.strftime('%Y%m%d%H%M%S') - http_server_host_port = "%s:%d" % (test_args.http_hostname, test_args.http_port) + http_server_host_port = f"{test_args.http_hostname}:{test_args.http_port}" if test_args.tests_bin_path.endswith("scala-integration-tests.jar"): test_topologies = filter_test_topologies(conf["scalaTopologies"], test_args.test_topology_pattern) @@ -509,7 +495,7 @@ def run_topology_tests(conf, test_args): test_topologies = filter_test_topologies(conf["pythonTopologies"], test_args.test_topology_pattern) topology_classpath_prefix = "" else: - raise ValueError("Unrecognized binary file type: %s" % test_args.tests_bin_path) + raise ValueError(f"Unrecognized binary file type: {test_args.tests_bin_path}") processing_type = conf["processingType"] @@ -578,7 +564,7 @@ def _run_single_test(topology_name, topology_conf, test_args, http_server_host_p restart_args = True if "topologyArgs" in topology_conf: - topology_args = "%s %s" % (topology_args, topology_conf["topologyArgs"]) + topology_args = f"{topology_args} {topology_conf['topologyArgs']}" expected_topo_result_file_path = \ test_args.topologies_path + "/" + topology_conf["expectedTopoResultRelativePath"] @@ -622,7 +608,7 @@ def load_result_checker(check_type, topology_name, expected_instance_state_result_handler, actual_instance_state_result_handler) else: - status.TestFailure("Unrecognized check type : %s", check_type) + status.TestFailure(f"Unrecognized check type : {check_type}") def main(): @@ -664,17 +650,17 @@ def main(): tests_start_time = int(time.time()) run_topology_tests(conf, args) total = len(failures) + len(successes) - logging.info("Total integration topology test time = %ss" % (int(time.time()) - tests_start_time)) + logging.info("Total integration topology test time = %s s", int(time.time()) - tests_start_time) if not failures: logging.info("SUCCESS: %s (all) tests passed:", len(successes)) for test in successes: - logging.info(" - %s: %s", ("[%ss]" % test[1]).ljust(8), test[0]) + logging.info(" - %s: %s", (f"[{test[1]}s]").ljust(8), test[0]) sys.exit(0) else: logging.error("FAILURE: %s/%s tests failed:", len(failures), total) for test in failures: - logging.error(" - %s: %s", ("[%ss]" % test[1]).ljust(8), test[0]) + logging.error(" - %s: %s", (f"[{test[1]}s]").ljust(8), test[0]) sys.exit(1) if __name__ == '__main__': diff --git a/scripts/shutils/save-logs.py b/scripts/shutils/save-logs.py index cc50c9910c7..78edb1c26b5 100755 --- a/scripts/shutils/save-logs.py +++ b/scripts/shutils/save-logs.py @@ -44,7 +44,7 @@ def shell_cmd(cmd): return " ".join(shlex.quote(c) for c in cmd) def main(file, cmd): - print("%s > %s" % (shell_cmd(cmd),file)) + print(f"{shell_cmd(cmd)} > {file}") with open(file, "w") as out: count = 0 process = subprocess.Popen(cmd, @@ -59,7 +59,7 @@ def main(file, cmd): count = count + 1 if datetime.now() > nextPrint: diff = datetime.now() - start - sys.stdout.write("\r%d seconds %d log lines"%(diff.seconds, count)) + sys.stdout.write(f"\r{diff.seconds} seconds {count} log lines") sys.stdout.flush() nextPrint = datetime.now() + timedelta(seconds=10) out.write(line.decode()) @@ -67,8 +67,8 @@ def main(file, cmd): out.close() errcode = process.wait() diff = datetime.now() - start - sys.stdout.write("\r%d seconds %d log lines"%(diff.seconds, count)) - print("\n `%s` finished with errcode: %d" % (shell_cmd(cmd), errcode)) + sys.stdout.write(f"\r{diff.seconds} seconds {count} log lines") + print(f"\n `{shell_cmd(cmd)}` finished with errcode: {errcode}") if errcode != 0: lines = tail(file, 1000) print('\n'.join(lines)) @@ -79,7 +79,7 @@ def main(file, cmd): try: _, file, *cmd = sys.argv except ValueError: - print("Usage: %s [file info]" % sys.argv[0]) + print(f"Usage: {sys.argv[0]} [file info]") sys.exit(1) main(file, cmd) diff --git a/third_party/python/semver/semver.py b/third_party/python/semver/semver.py index c14d9651ba8..de4d3bc5b12 100644 --- a/third_party/python/semver/semver.py +++ b/third_party/python/semver/semver.py @@ -74,7 +74,7 @@ def match(version, match_expr): else: raise ValueError("match_expr parameter should be in format , " "where is one of ['<', '>', '==', '<=', '>=']. " - "You provided: %r" % match_expr) + f"You provided: {match_expr!r}") possibilities_dict = { '>': (1,), @@ -107,12 +107,12 @@ def min_ver(ver1, ver2): def format_version(major, minor, patch, prerelease=None, build=None): - version = "%d.%d.%d" % (major, minor, patch) + version = f"{major}.{minor}.{patch}" if prerelease is not None: - version = version + "-%s" % prerelease + version = version + f"-{prerelease}" if build is not None: - version = version + "+%s" % build + version = version + f"+{build}" return version diff --git a/tools/rules/pex/wrapper/pex_wrapper.py b/tools/rules/pex/wrapper/pex_wrapper.py index b3e3a8bc9db..b8439b29539 100644 --- a/tools/rules/pex/wrapper/pex_wrapper.py +++ b/tools/rules/pex/wrapper/pex_wrapper.py @@ -119,7 +119,7 @@ def main(args=None): pex_builder._copy = True pex_builder.add_source(dereference_symlinks(src), dst) else: - raise RuntimeError("Failed to add %s: %s" % (src, err)) + raise RuntimeError(f"Failed to add {src}: {err}") # Add resources from the manifest for reqmap in manifest.get('resources', []): @@ -132,11 +132,11 @@ def main(args=None): try: pex_builder.add_dist_location(egg) except Exception as err: - raise RuntimeError("Failed to add %s: %s" % (egg, err)) + raise RuntimeError(f"Failed to add {egg}: {err}") # TODO(mikekap): Do something about manifest['nativeLibraries']. - pexbin.log('Saving PEX file to %s' % poptions.pex_name, + pexbin.log(f'Saving PEX file to {poptions.pex_name}', V=poptions.verbosity) tmp_name = poptions.pex_name + '~' safe_delete(tmp_name) From 1533deac9cf4998a118c9cb047d5a95e113e9f10 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sun, 3 Apr 2022 04:27:05 -0400 Subject: [PATCH 37/82] Fix compile error --- heron/instance/src/python/network/heron_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/instance/src/python/network/heron_client.py b/heron/instance/src/python/network/heron_client.py index 33f7dac8a69..eee7d09aacb 100644 --- a/heron/instance/src/python/network/heron_client.py +++ b/heron/instance/src/python/network/heron_client.py @@ -246,7 +246,7 @@ def handle_timeout(self, reqid): def handle_error(self): _, t, v, tbinfo = asyncore.compact_traceback() - self_msg = f"{self._get_classname()} failed for object at {id(self):x}") + self_msg = f"{self._get_classname()} failed for object at {id(self):x}" Log.error(f"Uncaptured python exception, closing channel {self_msg} ({t}:{v} {tbinfo})") if self._connecting: From 028f87f2ff87bfa8c5a3e13b87c7e444bfcd197d Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 3 Apr 2022 11:48:07 -0400 Subject: [PATCH 38/82] [Examples] word spout syntax fix. --- examples/src/python/spout/word_spout.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/src/python/spout/word_spout.py b/examples/src/python/spout/word_spout.py index 537cde7278a..8878a4c7bb2 100644 --- a/examples/src/python/spout/word_spout.py +++ b/examples/src/python/spout/word_spout.py @@ -37,8 +37,8 @@ def initialize(self, config, context): self.ack_count = 0 self.fail_count = 0 - self.logger.info(f"Component-specific config: \n{str(config)}" - self.logger.info(f"Context: \n{str(context)}" + self.logger.info(f"Component-specific config: \n{str(config)}") + self.logger.info(f"Context: \n{str(context)}") def next_tuple(self): word = next(self.words) From 4cdfa6d58b903e027fb12a369a12f295901ae493 Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 3 Apr 2022 13:59:02 -0400 Subject: [PATCH 39/82] [Style] utils.topology.topology_context_impl --- .../utils/topology/topology_context_impl.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/heron/instance/src/python/utils/topology/topology_context_impl.py b/heron/instance/src/python/utils/topology/topology_context_impl.py index 3d5ec57e6e5..f838681e480 100644 --- a/heron/instance/src/python/utils/topology/topology_context_impl.py +++ b/heron/instance/src/python/utils/topology/topology_context_impl.py @@ -22,15 +22,15 @@ import os from collections import namedtuple -import heronpy.api.api_constants as api_constants +from heronpy.api import api_constants from heronpy.api.topology_context import TopologyContext from heronpy.api.task_hook import (ITaskHook, EmitInfo, SpoutAckInfo, SpoutFailInfo, BoltExecuteInfo, BoltAckInfo, BoltFailInfo) from heron.instance.src.python.utils.metrics import MetricsCollector -import heron.instance.src.python.utils.system_constants as system_constants -import heron.common.src.python.pex_loader as pex_loader +from heron.instance.src.python.utils import system_constants +from heron.common.src.python import pex_loader class TopologyContextImpl(TopologyContext): """Implemention of TopologyContext @@ -120,8 +120,8 @@ def add_task_hook(self, task_hook): :param task_hook: Implementation of ITaskHook """ if not isinstance(task_hook, ITaskHook): - raise TypeError("In add_task_hook(): attempt to add non ITaskHook instance, given: %s" - % str(type(task_hook))) + raise TypeError(f"In add_task_hook(): attempt to add non ITaskHook instance, given: " + f"{str(type(task_hook))}") self.task_hooks.append(task_hook) ##### Other exposed implementation specific methods ##### @@ -190,11 +190,11 @@ def _init_task_hooks(self): task_hook_instance = task_hook_cls() assert isinstance(task_hook_instance, ITaskHook) self.task_hooks.append(task_hook_instance) - except AssertionError: - raise RuntimeError("Auto-registered task hook not instance of ITaskHook") + except AssertionError as e: + raise RuntimeError("Auto-registered task hook not instance of ITaskHook") from e except Exception as e: - raise RuntimeError("Error with loading task hook class: %s, with error message: %s" - % (class_name, str(e))) + raise RuntimeError(f"Error with loading task hook class: {class_name,}, with error message:" + f"{str(e)}") from e def invoke_hook_prepare(self): """invoke task hooks for after the spout/bolt's initialize() method""" From 30d7b6de72f228966fa0239462b84f0a4e24752d Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 3 Apr 2022 13:59:32 -0400 Subject: [PATCH 40/82] [Style] utils.misc.serializer_helper --- heron/instance/src/python/utils/misc/serializer_helper.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/heron/instance/src/python/utils/misc/serializer_helper.py b/heron/instance/src/python/utils/misc/serializer_helper.py index 3004c35887f..0fb9baaf0c8 100644 --- a/heron/instance/src/python/utils/misc/serializer_helper.py +++ b/heron/instance/src/python/utils/misc/serializer_helper.py @@ -20,7 +20,7 @@ '''serializer_helper.py''' -import heron.common.src.python.pex_loader as pex_loader +from heron.common.src.python import pex_loader from heronpy.api.serializer import PythonSerializer import heronpy.api.api_constants as constants @@ -41,5 +41,5 @@ def get_serializer(context): serializer = serializer_cls() return serializer except Exception as e: - raise RuntimeError("Error with loading custom serializer class: %s, with error message: %s" - % (serializer_clsname, str(e))) + raise RuntimeError(f"Error with loading custom serializer class: {serializer_clsname}," + f"with error message: {str(e)}") from e From aa052d3ff442fde5b2196572074383a41ce48598 Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 3 Apr 2022 14:03:18 -0400 Subject: [PATCH 41/82] [Style] utils.misc.pplan_helper --- .../instance/src/python/utils/misc/pplan_helper.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/heron/instance/src/python/utils/misc/pplan_helper.py b/heron/instance/src/python/utils/misc/pplan_helper.py index 6c8668f9bf3..ac3d1992f91 100644 --- a/heron/instance/src/python/utils/misc/pplan_helper.py +++ b/heron/instance/src/python/utils/misc/pplan_helper.py @@ -23,7 +23,7 @@ from heron.proto import topology_pb2 from heron.common.src.python.utils.log import Log -import heron.common.src.python.pex_loader as pex_loader +from heron.common.src.python import pex_loader from heron.instance.src.python.utils.topology import TopologyContextImpl from heronpy.api.custom_grouping import ICustomGrouping @@ -119,11 +119,11 @@ def check_output_schema(self, stream_id, tup): # do some checking to make sure that the number of fields match what's expected size = self._output_schema.get(stream_id, None) if size is None: - raise RuntimeError("%s emitting to stream %s but was not declared in output fields" - % (self.my_component_name, stream_id)) + raise RuntimeError(f"{self.my_component_name} emitting to stream {stream_id}" + f"but was not declared in output fields") if size != len(tup): - raise RuntimeError(f"Number of fields emitted in stream {stream_id} does not match what's expected. "\ - f"Expected: {size}, Observed: {len(tup)}") + raise RuntimeError(f"Number of fields emitted in stream {stream_id} does not match" + f"what's expected. Expected: {size}, Observed: {len(tup)}") def get_my_spout(self): """Returns spout instance, or ``None`` if bolt is assigned""" @@ -199,8 +199,7 @@ def _get_dict_from_config(topology_config): config[kv.key] = default_serializer.deserialize(kv.serialized_value) else: assert kv.HasField("type") - Log.error("Unsupported config found: %s, with type: %s" - % (str(kv), str(kv.type))) + Log.error(f"Unsupported config found: {str(kv)}, with type: {str(kv.type)}") continue return config From fcc740831ac58b002c4ccae13ead16ac5dc2c945 Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 3 Apr 2022 14:05:00 -0400 Subject: [PATCH 42/82] [Style] utils.misc.communicator --- heron/instance/src/python/utils/misc/communicator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/heron/instance/src/python/utils/misc/communicator.py b/heron/instance/src/python/utils/misc/communicator.py index 4ee5d47ef23..eaa883aba4a 100644 --- a/heron/instance/src/python/utils/misc/communicator.py +++ b/heron/instance/src/python/utils/misc/communicator.py @@ -72,9 +72,9 @@ def poll(self): if self._producer_callback is not None: self._producer_callback() return ret - except Empty: + except Empty as e: Log.debug("%s: Empty in poll()", str(self)) - raise Empty + raise Empty from e def offer(self, item): """Offer to the buffer @@ -87,9 +87,9 @@ def offer(self, item): if self._consumer_callback is not None: self._consumer_callback() return True - except Full: + except Full as e: Log.debug("%s: Full in offer()", str(self)) - raise Full + raise Full from e def clear(self): """Clear the buffer""" From 3ef6cce07ed35125da9583536a09350a615ed3ca Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 3 Apr 2022 14:08:40 -0400 Subject: [PATCH 43/82] [Style] utils.metrics.py_metrics --- heron/instance/src/python/utils/metrics/py_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/instance/src/python/utils/metrics/py_metrics.py b/heron/instance/src/python/utils/metrics/py_metrics.py index 26ad2b6b0d4..1888774528f 100644 --- a/heron/instance/src/python/utils/metrics/py_metrics.py +++ b/heron/instance/src/python/utils/metrics/py_metrics.py @@ -80,7 +80,7 @@ def __init__(self, metrics_collector): PY_GC_GENERATION_1_THRESHOLD: self.g1_threshold, PY_GC_GENERATION_2_THRESHOLD: self.g2_threshold, PY_GC_GENERATION_3_THRESHOLD: self.g3_threshold} - super(PyMetrics, self).__init__(self.metrics) + _ = super() sys_config = system_config.get_sys_config() interval = float(sys_config[constants.HERON_METRICS_EXPORT_INTERVAL_SEC]) self.register_metrics(metrics_collector, interval) From 984698593e1fce2d692aed483e22d0134987e18e Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 3 Apr 2022 14:10:57 -0400 Subject: [PATCH 44/82] [Style] utils.metrics.metrics_helper --- .../src/python/utils/metrics/metrics_helper.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/heron/instance/src/python/utils/metrics/metrics_helper.py b/heron/instance/src/python/utils/metrics/metrics_helper.py index 258b4243050..28d05fa6b2f 100644 --- a/heron/instance/src/python/utils/metrics/metrics_helper.py +++ b/heron/instance/src/python/utils/metrics/metrics_helper.py @@ -122,7 +122,7 @@ class GatewayMetrics(BaseMetricsHelper): def __init__(self, metrics_collector): sys_config = system_config.get_sys_config() - super(GatewayMetrics, self).__init__(self.metrics) + _ = super().__init__(self.metrics) interval = float(sys_config[constants.HERON_METRICS_EXPORT_INTERVAL_SEC]) self.register_metrics(metrics_collector, interval) @@ -168,7 +168,7 @@ class ComponentMetrics(BaseMetricsHelper): def __init__(self, additional_metrics): metrics = self.component_metrics metrics.update(additional_metrics) - super(ComponentMetrics, self).__init__(metrics) + _ = super().__init__(metrics) # pylint: disable=arguments-differ def register_metrics(self, context): @@ -179,7 +179,7 @@ def register_metrics(self, context): sys_config = system_config.get_sys_config() interval = float(sys_config[constants.HERON_METRICS_EXPORT_INTERVAL_SEC]) collector = context.get_metrics_collector() - super(ComponentMetrics, self).register_metrics(collector, interval) + _ = super().register_metrics(collector, interval) def update_out_queue_full_count(self): """Apply update to the out-queue full count""" @@ -213,7 +213,7 @@ class SpoutMetrics(ComponentMetrics): TIMEOUT_COUNT, ComponentMetrics.EMIT_COUNT] def __init__(self, pplan_helper): - super(SpoutMetrics, self).__init__(self.spout_metrics) + _ = super().__init__(self.spout_metrics) self._init_multi_count_metrics(pplan_helper) def _init_multi_count_metrics(self, pplan_helper): @@ -269,7 +269,7 @@ class BoltMetrics(ComponentMetrics): outputs_init = [ComponentMetrics.EMIT_COUNT] def __init__(self, pplan_helper): - super(BoltMetrics, self).__init__(self.bolt_metrics) + _ = super().__init__(self.bolt_metrics) self._init_multi_count_metrics(pplan_helper) def _init_multi_count_metrics(self, pplan_helper): From 2b825c6a9b4b45ba89a85118f233377e5bb9ff79 Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 3 Apr 2022 14:11:59 -0400 Subject: [PATCH 45/82] [Style] network.socket_options --- heron/instance/src/python/network/socket_options.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/heron/instance/src/python/network/socket_options.py b/heron/instance/src/python/network/socket_options.py index 8de011a3c30..b53d86303c0 100644 --- a/heron/instance/src/python/network/socket_options.py +++ b/heron/instance/src/python/network/socket_options.py @@ -47,7 +47,7 @@ def create_socket_options(): return sock_opt except ValueError as e: # couldn't convert to int - raise ValueError(f"Invalid value in sys_config: {str(e)}") + raise ValueError(f"Invalid value in sys_config: {str(e)}") from e except KeyError as e: # option key was not found - raise KeyError(f"Incomplete sys_config: {str(e)}") + raise KeyError(f"Incomplete sys_config: {str(e)}") from e From 0c8d2fccfec5a6180163593d8aad833300ec689d Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 3 Apr 2022 14:14:37 -0400 Subject: [PATCH 46/82] [Style] network.protocol --- heron/instance/src/python/network/protocol.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/heron/instance/src/python/network/protocol.py b/heron/instance/src/python/network/protocol.py index fd8105c2ba8..a8ba0f71955 100644 --- a/heron/instance/src/python/network/protocol.py +++ b/heron/instance/src/python/network/protocol.py @@ -205,13 +205,13 @@ def read(self, dispatcher): if len(self.data) == self.get_datasize(): self.is_complete = True except socket.error as e: - if e.errno == socket.errno.EAGAIN or e.errno == socket.errno.EWOULDBLOCK: + if e.errno in (socket.errno.EAGAIN, socket.errno.EWOULDBLOCK): # Try again later -> call continue_read later Log.debug("Try again error") else: # Fatal error Log.debug("Fatal error when reading IncomingPacket") - raise RuntimeError("Fatal error occured in IncomingPacket.read()") + raise RuntimeError("Fatal error occurred in IncomingPacket.read()") from e def __str__(self): return f"Packet ID: {str(self.id)}, header: {self.is_header_read}, complete: {self.is_complete}" From 7a8927e31977876cd4a71bda2c5c90a164e16cce Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 3 Apr 2022 14:15:37 -0400 Subject: [PATCH 47/82] [Style] network.metricsmgr_client --- heron/instance/src/python/network/metricsmgr_client.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/heron/instance/src/python/network/metricsmgr_client.py b/heron/instance/src/python/network/metricsmgr_client.py index 93cb119f7e3..03d7a2f41e7 100644 --- a/heron/instance/src/python/network/metricsmgr_client.py +++ b/heron/instance/src/python/network/metricsmgr_client.py @@ -97,7 +97,8 @@ def on_response(self, status, context, response): # pylint: disable=no-self-use def on_incoming_message(self, message): - raise RuntimeError(f"Metrics Client got an unknown message from Metrics Manager: {str(message)}") + raise RuntimeError(f"Metrics Client got an unknown message from " + f"Metrics Manager: {str(message)}") def on_error(self): Log.error("Disconnected from Metrics Manager") From 4ba503c31421fe5f1057bc7fc77643b7db8e6303 Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 3 Apr 2022 14:20:15 -0400 Subject: [PATCH 48/82] [Style] network.heron_client W0402: Uses of a deprecated module 'asyncore' unattended. --- heron/instance/src/python/network/heron_client.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/heron/instance/src/python/network/heron_client.py b/heron/instance/src/python/network/heron_client.py index eee7d09aacb..76c1c8d79ff 100644 --- a/heron/instance/src/python/network/heron_client.py +++ b/heron/instance/src/python/network/heron_client.py @@ -71,9 +71,8 @@ def __init__(self, looper, hostname, port, socket_map, socket_options): # for compatibility with 2.7.3 self._connecting = False - Log.debug("Initializing %s with endpoint: %s, \nsocket_map: %s, \nsocket_options: %s" - % (self._get_classname(), str(self.endpoint), - str(socket_map), str(self.socket_options))) + Log.debug(f"Initializing {self._get_classname()} with endpoint: {str(self.endpoint)}," + f"\nsocket_map: {str(socket_map)}, \nsocket_options: {str(self.socket_options)}") ################################## @@ -297,7 +296,7 @@ def _handle_packet(self, packet): except Exception as e: Log.error(f"Error when handling message packet: {str(e)}") Log.error(traceback.format_exc()) - raise RuntimeError("Problem reading message") + raise RuntimeError("Problem reading message") from e else: # might be a timeout response Log.info(f"In handle_packet(): Received message whose REQID is not registered: {str(reqid)}") From ebd9f753b03d03fc789250ef2fa9a8b004cd8a26 Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 3 Apr 2022 14:22:07 -0400 Subject: [PATCH 49/82] [Style] network.gateway_looper W0402: Uses of a deprecated module 'asyncore' unattended. --- heron/instance/src/python/network/gateway_looper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/heron/instance/src/python/network/gateway_looper.py b/heron/instance/src/python/network/gateway_looper.py index 4bd7b443e85..1cff52a60da 100644 --- a/heron/instance/src/python/network/gateway_looper.py +++ b/heron/instance/src/python/network/gateway_looper.py @@ -49,7 +49,7 @@ def __init__(self, socket_map): :param socket_map: socket map used for asyncore.dispatcher """ - super(GatewayLooper, self).__init__() + _ = super().__init__() self.sock_map = socket_map # Pipe used for wake up select @@ -70,7 +70,7 @@ def wake_up(self): Log.debug("Wake up called") def on_exit(self): - super(GatewayLooper, self).on_exit() + _ = super().on_exit() os.close(self.pipe_r) os.close(self.pipe_w) From fc3d531df87c58972112ca260e258990383415f3 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sun, 3 Apr 2022 14:34:29 -0400 Subject: [PATCH 50/82] More style fixes --- .../python/handlers/killexecutorhandler.py | 2 +- heron/shell/src/python/utils.py | 14 ++++++++----- .../tracker/src/python/routers/container.py | 20 +++++++++---------- .../tracker/src/python/routers/metrics.py | 10 +++++----- heron/tools/tracker/src/python/topology.py | 7 ++----- heron/tools/tracker/src/python/tracker.py | 6 ++---- .../bazel/checkstyle/CppCheckstyle.java | 14 +++++++------ .../org/apache/bazel/cppcheck/CppCheck.java | 14 +++++++------ 8 files changed, 45 insertions(+), 42 deletions(-) diff --git a/heron/shell/src/python/handlers/killexecutorhandler.py b/heron/shell/src/python/handlers/killexecutorhandler.py index ab0c3eea4b7..a2d448d376c 100644 --- a/heron/shell/src/python/handlers/killexecutorhandler.py +++ b/heron/shell/src/python/handlers/killexecutorhandler.py @@ -63,7 +63,7 @@ def is_local(): if instanceId.startswith('heron-executor-'): # kill heron-executor kill_parent() else: # kill other normal instance - fh = open(filepath) + fh = open(filepath, encoding='utf8') firstLine = int(fh.readline()) fh.close() logger.info("Killing process %s %s", instanceId, firstLine) diff --git a/heron/shell/src/python/utils.py b/heron/shell/src/python/utils.py index 2dc8fee37d2..3135445ed48 100644 --- a/heron/shell/src/python/utils.py +++ b/heron/shell/src/python/utils.py @@ -51,6 +51,7 @@ def stat_type(md): def triple(md): ''' triple ''' + # pylint: disable=consider-using-f-string return '%c%c%c' % ( 'r' if md & 0b100 else '-', 'w' if md & 0b010 else '-', @@ -64,6 +65,7 @@ def format_mtime(mtime): """ now = datetime.now() dt = datetime.fromtimestamp(mtime) + # pylint: disable=consider-using-f-string return '%s %2d %5s' % ( dt.strftime('%b'), dt.day, dt.year if dt.year != now.year else dt.strftime('%H:%M')) @@ -86,6 +88,7 @@ def format_prefix(filename, sres): except KeyError: group = sres.st_gid + # pylint: disable=consider-using-f-string return '%s %3d %10s %10s %10d %s' % ( format_mode(sres), sres.st_nlink, @@ -156,6 +159,7 @@ def pipe(prev_proc, to_cmd): Pipes output of prev_proc into to_cmd. Returns piped process """ + # pylint: disable=consider-using-with stdin = None if prev_proc is None else prev_proc.stdout process = subprocess.Popen(to_cmd, stdout=subprocess.PIPE, @@ -168,12 +172,12 @@ def str_cmd(cmd, cwd, env): """ Runs the command and returns its stdout and stderr. """ - process = subprocess.Popen(cmd, stdout=subprocess.PIPE, + with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd, - env=env, universal_newlines=True) - stdout_builder, stderr_builder = proc.async_stdout_stderr_builder(process) - process.wait() - stdout, stderr = stdout_builder.result(), stderr_builder.result() + env=env, universal_newlines=True) as process: + stdout_builder, stderr_builder = proc.async_stdout_stderr_builder(process) + process.wait() + stdout, stderr = stdout_builder.result(), stderr_builder.result() return {'command': ' '.join(cmd), 'stderr': stderr, 'stdout': stdout} def get_container_id(instance_id): diff --git a/heron/tools/tracker/src/python/routers/container.py b/heron/tools/tracker/src/python/routers/container.py index f80d9b4b650..c978ddd53b9 100644 --- a/heron/tools/tracker/src/python/routers/container.py +++ b/heron/tools/tracker/src/python/routers/container.py @@ -24,9 +24,6 @@ """ from typing import List, Optional -from heron.proto import common_pb2, tmanager_pb2 -from heron.tools.tracker.src.python import state, utils - import httpx # from fastapi import Query @@ -34,6 +31,9 @@ from pydantic import BaseModel, Field from starlette.responses import StreamingResponse +from heron.proto import common_pb2, tmanager_pb2 +from heron.tools.tracker.src.python import state, utils + router = APIRouter() @@ -201,13 +201,13 @@ async def get_exceptions( # pylint: disable=too-many-arguments ret = [] for exception_log in exception_response.exceptions: ret.append(ExceptionLog( - hostname=exception_log.hostname, - instance_id=exception_log.instance_id, - stacktrace=exception_log.stacktrace, - lasttime=exception_log.lasttime, - firsttime=exception_log.firsttime, - count=str(exception_log.count), - logging=exception_log.logging, + hostname = exception_log.hostname, + instance_id = exception_log.instance_id, + stacktrace = exception_log.stacktrace, + lasttime = exception_log.lasttime, + firsttime = exception_log.firsttime, + count = str(exception_log.count), + logging = exception_log.logging, )) return ret diff --git a/heron/tools/tracker/src/python/routers/metrics.py b/heron/tools/tracker/src/python/routers/metrics.py index 1246a7e671b..869a3a43dc5 100644 --- a/heron/tools/tracker/src/python/routers/metrics.py +++ b/heron/tools/tracker/src/python/routers/metrics.py @@ -23,6 +23,11 @@ """ from typing import Dict, List, Optional +import httpx + +from fastapi import Query, APIRouter +from pydantic import BaseModel, Field + from heron.common.src.python.utils.log import Log from heron.proto import common_pb2 from heron.proto import tmanager_pb2 @@ -30,11 +35,6 @@ from heron.tools.tracker.src.python.query import Query as TManagerQuery from heron.tools.tracker.src.python.utils import BadRequest -import httpx - -from fastapi import Query, APIRouter -from pydantic import BaseModel, Field - router = APIRouter() class ComponentMetrics(BaseModel): diff --git a/heron/tools/tracker/src/python/topology.py b/heron/tools/tracker/src/python/topology.py index 15c4de612df..39f9de61be2 100644 --- a/heron/tools/tracker/src/python/topology.py +++ b/heron/tools/tracker/src/python/topology.py @@ -22,9 +22,11 @@ import dataclasses import json import string +import networkx from copy import deepcopy from typing import Any, Dict, List, Optional +from pydantic import BaseModel, Field from heron.proto import topology_pb2 from heron.proto.execution_state_pb2 import ExecutionState as ExecutionState_pb @@ -39,11 +41,6 @@ ) from heron.tools.tracker.src.python import utils -import networkx - -from pydantic import BaseModel, Field - - class TopologyInfoMetadata(BaseModel): cluster: str environ: str diff --git a/heron/tools/tracker/src/python/tracker.py b/heron/tools/tracker/src/python/tracker.py index 5fb2fce4167..3d20ad4bae7 100644 --- a/heron/tools/tracker/src/python/tracker.py +++ b/heron/tools/tracker/src/python/tracker.py @@ -102,10 +102,8 @@ def get_topology( and t.environ == environ] if len(topologies) != 1: if role is not None: - raise KeyError("Topology not found for {0}, {1}, {2}, {3}".format( - cluster, role, environ, topology_name)) - raise KeyError("Topology not found for {0}, {1}, {2}".format( - cluster, environ, topology_name)) + raise KeyError(f"Topology not found for {cluster}, {role}, {environ}, {topology_name}") + raise KeyError(f"Topology not found for {cluster}, {environ}, {topology_name}") # There is only one topology which is returned. return topologies[0] diff --git a/tools/java/src/org/apache/bazel/checkstyle/CppCheckstyle.java b/tools/java/src/org/apache/bazel/checkstyle/CppCheckstyle.java index 885c898b919..cc4e2ec53e5 100644 --- a/tools/java/src/org/apache/bazel/checkstyle/CppCheckstyle.java +++ b/tools/java/src/org/apache/bazel/checkstyle/CppCheckstyle.java @@ -137,12 +137,14 @@ private static Collection getSourceFiles(String extraActionFile) { return Collections2.filter( cppInfo.getSourcesAndHeadersList(), Predicates.and( - Predicates.not(Predicates.containsPattern("external/")), - Predicates.not(Predicates.containsPattern("third_party/")), - Predicates.not(Predicates.containsPattern("config/heron-config.h")), - Predicates.not(Predicates.containsPattern(".*pb.h$")), - Predicates.not(Predicates.containsPattern(".*cc_wrapper.sh$")), - Predicates.not(Predicates.containsPattern(".*pb.cc$")) + Predicates.not(Predicates.containsPattern("external/")), + Predicates.not(Predicates.containsPattern("third_party/")), + Predicates.not(Predicates.containsPattern("config/heron-config.h")), + Predicates.not(Predicates.containsPattern(".*cppmap")), + Predicates.not(Predicates.containsPattern(".*srcjar")), + Predicates.not(Predicates.containsPattern(".*pb.h$")), + Predicates.not(Predicates.containsPattern(".*cc_wrapper.sh$")), + Predicates.not(Predicates.containsPattern(".*pb.cc$")) ) ); } diff --git a/tools/java/src/org/apache/bazel/cppcheck/CppCheck.java b/tools/java/src/org/apache/bazel/cppcheck/CppCheck.java index 6cfa9de4762..cd032be3af1 100644 --- a/tools/java/src/org/apache/bazel/cppcheck/CppCheck.java +++ b/tools/java/src/org/apache/bazel/cppcheck/CppCheck.java @@ -132,12 +132,14 @@ private static Collection getSourceFiles(String extraActionFile) { return Collections2.filter( cppInfo.getSourcesAndHeadersList(), Predicates.and( - Predicates.not(Predicates.containsPattern("external/")), - Predicates.not(Predicates.containsPattern("third_party/")), - Predicates.not(Predicates.containsPattern("config/heron-config.h")), - Predicates.not(Predicates.containsPattern(".*pb.h$")), - Predicates.not(Predicates.containsPattern(".*cc_wrapper.sh$")), - Predicates.not(Predicates.containsPattern(".*pb.cc$")) + Predicates.not(Predicates.containsPattern("external/")), + Predicates.not(Predicates.containsPattern("third_party/")), + Predicates.not(Predicates.containsPattern("config/heron-config.h")), + Predicates.not(Predicates.containsPattern(".*cppmap")), + Predicates.not(Predicates.containsPattern(".*srcjar")), + Predicates.not(Predicates.containsPattern(".*pb.h$")), + Predicates.not(Predicates.containsPattern(".*cc_wrapper.sh$")), + Predicates.not(Predicates.containsPattern(".*pb.cc$")) ) ); } From 0d3e833397aae7e4804eb93f59fb480b8b5ad18d Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 3 Apr 2022 15:55:09 -0400 Subject: [PATCH 51/82] [Style] instance W1514: Using open without explicitly specifying an encoding (unspecified-encoding) unattended. --- heron/instance/src/python/instance.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/heron/instance/src/python/instance.py b/heron/instance/src/python/instance.py index 96188a95714..8d64be5d719 100644 --- a/heron/instance/src/python/instance.py +++ b/heron/instance/src/python/instance.py @@ -19,6 +19,7 @@ # under the License. '''module for single-thread Heron Instance in python''' +import click import collections import logging import os @@ -43,8 +44,6 @@ from heronpy.api import api_constants from heronpy.api.state.state import HashMapState -import click - Log = log.Log AssignedInstance = collections.namedtuple('AssignedInstance', 'is_spout, protobuf, py_class') @@ -179,7 +178,7 @@ def handle_restore_instance_state(self, restore_msg): try: self.stateful_state = self.serializer.deserialize(restore_msg.state.state) except Exception as e: - raise RuntimeError("Could not serialize state during restore " + str(e)) + raise RuntimeError("Could not serialize state during restore " + str(e)) from e else: Log.info("The restore request does not have an actual state") if self.stateful_state is None: @@ -245,7 +244,8 @@ def handle_assignment_msg(self, pplan): self._handle_assignment_msg(new_helper) else: Log.info("Received a new Physical Plan with the same assignment -- State Change") - Log.info(f"Old state: {self.my_pplan_helper.get_topology_state()}, new state: {new_helper.get_topology_state()}.") + Log.info(f"Old state: {self.my_pplan_helper.get_topology_state()}, " + f"new state: {new_helper.get_topology_state()}.") self._handle_state_change_msg(new_helper) def _handle_assignment_msg(self, pplan_helper): From 8456e584cace3de59424636297dcf51e03ac9fc1 Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 3 Apr 2022 15:59:24 -0400 Subject: [PATCH 52/82] [Style] basics.spout_instance --- .../src/python/basics/spout_instance.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/heron/instance/src/python/basics/spout_instance.py b/heron/instance/src/python/basics/spout_instance.py index 48c86ff090c..b282aa4ca42 100644 --- a/heron/instance/src/python/basics/spout_instance.py +++ b/heron/instance/src/python/basics/spout_instance.py @@ -25,7 +25,7 @@ import collections from heronpy.api.stream import Stream -import heronpy.api.api_constants as api_constants +from heronpy.api import api_constants from heronpy.api.state.stateful_component import StatefulComponent from heron.common.src.python.utils.log import Log @@ -35,7 +35,7 @@ from heron.proto import topology_pb2, tuple_pb2, ckptmgr_pb2 -import heron.instance.src.python.utils.system_constants as system_constants +from heron.instance.src.python.utils import system_constants from .base_instance import BaseInstance @@ -44,7 +44,7 @@ class SpoutInstance(BaseInstance): """The base class for all heron spouts in Python""" def __init__(self, pplan_helper, in_stream, out_stream, looper): - super(SpoutInstance, self).__init__(pplan_helper, in_stream, out_stream, looper) + _ = super().__init__(pplan_helper, in_stream, out_stream, looper) self.topology_state = topology_pb2.TopologyState.Value("PAUSED") if not self.pplan_helper.is_spout: @@ -69,7 +69,7 @@ def __init__(self, pplan_helper, in_stream, out_stream, looper): self.total_tuples_emitted = 0 # load user's spout class - spout_impl_class = super(SpoutInstance, self).load_py_instance(is_spout=True) + spout_impl_class = super().load_py_instance(is_spout=True) self.spout_impl = spout_impl_class(delegate=self) def start_component(self, stateful_state): @@ -131,7 +131,8 @@ def emit(self, tup, tup_id=None, stream=Stream.DEFAULT_STREAM_ID, if direct_task is not None: if not isinstance(direct_task, int): - raise TypeError(f"direct_task argument needs to be an integer, given: {str(type(direct_task))}") + raise TypeError(f"direct_task argument needs to be an integer, " + f"given: {str(type(direct_task))}") # performing emit-direct data_tuple.dest_task_ids.append(direct_task) elif custom_target_task_ids is not None: @@ -163,7 +164,7 @@ def emit(self, tup, tup_id=None, stream=Stream.DEFAULT_STREAM_ID, serialize_latency_ns = (time.time() - start_time) * system_constants.SEC_TO_NS self.spout_metrics.serialize_data_tuple(stream, serialize_latency_ns) - super(SpoutInstance, self).admit_data_tuple(stream_id=stream, data_tuple=data_tuple, + _ = super().admit_data_tuple(stream_id=stream, data_tuple=data_tuple, tuple_size_in_bytes=tuple_size_in_bytes) self.total_tuples_emitted += 1 self.spout_metrics.update_emit_count(stream) @@ -327,8 +328,8 @@ def _look_for_timeouts(self): def _handle_ack_tuple(self, tup, is_success): for rt in tup.roots: if rt.taskid != self.pplan_helper.my_task_id: - raise RuntimeError("Receiving tuple for task: %s in task: %s" - % (str(rt.taskid), str(self.pplan_helper.my_task_id))) + raise RuntimeError(f"Receiving tuple for task: {str(rt.taskid)}" + f" in task: {str(self.pplan_helper.my_task_id)}") try: tuple_info = self.in_flight_tuples.pop(rt.key) except KeyError: From 3967c80c5dc06f4a403380986a92f4cf055fe261 Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 3 Apr 2022 16:03:18 -0400 Subject: [PATCH 53/82] [Style] basics.bolt_instance --- .../src/python/basics/bolt_instance.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/heron/instance/src/python/basics/bolt_instance.py b/heron/instance/src/python/basics/bolt_instance.py index 5b597e87450..bc5fb50c7b4 100644 --- a/heron/instance/src/python/basics/bolt_instance.py +++ b/heron/instance/src/python/basics/bolt_instance.py @@ -27,9 +27,9 @@ from heron.proto import topology_pb2, tuple_pb2, ckptmgr_pb2 from heron.instance.src.python.utils.metrics import BoltMetrics from heron.instance.src.python.utils.tuple import TupleHelper, HeronTuple -import heron.instance.src.python.utils.system_constants as system_constants +from heron.instance.src.python.utils import system_constants -import heronpy.api.api_constants as api_constants +from heronpy.api import api_constants from heronpy.api.state.stateful_component import StatefulComponent from heronpy.api.stream import Stream @@ -39,7 +39,7 @@ class BoltInstance(BaseInstance): """The base class for all heron bolts in Python""" def __init__(self, pplan_helper, in_stream, out_stream, looper): - super(BoltInstance, self).__init__(pplan_helper, in_stream, out_stream, looper) + _ = super().__init__(pplan_helper, in_stream, out_stream, looper) self.topology_state = topology_pb2.TopologyState.Value("PAUSED") if self.pplan_helper.is_spout: @@ -57,7 +57,7 @@ def __init__(self, pplan_helper, in_stream, out_stream, looper): Log.info(f"Enable ACK: {str(self.acking_enabled)}") # load user's bolt class - bolt_impl_class = super(BoltInstance, self).load_py_instance(is_spout=False) + bolt_impl_class = super().load_py_instance(is_spout=False) self.bolt_impl = bolt_impl_class(delegate=self) def start_component(self, stateful_state): @@ -116,8 +116,8 @@ def emit(self, tup, stream=Stream.DEFAULT_STREAM_ID, if direct_task is not None: if not isinstance(direct_task, int): - raise TypeError("direct_task argument needs to be an integer, given: %s" - % str(type(direct_task))) + raise TypeError(f"direct_task argument needs to be an integer, " + f"given: {str(type(direct_task))}") # performing emit-direct data_tuple.dest_task_ids.append(direct_task) elif custom_target_task_ids is not None: @@ -145,7 +145,7 @@ def emit(self, tup, stream=Stream.DEFAULT_STREAM_ID, serialize_latency_ns = (time.time() - start_time) * system_constants.SEC_TO_NS self.bolt_metrics.serialize_data_tuple(stream, serialize_latency_ns) - super(BoltInstance, self).admit_data_tuple(stream_id=stream, data_tuple=data_tuple, + _ = super().admit_data_tuple(stream_id=stream, data_tuple=data_tuple, tuple_size_in_bytes=tuple_size_in_bytes) self.bolt_metrics.update_emit_count(stream) @@ -260,7 +260,7 @@ def ack(self, tup): to_add = ack_tuple.roots.add() to_add.CopyFrom(rt) tuple_size_in_bytes += rt.ByteSize() - super(BoltInstance, self).admit_control_tuple(ack_tuple, tuple_size_in_bytes, True) + _ = super().admit_control_tuple(ack_tuple, tuple_size_in_bytes, True) process_latency_ns = (time.time() - tup.creation_time) * system_constants.SEC_TO_NS self.pplan_helper.context.invoke_hook_bolt_ack(tup, process_latency_ns) @@ -284,7 +284,7 @@ def fail(self, tup): to_add = fail_tuple.roots.add() to_add.CopyFrom(rt) tuple_size_in_bytes += rt.ByteSize() - super(BoltInstance, self).admit_control_tuple(fail_tuple, tuple_size_in_bytes, False) + _ = super().admit_control_tuple(fail_tuple, tuple_size_in_bytes, False) fail_latency_ns = (time.time() - tup.creation_time) * system_constants.SEC_TO_NS self.pplan_helper.context.invoke_hook_bolt_fail(tup, fail_latency_ns) From 47682fdaddeb70d2a82480b04df7dbba6a57503b Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 3 Apr 2022 16:08:32 -0400 Subject: [PATCH 54/82] [Style] basics.base_instance --- heron/instance/src/python/basics/base_instance.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/heron/instance/src/python/basics/base_instance.py b/heron/instance/src/python/basics/base_instance.py index 224494a12a6..ab2e16c3894 100644 --- a/heron/instance/src/python/basics/base_instance.py +++ b/heron/instance/src/python/basics/base_instance.py @@ -30,11 +30,11 @@ from heron.instance.src.python.utils.misc import SerializerHelper from heron.instance.src.python.utils.misc import OutgoingTupleHelper from heron.instance.src.python.utils import system_config -import heron.instance.src.python.utils.system_constants as system_constants -import heron.common.src.python.pex_loader as pex_loader +from heron.instance.src.python.utils import system_constants +from heron.common.src.python import pex_loader -import heronpy.api.global_metrics as global_metrics -import heronpy.api.api_constants as api_constants +from heronpy.api import global_metrics +from heronpy.api import api_constants from heronpy.api.state.stateful_component import StatefulComponent # pylint: disable=too-many-instance-attributes @@ -114,11 +114,11 @@ def load_py_instance(self, is_spout): if is_spout: spout_proto = self.pplan_helper.get_my_spout() py_classpath = spout_proto.comp.class_name - self.logger.info(f"Loading Spout from: {py_classpath}") + self.logger.info("Loading Spout from: %s", py_classpath) else: bolt_proto = self.pplan_helper.get_my_bolt() py_classpath = bolt_proto.comp.class_name - self.logger.info(f"Loading Bolt from: {py_classpath}") + self.logger.info("Loading Bolt from: %s", py_classpath) pex_loader.load_pex(self.pplan_helper.topology_pex_abs_path) spbl_class = pex_loader.import_and_get_class(self.pplan_helper.topology_pex_abs_path, @@ -126,7 +126,7 @@ def load_py_instance(self, is_spout): except Exception as e: spbl = "spout" if is_spout else "bolt" self.logger.error(traceback.format_exc()) - raise RuntimeError(f"Error when loading a {spbl} from pex: {str(e)}") + raise RuntimeError(f"Error when loading a {spbl} from pex: {str(e)}") from e return spbl_class def handle_initiate_stateful_checkpoint(self, ckptmsg, component): From e7e673e1e4b60057775bd14fa302ca05ba39d396 Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 3 Apr 2022 16:17:28 -0400 Subject: [PATCH 55/82] [Style] utils.metrics.py_metrics Added back dropped __init__ call to super class. --- heron/instance/src/python/utils/metrics/py_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/instance/src/python/utils/metrics/py_metrics.py b/heron/instance/src/python/utils/metrics/py_metrics.py index 1888774528f..c14b90c828e 100644 --- a/heron/instance/src/python/utils/metrics/py_metrics.py +++ b/heron/instance/src/python/utils/metrics/py_metrics.py @@ -80,7 +80,7 @@ def __init__(self, metrics_collector): PY_GC_GENERATION_1_THRESHOLD: self.g1_threshold, PY_GC_GENERATION_2_THRESHOLD: self.g2_threshold, PY_GC_GENERATION_3_THRESHOLD: self.g3_threshold} - _ = super() + _ = super().__init__(self.metrics) sys_config = system_config.get_sys_config() interval = float(sys_config[constants.HERON_METRICS_EXPORT_INTERVAL_SEC]) self.register_metrics(metrics_collector, interval) From 7417292b785b9c09af8eea5ecf950899952caa1c Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Sun, 3 Apr 2022 17:17:43 -0400 Subject: [PATCH 56/82] [Style] fixes to super() calls. --- heron/instance/src/python/basics/bolt_instance.py | 8 ++++---- heron/instance/src/python/basics/spout_instance.py | 4 ++-- heron/instance/src/python/instance.py | 2 +- heron/instance/src/python/network/gateway_looper.py | 4 ++-- .../src/python/utils/metrics/metrics_helper.py | 10 +++++----- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/heron/instance/src/python/basics/bolt_instance.py b/heron/instance/src/python/basics/bolt_instance.py index bc5fb50c7b4..7a1a34f2367 100644 --- a/heron/instance/src/python/basics/bolt_instance.py +++ b/heron/instance/src/python/basics/bolt_instance.py @@ -39,7 +39,7 @@ class BoltInstance(BaseInstance): """The base class for all heron bolts in Python""" def __init__(self, pplan_helper, in_stream, out_stream, looper): - _ = super().__init__(pplan_helper, in_stream, out_stream, looper) + super().__init__(pplan_helper, in_stream, out_stream, looper) self.topology_state = topology_pb2.TopologyState.Value("PAUSED") if self.pplan_helper.is_spout: @@ -145,7 +145,7 @@ def emit(self, tup, stream=Stream.DEFAULT_STREAM_ID, serialize_latency_ns = (time.time() - start_time) * system_constants.SEC_TO_NS self.bolt_metrics.serialize_data_tuple(stream, serialize_latency_ns) - _ = super().admit_data_tuple(stream_id=stream, data_tuple=data_tuple, + super().admit_data_tuple(stream_id=stream, data_tuple=data_tuple, tuple_size_in_bytes=tuple_size_in_bytes) self.bolt_metrics.update_emit_count(stream) @@ -260,7 +260,7 @@ def ack(self, tup): to_add = ack_tuple.roots.add() to_add.CopyFrom(rt) tuple_size_in_bytes += rt.ByteSize() - _ = super().admit_control_tuple(ack_tuple, tuple_size_in_bytes, True) + super().admit_control_tuple(ack_tuple, tuple_size_in_bytes, True) process_latency_ns = (time.time() - tup.creation_time) * system_constants.SEC_TO_NS self.pplan_helper.context.invoke_hook_bolt_ack(tup, process_latency_ns) @@ -284,7 +284,7 @@ def fail(self, tup): to_add = fail_tuple.roots.add() to_add.CopyFrom(rt) tuple_size_in_bytes += rt.ByteSize() - _ = super().admit_control_tuple(fail_tuple, tuple_size_in_bytes, False) + super().admit_control_tuple(fail_tuple, tuple_size_in_bytes, False) fail_latency_ns = (time.time() - tup.creation_time) * system_constants.SEC_TO_NS self.pplan_helper.context.invoke_hook_bolt_fail(tup, fail_latency_ns) diff --git a/heron/instance/src/python/basics/spout_instance.py b/heron/instance/src/python/basics/spout_instance.py index b282aa4ca42..fb01fbead7f 100644 --- a/heron/instance/src/python/basics/spout_instance.py +++ b/heron/instance/src/python/basics/spout_instance.py @@ -44,7 +44,7 @@ class SpoutInstance(BaseInstance): """The base class for all heron spouts in Python""" def __init__(self, pplan_helper, in_stream, out_stream, looper): - _ = super().__init__(pplan_helper, in_stream, out_stream, looper) + super().__init__(pplan_helper, in_stream, out_stream, looper) self.topology_state = topology_pb2.TopologyState.Value("PAUSED") if not self.pplan_helper.is_spout: @@ -164,7 +164,7 @@ def emit(self, tup, tup_id=None, stream=Stream.DEFAULT_STREAM_ID, serialize_latency_ns = (time.time() - start_time) * system_constants.SEC_TO_NS self.spout_metrics.serialize_data_tuple(stream, serialize_latency_ns) - _ = super().admit_data_tuple(stream_id=stream, data_tuple=data_tuple, + super().admit_data_tuple(stream_id=stream, data_tuple=data_tuple, tuple_size_in_bytes=tuple_size_in_bytes) self.total_tuples_emitted += 1 self.spout_metrics.update_emit_count(stream) diff --git a/heron/instance/src/python/instance.py b/heron/instance/src/python/instance.py index 8d64be5d719..8f5ece1f661 100644 --- a/heron/instance/src/python/instance.py +++ b/heron/instance/src/python/instance.py @@ -19,13 +19,13 @@ # under the License. '''module for single-thread Heron Instance in python''' -import click import collections import logging import os import resource import signal import traceback +import click import yaml from heron.common.src.python.utils import log diff --git a/heron/instance/src/python/network/gateway_looper.py b/heron/instance/src/python/network/gateway_looper.py index 1cff52a60da..50d796a13f7 100644 --- a/heron/instance/src/python/network/gateway_looper.py +++ b/heron/instance/src/python/network/gateway_looper.py @@ -49,7 +49,7 @@ def __init__(self, socket_map): :param socket_map: socket map used for asyncore.dispatcher """ - _ = super().__init__() + super().__init__() self.sock_map = socket_map # Pipe used for wake up select @@ -70,7 +70,7 @@ def wake_up(self): Log.debug("Wake up called") def on_exit(self): - _ = super().on_exit() + super().on_exit() os.close(self.pipe_r) os.close(self.pipe_w) diff --git a/heron/instance/src/python/utils/metrics/metrics_helper.py b/heron/instance/src/python/utils/metrics/metrics_helper.py index 28d05fa6b2f..ad31bea8eee 100644 --- a/heron/instance/src/python/utils/metrics/metrics_helper.py +++ b/heron/instance/src/python/utils/metrics/metrics_helper.py @@ -122,7 +122,7 @@ class GatewayMetrics(BaseMetricsHelper): def __init__(self, metrics_collector): sys_config = system_config.get_sys_config() - _ = super().__init__(self.metrics) + super().__init__(self.metrics) interval = float(sys_config[constants.HERON_METRICS_EXPORT_INTERVAL_SEC]) self.register_metrics(metrics_collector, interval) @@ -168,7 +168,7 @@ class ComponentMetrics(BaseMetricsHelper): def __init__(self, additional_metrics): metrics = self.component_metrics metrics.update(additional_metrics) - _ = super().__init__(metrics) + super().__init__(metrics) # pylint: disable=arguments-differ def register_metrics(self, context): @@ -179,7 +179,7 @@ def register_metrics(self, context): sys_config = system_config.get_sys_config() interval = float(sys_config[constants.HERON_METRICS_EXPORT_INTERVAL_SEC]) collector = context.get_metrics_collector() - _ = super().register_metrics(collector, interval) + super().register_metrics(collector, interval) def update_out_queue_full_count(self): """Apply update to the out-queue full count""" @@ -213,7 +213,7 @@ class SpoutMetrics(ComponentMetrics): TIMEOUT_COUNT, ComponentMetrics.EMIT_COUNT] def __init__(self, pplan_helper): - _ = super().__init__(self.spout_metrics) + super().__init__(self.spout_metrics) self._init_multi_count_metrics(pplan_helper) def _init_multi_count_metrics(self, pplan_helper): @@ -269,7 +269,7 @@ class BoltMetrics(ComponentMetrics): outputs_init = [ComponentMetrics.EMIT_COUNT] def __init__(self, pplan_helper): - _ = super().__init__(self.bolt_metrics) + super().__init__(self.bolt_metrics) self._init_multi_count_metrics(pplan_helper) def _init_multi_count_metrics(self, pplan_helper): From b9fd15fcdb54d6a07564fe1fe94f6c4f731a4e08 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sun, 3 Apr 2022 18:09:50 -0400 Subject: [PATCH 57/82] More style cleanup --- heron/instance/src/python/instance.py | 4 ++-- heron/tools/cli/src/python/submit.py | 15 ++++++++------- heronpy/connectors/mock/arraylooper.py | 2 +- heronpy/connectors/pulsar/pulsarstreamlet.py | 2 +- .../connectors/textfiles/textfilesgenerator.py | 2 +- heronpy/streamlet/impl/consumebolt.py | 2 +- heronpy/streamlet/impl/filterbolt.py | 2 +- heronpy/streamlet/impl/flatmapbolt.py | 2 +- heronpy/streamlet/impl/generatorspout.py | 2 +- heronpy/streamlet/impl/joinbolt.py | 4 ++-- heronpy/streamlet/impl/logbolt.py | 2 +- heronpy/streamlet/impl/mapbolt.py | 2 +- .../streamlet/impl/reducebykeyandwindowbolt.py | 4 ++-- heronpy/streamlet/impl/reducebywindowbolt.py | 4 ++-- heronpy/streamlet/impl/repartitionbolt.py | 2 +- heronpy/streamlet/impl/supplierspout.py | 2 +- heronpy/streamlet/impl/transformbolt.py | 2 +- heronpy/streamlet/impl/unionbolt.py | 2 +- heronpy/streamlet/keyedwindow.py | 2 +- heronpy/streamlet/resources.py | 2 +- heronpy/streamlet/window.py | 2 +- .../core/integration_test_bolt.py | 10 +++++----- .../core/integration_test_spout.py | 4 ++-- .../python/integration_test/core/test_runner.py | 2 +- .../core/test_topology_builder.py | 2 +- 25 files changed, 41 insertions(+), 40 deletions(-) diff --git a/heron/instance/src/python/instance.py b/heron/instance/src/python/instance.py index 8f5ece1f661..59d8196f17c 100644 --- a/heron/instance/src/python/instance.py +++ b/heron/instance/src/python/instance.py @@ -102,7 +102,7 @@ def __init__(self, topology_name, topology_id, instance, # Debugging purposes def go_trace(_, stack): - with open("/tmp/trace.log", "w") as f: + with open("/tmp/trace.log", "w", encoding='utf8') as f: traceback.print_stack(stack, file=f) self.looper.register_timer_task_in_sec(self.looper.exit_loop, 0.0) signal.signal(signal.SIGUSR1, go_trace) @@ -320,7 +320,7 @@ def yaml_config_reader(config_path): if not config_path.endswith(".yaml"): raise ValueError("Config file not yaml") - with open(config_path, 'r') as f: + with open(config_path, 'r', encoding='utf8') as f: config = yaml.safe_load(f) return config diff --git a/heron/tools/cli/src/python/submit.py b/heron/tools/cli/src/python/submit.py index 5f2e6cb142a..af77641f026 100644 --- a/heron/tools/cli/src/python/submit.py +++ b/heron/tools/cli/src/python/submit.py @@ -31,13 +31,13 @@ from heron.proto import topology_pb2 from heron.tools.cli.src.python.result import SimpleResult, Status import heron.tools.cli.src.python.args as cli_args -import heron.tools.cli.src.python.execute as execute -import heron.tools.cli.src.python.jars as jars -import heron.tools.cli.src.python.opts as opts -import heron.tools.cli.src.python.result as result -import heron.tools.cli.src.python.rest as rest -import heron.tools.common.src.python.utils.config as config -import heron.tools.common.src.python.utils.classpath as classpath +from heron.tools.cli.src.python import execute +from heron.tools.cli.src.python import jars +from heron.tools.cli.src.python import opts +from heron.tools.cli.src.python import result +from heron.tools.cli.src.python import rest +from heron.tools.common.src.python.utils import config +from heron.tools.common.src.python.utils import classpath # pylint: disable=too-many-return-statements @@ -191,6 +191,7 @@ def launch_topology_server(cl_args, topology_file, topology_defn_file, topology_ data['verbose_gc'] = True files = dict( + # pylint: disable=consider-using-with definition=open(topology_defn_file, 'rb'), topology=open(topology_file, 'rb'), ) diff --git a/heronpy/connectors/mock/arraylooper.py b/heronpy/connectors/mock/arraylooper.py index b22569674fb..13e0cc362ea 100644 --- a/heronpy/connectors/mock/arraylooper.py +++ b/heronpy/connectors/mock/arraylooper.py @@ -30,7 +30,7 @@ class ArrayLooper(Generator): """A ArrayLooper loops the contents of the a user supplied array forever """ def __init__(self, user_iterable, sleep=None): - super(ArrayLooper, self).__init__() + super().__init__() if not isinstance(user_iterable, collections.Iterable): raise RuntimeError("ArrayLooper must be passed an iterable") self._user_iterable = user_iterable diff --git a/heronpy/connectors/pulsar/pulsarstreamlet.py b/heronpy/connectors/pulsar/pulsarstreamlet.py index 2b7665d7484..be2d6de39dd 100644 --- a/heronpy/connectors/pulsar/pulsarstreamlet.py +++ b/heronpy/connectors/pulsar/pulsarstreamlet.py @@ -29,7 +29,7 @@ class PulsarStreamlet(Streamlet): """Streamlet facade on top of PulsarSpout""" def __init__(self, service_url, topic_name, stage_name=None, parallelism=None, receive_timeout_ms=None, input_schema=None): - super(PulsarStreamlet, self).__init__(parents=[], + super().__init__(parents=[], stage_name=stage_name, parallelism=parallelism) self._pulsar_service_url = service_url diff --git a/heronpy/connectors/textfiles/textfilesgenerator.py b/heronpy/connectors/textfiles/textfilesgenerator.py index 3d901e6bb93..c4b193aedd8 100644 --- a/heronpy/connectors/textfiles/textfilesgenerator.py +++ b/heronpy/connectors/textfiles/textfilesgenerator.py @@ -28,7 +28,7 @@ class TextFileGenerator(Generator): """TextFileGenerator: reads from a list of files""" def __init__(self, filepattern): - super(TextFileGenerator, self).__init__() + super().__init__() self._files = glob.glob(filepattern) # pylint: disable=attribute-defined-outside-init diff --git a/heronpy/streamlet/impl/consumebolt.py b/heronpy/streamlet/impl/consumebolt.py index eb52254eaee..0da961e7cdb 100644 --- a/heronpy/streamlet/impl/consumebolt.py +++ b/heronpy/streamlet/impl/consumebolt.py @@ -56,7 +56,7 @@ def process(self, tup): class ConsumeStreamlet(Streamlet): """ConsumeStreamlet""" def __init__(self, parent): - super(ConsumeStreamlet, self).__init__() + super().__init__() if not isinstance(parent, Streamlet): raise RuntimeError("Parent of Consume Streamlet has to be a Streamlet") self._parent = parent diff --git a/heronpy/streamlet/impl/filterbolt.py b/heronpy/streamlet/impl/filterbolt.py index 98e31aea68c..762062330f7 100644 --- a/heronpy/streamlet/impl/filterbolt.py +++ b/heronpy/streamlet/impl/filterbolt.py @@ -60,7 +60,7 @@ def process(self, tup): class FilterStreamlet(Streamlet): """FilterStreamlet""" def __init__(self, filter_function, parent): - super(FilterStreamlet, self).__init__() + super().__init__() if not callable(filter_function): raise RuntimeError("Filter function has to be callable") if not isinstance(parent, Streamlet): diff --git a/heronpy/streamlet/impl/flatmapbolt.py b/heronpy/streamlet/impl/flatmapbolt.py index 6fbd46f68a6..7ac5701cb5d 100644 --- a/heronpy/streamlet/impl/flatmapbolt.py +++ b/heronpy/streamlet/impl/flatmapbolt.py @@ -66,7 +66,7 @@ def process(self, tup): class FlatMapStreamlet(Streamlet): """FlatMapStreamlet""" def __init__(self, flatmap_function, parent): - super(FlatMapStreamlet, self).__init__() + super().__init__() if not callable(flatmap_function): raise RuntimeError("FlatMap function has to be callable") if not isinstance(parent, Streamlet): diff --git a/heronpy/streamlet/impl/generatorspout.py b/heronpy/streamlet/impl/generatorspout.py index b9943d6d2fa..981670a2729 100644 --- a/heronpy/streamlet/impl/generatorspout.py +++ b/heronpy/streamlet/impl/generatorspout.py @@ -62,7 +62,7 @@ def next_tuple(self): class GeneratorStreamlet(Streamlet): """GeneratorStreamlet""" def __init__(self, generator): - super(GeneratorStreamlet, self).__init__() + super().__init__() if not isinstance(generator, Generator): raise RuntimeError("Generator has to be of type Generator") self._generator = generator diff --git a/heronpy/streamlet/impl/joinbolt.py b/heronpy/streamlet/impl/joinbolt.py index 7f8ffa7eaac..7bd4b589f42 100644 --- a/heronpy/streamlet/impl/joinbolt.py +++ b/heronpy/streamlet/impl/joinbolt.py @@ -61,7 +61,7 @@ def _add(self, key, value, src_component, mymap): mymap[key][0].append(value) def initialize(self, config, context): - super(JoinBolt, self).initialize(config, context) + super().initialize(config, context) if not JoinBolt.JOINEDCOMPONENT in config: raise RuntimeError(f"{JoinBolt.JOINEDCOMPONENT} must be specified in the JoinBolt") self._joined_component = config[JoinBolt.JOINEDCOMPONENT] @@ -147,7 +147,7 @@ def choose_tasks(self, values): class JoinStreamlet(Streamlet): """JoinStreamlet""" def __init__(self, join_type, window_config, join_function, left, right): - super(JoinStreamlet, self).__init__() + super().__init__() if not join_type in [JoinBolt.INNER, JoinBolt.OUTER_RIGHT, JoinBolt.OUTER_LEFT]: raise RuntimeError("join type has to be of one of inner, outer, left") if not isinstance(window_config, WindowConfig): diff --git a/heronpy/streamlet/impl/logbolt.py b/heronpy/streamlet/impl/logbolt.py index 063952ef812..6abde1ca6af 100644 --- a/heronpy/streamlet/impl/logbolt.py +++ b/heronpy/streamlet/impl/logbolt.py @@ -51,7 +51,7 @@ def process(self, tup): class LogStreamlet(Streamlet): """LogStreamlet""" def __init__(self, parent): - super(LogStreamlet, self).__init__() + super().__init__() if not isinstance(parent, Streamlet): raise RuntimeError("Parent of Log Streamlet has to be a Streamlet") self._parent = parent diff --git a/heronpy/streamlet/impl/mapbolt.py b/heronpy/streamlet/impl/mapbolt.py index 80d5efe3127..b35f761e7ca 100644 --- a/heronpy/streamlet/impl/mapbolt.py +++ b/heronpy/streamlet/impl/mapbolt.py @@ -60,7 +60,7 @@ def process(self, tup): class MapStreamlet(Streamlet): """MapStreamlet""" def __init__(self, map_function, parent): - super(MapStreamlet, self).__init__() + super().__init__() if not callable(map_function): raise RuntimeError("Map function has to be callable") if not isinstance(parent, Streamlet): diff --git a/heronpy/streamlet/impl/reducebykeyandwindowbolt.py b/heronpy/streamlet/impl/reducebykeyandwindowbolt.py index 977ece1789b..d5c055d0337 100644 --- a/heronpy/streamlet/impl/reducebykeyandwindowbolt.py +++ b/heronpy/streamlet/impl/reducebykeyandwindowbolt.py @@ -40,7 +40,7 @@ class ReduceByKeyAndWindowBolt(SlidingWindowBolt, StreamletBoltBase): SLIDEINTERVAL = SlidingWindowBolt.WINDOW_SLIDEINTERVAL_SECS def initialize(self, config, context): - super(ReduceByKeyAndWindowBolt, self).initialize(config, context) + super().initialize(config, context) if ReduceByKeyAndWindowBolt.FUNCTION not in config: raise RuntimeError("FUNCTION not specified in reducebywindow operator") self.reduce_function = config[ReduceByKeyAndWindowBolt.FUNCTION] @@ -88,7 +88,7 @@ def choose_tasks(self, values): class ReduceByKeyAndWindowStreamlet(Streamlet): """ReduceByKeyAndWindowStreamlet""" def __init__(self, window_config, reduce_function, parent): - super(ReduceByKeyAndWindowStreamlet, self).__init__() + super().__init__() if not isinstance(window_config, WindowConfig): raise RuntimeError("window config has to be a WindowConfig") if not callable(reduce_function): diff --git a/heronpy/streamlet/impl/reducebywindowbolt.py b/heronpy/streamlet/impl/reducebywindowbolt.py index 4a76cd2a725..7f920cf4c99 100644 --- a/heronpy/streamlet/impl/reducebywindowbolt.py +++ b/heronpy/streamlet/impl/reducebywindowbolt.py @@ -39,7 +39,7 @@ class ReduceByWindowBolt(SlidingWindowBolt, StreamletBoltBase): SLIDEINTERVAL = SlidingWindowBolt.WINDOW_SLIDEINTERVAL_SECS def initialize(self, config, context): - super(ReduceByWindowBolt, self).initialize(config, context) + super().initialize(config, context) if ReduceByWindowBolt.FUNCTION not in config: raise RuntimeError("FUNCTION not specified in reducebywindow operator") self.reduce_function = config[ReduceByWindowBolt.FUNCTION] @@ -70,7 +70,7 @@ def choose_tasks(self, values): class ReduceByWindowStreamlet(Streamlet): """ReduceByWindowStreamlet""" def __init__(self, window_config, reduce_function, parent): - super(ReduceByWindowStreamlet, self).__init__() + super().__init__() if not isinstance(window_config, WindowConfig): raise RuntimeError("window config has to be of type WindowConfig") if not callable(reduce_function): diff --git a/heronpy/streamlet/impl/repartitionbolt.py b/heronpy/streamlet/impl/repartitionbolt.py index 902e71d95a3..8a49edf6de9 100644 --- a/heronpy/streamlet/impl/repartitionbolt.py +++ b/heronpy/streamlet/impl/repartitionbolt.py @@ -84,7 +84,7 @@ def process(self, tup): class RepartitionStreamlet(Streamlet): """RepartitionStreamlet""" def __init__(self, num_partitions, repartition_function, parent): - super(RepartitionStreamlet, self).__init__() + super().__init__() if not callable(repartition_function): raise RuntimeError("Repartition function has to be callable") if len(inspect.getargspec(repartition_function)) != 2: diff --git a/heronpy/streamlet/impl/supplierspout.py b/heronpy/streamlet/impl/supplierspout.py index 074372b4c0e..6ea2b0917bc 100644 --- a/heronpy/streamlet/impl/supplierspout.py +++ b/heronpy/streamlet/impl/supplierspout.py @@ -55,7 +55,7 @@ def next_tuple(self): class SupplierStreamlet(Streamlet): """SupplierStreamlet""" def __init__(self, supplier_function): - super(SupplierStreamlet, self).__init__() + super().__init__() if not callable(supplier_function): raise RuntimeError("Supplier function has to be callable") self._supplier_function = supplier_function diff --git a/heronpy/streamlet/impl/transformbolt.py b/heronpy/streamlet/impl/transformbolt.py index a72e544638c..40c189cec92 100644 --- a/heronpy/streamlet/impl/transformbolt.py +++ b/heronpy/streamlet/impl/transformbolt.py @@ -65,7 +65,7 @@ def process(self, tup): class TransformStreamlet(Streamlet): """TransformStreamlet""" def __init__(self, transform_operator, parent): - super(TransformStreamlet, self).__init__() + super().__init__() if not isinstance(transform_operator, TransformOperator): raise RuntimeError("Transform Operator has to be a TransformOperator") if not isinstance(parent, Streamlet): diff --git a/heronpy/streamlet/impl/unionbolt.py b/heronpy/streamlet/impl/unionbolt.py index d7cdd50bb56..bb0538161f4 100644 --- a/heronpy/streamlet/impl/unionbolt.py +++ b/heronpy/streamlet/impl/unionbolt.py @@ -53,7 +53,7 @@ def process(self, tup): class UnionStreamlet(Streamlet): """UnionStreamlet""" def __init__(self, left, right): - super(UnionStreamlet, self).__init__() + super().__init__() if not isinstance(left, Streamlet): raise RuntimeError("Left of Union Streamlet has to be a Streamlet") if not isinstance(right, Streamlet): diff --git a/heronpy/streamlet/keyedwindow.py b/heronpy/streamlet/keyedwindow.py index 495dfd24477..47f73f3466d 100644 --- a/heronpy/streamlet/keyedwindow.py +++ b/heronpy/streamlet/keyedwindow.py @@ -33,4 +33,4 @@ def __init__(self, key, window): self._window = window def __repr__(self): - return 'KeyedWindow {key: %s, window: %s}' % (self._key, self._window) + return f'KeyedWindow {{key: {self._key}, window: {self._window}}}' diff --git a/heronpy/streamlet/resources.py b/heronpy/streamlet/resources.py index 7052995cab7..7f2c3e40944 100644 --- a/heronpy/streamlet/resources.py +++ b/heronpy/streamlet/resources.py @@ -49,4 +49,4 @@ def set_ram_in_gb(self, ram): return self.set_ram_in_mb(ram * 1024) def __repr__(self): - return 'Resource {cpu: %f, ram: %d}' % (self._cpu, self._ram) + return f'Resource {{cpu: {self._cpu}, ram: {self._ram}}}' diff --git a/heronpy/streamlet/window.py b/heronpy/streamlet/window.py index 8938ba2dbb1..c371c41a71d 100644 --- a/heronpy/streamlet/window.py +++ b/heronpy/streamlet/window.py @@ -30,4 +30,4 @@ def __init__(self, start_time, end_time): self._end_time = end_time def __repr__(self): - return 'Window {start_time: %s, end_time: %s}' % (self._start_time, self._end_time) + return f'Window {{start_time: {self._start_time}, end_time: {self._end_time}}}' diff --git a/integration_test/src/python/integration_test/core/integration_test_bolt.py b/integration_test/src/python/integration_test/core/integration_test_bolt.py index 65b9721704c..db55a89da6b 100644 --- a/integration_test/src/python/integration_test/core/integration_test_bolt.py +++ b/integration_test/src/python/integration_test/core/integration_test_bolt.py @@ -95,7 +95,7 @@ def process(self, tup): self.user_bolt.finish_batch() Log.info("Populating the terminals to downstream") - super(IntegrationTestBolt, self).emit( + super().emit( [integ_const.INTEGRATION_TEST_TERMINAL], stream=integ_const.INTEGRATION_TEST_CONTROL_STREAM_ID) else: @@ -107,24 +107,24 @@ def emit(self, tup, stream=Stream.DEFAULT_STREAM_ID, anchors=None, direct_task=None, need_task_ids=False): Log.info("emitting tuple: %s", tup) if tup is None: - super(IntegrationTestBolt, self).emit(list(self.current_tuple_processing), + super().emit(list(self.current_tuple_processing), stream=stream, anchors=anchors, direct_task=direct_task, need_task_ids=need_task_ids) else: - super(IntegrationTestBolt, self).emit(tup, stream, anchors, direct_task, need_task_ids) + super().emit(tup, stream, anchors, direct_task, need_task_ids) def ack(self, tup): Log.info("Trying to do an ack. tuples processed: %d, received: %d" % (self.tuples_processed, self.tuple_received)) if self.tuples_processed < self.tuple_received: - super(IntegrationTestBolt, self).ack(tup) + super().ack(tup) self.tuples_processed += 1 def fail(self, tup): Log.info("Trying to do a fail. tuples processed: %d, received: %d" % (self.tuples_processed, self.tuple_received)) if self.tuples_processed < self.tuple_received: - super(IntegrationTestBolt, self).fail(tup) + super().fail(tup) self.tuples_processed += 1 def process_tick(self, tup): diff --git a/integration_test/src/python/integration_test/core/integration_test_spout.py b/integration_test/src/python/integration_test/core/integration_test_spout.py index 7a7cbd14477..9d23644f416 100644 --- a/integration_test/src/python/integration_test/core/integration_test_spout.py +++ b/integration_test/src/python/integration_test/core/integration_test_spout.py @@ -114,11 +114,11 @@ def emit(self, tup, tup_id=None, stream=Stream.DEFAULT_STREAM_ID, else: _tup_id = tup_id - super(IntegrationTestSpout, self).emit(tup, _tup_id, stream, direct_task, need_task_ids) + super().emit(tup, _tup_id, stream, direct_task, need_task_ids) def _emit_terminal_if_needed(self): Log.info("is_done: %s, tuples_to_complete: %s", self.is_done, self.tuples_to_complete) if self.is_done and self.tuples_to_complete == 0: Log.info("Emitting terminals to downstream") - super(IntegrationTestSpout, self).emit([integ_const.INTEGRATION_TEST_TERMINAL], + super().emit([integ_const.INTEGRATION_TEST_TERMINAL], stream=integ_const.INTEGRATION_TEST_CONTROL_STREAM_ID) diff --git a/integration_test/src/python/integration_test/core/test_runner.py b/integration_test/src/python/integration_test/core/test_runner.py index eb5285fc46c..26b5c29161a 100644 --- a/integration_test/src/python/integration_test/core/test_runner.py +++ b/integration_test/src/python/integration_test/core/test_runner.py @@ -25,7 +25,7 @@ class TestRunner(Runner): """Module for running the streamlet API in integration tests""" def __init__(self): - super(TestRunner, self).__init__() + super().__init__() pass def run(self, name, config, builder, http_server_url): diff --git a/integration_test/src/python/integration_test/core/test_topology_builder.py b/integration_test/src/python/integration_test/core/test_topology_builder.py index 4b5daaa1731..5509d0fb157 100644 --- a/integration_test/src/python/integration_test/core/test_topology_builder.py +++ b/integration_test/src/python/integration_test/core/test_topology_builder.py @@ -41,7 +41,7 @@ class TestTopologyBuilder(TopologyBuilder): api_constants.TopologyReliabilityMode.ATLEAST_ONCE, api_constants.TOPOLOGY_PROJECT_NAME: "heron-integration-test"} def __init__(self, name, http_server_url): - super(TestTopologyBuilder, self).__init__(name) + super().__init__(name) self.output_location = f"{http_server_url}/{self.topology_name}" self.set_config(self.DEFAULT_CONFIG) From 6b87e26ff9d5e8d3af9d739c37416be6e0eb7dcb Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sun, 3 Apr 2022 18:44:31 -0400 Subject: [PATCH 58/82] More lint related style fixes --- .../src/python/utils/metrics/py_metrics.py | 2 +- heron/tools/cli/src/python/kill.py | 2 +- heron/tools/cli/src/python/main.py | 29 ++++++++++--------- heron/tools/cli/src/python/restart.py | 6 ++-- heron/tools/cli/src/python/update.py | 8 ++--- heron/tools/cli/src/python/version.py | 6 ++-- heronpy/api/bolt/window_bolt.py | 2 +- heronpy/api/component/component_spec.py | 19 +++++++----- heronpy/api/metrics.py | 1 + heronpy/api/topology.py | 14 +++++---- heronpy/connectors/pulsar/pulsarspout.py | 11 ++++--- .../textfiles/textfilesgenerator.py | 4 +-- heronpy/streamlet/config.py | 2 +- 13 files changed, 56 insertions(+), 50 deletions(-) diff --git a/heron/instance/src/python/utils/metrics/py_metrics.py b/heron/instance/src/python/utils/metrics/py_metrics.py index c14b90c828e..9f5590ec7dd 100644 --- a/heron/instance/src/python/utils/metrics/py_metrics.py +++ b/heron/instance/src/python/utils/metrics/py_metrics.py @@ -80,7 +80,7 @@ def __init__(self, metrics_collector): PY_GC_GENERATION_1_THRESHOLD: self.g1_threshold, PY_GC_GENERATION_2_THRESHOLD: self.g2_threshold, PY_GC_GENERATION_3_THRESHOLD: self.g3_threshold} - _ = super().__init__(self.metrics) + super().__init__(self.metrics) sys_config = system_config.get_sys_config() interval = float(sys_config[constants.HERON_METRICS_EXPORT_INTERVAL_SEC]) self.register_metrics(metrics_collector, interval) diff --git a/heron/tools/cli/src/python/kill.py b/heron/tools/cli/src/python/kill.py index 2b85fcde50b..5ac2456befa 100644 --- a/heron/tools/cli/src/python/kill.py +++ b/heron/tools/cli/src/python/kill.py @@ -20,7 +20,7 @@ ''' kill.py ''' from heron.common.src.python.utils.log import Log -import heron.tools.cli.src.python.cli_helper as cli_helper +from heron.tools.cli.src.python import cli_helper def create_parser(subparsers): ''' diff --git a/heron/tools/cli/src/python/main.py b/heron/tools/cli/src/python/main.py index c922000057f..dd68b6e2bb3 100644 --- a/heron/tools/cli/src/python/main.py +++ b/heron/tools/cli/src/python/main.py @@ -29,20 +29,21 @@ import time import traceback -import heron.common.src.python.utils.log as log -import heron.tools.common.src.python.utils.config as config -import heron.tools.cli.src.python.cdefs as cdefs -import heron.tools.cli.src.python.cliconfig as cliconfig -import heron.tools.cli.src.python.help as cli_help -import heron.tools.cli.src.python.activate as activate -import heron.tools.cli.src.python.deactivate as deactivate -import heron.tools.cli.src.python.kill as kill -import heron.tools.cli.src.python.result as result -import heron.tools.cli.src.python.restart as restart -import heron.tools.cli.src.python.submit as submit -import heron.tools.cli.src.python.update as update -import heron.tools.cli.src.python.version as version -import heron.tools.cli.src.python.config as hconfig +from heron.common.src.python.utils import log + +from heron.tools.common.src.python.utils import config +from heron.tools.cli.src.python import cdefs +from heron.tools.cli.src.python import cliconfig +from heron.tools.cli.src.python import help as cli_help +from heron.tools.cli.src.python import activate +from heron.tools.cli.src.python import deactivate +from heron.tools.cli.src.python import kill +from heron.tools.cli.src.python import result +from heron.tools.cli.src.python import restart +from heron.tools.cli.src.python import submit +from heron.tools.cli.src.python import update +from heron.tools.cli.src.python import version +from heron.tools.cli.src.python import config as hconfig from heron.tools.cli.src.python.opts import cleaned_up_files diff --git a/heron/tools/cli/src/python/restart.py b/heron/tools/cli/src/python/restart.py index 06f42d37a14..42048641be5 100644 --- a/heron/tools/cli/src/python/restart.py +++ b/heron/tools/cli/src/python/restart.py @@ -20,9 +20,9 @@ ''' restart.py ''' from heron.common.src.python.utils.log import Log -import heron.tools.cli.src.python.args as args -import heron.tools.cli.src.python.cli_helper as cli_helper -import heron.tools.common.src.python.utils.config as config +from heron.tools.cli.src.python import args +from heron.tools.cli.src.python import cli_helper +from heron.tools.common.src.python.utils import config def create_parser(subparsers): ''' diff --git a/heron/tools/cli/src/python/update.py b/heron/tools/cli/src/python/update.py index a117b368934..77f6923f146 100644 --- a/heron/tools/cli/src/python/update.py +++ b/heron/tools/cli/src/python/update.py @@ -25,10 +25,10 @@ from heron.common.src.python.utils.log import Log from heron.tools.cli.src.python.result import SimpleResult, Status -import heron.tools.cli.src.python.args as args -import heron.tools.cli.src.python.cli_helper as cli_helper -import heron.tools.cli.src.python.jars as jars -import heron.tools.common.src.python.utils.config as config +from heron.tools.cli.src.python import args +from heron.tools.cli.src.python import cli_helper +from heron.tools.cli.src.python import jars +from heron.tools.common.src.python.utils import config def create_parser(subparsers): """ Create the parse for the update command """ diff --git a/heron/tools/cli/src/python/version.py b/heron/tools/cli/src/python/version.py index 86fa050c9a6..fc7ea824f82 100644 --- a/heron/tools/cli/src/python/version.py +++ b/heron/tools/cli/src/python/version.py @@ -25,9 +25,9 @@ from heron.common.src.python.utils.log import Log from heron.tools.cli.src.python.result import SimpleResult, Status import heron.tools.cli.src.python.args as cli_args -import heron.tools.common.src.python.utils.config as config -import heron.tools.cli.src.python.cdefs as cdefs -import heron.tools.cli.src.python.rest as rest +from heron.tools.common.src.python.utils import config +from heron.tools.cli.src.python import cdefs +from heron.tools.cli.src.python import rest def add_version_titles(parser): ''' diff --git a/heronpy/api/bolt/window_bolt.py b/heronpy/api/bolt/window_bolt.py index 6e29eaba6a5..4a79aedfd54 100644 --- a/heronpy/api/bolt/window_bolt.py +++ b/heronpy/api/bolt/window_bolt.py @@ -23,7 +23,7 @@ from collections import namedtuple, deque import time from heronpy.api.bolt.bolt import Bolt -import heronpy.api.api_constants as api_constants +from heronpy.api import api_constants from heronpy.api.state.stateful_component import StatefulComponent WindowContext = namedtuple('WindowContext', ('start', 'end')) diff --git a/heronpy/api/component/component_spec.py b/heronpy/api/component/component_spec.py index fc04c6b05ed..bba998a2df9 100644 --- a/heronpy/api/component/component_spec.py +++ b/heronpy/api/component/component_spec.py @@ -145,13 +145,13 @@ def _sanitize_config(custom_config): These values will need to be serialized before adding to a protobuf message. """ if not isinstance(custom_config, dict): - raise TypeError("Component-specific configuration must be given as a dict type, given: %s" - % str(type(custom_config))) + raise TypeError("Component-specific configuration must be "\ + f"given as a dict type, given: {str(type(custom_config))}") sanitized = {} for key, value in list(custom_config.items()): if not isinstance(key, str): - raise TypeError("Key for component-specific configuration must be string, given: %s:%s" - % (str(type(key)), str(key))) + raise TypeError("Key for component-specific configuration "\ + f"must be string, given: {str(type(key))}:{str(key)}") if isinstance(value, bool): sanitized[key] = "true" if value else "false" @@ -250,16 +250,18 @@ def _sanitize_outputs(self): return None if not isinstance(self.outputs, (list, tuple)): - raise TypeError(f"Argument to outputs must be either list or tuple, given: {str(type(self.outputs))}") + raise TypeError("Argument to outputs must be either "\ + f"list or tuple, given: {str(type(self.outputs))}") for output in self.outputs: if not isinstance(output, (str, Stream)): - raise TypeError(f"Outputs must be a list of strings or Streams, given: {str(output)}") + raise TypeError("Outputs must be a list of strings "\ + f"or Streams, given: {str(output)}") if isinstance(output, str): # it's a default stream if Stream.DEFAULT_STREAM_ID not in ret: - ret[Stream.DEFAULT_STREAM_ID] = list() + ret[Stream.DEFAULT_STREAM_ID] = [] ret[Stream.DEFAULT_STREAM_ID].append(output) else: # output is a Stream object @@ -276,7 +278,8 @@ def get_out_streamids(self): return set() if not isinstance(self.outputs, (list, tuple)): - raise TypeError(f"Argument to outputs must be either list or tuple, given: {str(type(self.outputs))}") + raise TypeError("Argument to outputs must be either "\ + f"list or tuple, given: {str(type(self.outputs))}") ret_lst = [] for output in self.outputs: if not isinstance(output, (str, Stream)): diff --git a/heronpy/api/metrics.py b/heronpy/api/metrics.py index 6eabf477f3c..6508d709117 100644 --- a/heronpy/api/metrics.py +++ b/heronpy/api/metrics.py @@ -168,6 +168,7 @@ def update(self, key, value): def get_value_and_reset(self): ret = {} + # pylint: disable=consider-using-dict-items for k in self.map: ret[k] = self.map[k].get_value_and_reset() return ret diff --git a/heronpy/api/topology.py b/heronpy/api/topology.py index 055178b66ba..eef8d611665 100644 --- a/heronpy/api/topology.py +++ b/heronpy/api/topology.py @@ -25,7 +25,7 @@ import os import uuid -import heronpy.api.api_constants as api_constants +from heronpy.api import api_constants from heronpy.api.component.component_spec import HeronComponentSpec from heronpy.api.serializer import default_serializer from heronpy.proto import topology_pb2 @@ -112,7 +112,8 @@ def class_dict_to_topo_config(mcs, class_dict): def add_spout_specs(mcs, spec, spout_specs): if not spec.outputs: raise ValueError( - f"{spec.python_class_path}: {spec.name} requires at least one output, because it is a spout" + f"{spec.python_class_path}: {spec.name} requires "\ + "at least one output, because it is a spout" ) spout_specs[spec.name] = spec.get_protobuf() @@ -120,7 +121,8 @@ def add_spout_specs(mcs, spec, spout_specs): def add_bolt_specs(mcs, spec, bolt_specs): if not spec.inputs: raise ValueError( - f"{spec.python_class_path}: {spec.name} requires at least one input, because it is a bolt" + f"{spec.python_class_path}: {spec.name} requires "\ + "at least one input, because it is a bolt" ) bolt_specs[spec.name] = spec.get_protobuf() @@ -243,7 +245,8 @@ def _sanitize_config(custom_config): sanitized = {} for key, value in list(custom_config.items()): if not isinstance(key, str): - raise TypeError(f"Key for topology-wide configuration must be string, given: {str(type(key))}: {str(key)}") + raise TypeError("Key for topology-wide configuration must "\ + f"be string, given: {str(type(key))}: {str(key)}") if isinstance(value, bool): sanitized[key] = "true" if value else "false" @@ -345,8 +348,7 @@ def add_spec(self, *specs): """ for spec in specs: if not isinstance(spec, HeronComponentSpec): - raise TypeError("Argument to add_spec needs to be HeronComponentSpec, given: %s" - % str(spec)) + raise TypeError(f"Argument to add_spec needs to be HeronComponentSpec, given: {str(spec)}") if spec.name is None: raise ValueError("TopologyBuilder cannot take a spec without name") if spec.name == "config": diff --git a/heronpy/connectors/pulsar/pulsarspout.py b/heronpy/connectors/pulsar/pulsarspout.py index 057e1fefb64..bcde92f32d9 100644 --- a/heronpy/connectors/pulsar/pulsarspout.py +++ b/heronpy/connectors/pulsar/pulsarspout.py @@ -25,7 +25,7 @@ import pulsar -import heronpy.api.src.python.api_constants as api_constants +from heronpy.api.src.python import api_constants from heronpy.api.src.python.spout.spout import Spout from heronpy.streamlet.src.python.streamletboltbase import StreamletBoltBase @@ -46,11 +46,10 @@ def GenerateLogConfContents(logFileName): def GenerateLogConfig(context): namePrefix = str(context.get_component_id()) + "-" + str(context.get_task_id()) logFileName = os.getcwd() + "/" + namePrefix - flHandler = tempfile.NamedTemporaryFile(prefix=namePrefix, suffix='.conf', - dir=os.getcwd(), delete=False) - flHandler.write(GenerateLogConfContents(logFileName)) - flHandler.flush() - flHandler.close() + with tempfile.NamedTemporaryFile(prefix=namePrefix, suffix='.conf', + dir=os.getcwd(), delete=False) as flHandler: + flHandler.write(GenerateLogConfContents(logFileName)) + flHandler.flush() return flHandler.name class PulsarSpout(Spout, StreamletBoltBase): diff --git a/heronpy/connectors/textfiles/textfilesgenerator.py b/heronpy/connectors/textfiles/textfilesgenerator.py index c4b193aedd8..eeff70a6908 100644 --- a/heronpy/connectors/textfiles/textfilesgenerator.py +++ b/heronpy/connectors/textfiles/textfilesgenerator.py @@ -64,8 +64,8 @@ def _consume_next_file(self): return None self.logger.info("Now reading file %s", file_to_consume) try: - filep = open(file_to_consume, 'r') - return filep.readlines() + with open(file_to_consume, 'r') as filep: + return filep.readlines() except IOError as e: self.logger.info("Could not open the file %s", file_to_consume) raise e diff --git a/heronpy/streamlet/config.py b/heronpy/streamlet/config.py index 1d2e649739e..eb41614a031 100644 --- a/heronpy/streamlet/config.py +++ b/heronpy/streamlet/config.py @@ -20,7 +20,7 @@ '''config.py: module for defining config''' -import heronpy.api.api_constants as api_constants +from heronpy.api import api_constants from heronpy.streamlet.resources import Resources class Config: From 731fb95a598c717f59a6bbbb31dee0d5efef35a8 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sun, 3 Apr 2022 18:55:57 -0400 Subject: [PATCH 59/82] More fixes --- heronpy/connectors/textfiles/textfilesgenerator.py | 2 +- heronpy/streamlet/impl/flatmapbolt.py | 4 ++-- heronpy/streamlet/impl/joinbolt.py | 6 +++--- heronpy/streamlet/impl/reducebykeyandwindowbolt.py | 6 +++--- heronpy/streamlet/impl/reducebywindowbolt.py | 4 ++-- heronpy/streamlet/impl/repartitionbolt.py | 4 ++-- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/heronpy/connectors/textfiles/textfilesgenerator.py b/heronpy/connectors/textfiles/textfilesgenerator.py index eeff70a6908..2282ffe7240 100644 --- a/heronpy/connectors/textfiles/textfilesgenerator.py +++ b/heronpy/connectors/textfiles/textfilesgenerator.py @@ -64,7 +64,7 @@ def _consume_next_file(self): return None self.logger.info("Now reading file %s", file_to_consume) try: - with open(file_to_consume, 'r') as filep: + with open(file_to_consume, 'r', encoding='utf8') as filep: return filep.readlines() except IOError as e: self.logger.info("Could not open the file %s", file_to_consume) diff --git a/heronpy/streamlet/impl/flatmapbolt.py b/heronpy/streamlet/impl/flatmapbolt.py index 7ac5701cb5d..482fe7ab7de 100644 --- a/heronpy/streamlet/impl/flatmapbolt.py +++ b/heronpy/streamlet/impl/flatmapbolt.py @@ -19,7 +19,7 @@ # under the License. """module for flat_map bolt: FlatMapBolt""" -import collections +from collections.abc import Iterable from heronpy.api.bolt.bolt import Bolt from heronpy.api.state.stateful_component import StatefulComponent from heronpy.api.component.component_spec import GlobalStreamId @@ -52,7 +52,7 @@ def initialize(self, config, context): def process(self, tup): retval = self.flatmap_function(tup.values[0]) - if isinstance(retval, collections.Iterable): + if isinstance(retval, Iterable): for value in retval: self.emit([value], stream='output') self.emitted += 1 diff --git a/heronpy/streamlet/impl/joinbolt.py b/heronpy/streamlet/impl/joinbolt.py index 7bd4b589f42..8de493107dc 100644 --- a/heronpy/streamlet/impl/joinbolt.py +++ b/heronpy/streamlet/impl/joinbolt.py @@ -19,7 +19,7 @@ # under the License. """module for join bolt: JoinBolt""" -import collections +from collections.abc import Iterable from heronpy.api.bolt.window_bolt import SlidingWindowBolt from heronpy.api.component.component_spec import GlobalStreamId @@ -78,7 +78,7 @@ def processWindow(self, window_config, tuples): mymap = {} for tup in tuples: userdata = tup.values[0] - if not isinstance(userdata, collections.Iterable) or len(userdata) != 2: + if not isinstance(userdata, Iterable) or len(userdata) != 2: raise RuntimeError("Join tuples must be iterable of length 2") self._add(userdata[0], userdata[1], tup.component, mymap) for (key, values) in list(mymap.items()): @@ -136,7 +136,7 @@ def prepare(self, context, component, stream, target_tasks): def choose_tasks(self, values): assert isinstance(values, list) and len(values) == 1 userdata = values[0] - if not isinstance(userdata, collections.Iterable) or len(userdata) != 2: + if not isinstance(userdata, Iterable) or len(userdata) != 2: raise RuntimeError("Tuples going to join must be iterable of length 2") # only emits to the first task id hashvalue = hash(userdata[0]) diff --git a/heronpy/streamlet/impl/reducebykeyandwindowbolt.py b/heronpy/streamlet/impl/reducebykeyandwindowbolt.py index d5c055d0337..65652f1d087 100644 --- a/heronpy/streamlet/impl/reducebykeyandwindowbolt.py +++ b/heronpy/streamlet/impl/reducebykeyandwindowbolt.py @@ -19,7 +19,7 @@ # under the License. """module for join bolt: ReduceByKeyAndWindowBolt""" -import collections +from collections.abc import Iterable from heronpy.api.bolt.window_bolt import SlidingWindowBolt from heronpy.api.custom_grouping import ICustomGrouping @@ -59,7 +59,7 @@ def processWindow(self, window_config, tuples): mymap = {} for tup in tuples: userdata = tup.values[0] - if not isinstance(userdata, collections.Iterable) or len(userdata) != 2: + if not isinstance(userdata, Iterable) or len(userdata) != 2: raise RuntimeError("ReduceByWindow tuples must be iterable of length 2") self._add(userdata[0], userdata[1], mymap) for (key, values) in list(mymap.items()): @@ -77,7 +77,7 @@ def prepare(self, context, component, stream, target_tasks): def choose_tasks(self, values): assert isinstance(values, list) and len(values) == 1 userdata = values[0] - if not isinstance(userdata, collections.Iterable) or len(userdata) != 2: + if not isinstance(userdata, Iterable) or len(userdata) != 2: raise RuntimeError("Tuples going to reduce must be iterable of length 2") # only emits to the first task id hashvalue = hash(userdata[0]) diff --git a/heronpy/streamlet/impl/reducebywindowbolt.py b/heronpy/streamlet/impl/reducebywindowbolt.py index 7f920cf4c99..157a199e1f3 100644 --- a/heronpy/streamlet/impl/reducebywindowbolt.py +++ b/heronpy/streamlet/impl/reducebywindowbolt.py @@ -19,7 +19,7 @@ # under the License. """module for bolt: ReduceByWindowBolt""" -import collections +from collections.abc import Iterable from heronpy.api.bolt.window_bolt import SlidingWindowBolt from heronpy.api.custom_grouping import ICustomGrouping @@ -59,7 +59,7 @@ def prepare(self, context, component, stream, target_tasks): def choose_tasks(self, values): assert isinstance(values, list) and len(values) == 1 userdata = values[0] - if not isinstance(userdata, collections.Iterable) or len(userdata) != 2: + if not isinstance(userdata, Iterable) or len(userdata) != 2: raise RuntimeError("Tuples going to reduce must be iterable of length 2") # only emits to the first task id hashvalue = hash(userdata[0]) diff --git a/heronpy/streamlet/impl/repartitionbolt.py b/heronpy/streamlet/impl/repartitionbolt.py index 8a49edf6de9..25e600220d9 100644 --- a/heronpy/streamlet/impl/repartitionbolt.py +++ b/heronpy/streamlet/impl/repartitionbolt.py @@ -19,7 +19,7 @@ # under the License. """module for map bolt: RepartitionBolt""" -import collections +from collections.abc import Iterable import inspect from heronpy.api.custom_grouping import ICustomGrouping @@ -50,7 +50,7 @@ def choose_tasks(self, values): # only emits to the first task id targets = self._repartition_function(values, len(self.target_tasks)) retval = [] - if isinstance(targets, collections.Iterable): + if isinstance(targets, Iterable): for target in targets: retval.append(self.target_tasks[target % len(self.target_tasks)]) else: From d9b1a92f57986ac9f7d9a00065baa712b2df325e Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sun, 3 Apr 2022 19:12:51 -0400 Subject: [PATCH 60/82] More fixes --- heron/instance/src/python/network/gateway_looper.py | 1 + heron/instance/src/python/network/heron_client.py | 1 + heron/statemgrs/src/python/filestatemanager.py | 1 + heron/tools/cli/src/python/activate.py | 2 +- heron/tools/cli/src/python/args.py | 2 +- heron/tools/cli/src/python/cdefs.py | 4 ++-- heron/tools/cli/src/python/cli_helper.py | 8 ++++---- heron/tools/cli/src/python/config.py | 2 +- heron/tools/cli/src/python/deactivate.py | 2 +- heron/tools/cli/src/python/execute.py | 8 ++++---- heron/tools/cli/src/python/help.py | 2 +- heron/tools/cli/src/python/jars.py | 2 +- heron/tools/tracker/src/python/app.py | 5 ++--- heron/tools/tracker/src/python/config.py | 1 + heron/tools/tracker/src/python/metricstimeline.py | 4 ++-- heron/tools/tracker/src/python/routers/topologies.py | 4 ++-- heron/tools/tracker/src/python/topology.py | 5 +++-- heron/tools/tracker/src/python/utils.py | 6 +++--- heronpy/connectors/mock/arraylooper.py | 4 ++-- 19 files changed, 34 insertions(+), 30 deletions(-) diff --git a/heron/instance/src/python/network/gateway_looper.py b/heron/instance/src/python/network/gateway_looper.py index 50d796a13f7..fee9f70ca6a 100644 --- a/heron/instance/src/python/network/gateway_looper.py +++ b/heron/instance/src/python/network/gateway_looper.py @@ -20,6 +20,7 @@ '''gateway_looper.py''' +# pylint: disable=deprected-module import asyncore import errno import os diff --git a/heron/instance/src/python/network/heron_client.py b/heron/instance/src/python/network/heron_client.py index 76c1c8d79ff..4418dbe2bc7 100644 --- a/heron/instance/src/python/network/heron_client.py +++ b/heron/instance/src/python/network/heron_client.py @@ -20,6 +20,7 @@ '''heron_client.py''' +# pylint: disable=deprected-module import asyncore import socket import time diff --git a/heron/statemgrs/src/python/filestatemanager.py b/heron/statemgrs/src/python/filestatemanager.py index 01cf2fe7fb7..5d623ba92f1 100644 --- a/heron/statemgrs/src/python/filestatemanager.py +++ b/heron/statemgrs/src/python/filestatemanager.py @@ -117,6 +117,7 @@ def trigger_watches_based_on_files(watchers, path, directory, ProtoClass): topologies = [] if os.path.isdir(topologies_path): + # pylint: disable=consider-using-generator topologies = list([f for f in os.listdir(topologies_path) if os.path.isfile(os.path.join(topologies_path, f))]) if set(topologies) != set(self.topologies_directory): diff --git a/heron/tools/cli/src/python/activate.py b/heron/tools/cli/src/python/activate.py index 35674847028..5754e1110d2 100644 --- a/heron/tools/cli/src/python/activate.py +++ b/heron/tools/cli/src/python/activate.py @@ -20,7 +20,7 @@ ''' activate ''' from heron.common.src.python.utils.log import Log -import heron.tools.cli.src.python.cli_helper as cli_helper +from heron.tools.cli.src.python import cli_helper def create_parser(subparsers): ''' diff --git a/heron/tools/cli/src/python/args.py b/heron/tools/cli/src/python/args.py index 003db5c4a73..3fe388e59f1 100644 --- a/heron/tools/cli/src/python/args.py +++ b/heron/tools/cli/src/python/args.py @@ -23,7 +23,7 @@ import os import sys -import heron.tools.common.src.python.utils.config as config +from heron.tools.common.src.python.utils import config def add_titles(parser): diff --git a/heron/tools/cli/src/python/cdefs.py b/heron/tools/cli/src/python/cdefs.py index 95c1a4adeb8..79690301951 100644 --- a/heron/tools/cli/src/python/cdefs.py +++ b/heron/tools/cli/src/python/cdefs.py @@ -21,8 +21,8 @@ ''' cdefs.py ''' import os -import heron.tools.cli.src.python.cliconfig as cliconfig -import heron.tools.common.src.python.utils.config as config +from heron.tools.cli.src.python import cliconfig +from heron.tools.common.src.python.utils import config ################################################################################ def read_server_mode_cluster_definition(cluster, cl_args): diff --git a/heron/tools/cli/src/python/cli_helper.py b/heron/tools/cli/src/python/cli_helper.py index 1b91ebb3f6a..f9253718f91 100644 --- a/heron/tools/cli/src/python/cli_helper.py +++ b/heron/tools/cli/src/python/cli_helper.py @@ -23,10 +23,10 @@ import requests import heron.tools.common.src.python.utils.config as config from heron.tools.cli.src.python.result import SimpleResult, Status -import heron.tools.cli.src.python.args as args -import heron.tools.cli.src.python.execute as execute -import heron.tools.cli.src.python.jars as jars -import heron.tools.cli.src.python.rest as rest +from heron.tools.cli.src.python import args +from heron.tools.cli.src.python import execute +from heron.tools.cli.src.python import jars +from heron.tools.cli.src.python import rest from heron.common.src.python.utils.log import Log diff --git a/heron/tools/cli/src/python/config.py b/heron/tools/cli/src/python/config.py index 74358ec7145..f859184cb7f 100644 --- a/heron/tools/cli/src/python/config.py +++ b/heron/tools/cli/src/python/config.py @@ -19,7 +19,7 @@ # under the License. ''' config.py ''' -import heron.tools.cli.src.python.cliconfig as cliconfig +from heron.tools.cli.src.python import cliconfig from heron.tools.cli.src.python.result import SimpleResult, Status diff --git a/heron/tools/cli/src/python/deactivate.py b/heron/tools/cli/src/python/deactivate.py index 7cef5aba4e9..6ae50357dd5 100644 --- a/heron/tools/cli/src/python/deactivate.py +++ b/heron/tools/cli/src/python/deactivate.py @@ -20,7 +20,7 @@ ''' deactivate.py ''' from heron.common.src.python.utils.log import Log -import heron.tools.cli.src.python.cli_helper as cli_helper +from heron.tools.cli.src.python import cli_helper def create_parser(subparsers): ''' diff --git a/heron/tools/cli/src/python/execute.py b/heron/tools/cli/src/python/execute.py index 5b633b6b88a..6d0b2db6776 100644 --- a/heron/tools/cli/src/python/execute.py +++ b/heron/tools/cli/src/python/execute.py @@ -31,11 +31,11 @@ from heron.tools.cli.src.python.result import SimpleResult, ProcessResult, Status -import heron.common.src.python.pex_loader as pex_loader +from heron.common.src.python import pex_loader -import heron.tools.cli.src.python.opts as opts -import heron.tools.cli.src.python.jars as jars -import heron.tools.common.src.python.utils.config as config +from heron.tools.cli.src.python import opts +from heron.tools.cli.src.python import jars +from heron.tools.common.src.python.utils import config ################################################################################ def heron_class(class_name, lib_jars, extra_jars=None, args=None, java_defines=None): diff --git a/heron/tools/cli/src/python/help.py b/heron/tools/cli/src/python/help.py index 6c9d35ad4dd..f73b9b4cf76 100644 --- a/heron/tools/cli/src/python/help.py +++ b/heron/tools/cli/src/python/help.py @@ -21,7 +21,7 @@ ''' help.py ''' from heron.common.src.python.utils.log import Log from heron.tools.cli.src.python.result import SimpleResult, Status -import heron.tools.common.src.python.utils.config as config +from heron.tools.common.src.python.utils import config def create_parser(subparsers): ''' diff --git a/heron/tools/cli/src/python/jars.py b/heron/tools/cli/src/python/jars.py index 32152944c2e..edde27ab415 100644 --- a/heron/tools/cli/src/python/jars.py +++ b/heron/tools/cli/src/python/jars.py @@ -22,7 +22,7 @@ import os import fnmatch -import heron.tools.common.src.python.utils.config as config +from heron.tools.common.src.python.utils import config def pick(dirname, pattern): diff --git a/heron/tools/tracker/src/python/app.py b/heron/tools/tracker/src/python/app.py index 7428ac45b06..3c6e6f408f8 100644 --- a/heron/tools/tracker/src/python/app.py +++ b/heron/tools/tracker/src/python/app.py @@ -26,14 +26,13 @@ import time from typing import Dict, List, Optional -from heron.tools.tracker.src.python import constants, state, query -from heron.tools.tracker.src.python.routers import topologies, container, metrics - from fastapi import FastAPI, Query, Request from fastapi.exceptions import RequestValidationError from fastapi.responses import JSONResponse from starlette.exceptions import HTTPException as StarletteHTTPException +from heron.tools.tracker.src.python import constants, state, query +from heron.tools.tracker.src.python.routers import topologies, container, metrics openapi_tags = [ {"name": "metrics", "description": query.__doc__}, diff --git a/heron/tools/tracker/src/python/config.py b/heron/tools/tracker/src/python/config.py index 6652a7ff36b..ec9511f1528 100644 --- a/heron/tools/tracker/src/python/config.py +++ b/heron/tools/tracker/src/python/config.py @@ -75,4 +75,5 @@ def __str__(self): @staticmethod def config_str(config): keys = ("type", "name", "hostport", "rootpath", "tunnelhost") + # pylint: disable=consider-using-f-string return "".join("\t{}: {}\n".format(k, config[k]) for k in keys if k in config).rstrip() diff --git a/heron/tools/tracker/src/python/metricstimeline.py b/heron/tools/tracker/src/python/metricstimeline.py index 8fea4f972f9..93c02801e32 100644 --- a/heron/tools/tracker/src/python/metricstimeline.py +++ b/heron/tools/tracker/src/python/metricstimeline.py @@ -21,12 +21,12 @@ """ metricstimeline.py """ from typing import Dict, List -from heron.proto import tmanager_pb2 - import httpx from pydantic import BaseModel, Field +from heron.proto import tmanager_pb2 + class MetricsTimeline(BaseModel): component: str diff --git a/heron/tools/tracker/src/python/routers/topologies.py b/heron/tools/tracker/src/python/routers/topologies.py index 37868876aeb..b8cbd0868cc 100644 --- a/heron/tools/tracker/src/python/routers/topologies.py +++ b/heron/tools/tracker/src/python/routers/topologies.py @@ -27,6 +27,8 @@ """ from typing import List, Optional, Dict, Union +from fastapi import Query, APIRouter + from heron.tools.tracker.src.python import state from heron.tools.tracker.src.python.topology import ( TopologyInfo, @@ -38,8 +40,6 @@ TopologyInfoSchedulerLocation, ) -from fastapi import Query, APIRouter - router = APIRouter() diff --git a/heron/tools/tracker/src/python/topology.py b/heron/tools/tracker/src/python/topology.py index 39f9de61be2..7ac297a2c3f 100644 --- a/heron/tools/tracker/src/python/topology.py +++ b/heron/tools/tracker/src/python/topology.py @@ -22,10 +22,11 @@ import dataclasses import json import string -import networkx -from copy import deepcopy from typing import Any, Dict, List, Optional +from copy import deepcopy +import networkx + from pydantic import BaseModel, Field from heron.proto import topology_pb2 diff --git a/heron/tools/tracker/src/python/utils.py b/heron/tools/tracker/src/python/utils.py index e4428b7da82..896ba1c268d 100644 --- a/heron/tools/tracker/src/python/utils.py +++ b/heron/tools/tracker/src/python/utils.py @@ -130,9 +130,9 @@ def cygpath(x: str) -> str: :return: the path in windows """ command = ['cygpath', '-wp', x] - p = subprocess.Popen(command, stdout=subprocess.PIPE, universal_newlines=True) - output, _ = p.communicate() - lines = output.split("\n") + with subprocess.Popen(command, stdout=subprocess.PIPE, universal_newlines=True) as p: + output, _ = p.communicate() + lines = output.split("\n") return lines[0] def normalized_class_path(x: str) -> str: diff --git a/heronpy/connectors/mock/arraylooper.py b/heronpy/connectors/mock/arraylooper.py index 13e0cc362ea..123d143895f 100644 --- a/heronpy/connectors/mock/arraylooper.py +++ b/heronpy/connectors/mock/arraylooper.py @@ -20,7 +20,7 @@ '''arraylooper.py: module for defining a simple Generator''' -import collections +from collections.abc import Iterable import itertools import time @@ -31,7 +31,7 @@ class ArrayLooper(Generator): """ def __init__(self, user_iterable, sleep=None): super().__init__() - if not isinstance(user_iterable, collections.Iterable): + if not isinstance(user_iterable, Iterable): raise RuntimeError("ArrayLooper must be passed an iterable") self._user_iterable = user_iterable self._sleep = sleep From 34e375993025112d80906375386bd1d5f1a847b6 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sun, 3 Apr 2022 19:15:32 -0400 Subject: [PATCH 61/82] Typo fix --- heron/instance/src/python/network/gateway_looper.py | 2 +- heron/instance/src/python/network/heron_client.py | 2 +- heron/tools/tracker/src/python/utils.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/heron/instance/src/python/network/gateway_looper.py b/heron/instance/src/python/network/gateway_looper.py index fee9f70ca6a..aba5b70acf4 100644 --- a/heron/instance/src/python/network/gateway_looper.py +++ b/heron/instance/src/python/network/gateway_looper.py @@ -20,7 +20,7 @@ '''gateway_looper.py''' -# pylint: disable=deprected-module +# pylint: disable=deprecated-module import asyncore import errno import os diff --git a/heron/instance/src/python/network/heron_client.py b/heron/instance/src/python/network/heron_client.py index 4418dbe2bc7..2fa5179c4a7 100644 --- a/heron/instance/src/python/network/heron_client.py +++ b/heron/instance/src/python/network/heron_client.py @@ -20,7 +20,7 @@ '''heron_client.py''' -# pylint: disable=deprected-module +# pylint: disable=deprecated-module import asyncore import socket import time diff --git a/heron/tools/tracker/src/python/utils.py b/heron/tools/tracker/src/python/utils.py index 896ba1c268d..50553baa4fc 100644 --- a/heron/tools/tracker/src/python/utils.py +++ b/heron/tools/tracker/src/python/utils.py @@ -185,7 +185,7 @@ def parse_config_file(config_file: str) -> Optional[str]: return None # Read the configuration file - with open(expanded_config_file_path, 'r') as f: + with open(expanded_config_file_path, 'r', encoding='utf8') as f: return yaml.safe_load(f) ################################################################################ From 1a3e1b7ae1facc067725ac7ea1330da8a8d89c8e Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sun, 3 Apr 2022 19:48:11 -0400 Subject: [PATCH 62/82] More fixes --- heron/executor/src/python/heron_executor.py | 204 ++++++++++---------- heron/tools/cli/src/python/cli_helper.py | 2 +- heron/tools/cli/src/python/execute.py | 3 + 3 files changed, 106 insertions(+), 103 deletions(-) diff --git a/heron/executor/src/python/heron_executor.py b/heron/executor/src/python/heron_executor.py index bbd6e724039..b53fc35995c 100755 --- a/heron/executor/src/python/heron_executor.py +++ b/heron/executor/src/python/heron_executor.py @@ -40,6 +40,9 @@ import socket import traceback import itertools +import yaml + +import click from heron.common.src.python.utils import log from heron.common.src.python.utils import proc @@ -49,9 +52,6 @@ from heron.statemgrs.src.python import configloader from heron.statemgrs.src.python.config import Config as StateMgrConfig -import click -import yaml - Log = log.Log # pylint: disable=too-many-lines @@ -166,7 +166,7 @@ def atomic_write_file(path, content): """ # Write to a randomly tmp file tmp_file = get_tmp_filename() - with open(tmp_file, 'w') as f: + with open(tmp_file, 'w', encoding='utf8') as f: f.write(content) # make sure that all data is on disk f.flush() @@ -177,7 +177,7 @@ def atomic_write_file(path, content): def log_pid_for_process(process_name, pid): filename = get_process_pid_filename(process_name) - Log.info('Logging pid %d to file %s' %(pid, filename)) + Log.info('Logging pid %d to file %s', pid, filename) atomic_write_file(filename, str(pid)) def is_docker_environment(): @@ -247,6 +247,7 @@ def increment_attempts(self): def __repr__(self): return ( + # pylint: disable=consider-using-f-string "ProcessInfo(pid=%(pid)r, name=%(name)r, command=%(command)r, attempts=%(attempts)r)" % vars(self) ) @@ -341,8 +342,7 @@ def init_from_parsed_args(self, parsed_args): self.metricscache_manager_mode = parsed_args.metricscache_manager_mode \ if parsed_args.metricscache_manager_mode else "disabled" self.health_manager_mode = parsed_args.health_manager_mode - self.health_manager_classpath = '%s:%s'\ - % (self.scheduler_classpath, parsed_args.health_manager_classpath) + self.health_manager_classpath = f'{self.scheduler_classpath}:{parsed_args.health_manager_classpath}' self.verbose_gc = parsed_args.verbose_gc self.jvm_remote_debugger_ports = \ parsed_args.jvm_remote_debugger_ports.split(",") \ @@ -385,10 +385,10 @@ def initialize(self): 2. We don't initialize the logger (also something unit tests don't want) until after the constructor """ - create_folders = Command('mkdir -p %s' % self.log_dir, self.shell_env) + create_folders = Command(f'mkdir -p {self.log_dir}', self.shell_env) self.run_command_or_exit(create_folders) - chmod_logs_dir = Command('chmod a+rx . && chmod a+x %s' % self.log_dir, self.shell_env) + chmod_logs_dir = Command(f'chmod a+rx . && chmod a+x {self.log_dir}', self.shell_env) self.run_command_or_exit(chmod_logs_dir) chmod_x_binaries = [self.tmanager_binary, self.stmgr_binary, self.heron_shell_binary] @@ -396,7 +396,7 @@ def initialize(self): for binary in chmod_x_binaries: stat_result = os.stat(binary)[stat.ST_MODE] if not stat_result & stat.S_IXOTH: - chmod_binary = Command('chmod +x %s' % binary, self.shell_env) + chmod_binary = Command(f'chmod +x {binary}', self.shell_env) self.run_command_or_exit(chmod_binary) # Log itself pid @@ -411,7 +411,7 @@ def update_packing_plan(self, new_packing_plan): # pylint: disable=no-self-use def _load_logging_dir(self, heron_internals_config_file): - with open(heron_internals_config_file, 'r') as stream: + with open(heron_internals_config_file, 'r', encoding='utf8') as stream: heron_internals_config = yaml.safe_load(stream) return heron_internals_config['heron.logging.directory'] @@ -499,19 +499,19 @@ def _get_tmanager_processes(self): retval = {} tmanager_cmd_lst = [ self.tmanager_binary, - '--topology_name=%s' % self.topology_name, - '--topology_id=%s' % self.topology_id, - '--zkhostportlist=%s' % self.state_manager_connection, - '--zkroot=%s' % self.state_manager_root, - '--myhost=%s' % self.primary_host, - '--server_port=%s' % str(self.server_port), - '--controller_port=%s' % str(self.tmanager_controller_port), - '--stats_port=%s' % str(self.tmanager_stats_port), - '--config_file=%s' % self.heron_internals_config_file, - '--override_config_file=%s' % self.override_config_file, - '--metrics_sinks_yaml=%s' % self.metrics_sinks_config_file, - '--metricsmgr_port=%s' % str(self.metrics_manager_port), - '--ckptmgr_port=%s' % str(self.checkpoint_manager_port)] + f'--topology_name={self.topology_name}', + f'--topology_id={self.topology_id}', + f'--zkhostportlist={self.state_manager_connection}', + f'--zkroot={self.state_manager_root}', + f'--myhost={self.primary_host}', + f'--server_port={str(self.server_port)}', + f'--controller_port={str(self.tmanager_controller_port)}', + f'--stats_port={str(self.tmanager_stats_port)}', + f'--config_file={self.heron_internals_config_file}', + f'--override_config_file={self.override_config_file}', + f'--metrics_sinks_yaml={self.metrics_sinks_config_file}', + f'--metricsmgr_port={str(self.metrics_manager_port)}', + f'--ckptmgr_port={str(self.checkpoint_manager_port)}'] tmanager_env = self.shell_env.copy() if self.shell_env is not None else {} tmanager_cmd = Command(tmanager_cmd_lst, tmanager_env) @@ -573,7 +573,7 @@ def _get_jvm_instance_cmd(self): return Command(os.path.join(self.heron_java_home, 'bin/java'), self.shell_env) def _get_java_major_version(self): - return int(self._get_jvm_version().split(".")[0]) + return int(self._get_jvm_version().split('.', maxsplit=1)[0]) def _get_java_gc_instance_cmd(self, cmd, gc_name): gc_cmd = [ @@ -598,10 +598,10 @@ def _get_jvm_instance_options(self, instance_id, component_name, remote_debugger total_jvm_size = int(self.component_ram_map[component_name] / (1024 * 1024)) heap_size_mb = total_jvm_size - code_cache_size_mb - java_metasize_mb - Log.info("component name: %s, RAM request: %d, total JVM size: %dM, " - "cache size: %dM, metaspace size: %dM" - % (component_name, self.component_ram_map[component_name], - total_jvm_size, code_cache_size_mb, java_metasize_mb)) + Log.info("component name: %s, RAM request: %d, total JVM size: %dM, "\ + "cache size: %dM, metaspace size: %dM", + component_name, self.component_ram_map[component_name], + total_jvm_size, code_cache_size_mb, java_metasize_mb) xmn_size = int(heap_size_mb / 2) java_version = self._get_jvm_version() @@ -610,29 +610,28 @@ def _get_jvm_instance_options(self, instance_id, component_name, remote_debugger java_version.startswith("1.6") or \ java_version.startswith("1.5"): java_metasize_param = 'PermSize' - xmn_param = '-Xmn%dM' % xmn_size + xmn_param = f'-Xmn{xmn_size}M' if self._get_java_major_version() >= 11: # Remove '-Xmn' xmn_param = None instance_options = [ - '-Xmx%dM' % heap_size_mb, - '-Xms%dM' % heap_size_mb, + f'-Xmx{heap_size_mb}M', + f'-Xms{heap_size_mb}M', xmn_param, - '-XX:Max%s=%dM' % (java_metasize_param, java_metasize_mb), - '-XX:%s=%dM' % (java_metasize_param, java_metasize_mb), - '-XX:ReservedCodeCacheSize=%dM' % code_cache_size_mb, + f'-XX:Max{java_metasize_param}={java_metasize_mb}M', + f'-XX:{java_metasize_param}={java_metasize_mb}M', + f'-XX:ReservedCodeCacheSize={code_cache_size_mb}M', '-XX:+PrintCommandLineFlags', '-Djava.net.preferIPv4Stack=true', '-cp', - '%s:%s'% (self.instance_classpath, self.classpath)] + f'{self.instance_classpath}:{self.classpath}'] # Insert GC Options instance_options = self._get_java_gc_instance_cmd(instance_options, instance_id) # Append debugger ports when it is available if remote_debugger_port: - instance_options.append('-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=%s' - % remote_debugger_port) + instance_options.append(f'-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address={remote_debugger_port}') # Append user specified jvm options instance_options.extend(self.instance_jvm_opts.split()) @@ -666,16 +665,16 @@ def _get_jvm_version(self): if not self.jvm_version: cmd = [os.path.join(self.heron_java_home, 'bin/java'), '-cp', self.instance_classpath, 'org.apache.heron.instance.util.JvmVersion'] - process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - universal_newlines=True) - (process_stdout, process_stderr) = process.communicate() - if process.returncode != 0: - Log.error("Failed to determine JVM version. Exiting. Output of %s: %s", - ' '.join(cmd), process_stderr) - sys.exit(1) - - self.jvm_version = process_stdout - Log.info(f"Detected JVM version {self.jvm_version}") + with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=True) as process: + (process_stdout, process_stderr) = process.communicate() + if process.returncode != 0: + Log.error("Failed to determine JVM version. Exiting. Output of %s: %s", + ' '.join(cmd), process_stderr) + sys.exit(1) + + self.jvm_version = process_stdout + Log.info(f"Detected JVM version {self.jvm_version}") return self.jvm_version # Returns the processes for each Python Heron Instance @@ -686,19 +685,19 @@ def _get_python_instance_cmd(self, instance_info): for (instance_id, component_name, global_task_id, component_index) in instance_info: Log.info(f"Python instance {instance_id} component: {component_name}") instance_cmd = [self.python_instance_binary, - '--topology_name=%s' % self.topology_name, - '--topology_id=%s' % self.topology_id, - '--instance_id=%s' % instance_id, - '--component_name=%s' % component_name, - '--task_id=%s' % str(global_task_id), - '--component_index=%s' % str(component_index), - '--stmgr_id=%s' % self.stmgr_ids[self.shard], - '--stmgr_port=%s' % self.tmanager_controller_port, - '--metricsmgr_port=%s' % self.metrics_manager_port, - '--config_file=%s' % self.heron_internals_config_file, - '--override_config_file=%s' % self.override_config_file, - '--topology_pex=%s' % self.topology_binary_file, - '--max_ram=%s' % str(self.component_ram_map[component_name])] + f'--topology_name={self.topology_name}', + f'--topology_id={self.topology_id}', + f'--instance_id={instance_id}', + f'--component_name={component_name}', + f'--task_id={str(global_task_id)}', + f'--component_index={str(component_index)}', + f'--stmgr_id={self.stmgr_ids[self.shard]}', + f'--stmgr_port={self.tmanager_controller_port}', + f'--metricsmgr_port={self.metrics_manager_port}', + f'--config_file={self.heron_internals_config_file}', + f'--override_config_file={self.override_config_file}', + f'--topology_pex={self.topology_binary_file}', + f'--max_ram={str(self.component_ram_map[component_name])}'] retval[instance_id] = Command(instance_cmd, self.shell_env) @@ -713,18 +712,18 @@ def _get_cpp_instance_cmd(self, instance_info): Log.info(f"CPP instance {instance_id} component: {component_name}") instance_cmd = [ self.cpp_instance_binary, - '--topology_name=%s' % self.topology_name, - '--topology_id=%s' % self.topology_id, - '--instance_id=%s' % instance_id, - '--component_name=%s' % component_name, - '--task_id=%s' % str(global_task_id), - '--component_index=%s' % str(component_index), - '--stmgr_id=%s' % self.stmgr_ids[self.shard], - '--stmgr_port=%s' % str(self.tmanager_controller_port), - '--metricsmgr_port=%s' % str(self.metrics_manager_port), - '--config_file=%s' % self.heron_internals_config_file, - '--override_config_file=%s' % self.override_config_file, - '--topology_binary=%s' % os.path.abspath(self.topology_binary_file) + f'--topology_name={self.topology_name}', + f'--topology_id={self.topology_id}', + f'--instance_id={instance_id}', + f'--component_name={component_name}', + f'--task_id={str(global_task_id)}', + f'--component_index={str(component_index)}', + f'--stmgr_id={self.stmgr_ids[self.shard]}', + f'--stmgr_port={str(self.tmanager_controller_port)}', + f'--metricsmgr_port={str(self.metrics_manager_port)}', + f'--config_file={self.heron_internals_config_file}', + f'--override_config_file={self.override_config_file}', + f'--topology_binary={os.path.abspath(self.topology_binary_file)}' ] retval[instance_id] = Command(instance_cmd, self.shell_env) @@ -750,23 +749,23 @@ def _get_streaming_processes(self): stmgr_cmd_lst = [ self.stmgr_binary, - '--topology_name=%s' % self.topology_name, - '--topology_id=%s' % self.topology_id, - '--topologydefn_file=%s' % self.topology_defn_file, - '--zkhostportlist=%s' % self.state_manager_connection, - '--zkroot=%s' % self.state_manager_root, - '--stmgr_id=%s' % self.stmgr_ids[self.shard], - '--instance_ids=%s' % ','.join([x[0] for x in instance_info]), - '--myhost=%s' % self.primary_host, - '--data_port=%s' % str(self.server_port), - '--local_data_port=%s' % str(self.tmanager_controller_port), - '--metricsmgr_port=%s' % str(self.metrics_manager_port), - '--shell_port=%s' % str(self.shell_port), - '--config_file=%s' % self.heron_internals_config_file, - '--override_config_file=%s' % self.override_config_file, - '--ckptmgr_port=%s' % str(self.checkpoint_manager_port), - '--ckptmgr_id=%s' % self.ckptmgr_ids[self.shard], - '--metricscachemgr_mode=%s' % self.metricscache_manager_mode.lower()] + f'--topology_name={self.topology_name}', + f'--topology_id={self.topology_id}', + f'--topologydefn_file={self.topology_defn_file}', + f'--zkhostportlist={self.state_manager_connection}', + f'--zkroot={self.state_manager_root}', + f'--stmgr_id={self.stmgr_ids[self.shard]}', + f"--instance_ids={','.join([x[0] for x in instance_info])}", + f'--myhost={self.primary_host}', + f'--data_port={str(self.server_port)}', + f'--local_data_port={str(self.tmanager_controller_port)}', + f'--metricsmgr_port={str(self.metrics_manager_port)}', + f'--shell_port={str(self.shell_port)}', + f'--config_file={self.heron_internals_config_file}', + f'--override_config_file={self.override_config_file}', + f'--ckptmgr_port={str(self.checkpoint_manager_port)}', + f'--ckptmgr_id={self.ckptmgr_ids[self.shard]}', + f'--metricscachemgr_mode={self.metricscache_manager_mode.lower()}'] stmgr_env = self.shell_env.copy() if self.shell_env is not None else {} stmgr_cmd = Command(stmgr_cmd_lst, stmgr_env) @@ -789,7 +788,7 @@ def _get_streaming_processes(self): if self.is_stateful_topology: retval.update(self._get_ckptmgr_process()) - if self.pkg_type == 'jar' or self.pkg_type == 'tar': + if self.pkg_type in ('jar', 'tar'): retval.update(self._get_java_instance_cmd(instance_info)) elif self.pkg_type == 'pex': retval.update(self._get_python_instance_cmd(instance_info)) @@ -810,8 +809,8 @@ def _get_ckptmgr_process(self): ckptmgr_ram_mb = self.checkpoint_manager_ram / (1024 * 1024) ckptmgr_id = self.ckptmgr_ids[self.shard] ckptmgr_cmd = [os.path.join(self.heron_java_home, "bin/java"), - '-Xms%dM' % ckptmgr_ram_mb, - '-Xmx%dM' % ckptmgr_ram_mb, + f'-Xms{ckptmgr_ram_mb}M', + f'-Xmx{ckptmgr_ram_mb}M', '-XX:+PrintCommandLineFlags', '-Djava.net.preferIPv4Stack=true', '-cp', @@ -856,10 +855,10 @@ def _get_heron_support_processes(self): retval = {} retval[self.heron_shell_ids[self.shard]] = Command([ - '%s' % self.heron_shell_binary, - '--port=%s' % self.shell_port, - '--log_file_prefix=%s/heron-shell-%s.log' % (self.log_dir, self.shard), - '--secret=%s' % self.topology_id], self.shell_env) + f'{self.heron_shell_binary}', + f'--port={self.shell_port}', + f'--log_file_prefix={self.log_dir}/heron-shell-{self.shard}.log', + f'--secret={self.topology_id}'], self.shell_env) return retval @@ -918,7 +917,8 @@ def _kill_processes(self, commands): process_info.process.terminate() # sends SIGTERM to process except OSError as e: if e.errno == 3: # No such process - Log.warn(f"Expected process {process_info.name} with pid {process_info.pid} was not running, ignoring.") + Log.warn(f"Expected process {process_info.name} with "\ + f"pid {process_info.pid} was not running, ignoring.") else: raise e @@ -1059,7 +1059,7 @@ def start_state_manager_watches(self): """ Log.info("Start state manager watches") - with open(self.override_config_file, 'r') as stream: + with open(self.override_config_file, 'r', encoding='utf8') as stream: overrides = yaml.safe_load(stream) if overrides is None: overrides = {} @@ -1084,8 +1084,7 @@ def on_packing_plan_watch(state_manager, new_packing_plan): f"Existing: {str(self.packing_plan)}, New: {str(new_packing_plan)}") if self.packing_plan != new_packing_plan: - Log.info("PackingPlan change detected on shard %s, relaunching effected processes." - % self.shard) + Log.info("PackingPlan change detected on shard %s, relaunching effected processes.", self.shard) self.update_packing_plan(new_packing_plan) Log.info("Updating executor processes") @@ -1099,7 +1098,8 @@ def on_packing_plan_watch(state_manager, new_packing_plan): # state_manager as first variable. onPackingPlanWatch = functools.partial(on_packing_plan_watch, state_manager) state_manager.get_packing_plan(self.topology_name, onPackingPlanWatch) - Log.info(f"Registered state watch for packing plan changes with state manager {str(state_manager)}.") + Log.info(f"Registered state watch for packing "\ + f"plan changes with state manager {str(state_manager)}.") def stop_state_manager_watches(self): Log.info("Stopping state managers") diff --git a/heron/tools/cli/src/python/cli_helper.py b/heron/tools/cli/src/python/cli_helper.py index f9253718f91..9f235222d80 100644 --- a/heron/tools/cli/src/python/cli_helper.py +++ b/heron/tools/cli/src/python/cli_helper.py @@ -21,7 +21,7 @@ ''' cli_helper.py ''' import logging import requests -import heron.tools.common.src.python.utils.config as config +from heron.tools.common.src.python.utils import config from heron.tools.cli.src.python.result import SimpleResult, Status from heron.tools.cli.src.python import args from heron.tools.cli.src.python import execute diff --git a/heron/tools/cli/src/python/execute.py b/heron/tools/cli/src/python/execute.py index 6d0b2db6776..f71b0e90747 100644 --- a/heron/tools/cli/src/python/execute.py +++ b/heron/tools/cli/src/python/execute.py @@ -83,6 +83,7 @@ def heron_class(class_name, lib_jars, extra_jars=None, args=None, java_defines=N Log.debug("Heron options: {%s}", str(heron_env["HERON_OPTIONS"])) # invoke the command with subprocess and print error message, if any + # pylint: disable=consider-using-with process = subprocess.Popen(all_args, env=heron_env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, bufsize=1) # stdout message has the information Java program sends back @@ -134,6 +135,7 @@ def heron_pex(topology_pex, topology_class_name, args=None): Log.debug("Invoking class using command: ``%s''", ' '.join(cmd)) Log.debug('Heron options: {%s}', str(heron_env['HERON_OPTIONS'])) # invoke the command with subprocess and print error message, if any + # pylint: disable=consider-using-with process = subprocess.Popen(cmd, env=heron_env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, bufsize=1) # pylint: disable=fixme @@ -170,6 +172,7 @@ def heron_cpp(topology_binary, args=None): print(f"""Invoking class using command: ``{' '.join(cmd)}''""") print(f"Heron options: {str(heron_env['HERON_OPTIONS'])}") # invoke the command with subprocess and print error message, if any + # pylint: disable=consider-using-with proc = subprocess.Popen(cmd, env=heron_env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, bufsize=1) return ProcessResult(proc) From ffc1af5d061a95e5ac8987144ed594c504fa2e69 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sun, 3 Apr 2022 19:51:14 -0400 Subject: [PATCH 63/82] More fixes --- heron/executor/src/python/heron_executor.py | 8 ++++++-- heron/tools/cli/src/python/result.py | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/heron/executor/src/python/heron_executor.py b/heron/executor/src/python/heron_executor.py index b53fc35995c..2fbbfc32771 100755 --- a/heron/executor/src/python/heron_executor.py +++ b/heron/executor/src/python/heron_executor.py @@ -342,7 +342,8 @@ def init_from_parsed_args(self, parsed_args): self.metricscache_manager_mode = parsed_args.metricscache_manager_mode \ if parsed_args.metricscache_manager_mode else "disabled" self.health_manager_mode = parsed_args.health_manager_mode - self.health_manager_classpath = f'{self.scheduler_classpath}:{parsed_args.health_manager_classpath}' + self.health_manager_classpath = f'{self.scheduler_classpath}:'\ + f'{parsed_args.health_manager_classpath}' self.verbose_gc = parsed_args.verbose_gc self.jvm_remote_debugger_ports = \ parsed_args.jvm_remote_debugger_ports.split(",") \ @@ -631,7 +632,8 @@ def _get_jvm_instance_options(self, instance_id, component_name, remote_debugger instance_options = self._get_java_gc_instance_cmd(instance_options, instance_id) # Append debugger ports when it is available if remote_debugger_port: - instance_options.append(f'-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address={remote_debugger_port}') + instance_options.append(f'-agentlib:jdwp=transport=dt_socket,'\ + f'server=y,suspend=n,address={remote_debugger_port}') # Append user specified jvm options instance_options.extend(self.instance_jvm_opts.split()) @@ -879,6 +881,7 @@ def _run_process(self, name, cmd): try: # stderr is redirected to stdout so that it can more easily be logged. stderr has a max buffer # size and can cause the child process to deadlock if it fills up + # pylint: disable=consider-using-with process = subprocess.Popen(cmd.cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=cmd.env, universal_newlines=True, bufsize=1) proc.async_stream_process_stdout(process, stdout_log_fn(name)) @@ -893,6 +896,7 @@ def _run_blocking_process(self, cmd, is_shell=False): try: # stderr is redirected to stdout so that it can more easily be logged. stderr has a max buffer # size and can cause the child process to deadlock if it fills up + # pylint: disable=consider-using-with process = subprocess.Popen(cmd.cmd, shell=is_shell, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, env=cmd.env) diff --git a/heron/tools/cli/src/python/result.py b/heron/tools/cli/src/python/result.py index 1aed7e72cfa..d2b83736e91 100644 --- a/heron/tools/cli/src/python/result.py +++ b/heron/tools/cli/src/python/result.py @@ -184,6 +184,7 @@ def render(results): # check if all results are successful def is_successful(results): if isinstance(results, list): + # pylint: disable=use-a-generator return all([is_successful(result) for result in results]) if isinstance(results, Result): return results.status in (Status.Ok, Status.DryRun) From df11d8cb2bcc3bc0fd38c83fe29cac9c8be672b9 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sun, 3 Apr 2022 19:55:00 -0400 Subject: [PATCH 64/82] One more fix --- heron/executor/src/python/heron_executor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/heron/executor/src/python/heron_executor.py b/heron/executor/src/python/heron_executor.py index 2fbbfc32771..204dd36e77f 100755 --- a/heron/executor/src/python/heron_executor.py +++ b/heron/executor/src/python/heron_executor.py @@ -1088,7 +1088,8 @@ def on_packing_plan_watch(state_manager, new_packing_plan): f"Existing: {str(self.packing_plan)}, New: {str(new_packing_plan)}") if self.packing_plan != new_packing_plan: - Log.info("PackingPlan change detected on shard %s, relaunching effected processes.", self.shard) + Log.info("PackingPlan change detected on shard %s, "\ + "relaunching effected processes.", self.shard) self.update_packing_plan(new_packing_plan) Log.info("Updating executor processes") From 8af12c077c7dc52db28fdb533846fb48583d26cf Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sun, 3 Apr 2022 20:03:37 -0400 Subject: [PATCH 65/82] More fixes --- examples/src/python/bolt/count_bolt.py | 2 +- examples/src/python/bolt/stateful_count_bolt.py | 2 +- examples/src/python/bolt/window_size_bolt.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/src/python/bolt/count_bolt.py b/examples/src/python/bolt/count_bolt.py index 63e19bc9941..77c4d6f1aaa 100644 --- a/examples/src/python/bolt/count_bolt.py +++ b/examples/src/python/bolt/count_bolt.py @@ -20,7 +20,7 @@ """module for example bolt: CountBolt""" from collections import Counter -import heronpy.api.global_metrics as global_metrics +from heronpy.api import global_metrics from heronpy.api.bolt.bolt import Bolt # pylint: disable=unused-argument diff --git a/examples/src/python/bolt/stateful_count_bolt.py b/examples/src/python/bolt/stateful_count_bolt.py index cc97d995233..0eb905a2396 100644 --- a/examples/src/python/bolt/stateful_count_bolt.py +++ b/examples/src/python/bolt/stateful_count_bolt.py @@ -21,7 +21,7 @@ """module for example bolt: CountBolt""" from collections import Counter -import heronpy.api.global_metrics as global_metrics +from heronpy.api import global_metrics from heronpy.api.bolt.bolt import Bolt from heronpy.api.state.stateful_component import StatefulComponent diff --git a/examples/src/python/bolt/window_size_bolt.py b/examples/src/python/bolt/window_size_bolt.py index 9a14afd8da6..b0c9dac8f6a 100644 --- a/examples/src/python/bolt/window_size_bolt.py +++ b/examples/src/python/bolt/window_size_bolt.py @@ -27,7 +27,7 @@ class WindowSizeBolt(SlidingWindowBolt): A bolt that calculates the average batch size of window""" def initialize(self, config, context): - super(WindowSizeBolt, self).initialize(config, context) + super().initialize(config, context) self.numerator = 0.0 self.denominator = 0.0 From 708734ddb9f368a6bc8189adef3caec00a723bc2 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Mon, 4 Apr 2022 01:59:38 -0400 Subject: [PATCH 66/82] Fixing local integration test --- .../src/python/local_test_runner/test_template.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration_test/src/python/local_test_runner/test_template.py b/integration_test/src/python/local_test_runner/test_template.py index 1c11c22e5bc..603b4a8ff04 100644 --- a/integration_test/src/python/local_test_runner/test_template.py +++ b/integration_test/src/python/local_test_runner/test_template.py @@ -256,11 +256,11 @@ def _get_tracker_pplan(self): response = urlopen(url) physical_plan_json = json.loads(response.read()) - if 'result' not in physical_plan_json: + if 'instances' not in physical_plan_json: raise status.TestFailure( f"Could not find result json in physical plan request to tracker: {url}") - return physical_plan_json['result'] + return physical_plan_json def _block_until_topology_running(self, min_instances): retries_left = RETRY_COUNT From c8d35a65eac4aca24f3bfd3bf9caf341d2bddad0 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Wed, 6 Apr 2022 20:21:32 -0400 Subject: [PATCH 67/82] Convert memory value to int to fix test issue --- heron/executor/src/python/heron_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/executor/src/python/heron_executor.py b/heron/executor/src/python/heron_executor.py index 204dd36e77f..3745b039494 100755 --- a/heron/executor/src/python/heron_executor.py +++ b/heron/executor/src/python/heron_executor.py @@ -808,7 +808,7 @@ def _get_ckptmgr_process(self): ckptmgr_main_class = 'org.apache.heron.ckptmgr.CheckpointManager' - ckptmgr_ram_mb = self.checkpoint_manager_ram / (1024 * 1024) + ckptmgr_ram_mb = int(self.checkpoint_manager_ram / (1024 * 1024)) ckptmgr_id = self.ckptmgr_ids[self.shard] ckptmgr_cmd = [os.path.join(self.heron_java_home, "bin/java"), f'-Xms{ckptmgr_ram_mb}M', From 32a67ddafed3fb836fb17b7c16dd36321a3419b9 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Wed, 6 Apr 2022 21:09:17 -0400 Subject: [PATCH 68/82] More style fixes and type conversion to correct string interpolation --- bazel_configure.py | 28 +++++++++---------- examples/src/python/bolt/half_ack_bolt.py | 2 +- .../tests/python/heron_executor_unittest.py | 20 ++++++------- heron/instance/src/python/network/protocol.py | 2 +- heron/statemgrs/src/python/statemanager.py | 2 +- .../integration_test/core/aggregator_bolt.py | 4 +-- .../core/integration_test_bolt.py | 8 +++--- .../local_test_runner/test_kill_bolt.py | 2 +- .../python/local_test_runner/test_template.py | 4 +-- .../src/python/test_runner/main.py | 2 +- .../src/python/topology_test_runner/main.py | 2 +- 11 files changed, 37 insertions(+), 39 deletions(-) diff --git a/bazel_configure.py b/bazel_configure.py index 49a8498deaf..92aca5506e6 100755 --- a/bazel_configure.py +++ b/bazel_configure.py @@ -103,10 +103,10 @@ def discover_git_branch(): # Utility functions for system defines ###################################################################### def define_string(name, value): - return '#define %s "%s"\n' % (name, value) + return f'#define {name} "{value}"\n' def define_value(name, value): - return '#define %s %s\n' % (name, value) + return f'#define {name} {value}\n' ###################################################################### # Discover where a program is located using the PATH variable @@ -267,17 +267,17 @@ def make_executable(path): def discover_tool(program, msg, envvar, min_version = ''): VALUE = discover_program(program, envvar) if not VALUE: - fail("""You need to have %s installed to build Heron. -Note: Some vendors install %s with a versioned name -(like /usr/bin/%s-4.8). You can set the %s environment -variable to specify the full path to yours.'""" % (program, program, program, envvar)) + fail(f"""You need to have {program} installed to build Heron. +Note: Some vendors install {program} with a versioned name +(like /usr/bin/{program}-4.8). You can set the {envvar} environment +variable to specify the full path to yours.'""") print_value = VALUE if min_version: version = assert_min_version(VALUE, min_version) - print_value = "%s (%s)" % (VALUE, version) + print_value = f"{VALUE} ({version})" - print('Using %s:\t%s' % (msg.ljust(20), print_value)) + print(f'Using {msg.ljust(20)}:\t{print_value}') return VALUE def discover_jdk(): @@ -290,7 +290,7 @@ def discover_jdk(): "You can set the JAVA_HOME environment variavle to specify the full path to yours.") jdk_bin_path = os.path.dirname(javac_path) jdk_path = os.path.dirname(jdk_bin_path) - print('Using %s:\t%s' % ('JDK'.ljust(20), jdk_path)) + print(f"Using {'JDK'.ljust(20)}:\t{jdk_path}") return jdk_path def test_venv(): @@ -312,14 +312,14 @@ def discover_tool_default(program, msg, envvar, defvalue): VALUE = discover_program(program, envvar) if not VALUE: VALUE = defvalue - print('%s:\tnot found, but ok' % (program.ljust(26))) + print(f'{program.ljust(26)}:\tnot found, but ok') else: - print('Using %s:\t%s' % (msg.ljust(20), VALUE)) + print(f'Using {msg.ljust(20)}:\t{VALUE}') return VALUE def export_env_to_file(out_file, env): if env in os.environ: - out_file.write('export %s="%s"\n' % (env, os.environ[env])) + out_file.write(f'export {env}="{os.environ[env]}"\n') ###################################################################### # Generate the shell script that recreates the environment @@ -348,7 +348,7 @@ def write_env_exec_file(platform, environ): out_file.write('$*') make_executable(env_exec_file) - print('Wrote the environment exec file %s' % (env_exec_file)) + print(f'Wrote the environment exec file {env_exec_file}') ###################################################################### @@ -385,7 +385,7 @@ def write_heron_config_header(config_file): out_file.write(define_string('GIT_BRANCH', discover_git_branch())) out_file.write(generate_system_defines()) out_file.close() - print('Wrote the heron config header file: \t"%s"' % (config_file)) + print(f'Wrote the heron config header file: \t"{config_file}"') ###################################################################### # MAIN program that sets up your workspace for bazel diff --git a/examples/src/python/bolt/half_ack_bolt.py b/examples/src/python/bolt/half_ack_bolt.py index 537edaa3cb8..578e2f238e6 100644 --- a/examples/src/python/bolt/half_ack_bolt.py +++ b/examples/src/python/bolt/half_ack_bolt.py @@ -38,4 +38,4 @@ def process(self, tup): def process_tick(self, tup): self.log("Got tick tuple!") - self.log("Total received: %d", self.total) + self.log(f"Total received: {self.total}") diff --git a/heron/executor/tests/python/heron_executor_unittest.py b/heron/executor/tests/python/heron_executor_unittest.py index 872ed34a2db..d8aadd6427d 100644 --- a/heron/executor/tests/python/heron_executor_unittest.py +++ b/heron/executor/tests/python/heron_executor_unittest.py @@ -113,7 +113,7 @@ def get_expected_metricsmgr_command(container_id): "-XX:MaxGCPauseMillis=100 -XX:InitiatingHeapOccupancyPercent=30 " \ "-XX:ParallelGCThreads=4 " \ "-cp metricsmgr_classpath org.apache.heron.metricsmgr.MetricsManager " \ - f"--id=metricsmgr-{container_id} --port=metricsmgr_port " \ + f"--id=metricsmgr-{int(container_id)} --port=metricsmgr_port " \ "--topology=topname --cluster=cluster --role=role --environment=environ " \ "--topology-id=topid " \ f"--system-config-file={INTERNAL_CONF_PATH} --override-config-file={OVERRIDE_PATH} " \ @@ -143,7 +143,7 @@ def get_expected_healthmgr_command(): "--environment environ --topology_name topname --metricsmgr_port metricsmgr_port" def get_expected_instance_command(component_name, instance_id, container_id): - instance_name = f"container_{container_id}_{component_name}_{instance_id}" + instance_name = f"container_{int(container_id)}_{component_name}_{int(instance_id)}" return "heron_java_home/bin/java -Xmx320M -Xms320M -XX:MaxMetaspaceSize=128M " \ "-XX:MetaspaceSize=128M -XX:ReservedCodeCacheSize=64M -XX:+PrintCommandLineFlags " \ "-Djava.net.preferIPv4Stack=true " \ @@ -153,7 +153,7 @@ def get_expected_instance_command(component_name, instance_id, container_id): "-cp instance_classpath:classpath -XX:+HeapDumpOnOutOfMemoryError " \ "org.apache.heron.instance.HeronInstance -topology_name topname -topology_id topid " \ f"-instance_id {instance_name} -component_name {component_name} " \ - f"-task_id {instance_id} -component_index 0 -stmgr_id stmgr-{container_id} " \ + f"-task_id {int(instance_id)} -component_index 0 -stmgr_id stmgr-{int(container_id)} " \ "-stmgr_port tmanager_controller_port -metricsmgr_port metricsmgr_port " \ f"-system_config_file {INTERNAL_CONF_PATH} -override_config_file {OVERRIDE_PATH}" @@ -161,12 +161,11 @@ def get_expected_instance_command(component_name, instance_id, container_id): expected_processes_container_0 = [ ProcessInfo(MockPOpen(), 'heron-tmanager', 'tmanager_binary --topology_name=topname --topology_id=topid ' - '--zkhostportlist=zknode --zkroot=zkroot --myhost=%s --server_port=server_port ' + f'--zkhostportlist=zknode --zkroot=zkroot --myhost={HOSTNAME} --server_port=server_port ' '--controller_port=tmanager_controller_port --stats_port=tmanager_stats_port ' - '--config_file=%s --override_config_file=%s ' + f'--config_file={INTERNAL_CONF_PATH} --override_config_file={OVERRIDE_PATH} ' '--metrics_sinks_yaml=metrics_sinks_config_file ' - '--metricsmgr_port=metricsmgr_port ' - '--ckptmgr_port=ckptmgr-port' % (HOSTNAME, INTERNAL_CONF_PATH, OVERRIDE_PATH)), + '--metricsmgr_port=metricsmgr_port --ckptmgr_port=ckptmgr-port'), ProcessInfo(MockPOpen(), 'heron-metricscache', get_expected_metricscachemgr_command()), ProcessInfo(MockPOpen(), 'heron-healthmgr', get_expected_healthmgr_command()), ProcessInfo(MockPOpen(), 'metricsmgr-0', get_expected_metricsmgr_command(0)), @@ -180,12 +179,11 @@ def get_expected_instance_command(component_name, instance_id, container_id): '--topologydefn_file=topdefnfile --zkhostportlist=zknode --zkroot=zkroot ' '--stmgr_id=stmgr-1 ' '--instance_ids=container_1_word_3,container_1_exclaim1_2,container_1_exclaim1_1 ' - '--myhost=%s --data_port=server_port ' + f'--myhost={HOSTNAME} --data_port=server_port ' '--local_data_port=tmanager_controller_port --metricsmgr_port=metricsmgr_port ' - '--shell_port=shell-port --config_file=%s --override_config_file=%s ' + f'--shell_port=shell-port --config_file={INTERNAL_CONF_PATH} --override_config_file={OVERRIDE_PATH} ' '--ckptmgr_port=ckptmgr-port --ckptmgr_id=ckptmgr-1 ' - '--metricscachemgr_mode=cluster' - % (HOSTNAME, INTERNAL_CONF_PATH, OVERRIDE_PATH)), + '--metricscachemgr_mode=cluster'), ProcessInfo(MockPOpen(), 'metricsmgr-1', get_expected_metricsmgr_command(1)), ProcessInfo(MockPOpen(), 'container_1_word_3', get_expected_instance_command('word', 3, 1)), ProcessInfo(MockPOpen(), 'container_1_exclaim1_2', diff --git a/heron/instance/src/python/network/protocol.py b/heron/instance/src/python/network/protocol.py index a8ba0f71955..21f25b262dc 100644 --- a/heron/instance/src/python/network/protocol.py +++ b/heron/instance/src/python/network/protocol.py @@ -195,7 +195,7 @@ def read(self, dispatcher): if len(self.header) == HeronProtocol.HEADER_SIZE: self.is_header_read = True else: - Log.debug(f"Header read incomplete; read {len(self.header)} bytes of header") + Log.debug("Header read incomplete; read %d bytes of header", len(self.header)) return if self.is_header_read and not self.is_complete: diff --git a/heron/statemgrs/src/python/statemanager.py b/heron/statemgrs/src/python/statemanager.py index cb097aa4ad4..32fae6a297e 100644 --- a/heron/statemgrs/src/python/statemanager.py +++ b/heron/statemgrs/src/python/statemanager.py @@ -116,7 +116,7 @@ def establish_ssh_tunnel(self): localport = self.pick_unused_port() # pylint: disable=consider-using-with self.tunnel.append(subprocess.Popen( - ('ssh', self.tunnelhost, f'-NL127.0.0.1:{localport}:{host}:{port}'))) + ('ssh', self.tunnelhost, f'-NL127.0.0.1:{int(localport)}:{host}:{int(port)}'))) localportlist.append(('127.0.0.1', localport)) return localportlist diff --git a/integration_test/src/python/integration_test/core/aggregator_bolt.py b/integration_test/src/python/integration_test/core/aggregator_bolt.py index de22548c351..3c71857cfef 100644 --- a/integration_test/src/python/integration_test/core/aggregator_bolt.py +++ b/integration_test/src/python/integration_test/core/aggregator_bolt.py @@ -49,8 +49,8 @@ def _post_result_to_server(self, json_result): if response.status == 200: Log.info("HTTP POST successful") else: - Log.severe("HTTP POST failed, response code: %d, response: %s" - % (response.status, response.read())) + Log.severe("HTTP POST failed, response code: %d, response: %s", + response.status, response.read()) return response.status def write_finished_data(self): diff --git a/integration_test/src/python/integration_test/core/integration_test_bolt.py b/integration_test/src/python/integration_test/core/integration_test_bolt.py index db55a89da6b..89a06b58fc1 100644 --- a/integration_test/src/python/integration_test/core/integration_test_bolt.py +++ b/integration_test/src/python/integration_test/core/integration_test_bolt.py @@ -114,15 +114,15 @@ def emit(self, tup, stream=Stream.DEFAULT_STREAM_ID, anchors=None, super().emit(tup, stream, anchors, direct_task, need_task_ids) def ack(self, tup): - Log.info("Trying to do an ack. tuples processed: %d, received: %d" - % (self.tuples_processed, self.tuple_received)) + Log.info("Trying to do an ack. tuples processed: %d, received: %d", + self.tuples_processed, self.tuple_received) if self.tuples_processed < self.tuple_received: super().ack(tup) self.tuples_processed += 1 def fail(self, tup): - Log.info("Trying to do a fail. tuples processed: %d, received: %d" - % (self.tuples_processed, self.tuple_received)) + Log.info("Trying to do a fail. tuples processed: %d, received: %d", + self.tuples_processed, self.tuple_received) if self.tuples_processed < self.tuple_received: super().fail(tup) self.tuples_processed += 1 diff --git a/integration_test/src/python/local_test_runner/test_kill_bolt.py b/integration_test/src/python/local_test_runner/test_kill_bolt.py index 5efe2ad3310..413eeeb5daf 100644 --- a/integration_test/src/python/local_test_runner/test_kill_bolt.py +++ b/integration_test/src/python/local_test_runner/test_kill_bolt.py @@ -31,5 +31,5 @@ class TestKillBolt(test_template.TestTemplate): def execute_test_case(self): logging.info("Executing kill bolt") bolt_pid = self.get_pid( - 'container_%d_%s' % (NON_TMANAGER_SHARD, HERON_BOLT), self.params['workingDirectory']) + f'container_{int(NON_TMANAGER_SHARD)}_{HERON_BOLT}', self.params['workingDirectory']) self.kill_process(bolt_pid) diff --git a/integration_test/src/python/local_test_runner/test_template.py b/integration_test/src/python/local_test_runner/test_template.py index 603b4a8ff04..e6bcc3f892f 100644 --- a/integration_test/src/python/local_test_runner/test_template.py +++ b/integration_test/src/python/local_test_runner/test_template.py @@ -246,11 +246,11 @@ def kill_strmgr(self): def kill_metricsmgr(self): logging.info("Executing kill metrics manager") metricsmgr_pid = self.get_pid( - '%s-%d' % (HERON_METRICSMGR, NON_TMANAGER_SHARD), self.params['workingDirectory']) + f'{HERON_METRICSMGR}-{int(NON_TMANAGER_SHARD)}', self.params['workingDirectory']) self.kill_process(metricsmgr_pid) def _get_tracker_pplan(self): - url = 'http://localhost:%s/topologies/physicalplan?' % self.params['trackerPort']\ + url = f'http://localhost:{self.params['trackerPort']}/topologies/physicalplan?'\ + 'cluster=local&environ=default&topology=IntegrationTest_LocalReadWriteTopology' logging.debug("Fetching physical plan from %s", url) response = urlopen(url) diff --git a/integration_test/src/python/test_runner/main.py b/integration_test/src/python/test_runner/main.py index fbfd874fe95..732a154c419 100644 --- a/integration_test/src/python/test_runner/main.py +++ b/integration_test/src/python/test_runner/main.py @@ -355,7 +355,7 @@ def _run_single_test(topology_name, topology_conf, test_args, http_server_host_p test_threads = [] for topology_conf in test_topologies: - topology_name = ("%s_%s_%s") % (timestamp, topology_conf["topologyName"], str(uuid.uuid4())) + topology_name = f"{timestamp}_{topology_conf['topologyName']}_{str(uuid.uuid4())}" classpath = topology_classpath_prefix + topology_conf["classPath"] # if the test includes an update we need to pass that info to the topology so it can send diff --git a/integration_test/src/python/topology_test_runner/main.py b/integration_test/src/python/topology_test_runner/main.py index 77867c54a44..e15b926ecc4 100644 --- a/integration_test/src/python/topology_test_runner/main.py +++ b/integration_test/src/python/topology_test_runner/main.py @@ -549,7 +549,7 @@ def _run_single_test(topology_name, topology_conf, test_args, http_server_host_p test_threads = [] for topology_conf in test_topologies: - topology_name = ("%s_%s_%s") % (timestamp, topology_conf["topologyName"], str(uuid.uuid4())) + topology_name = f"{timestamp}_{topology_conf['topologyName']}_{str(uuid.uuid4())}" classpath = topology_classpath_prefix + topology_conf["classPath"] update_args = "" From c395071604e9a69f21796903debb943ed1c46bbe Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Wed, 6 Apr 2022 21:14:28 -0400 Subject: [PATCH 69/82] Another fix to enforce `int` type --- third_party/python/semver/semver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/python/semver/semver.py b/third_party/python/semver/semver.py index de4d3bc5b12..cada8d2c7c0 100644 --- a/third_party/python/semver/semver.py +++ b/third_party/python/semver/semver.py @@ -107,7 +107,7 @@ def min_ver(ver1, ver2): def format_version(major, minor, patch, prerelease=None, build=None): - version = f"{major}.{minor}.{patch}" + version = f"{int(major)}.{int(minor)}.{int(patch)}" if prerelease is not None: version = version + f"-{prerelease}" From 1a4fe0489920efcb620eb256f4c6ceb9bd879075 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Thu, 7 Apr 2022 00:25:24 -0400 Subject: [PATCH 70/82] Typo fix --- .../src/python/local_test_runner/test_template.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration_test/src/python/local_test_runner/test_template.py b/integration_test/src/python/local_test_runner/test_template.py index e6bcc3f892f..8ea182cd642 100644 --- a/integration_test/src/python/local_test_runner/test_template.py +++ b/integration_test/src/python/local_test_runner/test_template.py @@ -250,8 +250,8 @@ def kill_metricsmgr(self): self.kill_process(metricsmgr_pid) def _get_tracker_pplan(self): - url = f'http://localhost:{self.params['trackerPort']}/topologies/physicalplan?'\ - + 'cluster=local&environ=default&topology=IntegrationTest_LocalReadWriteTopology' + url = f"http://localhost:{self.params['trackerPort']}/topologies/physicalplan?"\ + + "cluster=local&environ=default&topology=IntegrationTest_LocalReadWriteTopology" logging.debug("Fetching physical plan from %s", url) response = urlopen(url) physical_plan_json = json.loads(response.read()) From 1cf27d9f345b7d46827aaa197243a2e61d954067 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Thu, 7 Apr 2022 07:45:01 -0400 Subject: [PATCH 71/82] Remove import that IDE auto added --- integration_test/src/python/topology_test_runner/main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/integration_test/src/python/topology_test_runner/main.py b/integration_test/src/python/topology_test_runner/main.py index e15b926ecc4..30e9f450e8b 100644 --- a/integration_test/src/python/topology_test_runner/main.py +++ b/integration_test/src/python/topology_test_runner/main.py @@ -23,7 +23,6 @@ import re import sys import time -from turtle import update import uuid from http.client import HTTPConnection from threading import Lock, Thread From e30e6bace1ab9764f0461a15bde317bf93acf1d4 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Fri, 8 Apr 2022 00:33:18 -0400 Subject: [PATCH 72/82] Update heron/statemgrs/src/python/statemanager.py Co-authored-by: Saad Ur Rahman --- heron/statemgrs/src/python/statemanager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/statemgrs/src/python/statemanager.py b/heron/statemgrs/src/python/statemanager.py index 32fae6a297e..343dfa66e72 100644 --- a/heron/statemgrs/src/python/statemanager.py +++ b/heron/statemgrs/src/python/statemanager.py @@ -92,7 +92,7 @@ def is_host_port_reachable(self): socket.create_connection(hostport, StateManager.TIMEOUT_SECONDS) return True except: - LOG.info("StateManager %s Unable to connect to host: %s port %i", + LOG.info("StateManager %s Unable to connect to host: %d port %d", self.name, hostport[0], hostport[1]) continue return False From 52c9401b81b1ba83304229e8454fe221386adb70 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Fri, 8 Apr 2022 00:41:29 -0400 Subject: [PATCH 73/82] Update heronpy/connectors/pulsar/pulsarstreamlet.py Co-authored-by: Saad Ur Rahman --- heronpy/connectors/pulsar/pulsarstreamlet.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/heronpy/connectors/pulsar/pulsarstreamlet.py b/heronpy/connectors/pulsar/pulsarstreamlet.py index be2d6de39dd..e4c9f657cd4 100644 --- a/heronpy/connectors/pulsar/pulsarstreamlet.py +++ b/heronpy/connectors/pulsar/pulsarstreamlet.py @@ -29,9 +29,7 @@ class PulsarStreamlet(Streamlet): """Streamlet facade on top of PulsarSpout""" def __init__(self, service_url, topic_name, stage_name=None, parallelism=None, receive_timeout_ms=None, input_schema=None): - super().__init__(parents=[], - stage_name=stage_name, - parallelism=parallelism) + super().__init__(parents=[], stage_name=stage_name, parallelism=parallelism) self._pulsar_service_url = service_url self._pulsar_topic_name = topic_name self._pulsar_receive_timeout_ms = receive_timeout_ms From 1525256ad0adc63b98d38741b8820b12a8cddb29 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Fri, 8 Apr 2022 01:31:09 -0400 Subject: [PATCH 74/82] Fixes based on feedback --- heron/tools/tracker/src/python/main.py | 2 -- heron/tools/tracker/src/python/metricstimeline.py | 12 +++++++----- heron/tools/ui/src/python/main.py | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/heron/tools/tracker/src/python/main.py b/heron/tools/tracker/src/python/main.py index ac0e49a22ed..4abf29cbaf7 100644 --- a/heron/tools/tracker/src/python/main.py +++ b/heron/tools/tracker/src/python/main.py @@ -125,8 +125,6 @@ def cli( log_level = logging.DEBUG if verbose else logging.INFO log.configure(log_level) - global Log - Log = log.Log stmgr_override = { "type": stmgr_type, diff --git a/heron/tools/tracker/src/python/metricstimeline.py b/heron/tools/tracker/src/python/metricstimeline.py index 93c02801e32..5a49e2ef0e0 100644 --- a/heron/tools/tracker/src/python/metricstimeline.py +++ b/heron/tools/tracker/src/python/metricstimeline.py @@ -25,13 +25,15 @@ from pydantic import BaseModel, Field +from heron.common.src.python.utils.log import Log +from heron.proto import common_pb2 from heron.proto import tmanager_pb2 class MetricsTimeline(BaseModel): component: str - starttime: int = Field(..., alias="starttime") - endtime: int = Field(..., alias="endtime") + start_time: int = Field(..., alias="starttime") + end_time: int = Field(..., alias="endtime") timeline: Dict[str, Dict[str, Dict[int, str]]] = Field( ..., description="map of (metric name, instance, start) to metric value", @@ -83,9 +85,9 @@ async def get_metrics_timeline( response_data = tmanager_pb2.MetricResponse() response_data.ParseFromString(result.content) - # if response_data.status.status == common_pb2.NOTOK: - # if response_data.status.HasField("message"): - # Log.warn("Received response from Tmanager: %s", response_data.status.message) + if response_data.status.status == common_pb2.NOTOK: + if response_data.status.HasField("message"): + Log.warn("Received response from Tmanager: %s", response_data.status.message) timeline = {} # Loop through all the metrics diff --git a/heron/tools/ui/src/python/main.py b/heron/tools/ui/src/python/main.py index 269fa5e0ca4..a4118846731 100644 --- a/heron/tools/ui/src/python/main.py +++ b/heron/tools/ui/src/python/main.py @@ -200,8 +200,8 @@ def timeline( topology: str, metric: str, instance: str, - starttime: str, - endtime: str, + starttime: int, + endtime: int, component: Optional[str] = None, max: bool = False, # pylint: disable=redefined-builtin ) -> dict: @@ -655,10 +655,10 @@ def cli( base_url = base_url_option log_level = logging.DEBUG if verbose else logging.INFO log.configure(log_level) - Log = log.Log + tracker.tracker_url = tracker_url_option - uvicorn.run(app, host=host, port=port, log_config=None) + uvicorn.run(app, host=host, port=port, log_level=log_level) if __name__ == "__main__": From ba1aaecb256fe875953317e1bf0bdd082466a6e0 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Fri, 8 Apr 2022 02:01:35 -0400 Subject: [PATCH 75/82] Putting back to original logic --- heron/tools/tracker/src/python/metricstimeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/heron/tools/tracker/src/python/metricstimeline.py b/heron/tools/tracker/src/python/metricstimeline.py index 5a49e2ef0e0..ab2b399fdcc 100644 --- a/heron/tools/tracker/src/python/metricstimeline.py +++ b/heron/tools/tracker/src/python/metricstimeline.py @@ -100,9 +100,9 @@ async def get_metrics_timeline( for im in metric.metric: metricname = im.name if metricname not in timeline: - timeline.setdefault(metricname, {}) + timeline[metricname] = {} if instance not in timeline[metricname]: - timeline.setdefault(metricname, {})[instance] = {} + timeline[metricname][instance] = {} # We get minutely metrics. # Interval-values correspond to the minutely mark for which From 403f32cc17bfd7cd7e675dbeed7a6948f54404c0 Mon Sep 17 00:00:00 2001 From: Saad Ur Rahman Date: Fri, 8 Apr 2022 11:24:16 -0400 Subject: [PATCH 76/82] [UI] removing global reference to Log This was added for debugging to update the Log object and is no longer required. --- heron/tools/ui/src/python/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/tools/ui/src/python/main.py b/heron/tools/ui/src/python/main.py index a4118846731..c4b3dc6b704 100644 --- a/heron/tools/ui/src/python/main.py +++ b/heron/tools/ui/src/python/main.py @@ -651,7 +651,7 @@ def cli( host: str, port: int, base_url_option: str, tracker_url_option: str, verbose: bool ) -> None: """Start a web UI for heron which renders information from the tracker.""" - global base_url, Log + global base_url base_url = base_url_option log_level = logging.DEBUG if verbose else logging.INFO log.configure(log_level) From 4492c48844f0ef14abdcf8f9a0cda158c70cfeb0 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sat, 9 Apr 2022 01:30:15 -0400 Subject: [PATCH 77/82] timeline values seem to be of float type --- heron/tools/tracker/src/python/metricstimeline.py | 2 +- heron/tools/tracker/src/python/routers/metrics.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/heron/tools/tracker/src/python/metricstimeline.py b/heron/tools/tracker/src/python/metricstimeline.py index ab2b399fdcc..6772f085a8a 100644 --- a/heron/tools/tracker/src/python/metricstimeline.py +++ b/heron/tools/tracker/src/python/metricstimeline.py @@ -34,7 +34,7 @@ class MetricsTimeline(BaseModel): component: str start_time: int = Field(..., alias="starttime") end_time: int = Field(..., alias="endtime") - timeline: Dict[str, Dict[str, Dict[int, str]]] = Field( + timeline: Dict[str, Dict[str, Dict[int, float]]] = Field( ..., description="map of (metric name, instance, start) to metric value", ) diff --git a/heron/tools/tracker/src/python/routers/metrics.py b/heron/tools/tracker/src/python/routers/metrics.py index 869a3a43dc5..644cc64e3ee 100644 --- a/heron/tools/tracker/src/python/routers/metrics.py +++ b/heron/tools/tracker/src/python/routers/metrics.py @@ -146,7 +146,7 @@ class TimelinePoint(BaseModel): # pylint: disable=too-few-public-methods None, description="name of the instance the metrics applies to if not an aggregate", ) - data: Dict[int, int] = Field(..., description="map of start times to metric values") + data: Dict[int, float] = Field(..., description="map of start times to metric values") class MetricsQueryResponse(BaseModel): # pylint: disable=too-few-public-methods From 54af1dfda10a202293cb25c004e3113bc72ab640 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sun, 10 Apr 2022 00:52:07 -0400 Subject: [PATCH 78/82] More fixes --- heron/tools/common/src/python/clients/tracker.py | 3 ++- heron/tools/tracker/src/python/metricstimeline.py | 4 ++-- heron/tools/tracker/src/python/query_operators.py | 2 +- heron/tools/tracker/src/python/routers/container.py | 1 - 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/heron/tools/common/src/python/clients/tracker.py b/heron/tools/common/src/python/clients/tracker.py index eb76bbb7dfd..0bf2ab03999 100644 --- a/heron/tools/common/src/python/clients/tracker.py +++ b/heron/tools/common/src/python/clients/tracker.py @@ -627,7 +627,8 @@ def compute_max(self, multi_ts: Iterable[dict]) -> dict: keys = list(filtered_ts[0]["timeline"][0]["data"].keys()) timelines = ([res["timeline"][0]["data"][key] for key in keys] for res in filtered_ts) values = (max(v) for v in zip(*timelines)) - return dict(list(zip(keys, values))) + data = dict(list(zip(keys, values))) + return data return {} # pylint: disable=no-self-use diff --git a/heron/tools/tracker/src/python/metricstimeline.py b/heron/tools/tracker/src/python/metricstimeline.py index 6772f085a8a..04aa01f23ce 100644 --- a/heron/tools/tracker/src/python/metricstimeline.py +++ b/heron/tools/tracker/src/python/metricstimeline.py @@ -32,8 +32,8 @@ class MetricsTimeline(BaseModel): component: str - start_time: int = Field(..., alias="starttime") - end_time: int = Field(..., alias="endtime") + starttime: int + endtime: int timeline: Dict[str, Dict[str, Dict[int, float]]] = Field( ..., description="map of (metric name, instance, start) to metric value", diff --git a/heron/tools/tracker/src/python/query_operators.py b/heron/tools/tracker/src/python/query_operators.py index d7ed232fb41..68a005a3e37 100644 --- a/heron/tools/tracker/src/python/query_operators.py +++ b/heron/tools/tracker/src/python/query_operators.py @@ -43,7 +43,7 @@ def __init__( instance, start: int, end: int, - timeline: Dict[int, int], + timeline: Dict[int, float], ): """Insantiate class with a floored copy of the timeline within [start, end].""" self.component_name = component_name diff --git a/heron/tools/tracker/src/python/routers/container.py b/heron/tools/tracker/src/python/routers/container.py index c978ddd53b9..84ff17c19ed 100644 --- a/heron/tools/tracker/src/python/routers/container.py +++ b/heron/tools/tracker/src/python/routers/container.py @@ -196,7 +196,6 @@ async def get_exceptions( # pylint: disable=too-many-arguments exception_response = await _get_exception_log_response( cluster, role, environ, component, instances, topology_name, summary=False ) - print(f"NICK: exception response: {exception_response}") ret = [] for exception_log in exception_response.exceptions: From aa896a9e760f351f6c9af47d8eb7738911adde35 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sun, 10 Apr 2022 02:08:05 -0400 Subject: [PATCH 79/82] Whitespace fix --- heron/tools/common/src/python/clients/tracker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/tools/common/src/python/clients/tracker.py b/heron/tools/common/src/python/clients/tracker.py index 0bf2ab03999..ce6c84f2229 100644 --- a/heron/tools/common/src/python/clients/tracker.py +++ b/heron/tools/common/src/python/clients/tracker.py @@ -627,7 +627,7 @@ def compute_max(self, multi_ts: Iterable[dict]) -> dict: keys = list(filtered_ts[0]["timeline"][0]["data"].keys()) timelines = ([res["timeline"][0]["data"][key] for key in keys] for res in filtered_ts) values = (max(v) for v in zip(*timelines)) - data = dict(list(zip(keys, values))) + data = dict(list(zip(keys, values))) return data return {} From 0abd309843a97f8899e568876cabf4a43380860b Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sun, 10 Apr 2022 12:50:21 -0400 Subject: [PATCH 80/82] Updating dictionary creation to bypass intermediate list creation --- heron/instance/tests/python/utils/mock_generator.py | 2 +- heron/tools/common/src/python/clients/tracker.py | 2 +- heronpy/api/tests/python/component_unittest.py | 2 +- heronpy/api/tests/python/metrics_unittest.py | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/heron/instance/tests/python/utils/mock_generator.py b/heron/instance/tests/python/utils/mock_generator.py index a6439adedca..2dfda734b7e 100644 --- a/heron/instance/tests/python/utils/mock_generator.py +++ b/heron/instance/tests/python/utils/mock_generator.py @@ -95,7 +95,7 @@ def get_a_sample_pplan(): keys = ["instance_id", "task_id", "comp_index", "comp_name"] zipped = list(zip(instance_ids, task_ids, comp_indexes, comp_names)) - return pplan, [dict(list(zip(keys, z))) for z in zipped] + return pplan, [dict(zip(keys, z)) for z in zipped] def make_data_tuple_from_list(lst, serializer=PythonSerializer()): """Make HeronDataTuple from a list of objects""" diff --git a/heron/tools/common/src/python/clients/tracker.py b/heron/tools/common/src/python/clients/tracker.py index ce6c84f2229..1a5d03daee6 100644 --- a/heron/tools/common/src/python/clients/tracker.py +++ b/heron/tools/common/src/python/clients/tracker.py @@ -627,7 +627,7 @@ def compute_max(self, multi_ts: Iterable[dict]) -> dict: keys = list(filtered_ts[0]["timeline"][0]["data"].keys()) timelines = ([res["timeline"][0]["data"][key] for key in keys] for res in filtered_ts) values = (max(v) for v in zip(*timelines)) - data = dict(list(zip(keys, values))) + data = dict(zip(keys, values)) return data return {} diff --git a/heronpy/api/tests/python/component_unittest.py b/heronpy/api/tests/python/component_unittest.py index a93baac5e07..cc7e9a9e492 100644 --- a/heronpy/api/tests/python/component_unittest.py +++ b/heronpy/api/tests/python/component_unittest.py @@ -167,7 +167,7 @@ def test_sanitize_inputs(self): inputs_list = [GlobalStreamId("spout1", "default"), GlobalStreamId("spout2", "some_stream")] spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=inputs_list) ret = spec._sanitize_inputs() - self.assertEqual(ret, dict(list(zip(inputs_list, [Grouping.SHUFFLE] * 2)))) + self.assertEqual(ret, dict(zip(inputs_list, [Grouping.SHUFFLE] * 2))) # list of neither GlobalStreamId nor HeronComponentSpec inputs_list = [None, 123, "string", [GlobalStreamId("sp", "default")]] diff --git a/heronpy/api/tests/python/metrics_unittest.py b/heronpy/api/tests/python/metrics_unittest.py index 23f35a9af52..ba45dcb4794 100644 --- a/heronpy/api/tests/python/metrics_unittest.py +++ b/heronpy/api/tests/python/metrics_unittest.py @@ -44,8 +44,8 @@ def test_multi_count_metric(self): for _ in range(10): for key in key_list: metric.incr(key=key) - self.assertEqual(metric.get_value_and_reset(), dict(list(zip(key_list, [10] * 3)))) - self.assertEqual(metric.get_value_and_reset(), dict(list(zip(key_list, [0] * 3)))) + self.assertEqual(metric.get_value_and_reset(), dict(zip(key_list, [10] * 3))) + self.assertEqual(metric.get_value_and_reset(), dict(zip(key_list, [0] * 3))) metric.add_key("key4") ret = metric.get_value_and_reset() @@ -71,8 +71,8 @@ def test_multi_mean_reduced_metric(self): metric.update(key=key_list[0], value=i) metric.update(key=key_list[1], value=i * 2) metric.update(key=key_list[2], value=i * 3) - self.assertEqual(metric.get_value_and_reset(), dict(list(zip(key_list, [5.5, 11, 16.5])))) - self.assertEqual(metric.get_value_and_reset(), dict(list(zip(key_list, [None] * 3)))) + self.assertEqual(metric.get_value_and_reset(), dict(zip(key_list, [5.5, 11, 16.5]))) + self.assertEqual(metric.get_value_and_reset(), dict(zip(key_list, [None] * 3))) metric.add_key("key4") ret = metric.get_value_and_reset() From dda93490a19263db8bdbb068d4b4b9f280fefd22 Mon Sep 17 00:00:00 2001 From: Nicholas Nezis Date: Sun, 10 Apr 2022 20:47:03 -0400 Subject: [PATCH 81/82] Returning the logic to original form with direct return of dict --- heron/tools/common/src/python/clients/tracker.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/heron/tools/common/src/python/clients/tracker.py b/heron/tools/common/src/python/clients/tracker.py index 1a5d03daee6..e6fc365b6ab 100644 --- a/heron/tools/common/src/python/clients/tracker.py +++ b/heron/tools/common/src/python/clients/tracker.py @@ -627,8 +627,7 @@ def compute_max(self, multi_ts: Iterable[dict]) -> dict: keys = list(filtered_ts[0]["timeline"][0]["data"].keys()) timelines = ([res["timeline"][0]["data"][key] for key in keys] for res in filtered_ts) values = (max(v) for v in zip(*timelines)) - data = dict(zip(keys, values)) - return data + return dict(zip(keys, values)) return {} # pylint: disable=no-self-use From 5df20a84dd42646cef512baa9de29b5632cea470 Mon Sep 17 00:00:00 2001 From: choi se Date: Mon, 11 Apr 2022 23:16:43 +0900 Subject: [PATCH 82/82] typo (#3814) --- heron/tools/tracker/src/python/topology.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/heron/tools/tracker/src/python/topology.py b/heron/tools/tracker/src/python/topology.py index 7ac297a2c3f..140242974ec 100644 --- a/heron/tools/tracker/src/python/topology.py +++ b/heron/tools/tracker/src/python/topology.py @@ -246,11 +246,11 @@ def __init__(self, name: str, state_manager_name: str, tracker_config: Config) - def _render_extra_links(extra_links, topology, execution_state: ExecutionState_pb) -> None: """Render links in place.""" subs = { - "cluster": execution_state.cluster, - "environ": execution_state.environ, - "role": execution_state.role, - "jobname": topology.name, - "submission_user": execution_state.submission_user, + "CLUSTER": execution_state.cluster, + "ENVIRON": execution_state.environ, + "ROLE": execution_state.role, + "TOPOLOGY": topology.name, + "USER": execution_state.submission_user, } for link in extra_links: link[EXTRA_LINK_URL_KEY] = string.Template(link[EXTRA_LINK_FORMATTER_KEY]).substitute(subs)