-
Notifications
You must be signed in to change notification settings - Fork 84
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Tooling to support rockset migration (#5366)
Adding tooling to analyze rockset query lambdas and collections. This is meant to be one-off throwaway code, just for use during the early days of the migration It includes: - Code to delete querys that are unused (currently about 80 of 180), in batches of 10 - Backups for all the querys in case we need to revert a delete Note: The main file of interest in this PR is rockset_queries.py, which is the python script version of the generated from the file rockset_queries.ipynb
- Loading branch information
Showing
363 changed files
with
13,209 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
This folder contains tools/scripts used to help with migrating away from Rockset | ||
|
62 changes: 62 additions & 0 deletions
62
tools/rockset_migration/lambdas_backup/benchmarks.oss_ci_benchmark_branches.raw.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
{ | ||
"workspace": "benchmarks", | ||
"last_updated_by": "[email protected]", | ||
"last_updated": "2024-06-15T23:03:05Z", | ||
"name": "oss_ci_benchmark_branches", | ||
"version_count": 3, | ||
"collections": [ | ||
"commons.workflow_run", | ||
"benchmarks.oss_ci_benchmark" | ||
], | ||
"latest_version": { | ||
"workspace": "benchmarks", | ||
"created_by": "[email protected]", | ||
"created_by_apikey_name": null, | ||
"created_at": "2024-06-15T23:03:05Z", | ||
"name": "oss_ci_benchmark_branches", | ||
"version": "76446d877defb748", | ||
"description": "Query branches and commits from OSS CI benchmarks", | ||
"sql": { | ||
"query": "--- This query is used to get the list of branches and commits used by different\n--- OSS CI benchmark experiments. This powers HUD benchmarks dashboards\nSELECT\n DISTINCT w.head_branch,\n w.head_sha,\n w.id,\n FORMAT_ISO8601(\n DATE_TRUNC(: granularity, o._event_time)\n ) AS event_time,\n o.filename\nFROM\n benchmarks.oss_ci_benchmark o\n LEFT JOIN commons.workflow_run w ON o.workflow_id = w.id\nWHERE\n o._event_time >= PARSE_DATETIME_ISO8601(: startTime)\n AND o._event_time < PARSE_DATETIME_ISO8601(: stopTime)\n AND (\n ARRAY_CONTAINS(\n SPLIT(: filenames, ','),\n o.filename\n )\n OR : filenames = ''\n )\n AND o.metric IS NOT NULL\n AND w.html_url LIKE CONCAT('%', : repo, '%')\n AND o.dtype IS NOT NULL\n AND o.device IS NOT NULL\nORDER BY\n w.head_branch,\n event_time DESC", | ||
"default_parameters": [ | ||
{ | ||
"name": "filenames", | ||
"type": "string", | ||
"value": "" | ||
}, | ||
{ | ||
"name": "granularity", | ||
"type": "string", | ||
"value": "day" | ||
}, | ||
{ | ||
"name": "repo", | ||
"type": "string", | ||
"value": "pytorch/pytorch" | ||
}, | ||
{ | ||
"name": "startTime", | ||
"type": "string", | ||
"value": "2024-05-01T00:00:00.00Z" | ||
}, | ||
{ | ||
"name": "stopTime", | ||
"type": "string", | ||
"value": "2024-08-01T00:00:00.00Z" | ||
} | ||
] | ||
}, | ||
"collections": [ | ||
"commons.workflow_run", | ||
"benchmarks.oss_ci_benchmark" | ||
], | ||
"state": "ACTIVE", | ||
"stats": { | ||
"last_executed": "2024-06-25T07:35:30Z", | ||
"last_executed_by": "[email protected]", | ||
"last_execution_error": null, | ||
"last_execution_error_message": null | ||
}, | ||
"public_access_id": null | ||
} | ||
} |
30 changes: 30 additions & 0 deletions
30
tools/rockset_migration/lambdas_backup/benchmarks.oss_ci_benchmark_branches.sql.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
{ | ||
"query": "--- This query is used to get the list of branches and commits used by different\n--- OSS CI benchmark experiments. This powers HUD benchmarks dashboards\nSELECT\n DISTINCT w.head_branch,\n w.head_sha,\n w.id,\n FORMAT_ISO8601(\n DATE_TRUNC(: granularity, o._event_time)\n ) AS event_time,\n o.filename\nFROM\n benchmarks.oss_ci_benchmark o\n LEFT JOIN commons.workflow_run w ON o.workflow_id = w.id\nWHERE\n o._event_time >= PARSE_DATETIME_ISO8601(: startTime)\n AND o._event_time < PARSE_DATETIME_ISO8601(: stopTime)\n AND (\n ARRAY_CONTAINS(\n SPLIT(: filenames, ','),\n o.filename\n )\n OR : filenames = ''\n )\n AND o.metric IS NOT NULL\n AND w.html_url LIKE CONCAT('%', : repo, '%')\n AND o.dtype IS NOT NULL\n AND o.device IS NOT NULL\nORDER BY\n w.head_branch,\n event_time DESC", | ||
"default_parameters": [ | ||
{ | ||
"name": "filenames", | ||
"type": "string", | ||
"value": "" | ||
}, | ||
{ | ||
"name": "granularity", | ||
"type": "string", | ||
"value": "day" | ||
}, | ||
{ | ||
"name": "repo", | ||
"type": "string", | ||
"value": "pytorch/pytorch" | ||
}, | ||
{ | ||
"name": "startTime", | ||
"type": "string", | ||
"value": "2024-05-01T00:00:00.00Z" | ||
}, | ||
{ | ||
"name": "stopTime", | ||
"type": "string", | ||
"value": "2024-08-01T00:00:00.00Z" | ||
} | ||
] | ||
} |
92 changes: 92 additions & 0 deletions
92
tools/rockset_migration/lambdas_backup/benchmarks.oss_ci_benchmark_llms.raw.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
{ | ||
"workspace": "benchmarks", | ||
"last_updated_by": "[email protected]", | ||
"last_updated": "2024-06-19T19:40:01Z", | ||
"name": "oss_ci_benchmark_llms", | ||
"version_count": 6, | ||
"collections": [ | ||
"commons.workflow_run", | ||
"benchmarks.oss_ci_benchmark" | ||
], | ||
"latest_version": { | ||
"workspace": "benchmarks", | ||
"created_by": "[email protected]", | ||
"created_by_apikey_name": null, | ||
"created_at": "2024-06-19T19:40:01Z", | ||
"name": "oss_ci_benchmark_llms", | ||
"version": "656fe095f7e9a3ab", | ||
"description": "The query to power LLMs benchmark dashboard", | ||
"sql": { | ||
"query": "--- This query is used to get the LLMs benchmark results from different experiments. It\n--- queries the TPS and memory bandwidth for each model / quantization combos. This powers\n--- the LLMs benchmark dashboard\nSELECT\n DISTINCT o.workflow_id,\n -- As the JSON response is pretty big, only return the field if it's needed\n IF(:getJobId, o.job_id, NULL) AS job_id,\n o.name,\n o.metric,\n IF(\n o.actual IS NOT NULL,\n CAST(o.actual AS FLOAT), 0.0\n ) AS actual,\n IF(\n o.target IS NOT NULL,\n CAST(o.target AS FLOAT), 0.0\n ) AS target,\n FORMAT_ISO8601(\n DATE_TRUNC(: granularity, w._event_time)\n ) AS granularity_bucket,\n o.dtype,\n o.device,\nFROM\n benchmarks.oss_ci_benchmark o\n LEFT JOIN commons.workflow_run w ON o.workflow_id = w.id\nWHERE\n (\n ARRAY_CONTAINS(\n SPLIT(: branches, ','),\n w.head_branch\n )\n OR : branches = ''\n )\n AND (\n ARRAY_CONTAINS(\n SPLIT(: commits, ','),\n w.head_sha\n )\n OR : commits = ''\n )\n AND (\n ARRAY_CONTAINS(\n SPLIT(: filenames, ','),\n o.filename\n )\n OR : filenames = ''\n )\n AND (\n ARRAY_CONTAINS(\n SPLIT(: names, ','),\n o.name\n )\n OR : names = ''\n )\n AND (\n ARRAY_CONTAINS(\n SPLIT(: devices, ','),\n o.device\n )\n OR : devices = ''\n )\n AND (\n ARRAY_CONTAINS(\n SPLIT(: dtypes, ','),\n o.dtype\n )\n OR : dtypes = ''\n )\n AND o.metric IS NOT NULL\n AND o.dtype IS NOT NULL\n AND o.device IS NOT NULL\n AND w.html_url LIKE CONCAT('%', : repo, '%')\nORDER BY\n granularity_bucket DESC,\n workflow_id DESC,\n name,\n dtype,\n device", | ||
"default_parameters": [ | ||
{ | ||
"name": "branches", | ||
"type": "string", | ||
"value": "main" | ||
}, | ||
{ | ||
"name": "commits", | ||
"type": "string", | ||
"value": "" | ||
}, | ||
{ | ||
"name": "devices", | ||
"type": "string", | ||
"value": "" | ||
}, | ||
{ | ||
"name": "dtypes", | ||
"type": "string", | ||
"value": "" | ||
}, | ||
{ | ||
"name": "filenames", | ||
"type": "string", | ||
"value": "" | ||
}, | ||
{ | ||
"name": "getJobId", | ||
"type": "bool", | ||
"value": "false" | ||
}, | ||
{ | ||
"name": "granularity", | ||
"type": "string", | ||
"value": "day" | ||
}, | ||
{ | ||
"name": "names", | ||
"type": "string", | ||
"value": "" | ||
}, | ||
{ | ||
"name": "repo", | ||
"type": "string", | ||
"value": "pytorch/pytorch" | ||
}, | ||
{ | ||
"name": "startTime", | ||
"type": "string", | ||
"value": "2024-05-01T00:00:00.00Z" | ||
}, | ||
{ | ||
"name": "stopTime", | ||
"type": "string", | ||
"value": "2024-08-01T00:00:00.00Z" | ||
} | ||
] | ||
}, | ||
"collections": [ | ||
"commons.workflow_run", | ||
"benchmarks.oss_ci_benchmark" | ||
], | ||
"state": "ACTIVE", | ||
"stats": { | ||
"last_executed": "2024-06-25T07:35:29Z", | ||
"last_executed_by": "[email protected]", | ||
"last_execution_error": null, | ||
"last_execution_error_message": null | ||
}, | ||
"public_access_id": null | ||
} | ||
} |
60 changes: 60 additions & 0 deletions
60
tools/rockset_migration/lambdas_backup/benchmarks.oss_ci_benchmark_llms.sql.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
{ | ||
"query": "--- This query is used to get the LLMs benchmark results from different experiments. It\n--- queries the TPS and memory bandwidth for each model / quantization combos. This powers\n--- the LLMs benchmark dashboard\nSELECT\n DISTINCT o.workflow_id,\n -- As the JSON response is pretty big, only return the field if it's needed\n IF(:getJobId, o.job_id, NULL) AS job_id,\n o.name,\n o.metric,\n IF(\n o.actual IS NOT NULL,\n CAST(o.actual AS FLOAT), 0.0\n ) AS actual,\n IF(\n o.target IS NOT NULL,\n CAST(o.target AS FLOAT), 0.0\n ) AS target,\n FORMAT_ISO8601(\n DATE_TRUNC(: granularity, w._event_time)\n ) AS granularity_bucket,\n o.dtype,\n o.device,\nFROM\n benchmarks.oss_ci_benchmark o\n LEFT JOIN commons.workflow_run w ON o.workflow_id = w.id\nWHERE\n (\n ARRAY_CONTAINS(\n SPLIT(: branches, ','),\n w.head_branch\n )\n OR : branches = ''\n )\n AND (\n ARRAY_CONTAINS(\n SPLIT(: commits, ','),\n w.head_sha\n )\n OR : commits = ''\n )\n AND (\n ARRAY_CONTAINS(\n SPLIT(: filenames, ','),\n o.filename\n )\n OR : filenames = ''\n )\n AND (\n ARRAY_CONTAINS(\n SPLIT(: names, ','),\n o.name\n )\n OR : names = ''\n )\n AND (\n ARRAY_CONTAINS(\n SPLIT(: devices, ','),\n o.device\n )\n OR : devices = ''\n )\n AND (\n ARRAY_CONTAINS(\n SPLIT(: dtypes, ','),\n o.dtype\n )\n OR : dtypes = ''\n )\n AND o.metric IS NOT NULL\n AND o.dtype IS NOT NULL\n AND o.device IS NOT NULL\n AND w.html_url LIKE CONCAT('%', : repo, '%')\nORDER BY\n granularity_bucket DESC,\n workflow_id DESC,\n name,\n dtype,\n device", | ||
"default_parameters": [ | ||
{ | ||
"name": "branches", | ||
"type": "string", | ||
"value": "main" | ||
}, | ||
{ | ||
"name": "commits", | ||
"type": "string", | ||
"value": "" | ||
}, | ||
{ | ||
"name": "devices", | ||
"type": "string", | ||
"value": "" | ||
}, | ||
{ | ||
"name": "dtypes", | ||
"type": "string", | ||
"value": "" | ||
}, | ||
{ | ||
"name": "filenames", | ||
"type": "string", | ||
"value": "" | ||
}, | ||
{ | ||
"name": "getJobId", | ||
"type": "bool", | ||
"value": "false" | ||
}, | ||
{ | ||
"name": "granularity", | ||
"type": "string", | ||
"value": "day" | ||
}, | ||
{ | ||
"name": "names", | ||
"type": "string", | ||
"value": "" | ||
}, | ||
{ | ||
"name": "repo", | ||
"type": "string", | ||
"value": "pytorch/pytorch" | ||
}, | ||
{ | ||
"name": "startTime", | ||
"type": "string", | ||
"value": "2024-05-01T00:00:00.00Z" | ||
}, | ||
{ | ||
"name": "stopTime", | ||
"type": "string", | ||
"value": "2024-08-01T00:00:00.00Z" | ||
} | ||
] | ||
} |
62 changes: 62 additions & 0 deletions
62
tools/rockset_migration/lambdas_backup/benchmarks.oss_ci_benchmark_names.raw.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
{ | ||
"workspace": "benchmarks", | ||
"last_updated_by": "[email protected]", | ||
"last_updated": "2024-06-16T06:09:30Z", | ||
"name": "oss_ci_benchmark_names", | ||
"version_count": 5, | ||
"collections": [ | ||
"commons.workflow_run", | ||
"benchmarks.oss_ci_benchmark" | ||
], | ||
"latest_version": { | ||
"workspace": "benchmarks", | ||
"created_by": "[email protected]", | ||
"created_by_apikey_name": null, | ||
"created_at": "2024-06-16T06:09:30Z", | ||
"name": "oss_ci_benchmark_names", | ||
"version": "98a212e928df968b", | ||
"description": "Query experiment names from OSS CI benchmarks", | ||
"sql": { | ||
"query": "--- This query is used by HUD benchmarks dashboards to get the list of experiment names\nSELECT DISTINCT\n o.filename, \n o.name, \n o.metric,\n o.dtype,\n o.device,\nFROM\n benchmarks.oss_ci_benchmark o\n LEFT JOIN commons.workflow_run w ON o.workflow_id = w.id\nWHERE\n o._event_time >= PARSE_DATETIME_ISO8601(: startTime)\n AND o._event_time < PARSE_DATETIME_ISO8601(: stopTime)\n AND (\n ARRAY_CONTAINS(\n SPLIT(: filenames, ','),\n o.filename\n )\n OR : filenames = ''\n )\n AND o.metric IS NOT NULL\n AND w.html_url LIKE CONCAT('%', : repo, '%')\n AND o.dtype IS NOT NULL\n AND o.device IS NOT NULL\nORDER BY\n o.filename, \n o.name,\n o.metric,\n o.dtype,\n o.device", | ||
"default_parameters": [ | ||
{ | ||
"name": "filenames", | ||
"type": "string", | ||
"value": "" | ||
}, | ||
{ | ||
"name": "granularity", | ||
"type": "string", | ||
"value": "day" | ||
}, | ||
{ | ||
"name": "repo", | ||
"type": "string", | ||
"value": "pytorch/pytorch" | ||
}, | ||
{ | ||
"name": "startTime", | ||
"type": "string", | ||
"value": "2024-05-01T00:00:00.00Z" | ||
}, | ||
{ | ||
"name": "stopTime", | ||
"type": "string", | ||
"value": "2024-08-01T00:00:00.00Z" | ||
} | ||
] | ||
}, | ||
"collections": [ | ||
"commons.workflow_run", | ||
"benchmarks.oss_ci_benchmark" | ||
], | ||
"state": "ACTIVE", | ||
"stats": { | ||
"last_executed": "2024-06-25T07:35:28Z", | ||
"last_executed_by": "[email protected]", | ||
"last_execution_error": null, | ||
"last_execution_error_message": null | ||
}, | ||
"public_access_id": null | ||
} | ||
} |
30 changes: 30 additions & 0 deletions
30
tools/rockset_migration/lambdas_backup/benchmarks.oss_ci_benchmark_names.sql.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
{ | ||
"query": "--- This query is used by HUD benchmarks dashboards to get the list of experiment names\nSELECT DISTINCT\n o.filename, \n o.name, \n o.metric,\n o.dtype,\n o.device,\nFROM\n benchmarks.oss_ci_benchmark o\n LEFT JOIN commons.workflow_run w ON o.workflow_id = w.id\nWHERE\n o._event_time >= PARSE_DATETIME_ISO8601(: startTime)\n AND o._event_time < PARSE_DATETIME_ISO8601(: stopTime)\n AND (\n ARRAY_CONTAINS(\n SPLIT(: filenames, ','),\n o.filename\n )\n OR : filenames = ''\n )\n AND o.metric IS NOT NULL\n AND w.html_url LIKE CONCAT('%', : repo, '%')\n AND o.dtype IS NOT NULL\n AND o.device IS NOT NULL\nORDER BY\n o.filename, \n o.name,\n o.metric,\n o.dtype,\n o.device", | ||
"default_parameters": [ | ||
{ | ||
"name": "filenames", | ||
"type": "string", | ||
"value": "" | ||
}, | ||
{ | ||
"name": "granularity", | ||
"type": "string", | ||
"value": "day" | ||
}, | ||
{ | ||
"name": "repo", | ||
"type": "string", | ||
"value": "pytorch/pytorch" | ||
}, | ||
{ | ||
"name": "startTime", | ||
"type": "string", | ||
"value": "2024-05-01T00:00:00.00Z" | ||
}, | ||
{ | ||
"name": "stopTime", | ||
"type": "string", | ||
"value": "2024-08-01T00:00:00.00Z" | ||
} | ||
] | ||
} |
41 changes: 41 additions & 0 deletions
41
tools/rockset_migration/lambdas_backup/commons.GHA-CI-for-shas.raw.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
{ | ||
"workspace": "commons", | ||
"last_updated_by": "[email protected]", | ||
"last_updated": "2022-01-16T08:24:39Z", | ||
"name": "GHA-CI-for-shas", | ||
"version_count": 6, | ||
"collections": [ | ||
"commons.workflow_run", | ||
"GitHub-Actions.workflow_run" | ||
], | ||
"latest_version": { | ||
"workspace": "commons", | ||
"created_by": "[email protected]", | ||
"created_by_apikey_name": null, | ||
"created_at": "2022-01-16T08:24:39Z", | ||
"name": "GHA-CI-for-shas", | ||
"version": "ae1b83292611eff2", | ||
"description": "Get GHA results for a specific set of SHAs", | ||
"sql": { | ||
"query": "SELECT head_sha, head_branch, html_url, name, status, conclusion\nFROM workflow_run\nWHERE ARRAY_CONTAINS(SPLIT(:shas, ','), head_sha)", | ||
"default_parameters": [ | ||
{ | ||
"name": "shas", | ||
"type": "string", | ||
"value": "" | ||
} | ||
] | ||
}, | ||
"collections": [ | ||
"commons.workflow_run" | ||
], | ||
"state": "ACTIVE", | ||
"stats": { | ||
"last_executed": null, | ||
"last_executed_by": null, | ||
"last_execution_error": null, | ||
"last_execution_error_message": null | ||
}, | ||
"public_access_id": null | ||
} | ||
} |
Oops, something went wrong.