cachatj
diff --git a/‎!-JuypterNB-JC Review-Workspace.code-workspace
+40 b/‎!-JuypterNB-JC Review-Workspace.code-workspace
+40
diff --git a/‎.DS_Store
0 Bytes b/‎.DS_Store
0 Bytes
diff --git a/‎.gitignore
+3 b/‎.gitignore
+3
diff --git a/‎0-Ingest/.DS_Store
6 KB b/‎0-Ingest/.DS_Store
6 KB
diff --git a/‎0-Ingest/.ipynb_checkpoints/API to DataFrame-checkpoint.ipynb
+265 b/‎0-Ingest/.ipynb_checkpoints/API to DataFrame-checkpoint.ipynb
+265
diff --git a/‎0-Ingest/.ipynb_checkpoints/test BQ-SQL w INFO_SCHEMA & ASSERT-checkpoint.md
+25 b/‎0-Ingest/.ipynb_checkpoints/test BQ-SQL w INFO_SCHEMA & ASSERT-checkpoint.md
+25
@@ -0,0 +1,40 @@
+{
+    "folders": [
+        {
+            "path": "."
+        },
+        {
+            "path": "../data_notebooks-ALL"
+        },
+        {
+            "path": "../data_GCP Pro Data Eng Certification/GCP Learning"
+        },
+        {
+            "path": "../../GitHub-Repo/Wrangling_PySpark"
+        }
+    ],
+    "settings": {
+        "jupyter.kernels.filter": [
+            {
+                "path": "/Users/JCachat/Library/Jupyter/kernels/data/kernel.json",
+                "type": "jupyterKernelspec"
+            },
+            {
+                "path": "/Users/JCachat/Library/Jupyter/kernels/mito_env/kernel.json",
+                "type": "jupyterKernelspec"
+            },
+            {
+                "path": "/usr/bin/python3",
+                "type": "pythonEnvironment"
+            },
+            {
+                "path": "/usr/local/bin/python3",
+                "type": "pythonEnvironment"
+            },
+            {
+                "path": "/Users/JCachat/Library/Jupyter/kernels/timeseries/kernel.json",
+                "type": "jupyterKernelspec"
+            }
+        ]
+    }
+}
@@ -0,0 +1,3 @@
+
+*/.ipynb_checkpoints
+data
@@ -0,0 +1,265 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Accessing the Public Data API with Python (Jupyter Notebook & Pandas)\n",
+    "\n",
+    "API Version 2.0 Sample Code Updated bls.gov 'API Version 2.0 Python Sample Code', by Mark McEnearney, to use Pandas/Jupyter Notebook.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "import json\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Build and submit request, then format response as JSON \n",
+    "Requesting data for two series, LASST060000000000003 (California Unemployment Rate) and LASST080000000000003 (Colorado Unemployment), between the years 2015 and 2022"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "headers = {'Content-type': 'application/json'}\n",
+    "data = json.dumps({\"seriesid\": ['LASST010000000000003','LASST020000000000003','LASST040000000000003','LASST050000000000003','LASST060000000000003','LASST080000000000003'],\"startyear\":\"2017\", \"endyear\":\"2022\",\"registrationkey\":\"8988511dabfc4508a0f08c0c051c1476\"})\n",
+    "\n",
+    "p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data=data, headers=headers)\n",
+    "\n",
+    "json_data = json.loads(p.text)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Iterate through series data to build a list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parsed_data = []\n",
+    "for series in json_data['Results']['series']:\n",
+    "    seriesId = series['seriesID']\n",
+    "    for item in series['data']:\n",
+    "        year = item['year']\n",
+    "        period = item['period']\n",
+    "        value = item['value']\n",
+    "        parsed_data.append([seriesId,year,period,value])\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Convert parsed list data into a Pandas DataFrame"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.DataFrame(parsed_data, columns=['seriesID', 'year', 'period', 'value'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Describe and display dataframe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>seriesID</th>\n",
+       "      <th>year</th>\n",
+       "      <th>period</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>LASST010000000000003</td>\n",
+       "      <td>2022</td>\n",
+       "      <td>M06</td>\n",
+       "      <td>2.6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>LASST010000000000003</td>\n",
+       "      <td>2022</td>\n",
+       "      <td>M05</td>\n",
+       "      <td>2.7</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>LASST010000000000003</td>\n",
+       "      <td>2022</td>\n",
+       "      <td>M04</td>\n",
+       "      <td>2.8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>LASST010000000000003</td>\n",
+       "      <td>2022</td>\n",
+       "      <td>M03</td>\n",
+       "      <td>2.9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>LASST010000000000003</td>\n",
+       "      <td>2022</td>\n",
+       "      <td>M02</td>\n",
+       "      <td>3.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>391</th>\n",
+       "      <td>LASST080000000000003</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>M05</td>\n",
+       "      <td>2.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>392</th>\n",
+       "      <td>LASST080000000000003</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>M04</td>\n",
+       "      <td>2.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>393</th>\n",
+       "      <td>LASST080000000000003</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>M03</td>\n",
+       "      <td>2.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>394</th>\n",
+       "      <td>LASST080000000000003</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>M02</td>\n",
+       "      <td>2.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>395</th>\n",
+       "      <td>LASST080000000000003</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>M01</td>\n",
+       "      <td>2.6</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>396 rows × 4 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                 seriesID  year period value\n",
+       "0    LASST010000000000003  2022    M06   2.6\n",
+       "1    LASST010000000000003  2022    M05   2.7\n",
+       "2    LASST010000000000003  2022    M04   2.8\n",
+       "3    LASST010000000000003  2022    M03   2.9\n",
+       "4    LASST010000000000003  2022    M02   3.0\n",
+       "..                    ...   ...    ...   ...\n",
+       "391  LASST080000000000003  2017    M05   2.4\n",
+       "392  LASST080000000000003  2017    M04   2.4\n",
+       "393  LASST080000000000003  2017    M03   2.4\n",
+       "394  LASST080000000000003  2017    M02   2.5\n",
+       "395  LASST080000000000003  2017    M01   2.6\n",
+       "\n",
+       "[396 rows x 4 columns]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "display(df)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.13 ('data')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "61f816485507dc889ff66c817d3dfe3a5ba58d9e76a81eab42366b9b45bfe58a"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -0,0 +1,25 @@
+# using INFORMATION_SCHEMA.views & ASSERT to test BigQuery SQL Statements
+
+
+https://cloud.google.com/bigquery/docs/information-schema-views
+
+
+https://medium.com/google-cloud/validating-successful-execution-of-bigquery-scripts-using-assert-c82f7ff9cfa8
+
+
+
+You can make a smaller randomly sampled version of your table:
+
+CREATE TABLE `project.testdataset.tablename`
+
+AS SELECT * FROM `project.proddataset.tablename` WHERE RAND() > 0.9
+
+to get 10% of the rows. Or 0.01 to get 1%. Run it more than once and you'll get different rows of course, since RAND() is random. Hash a timestamp to get repeatable results.
+
+Then all you're changing is the dataset name between test and prod.
+
+
+https://ianwhitestone.work/testing-sql/
+
+https://pypi.org/project/bq-test-kit/
+