Skip to content

Commit

Permalink
reduce specifity of git add so that state file is included
Browse files Browse the repository at this point in the history
  • Loading branch information
TimidRobot committed Oct 2, 2024
1 parent dc5a403 commit b84fe81
Show file tree
Hide file tree
Showing 31 changed files with 69 additions and 64 deletions.
73 changes: 39 additions & 34 deletions pre-automation/visualization/visualization_engineering.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
"\n",
"\n",
"import warnings\n",
"\n",
"warnings.filterwarnings(\"ignore\")"
]
},
Expand Down Expand Up @@ -109,7 +110,9 @@
"raw_ia_license_data = pd.read_csv(dataset_paths[\"ia_license\"])\n",
"raw_metmuseum_license_data = pd.read_csv(dataset_paths[\"metmuseum_license\"])\n",
"raw_vimeo_license_data = pd.read_csv(dataset_paths[\"vimeo_license\"])\n",
"raw_wikicommons_license_data = pd.read_csv(dataset_paths[\"wikicommons_license\"], encoding=\"latin-1\")\n",
"raw_wikicommons_license_data = pd.read_csv(\n",
" dataset_paths[\"wikicommons_license\"], encoding=\"latin-1\"\n",
")\n",
"raw_wikipedia_license_data = pd.read_csv(dataset_paths[\"wikipedia_license\"])\n",
"raw_youtube_time_data = pd.read_csv(dataset_paths[\"youtube_time\"])"
]
Expand Down Expand Up @@ -2923,36 +2926,36 @@
"outputs": [],
"source": [
"rename_dict = {\n",
" \"Bolivia\": \"Bolivia (Plurinational State of)\",\n",
" \"Cape Verde\": \"Cabo Verde\",\n",
" \"Congo the Democratic Republic of the\": \"Congo, Democratic Republic of the\",\n",
" \"Cote D'ivoire\": \"Côte d'Ivoire\",\n",
" \"Croatia (Hrvatska)\": \"Croatia\",\n",
" \"Czech Republic\": \"Czechia\",\n",
" \"East Timor\": \"Timor-Leste\",\n",
" \"Heard Island and Mcdonald Islands\": \"Heard Island and McDonald Islands\",\n",
" \"Holy See (Vatican City State)\": \"Holy See\",\n",
" \"Iran Islamic Republic of\": \"Iran (Islamic Republic of)\",\n",
" \"Korea Democratic People's Republic of\": \"Korea (Democratic People's Republic of)\",\n",
" \"Korea Republic of\": \"Korea, Republic of\",\n",
" \"Libyan Arab Jamahiriya\": \"Libya\",\n",
" \"Macedonia the Former Yugosalv Republic of\": \"North Macedonia\",\n",
" \"Micronesia Federated States of\": \"Micronesia (Federated States of)\",\n",
" \"Moldova Republic of\": \"Moldova, Republic of\",\n",
" \"Netherlands Antilles\": \"Netherlands\",\n",
" \"Palestinian Territory\": \"Palestine, State of\",\n",
" \"Reunion\": \"Réunion\",\n",
" \"Saint Helena\": \"Saint Helena, Ascension and Tristan da Cunha\",\n",
" \"Swaziland\": \"Eswatini\",\n",
" \"Taiwan Province of China\": \"Taiwan, Province of China\",\n",
" \"Tanzania United Republic of\": \"Tanzania, United Republic of\",\n",
" \"United Kingdom\": \"United Kingdom of Great Britain and Northern Ireland\",\n",
" \"United States\": \"United States of America\",\n",
" \"Venezuela\": \"Venezuela (Bolivarian Republic of)\",\n",
" \"Vietnam\": \"Viet Nam\",\n",
" \"Virgin Islands British\": \"Virgin Islands (British)\",\n",
" \"Virgin Islands U.S.\": \"Virgin Islands (U.S.)\",\n",
" }"
" \"Bolivia\": \"Bolivia (Plurinational State of)\",\n",
" \"Cape Verde\": \"Cabo Verde\",\n",
" \"Congo the Democratic Republic of the\": \"Congo, Democratic Republic of the\",\n",
" \"Cote D'ivoire\": \"Côte d'Ivoire\",\n",
" \"Croatia (Hrvatska)\": \"Croatia\",\n",
" \"Czech Republic\": \"Czechia\",\n",
" \"East Timor\": \"Timor-Leste\",\n",
" \"Heard Island and Mcdonald Islands\": \"Heard Island and McDonald Islands\",\n",
" \"Holy See (Vatican City State)\": \"Holy See\",\n",
" \"Iran Islamic Republic of\": \"Iran (Islamic Republic of)\",\n",
" \"Korea Democratic People's Republic of\": \"Korea (Democratic People's Republic of)\",\n",
" \"Korea Republic of\": \"Korea, Republic of\",\n",
" \"Libyan Arab Jamahiriya\": \"Libya\",\n",
" \"Macedonia the Former Yugosalv Republic of\": \"North Macedonia\",\n",
" \"Micronesia Federated States of\": \"Micronesia (Federated States of)\",\n",
" \"Moldova Republic of\": \"Moldova, Republic of\",\n",
" \"Netherlands Antilles\": \"Netherlands\",\n",
" \"Palestinian Territory\": \"Palestine, State of\",\n",
" \"Reunion\": \"Réunion\",\n",
" \"Saint Helena\": \"Saint Helena, Ascension and Tristan da Cunha\",\n",
" \"Swaziland\": \"Eswatini\",\n",
" \"Taiwan Province of China\": \"Taiwan, Province of China\",\n",
" \"Tanzania United Republic of\": \"Tanzania, United Republic of\",\n",
" \"United Kingdom\": \"United Kingdom of Great Britain and Northern Ireland\",\n",
" \"United States\": \"United States of America\",\n",
" \"Venezuela\": \"Venezuela (Bolivarian Republic of)\",\n",
" \"Vietnam\": \"Viet Nam\",\n",
" \"Virgin Islands British\": \"Virgin Islands (British)\",\n",
" \"Virgin Islands U.S.\": \"Virgin Islands (U.S.)\",\n",
"}"
]
},
{
Expand Down Expand Up @@ -3091,9 +3094,11 @@
"google_country_data = raw_google_country_data_transpose.copy()\n",
"google_country_data.rename(index=rename_dict, inplace=True)\n",
"\n",
"country_codes_data.set_index('name', inplace=True)\n",
"google_country_data['iso_a3'] = google_country_data.index.map(country_codes_data['alpha-3'])\n",
"google_geo_data = google_country_data.dropna(subset=['iso_a3'])\n",
"country_codes_data.set_index(\"name\", inplace=True)\n",
"google_country_data[\"iso_a3\"] = google_country_data.index.map(\n",
" country_codes_data[\"alpha-3\"]\n",
")\n",
"google_geo_data = google_country_data.dropna(subset=[\"iso_a3\"])\n",
"\n",
"google_geo_data"
]
Expand Down
2 changes: 1 addition & 1 deletion scripts/1-fetch/deviantart_fetched.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def main():

# Add and commit changes
shared.add_and_commit(
PATHS["repo"], PATHS["data_phase"], "Add and commit DeviantArt data"
PATHS["repo"], PATHS["data_quarter"], "Add and commit DeviantArt data"
)

# Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/1-fetch/flickr_fetched.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ def main():

# Add and commit changes
shared.add_and_commit(
PATHS["repo"], PATHS["data_phase"], "Add and commit new reports"
PATHS["repo"], PATHS["data_quarter"], "Add and commit new reports"
)

# Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/1-fetch/gcs_fetched.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ def main():

# Add and commit changes
shared.add_and_commit(
PATHS["repo"], PATHS["data_phase"], "Add and commit new reports"
PATHS["repo"], PATHS["data_quarter"], "Add and commit new reports"
)

# Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/1-fetch/github_fetched.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def main():

# Add and commit changes
shared.add_and_commit(
PATHS["repo"], PATHS["data_phase"], "Add and commit GitHub data"
PATHS["repo"], PATHS["data_quarter"], "Add and commit GitHub data"
)

# Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/1-fetch/internetarchive_fetched.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def main():
# Add and commit changes
shared.add_and_commit(
PATHS["repo"],
PATHS["data_phase"],
PATHS["data_quarter"],
"Add and commit Internet Archive data",
)

Expand Down
2 changes: 1 addition & 1 deletion scripts/1-fetch/metmuseum_fetched.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def main():

# Add and commit changes
shared.add_and_commit(
PATHS["repo"], PATHS["data_phase"], "Add and commit MetMuseum data"
PATHS["repo"], PATHS["data_quarter"], "Add and commit MetMuseum data"
)

# Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/1-fetch/vimeo_fetched.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ def main():

# Add and commit changes
shared.add_and_commit(
PATHS["repo"], PATHS["data_phase"], "Add and commit Vimeo data"
PATHS["repo"], PATHS["data_quarter"], "Add and commit Vimeo data"
)

# Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/1-fetch/wikicommons_fetched.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def main():

# Add and commit changes
shared.add_and_commit(
PATHS["repo"], PATHS["data_phase"], "Add and commit WikiCommons data"
PATHS["repo"], PATHS["data_quarter"], "Add and commit WikiCommons data"
)

# Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/1-fetch/wikipedia_fetched.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def main():

# Add and commit changes
shared.add_and_commit(
PATHS["repo"], PATHS["data_phase"], "Add and commit Wikipedia data"
PATHS["repo"], PATHS["data_quarter"], "Add and commit Wikipedia data"
)

# Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/1-fetch/youtube_fetched.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def main():

# Add and commit changes
shared.add_and_commit(
PATHS["repo"], PATHS["data_phase"], "Add and commit YouTube data"
PATHS["repo"], PATHS["data_quarter"], "Add and commit YouTube data"
)

# Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/2-process/deviantart_processed.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def main():

# # Add and commit changes
# shared.add_and_commit(
# PATHS["repo"], PATHS["data_phase"], "Fetched and updated new data"
# PATHS["repo"], PATHS["data_quarter"], "Fetched and updated new data"
# )

# # Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/2-process/flickr_processed.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def main():

# # Add and commit changes
# shared.add_and_commit(
# PATHS["repo"], PATHS["data_phase"], "Fetched and updated new data"
# PATHS["repo"], PATHS["data_quarter"], "Fetched and updated new data"
# )

# # Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/2-process/gcs_processed.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def main():

# # Add and commit changes
# shared.add_and_commit(
# PATHS["repo"], PATHS["data_phase"], "Fetched and updated new data"
# PATHS["repo"], PATHS["data_quarter"], "Fetched and updated new data"
# )

# # Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/2-process/github_processed.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def main():

# # Add and commit changes
# shared.add_and_commit(
# PATHS["repo"], PATHS["data_phase"], "Fetched and updated new data"
# PATHS["repo"], PATHS["data_quarter"], "Fetched and updated new data"
# )

# # Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/2-process/internetarchive_processed.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def main():

# # Add and commit changes
# shared.add_and_commit(
# PATHS["repo"], PATHS["data_phase"], "Fetched and updated new data"
# PATHS["repo"], PATHS["data_quarter"], "Fetched and updated new data"
# )

# # Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/2-process/metmuseum_processed.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def main():

# # Add and commit changes
# shared.add_and_commit(
# PATHS["repo"], PATHS["data_phase"], "Fetched and updated new data"
# PATHS["repo"], PATHS["data_quarter"], "Fetched and updated new data"
# )

# # Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/2-process/vimeo_processed.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def main():

# # Add and commit changes
# shared.add_and_commit(
# PATHS["repo"], PATHS["data_phase"], "Fetched and updated new data"
# PATHS["repo"], PATHS["data_quarter"], "Fetched and updated new data"
# )

# # Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/2-process/wikicommons_processed.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def main():

# # Add and commit changes
# shared.add_and_commit(
# PATHS["repo"], PATHS["data_phase"], "Fetched and updated new data"
# PATHS["repo"], PATHS["data_quarter"], "Fetched and updated new data"
# )

# # Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/2-process/wikipedia_processed.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def main():

# # Add and commit changes
# shared.add_and_commit(
# PATHS["repo"], PATHS["data_phase"], "Fetched and updated new data"
# PATHS["repo"], PATHS["data_quarter"], "Fetched and updated new data"
# )

# # Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/2-process/youtube_processed.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def main():

# # Add and commit changes
# shared.add_and_commit(
# PATHS["repo"], PATHS["data_phase"], "Fetched and updated new data"
# PATHS["repo"], PATHS["data_quarter"], "Fetched and updated new data"
# )

# # Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/3-report/deviantart_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def main():
if not args.skip_commit:
shared.add_and_commit(
PATHS["repo"],
PATHS["data_phase"],
PATHS["data_quarter"],
"Add and commit new Deviantart reports",
)

Expand Down
2 changes: 1 addition & 1 deletion scripts/3-report/flickr_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def main():
if not args.skip_commit:
shared.add_and_commit(
PATHS["repo"],
PATHS["data_phase"],
PATHS["data_quarter"],
"Add and commit new GitHub reports",
)

Expand Down
2 changes: 1 addition & 1 deletion scripts/3-report/gcs_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def main():
# Add and commit changes
if not args.skip_commit:
shared.add_and_commit(
PATHS["repo"], PATHS["data_phase"], "Add and commit new reports"
PATHS["repo"], PATHS["data_quarter"], "Add and commit new reports"
)

# Push changes
Expand Down
2 changes: 1 addition & 1 deletion scripts/3-report/github_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def main():
if not args.skip_commit:
shared.add_and_commit(
PATHS["repo"],
PATHS["data_phase"],
PATHS["data_quarter"],
"Add and commit new GitHub reports",
)

Expand Down
2 changes: 1 addition & 1 deletion scripts/3-report/internetarchive_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def main():
if not args.skip_commit:
shared.add_and_commit(
PATHS["repo"],
PATHS["data_phase"],
PATHS["data_quarter"],
"Add and commit new Internet Archive reports",
)

Expand Down
2 changes: 1 addition & 1 deletion scripts/3-report/metmuseum_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def main():
if not args.skip_commit:
shared.add_and_commit(
PATHS["repo"],
PATHS["data_phase"],
PATHS["data_quarter"],
"Add and commit new Metmuseum reports",
)

Expand Down
2 changes: 1 addition & 1 deletion scripts/3-report/vimeo_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def main():
if not args.skip_commit:
shared.add_and_commit(
PATHS["repo"],
PATHS["data_phase"],
PATHS["data_quarter"],
"Add and commit new Vimeo reports",
)

Expand Down
2 changes: 1 addition & 1 deletion scripts/3-report/wikicommons_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def main():
if not args.skip_commit:
shared.add_and_commit(
PATHS["repo"],
PATHS["data_phase"],
PATHS["data_quarter"],
"Add and commit new WikiCommons reports",
)

Expand Down
2 changes: 1 addition & 1 deletion scripts/3-report/wikipedia_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def main():
if not args.skip_commit:
shared.add_and_commit(
PATHS["repo"],
PATHS["data_phase"],
PATHS["data_quarter"],
"Add and commit new Wikpedia reports",
)

Expand Down
2 changes: 1 addition & 1 deletion scripts/3-report/youtube_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def main():
if not args.skip_commit:
shared.add_and_commit(
PATHS["repo"],
PATHS["data_phase"],
PATHS["data_quarter"],
"Add and commit new YouTube reports",
)

Expand Down

0 comments on commit b84fe81

Please sign in to comment.