From 731970762c22af1ae218cc1801ac4d5f677ccbf9 Mon Sep 17 00:00:00 2001 From: Timid Robot Zehta Date: Wed, 20 Nov 2024 07:51:59 -0800 Subject: [PATCH] add YAML Google Collection values and helper script --- .gitignore | 3 + data/google_country_collection.yaml | 488 ++++++++++++++++++++++ data/google_language_collection.yaml | 74 ++++ dev/reformat_collection_values_to_yaml.sh | 113 +++++ 4 files changed, 678 insertions(+) create mode 100644 data/google_country_collection.yaml create mode 100644 data/google_language_collection.yaml create mode 100755 dev/reformat_collection_values_to_yaml.sh diff --git a/.gitignore b/.gitignore index 888460d7..42a9e73d 100644 --- a/.gitignore +++ b/.gitignore @@ -144,3 +144,6 @@ Thumbs.db # secrets .env query_secrets.py + +# backup files +*.bak diff --git a/data/google_country_collection.yaml b/data/google_country_collection.yaml new file mode 100644 index 00000000..4eb5ed2c --- /dev/null +++ b/data/google_country_collection.yaml @@ -0,0 +1,488 @@ +# Based on: +# https://developers.google.com/custom-search/docs/xml_results_appendices#language-collection-values +# +# Reformatted with ./dev/reformat_collection_values_to_yaml.sh +- country: Afghanistan + cr: countryAF +- country: Albania + cr: countryAL +- country: Algeria + cr: countryDZ +- country: American Samoa + cr: countryAS +- country: Andorra + cr: countryAD +- country: Angola + cr: countryAO +- country: Anguilla + cr: countryAI +- country: Antarctica + cr: countryAQ +- country: Antigua and Barbuda + cr: countryAG +- country: Argentina + cr: countryAR +- country: Armenia + cr: countryAM +- country: Aruba + cr: countryAW +- country: Australia + cr: countryAU +- country: Austria + cr: countryAT +- country: Azerbaijan + cr: countryAZ +- country: Bahamas + cr: countryBS +- country: Bahrain + cr: countryBH +- country: Bangladesh + cr: countryBD +- country: Barbados + cr: countryBB +- country: Belarus + cr: countryBY +- country: Belgium + cr: countryBE +- country: Belize + cr: countryBZ +- country: Benin + cr: countryBJ +- country: Bermuda + cr: countryBM +- country: Bhutan + cr: countryBT +- country: Bolivia + cr: countryBO +- country: Bosnia and Herzegovina + cr: countryBA +- country: Botswana + cr: countryBW +- country: Bouvet Island + cr: countryBV +- country: Brazil + cr: countryBR +- country: British Indian Ocean Territory + cr: countryIO +- country: Brunei Darussalam + cr: countryBN +- country: Bulgaria + cr: countryBG +- country: Burkina Faso + cr: countryBF +- country: Burundi + cr: countryBI +- country: Cambodia + cr: countryKH +- country: Cameroon + cr: countryCM +- country: Canada + cr: countryCA +- country: Cape Verde + cr: countryCV +- country: Cayman Islands + cr: countryKY +- country: Central African Republic + cr: countryCF +- country: Chad + cr: countryTD +- country: Chile + cr: countryCL +- country: China + cr: countryCN +- country: Christmas Island + cr: countryCX +- country: Cocos (Keeling) Islands + cr: countryCC +- country: Colombia + cr: countryCO +- country: Comoros + cr: countryKM +- country: Congo + cr: countryCG +- country: Congo, the Democratic Republic of the + cr: countryCD +- country: Cook Islands + cr: countryCK +- country: Costa Rica + cr: countryCR +- country: Cote D'ivoire + cr: countryCI +- country: Croatia (Hrvatska) + cr: countryHR +- country: Cuba + cr: countryCU +- country: Cyprus + cr: countryCY +- country: Czech Republic + cr: countryCZ +- country: Denmark + cr: countryDK +- country: Djibouti + cr: countryDJ +- country: Dominica + cr: countryDM +- country: Dominican Republic + cr: countryDO +- country: East Timor + cr: countryTP +- country: Ecuador + cr: countryEC +- country: Egypt + cr: countryEG +- country: El Salvador + cr: countrySV +- country: Equatorial Guinea + cr: countryGQ +- country: Eritrea + cr: countryER +- country: Estonia + cr: countryEE +- country: Ethiopia + cr: countryET +- country: European Union + cr: countryEU +- country: Falkland Islands (Malvinas) + cr: countryFK +- country: Faroe Islands + cr: countryFO +- country: Fiji + cr: countryFJ +- country: Finland + cr: countryFI +- country: France + cr: countryFR +- country: France, Metropolitan + cr: countryFX +- country: French Guiana + cr: countryGF +- country: French Polynesia + cr: countryPF +- country: French Southern Territories + cr: countryTF +- country: Gabon + cr: countryGA +- country: Gambia + cr: countryGM +- country: Georgia + cr: countryGE +- country: Germany + cr: countryDE +- country: Ghana + cr: countryGH +- country: Gibraltar + cr: countryGI +- country: Greece + cr: countryGR +- country: Greenland + cr: countryGL +- country: Grenada + cr: countryGD +- country: Guadeloupe + cr: countryGP +- country: Guam + cr: countryGU +- country: Guatemala + cr: countryGT +- country: Guinea + cr: countryGN +- country: Guinea-Bissau + cr: countryGW +- country: Guyana + cr: countryGY +- country: Haiti + cr: countryHT +- country: Heard Island and Mcdonald Islands + cr: countryHM +- country: Holy See (Vatican City State) + cr: countryVA +- country: Honduras + cr: countryHN +- country: Hong Kong + cr: countryHK +- country: Hungary + cr: countryHU +- country: Iceland + cr: countryIS +- country: India + cr: countryIN +- country: Indonesia + cr: countryID +- country: Iran, Islamic Republic of + cr: countryIR +- country: Iraq + cr: countryIQ +- country: Ireland + cr: countryIE +- country: Israel + cr: countryIL +- country: Italy + cr: countryIT +- country: Jamaica + cr: countryJM +- country: Japan + cr: countryJP +- country: Jordan + cr: countryJO +- country: Kazakhstan + cr: countryKZ +- country: Kenya + cr: countryKE +- country: Kiribati + cr: countryKI +- country: Korea, Democratic People's Republic of + cr: countryKP +- country: Korea, Republic of + cr: countryKR +- country: Kuwait + cr: countryKW +- country: Kyrgyzstan + cr: countryKG +- country: Lao People's Democratic Republic + cr: countryLA +- country: Latvia + cr: countryLV +- country: Lebanon + cr: countryLB +- country: Lesotho + cr: countryLS +- country: Liberia + cr: countryLR +- country: Libyan Arab Jamahiriya + cr: countryLY +- country: Liechtenstein + cr: countryLI +- country: Lithuania + cr: countryLT +- country: Luxembourg + cr: countryLU +- country: Macao + cr: countryMO +- country: Macedonia, the Former Yugosalv Republic of + cr: countryMK +- country: Madagascar + cr: countryMG +- country: Malawi + cr: countryMW +- country: Malaysia + cr: countryMY +- country: Maldives + cr: countryMV +- country: Mali + cr: countryML +- country: Malta + cr: countryMT +- country: Marshall Islands + cr: countryMH +- country: Martinique + cr: countryMQ +- country: Mauritania + cr: countryMR +- country: Mauritius + cr: countryMU +- country: Mayotte + cr: countryYT +- country: Mexico + cr: countryMX +- country: Micronesia, Federated States of + cr: countryFM +- country: Moldova, Republic of + cr: countryMD +- country: Monaco + cr: countryMC +- country: Mongolia + cr: countryMN +- country: Montserrat + cr: countryMS +- country: Morocco + cr: countryMA +- country: Mozambique + cr: countryMZ +- country: Myanmar + cr: countryMM +- country: Namibia + cr: countryNA +- country: Nauru + cr: countryNR +- country: Nepal + cr: countryNP +- country: Netherlands + cr: countryNL +- country: Netherlands Antilles + cr: countryAN +- country: New Caledonia + cr: countryNC +- country: New Zealand + cr: countryNZ +- country: Nicaragua + cr: countryNI +- country: Niger + cr: countryNE +- country: Nigeria + cr: countryNG +- country: Niue + cr: countryNU +- country: Norfolk Island + cr: countryNF +- country: Northern Mariana Islands + cr: countryMP +- country: Norway + cr: countryNO +- country: Oman + cr: countryOM +- country: Pakistan + cr: countryPK +- country: Palau + cr: countryPW +- country: Palestinian Territory + cr: countryPS +- country: Panama + cr: countryPA +- country: Papua New Guinea + cr: countryPG +- country: Paraguay + cr: countryPY +- country: Peru + cr: countryPE +- country: Philippines + cr: countryPH +- country: Pitcairn + cr: countryPN +- country: Poland + cr: countryPL +- country: Portugal + cr: countryPT +- country: Puerto Rico + cr: countryPR +- country: Qatar + cr: countryQA +- country: Reunion + cr: countryRE +- country: Romania + cr: countryRO +- country: Russian Federation + cr: countryRU +- country: Rwanda + cr: countryRW +- country: Saint Helena + cr: countrySH +- country: Saint Kitts and Nevis + cr: countryKN +- country: Saint Lucia + cr: countryLC +- country: Saint Pierre and Miquelon + cr: countryPM +- country: Saint Vincent and the Grenadines + cr: countryVC +- country: Samoa + cr: countryWS +- country: San Marino + cr: countrySM +- country: Sao Tome and Principe + cr: countryST +- country: Saudi Arabia + cr: countrySA +- country: Senegal + cr: countrySN +- country: Serbia and Montenegro + cr: countryCS +- country: Seychelles + cr: countrySC +- country: Sierra Leone + cr: countrySL +- country: Singapore + cr: countrySG +- country: Slovakia + cr: countrySK +- country: Slovenia + cr: countrySI +- country: Solomon Islands + cr: countrySB +- country: Somalia + cr: countrySO +- country: South Africa + cr: countryZA +- country: South Georgia and the South Sandwich Islands + cr: countryGS +- country: Spain + cr: countryES +- country: Sri Lanka + cr: countryLK +- country: Sudan + cr: countrySD +- country: Suriname + cr: countrySR +- country: Svalbard and Jan Mayen + cr: countrySJ +- country: Swaziland + cr: countrySZ +- country: Sweden + cr: countrySE +- country: Switzerland + cr: countryCH +- country: Syrian Arab Republic + cr: countrySY +- country: Taiwan, Province of China + cr: countryTW +- country: Tajikistan + cr: countryTJ +- country: Tanzania, United Republic of + cr: countryTZ +- country: Thailand + cr: countryTH +- country: Togo + cr: countryTG +- country: Tokelau + cr: countryTK +- country: Tonga + cr: countryTO +- country: Trinidad and Tobago + cr: countryTT +- country: Tunisia + cr: countryTN +- country: Turkey + cr: countryTR +- country: Turkmenistan + cr: countryTM +- country: Turks and Caicos Islands + cr: countryTC +- country: Tuvalu + cr: countryTV +- country: Uganda + cr: countryUG +- country: Ukraine + cr: countryUA +- country: United Arab Emirates + cr: countryAE +- country: United Kingdom + cr: countryUK +- country: United States + cr: countryUS +- country: United States Minor Outlying Islands + cr: countryUM +- country: Uruguay + cr: countryUY +- country: Uzbekistan + cr: countryUZ +- country: Vanuatu + cr: countryVU +- country: Venezuela + cr: countryVE +- country: Vietnam + cr: countryVN +- country: Virgin Islands, British + cr: countryVG +- country: Virgin Islands, U.S. + cr: countryVI +- country: Wallis and Futuna + cr: countryWF +- country: Western Sahara + cr: countryEH +- country: Yemen + cr: countryYE +- country: Yugoslavia + cr: countryYU +- country: Zambia + cr: countryZM +- country: Zimbabwe + cr: countryZW diff --git a/data/google_language_collection.yaml b/data/google_language_collection.yaml new file mode 100644 index 00000000..09301ed8 --- /dev/null +++ b/data/google_language_collection.yaml @@ -0,0 +1,74 @@ +# Based on: +# https://developers.google.com/custom-search/docs/xml_results_appendices#language-collection-values +# +# Reformatted with ./dev/reformat_collection_values_to_yaml.sh +- country: Arabic + cr: lang_ar +- country: Bulgarian + cr: lang_bg +- country: Catalan + cr: lang_ca +- country: Chinese (Simplified) + cr: lang_zh-CN +- country: Chinese (Traditional) + cr: lang_zh-TW +- country: Croatian + cr: lang_hr +- country: Czech + cr: lang_cs +- country: Danish + cr: lang_da +- country: Dutch + cr: lang_nl +- country: English + cr: lang_en +- country: Estonian + cr: lang_et +- country: Finnish + cr: lang_fi +- country: French + cr: lang_fr +- country: German + cr: lang_de +- country: Greek + cr: lang_el +- country: Hebrew + cr: lang_iw +- country: Hungarian + cr: lang_hu +- country: Icelandic + cr: lang_is +- country: Indonesian + cr: lang_id +- country: Italian + cr: lang_it +- country: Japanese + cr: lang_ja +- country: Korean + cr: lang_ko +- country: Latvian + cr: lang_lv +- country: Lithuanian + cr: lang_lt +- country: Norwegian + cr: lang_no +- country: Polish + cr: lang_pl +- country: Portuguese + cr: lang_pt +- country: Romanian + cr: lang_ro +- country: Russian + cr: lang_ru +- country: Serbian + cr: lang_sr +- country: Slovak + cr: lang_sk +- country: Slovenian + cr: lang_sl +- country: Spanish + cr: lang_es +- country: Swedish + cr: lang_sv +- country: Turkish + cr: lang_tr diff --git a/dev/reformat_collection_values_to_yaml.sh b/dev/reformat_collection_values_to_yaml.sh new file mode 100755 index 00000000..bf222fd7 --- /dev/null +++ b/dev/reformat_collection_values_to_yaml.sh @@ -0,0 +1,113 @@ +#!/usr/bin/env bash +# +# Reformat cut & pasted Google Collection values to YAML +# +#### SETUP #################################################################### + +set -o errexit +set -o errtrace +set -o nounset + +# shellcheck disable=SC2154 +trap '_es=${?}; + printf "${0}: line ${LINENO}: \"${BASH_COMMAND}\""; + printf " exited with a status of ${_es}\n"; + exit ${_es}' ERR + +DIR_REPO="$(cd -P -- "${0%/*}/.." && pwd -P)" +SCRIPT_NAME="${0##*/}" +# https://en.wikipedia.org/wiki/ANSI_escape_code +E0="$(printf "\e[0m")" # reset +E30="$(printf "\e[30m")" # black foreground +E31="$(printf "\e[31m")" # red foreground +E97="$(printf "\e[97m")" # bright white foreground +E107="$(printf "\e[107m")" # bright white background + +#### FUNCTIONS ################################################################ + +check_gsed() { + local _msg + if ! gsed --version &>/dev/null + then + error_exit 'This script requires GNU sed to available as gsed' + fi +} + +error_exit() { + # Echo error message and exit with error + echo -e "${E31}ERROR:${E0} ${*}" 1>&2 + exit 1 +} + +print_header() { + # Print 80 character wide black on white heading with time + printf "${E30}${E107}# %-70s$(date '+%T') ${E0}\n" "${@}" +} + +#### MAIN ##################################################################### + +cd "${DIR_REPO}" + +check_gsed + +print_header 'Convert cut & paste to YAML' +COUNTRY_FILE=data/google_country_collection.yaml +url_part_1=https://developers.google.com/custom-search/docs/ +url_part_2=xml_results_appendices#country-collection-values +COUNTRY_URL="${url_part_1}${url_part_2}" +LANGUAGE_FILE=data/google_language_collection.yaml +url_part_1=https://developers.google.com/custom-search/docs/ +url_part_2=xml_results_appendices#language-collection-values +LANGUAGE_URL="${url_part_1}${url_part_2}" +echo 'This script assumes that the files contain data copied from the XML API' +echo 'reference appendices | Programmable Search Engine | Google for' +echo 'Developers page:' +echo +echo "${E97}${COUNTRY_FILE}${E0}" +echo "${COUNTRY_URL}" +echo +echo "${E97}${LANGUAGE_FILE}${E0}" +echo "${LANGUAGE_URL}" +echo + +print_header 'Update and reformat: Google Country Collection values' +echo "${COUNTRY_FILE}" +echo ' Remove any existing line comments and create backup' +gsed --in-place=.bak \ + -e'/^#/d' \ + "${COUNTRY_FILE}" +echo ' Add line comments at top of file' +gsed --in-place \ + -e"1s|^|# Reformatted with ./dev/${SCRIPT_NAME}\\n|" \ + -e'1s|^|#\n|' \ + -e"1s|^|# ${url_part_1}${url_part_2}\\n|" \ + -e'1s|^|# Based on:\n|' \ + "${COUNTRY_FILE}" +echo ' Reformat to YAML' +gsed --in-place --regexp-extended \ + -e's|^([A-Z])|- country: \1|' \ + -e's| \t|\n cr: |' \ + "${COUNTRY_FILE}" +echo ' Done.' +echo + +print_header 'Update and reformat: Google Language Collection values' +echo "${LANGUAGE_FILE}" +echo ' Remove any existing line comments and create backup' +gsed --in-place=.bak \ + -e'/^#/d' \ + "${LANGUAGE_FILE}" +echo ' Add line comments at top of file' +gsed --in-place \ + -e"1s|^|# Reformatted with ./dev/${SCRIPT_NAME}\\n|" \ + -e'1s|^|#\n|' \ + -e"1s|^|# ${url_part_1}${url_part_2}\\n|" \ + -e'1s|^|# Based on:\n|' \ + "${LANGUAGE_FILE}" +echo ' Reformat to YAML' +gsed --in-place --regexp-extended \ + -e's|^([A-Z])|- country: \1|' \ + -e's| \t|\n cr: |' \ + "${LANGUAGE_FILE}" +echo ' Done.' +echo