Skip to content

Commit

Permalink
Bonsai (#159)
Browse files Browse the repository at this point in the history
* add baci + unido

* add GEOnumeric

* force string

* use 4 number string

* do isort

* string issue

* tread GEOnumeric as int + fix float issue
  • Loading branch information
mabudz authored Nov 15, 2024
1 parent d0454b7 commit 2651d65
Show file tree
Hide file tree
Showing 4 changed files with 359 additions and 261 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,11 @@ Data sources below for further information):
34. [GWcode](https://www.tandfonline.com/doi/abs/10.1080/03050629908434958) - Gledisch & Ward numerical codes as published in https://www.andybeger.com/states/articles/statelists.html
35. CC41 - common classification for MRIOs (list of countries found in all public MRIOs)
36. [IOC](https://en.wikipedia.org/wiki/List_of_IOC_country_codes) - International Olympic Committee (IOC) country codes

37. [BACI](https://www.cepii.fr/CEPII/en/bdd_modele/bdd_modele_item.asp?id=37) - BACI: International Trade Database at the Product-Level
38. [UNIDO](https://stat.unido.org/portal/dataset/getDataset/COUNTRY_PROFILE) - UNIDO INDSTAT database
39. [EXIOBASE hybrid 3](https://zenodo.org/records/10148587) classification
40. [EXIOBASE hybrid 3 consequential](https://zenodo.org/records/13926939) classification
41. [GEOnumeric](https://ec.europa.eu/eurostat/comext/newxtweb/openNom.do) GEO numerical codes (also used in Prodcom)

Coco contains official recognised codes as well as non-standard codes
for disputed or dissolved countries. To restrict the set to only the
Expand Down
35 changes: 32 additions & 3 deletions country_converter/country_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
import pprint
import re
import sys
from collections import OrderedDict
from collections import OrderedDict, defaultdict

import numpy as np
import pandas as pd
from pandas._libs.parsers import STR_NA_VALUES

from country_converter.version import __version__

Expand Down Expand Up @@ -409,8 +411,29 @@ def __init__(

must_be_unique = ["name_short", "name_official", "regex"]
must_be_string = must_be_unique + (
["ISO2", "ISO3", "continent", "UNregion", "EXIO1", "EXIO2", "EXIO3", "WIOD"]
[
"ISO2",
"ISO3",
"continent",
"UNregion",
"EXIO1",
"EXIO2",
"EXIO3",
"WIOD",
]
)
must_be_int = [
"ISOnumeric",
"UNcode",
"FAOcode",
"GBDcode",
"EURO",
"UN",
"UNmember",
"obsolete",
"GEOnumeric",
]
accepted_na_values = STR_NA_VALUES - {"NA"}

def test_for_unique_names(
df, data_name="passed dataframe", report_fun=log.error
Expand All @@ -433,11 +456,16 @@ def data_loader(data):
sep="\t",
encoding="utf-8",
converters={str_col: str for str_col in must_be_string},
na_values=accepted_na_values,
)
ret = ret.astype(
{col: "Int64" for col in ret.columns if col in must_be_int}
)
test_for_unique_names(ret, data)
return ret

basic_df = data_loader(country_data)

if only_UNmember:
basic_df.dropna(subset=["UNmember"], inplace=True)

Expand Down Expand Up @@ -808,8 +836,9 @@ def _validate_input_para(self, para, column_names):
"name_short": ["short", "short_name", "name", "names"],
"name_official": ["official", "long_name", "long"],
"UNcode": ["un", "unnumeric", "M49"],
"ISOnumeric": ["isocode"],
"ISOnumeric": ["isocode", "baci", "unido"],
"FAOcode": ["fao", "faonumeric"],
"EXIO3": ["exio_hybrid_3", "exio_hybrid_3_cons"],
}

for item in alt_valid_names.items():
Expand Down
Loading

0 comments on commit 2651d65

Please sign in to comment.