mmcky · Nov 20, 2015
diff --git a/‎README.md ‎README.rst b/‎README.md ‎README.rst
diff --git a/‎chapter_2+3_theoryCA_networkstructure.py
+846 b/‎chapter_2+3_theoryCA_networkstructure.py
+846
diff --git a/‎chapter_4_probable_improbable.py
+532 b/‎chapter_4_probable_improbable.py
+532
diff --git a/‎chapter_5_productfragmentation-baci92.py
+2,943 b/‎chapter_5_productfragmentation-baci92.py
+2,943
diff --git a/‎chapter_5_productfragmentation-wits-sitcr3.py
+553 b/‎chapter_5_productfragmentation-wits-sitcr3.py
+553
diff --git a/‎chapter_x_appendix_A_dataset.py
+89 b/‎chapter_x_appendix_A_dataset.py
+89
diff --git a/‎chapter_x_appendix_B_RCAmeasures.py
+21 b/‎chapter_x_appendix_B_RCAmeasures.py
+21
diff --git a/‎chapter_x_appendix_G_proximitymatrices.py
+110 b/‎chapter_x_appendix_G_proximitymatrices.py
+110
diff --git a/‎dataset_analyse_baci.py
+195 b/‎dataset_analyse_baci.py
+195
diff --git a/‎dataset_analyse_nber.py
+1,037 b/‎dataset_analyse_nber.py
+1,037
diff --git a/‎dataset_analyse_nberbaci.py
+392 b/‎dataset_analyse_nberbaci.py
+392
diff --git a/‎dataset_analyse_other.py
+210 b/‎dataset_analyse_other.py
+210
diff --git a/‎dataset_compile_raw.py
+142 b/‎dataset_compile_raw.py
+142
diff --git a/‎dataset_construct_baci.py
+202 b/‎dataset_construct_baci.py
+202
diff --git a/‎dataset_construct_nber.py
+700 b/‎dataset_construct_nber.py
+700
diff --git a/‎dataset_construct_nber_options.py
+169 b/‎dataset_construct_nber_options.py
+169
diff --git a/‎dataset_construct_nberbaci.py
+648 b/‎dataset_construct_nberbaci.py
+648
diff --git a/‎dataset_construct_other.py
+134 b/‎dataset_construct_other.py
+134
diff --git a/‎dataset_info.py
+81 b/‎dataset_info.py
+81
diff --git a/‎setup.py
+117 b/‎setup.py
+117
@@ -0,0 +1,89 @@
+"""
+Chapter: Appendix A Dataset
+===========================
+
+This contains tables, plots and analysis used in the construction of the Dataset Appendix Chapter
+
+NES and Non-Country Areas 				=> Percentage of Trade Data Contained in Non Country ISO3C Codes
+NBER AX Codes as a % of World Exports 	=> Percentage of Trade Data Contained in AX Product Codes
+
+"""
+import re
+import pandas as pd
+import matplotlib.pyplot as plt
+
+from dataset_info import TARGET_RAW_DIR, CHAPTER_RESULTS
+
+RESULTS_DIR = CHAPTER_RESULTS["A"]
+
+#------#
+#-NBER-#
+#------#
+
+#-Setup-#
+#-------#
+source_dir = TARGET_RAW_DIR["nber"]
+for year in xrange(1962,2000+1,1):
+    if year == 1962:
+        source_data = pd.read_hdf(source_dir+"nber_year.h5", 'Y%s'%year)
+    else:
+        source_data = source_data.append(pd.read_hdf(source_dir+"nber_year.h5", 'Y%s'%year))
+
+#-World Values-#
+world_values = source_data.loc[(source_data.importer == "World") & (source_data.exporter == "World")] 
+world_values = world_values.groupby("year").sum()["value"]
+
+
+#-NES and Non-Country Areas-#
+#---------------------------#
+plt.clf()
+from pyeconlab.trade.dataset.NBERWTF.meta import countryname_to_iso3c
+data = source_data.copy()
+data = data.loc[(data.importer != "World") & (data.exporter != "World")].reset_index()
+data["EC"] = data["exporter"].apply(lambda x: countryname_to_iso3c[x])
+data["IC"] = data["importer"].apply(lambda x: countryname_to_iso3c[x])
+data["NC"] = data[["EC", "IC"]].apply(lambda row: 1 if (row["EC"]==".")|(row["IC"]==".") else 0, axis=1)
+nc_data = data.loc[data.NC == 1].groupby("year").sum()["value"]
+#-Percentage-#
+#~Trade~#
+result = nc_data.div(world_values)*100
+describe = result.describe()
+describe.to_csv(RESULTS_DIR + "nber_notcountry_percent_world_trade_table.csv")
+pd.DataFrame(describe).to_latex(RESULTS_DIR + "nber_notcountry_percent_world_trade_table.tex")
+ax = result.plot(title="NES Trade Flows [% of World Trade]", yticks=[0,1,2,3,4,5])
+ax.set_ylabel("Percent of World Trade")
+ax.set_xlabel("Year")
+plt.savefig(RESULTS_DIR + "nber_notcountry_percent_world_trade_plot.pdf")
+plt.clf()
+#~Export~#
+data_export = data.groupby(["year", "exporter", "EC"]).sum().reset_index()
+nc_data_export = data_export.loc[data_export.EC == "."].groupby("year").sum()["value"]
+result = nc_data_export.div(world_values)*100
+describe = result.describe()
+describe.to_csv(RESULTS_DIR + "nber_notcountry_percent_world_export_table.csv")
+pd.DataFrame(describe).to_latex(RESULTS_DIR + "nber_notcountry_percent_world_export_table.tex")
+ax = result.plot(title="NES Export Flows [% of World Trade]")
+ax.set_ylabel("Percent of World Trade")
+ax.set_xlabel("Year")
+plt.savefig(RESULTS_DIR + "nber_notcountry_percent_world_export_plot.pdf")
+plt.clf()
+#~Import ... Not Required~#
+del data, nc_data, data_export, nc_data_export
+
+
+#-NBER AX Codes as a % of World Exports-#
+#---------------------------------------#
+data = source_data.copy()
+data["AX"] = data["sitc4"].apply(lambda x: 1 if re.search("[aAxX]", x) else 0)
+AX = data.loc[data.AX == 1]
+AX = AX.groupby("year").sum()["value"]
+#-Percentage-#
+result = AX.div(world_values)*100
+describe = result.describe()
+describe.to_csv(RESULTS_DIR + "nber_ax_percent_world_trade_table.csv")
+pd.DataFrame(describe).to_latex(RESULTS_DIR + "nber_ax_percent_world_trade_table.tex")
+ax = result.plot(title="AX [% of World Trade]", )
+ax.set_ylabel("Percent of World Trade")
+ax.set_xlabel("Year")
+plt.savefig(RESULTS_DIR + "nber_ax_percent_world_trade_plot.pdf")
+del data, AX
@@ -0,0 +1,21 @@
+"""
+Appendix B: Revealed Comparative Advantage
+==========================================
+
+Analysis of Revealed Comparative Advantage Indicators
+
+"""
+
+#------------------------------------------------------------------------------#
+#-Compare RCA (Endogenously Computed Values with World Values) using NBER Data-#
+#------------------------------------------------------------------------------#
+
+# IPYTHON NOTEBOOK: 
+# 	./AppendixC/RCAStudy-NBER2000-TotalWorldValues-Vs-CompleteNetwork.ipynb
+
+
+#-Compare RCA Metrics-#
+#-Balassa (1965) and Yu (2009)-#
+
+# IPYTHON NOTEBOOK
+#	./AppendixC/RCAStudy-NBER-Compare-RCA-Metrics.ipynb
@@ -0,0 +1,110 @@
+"""
+Appendix G: Symmetric Vs. Assymetric Proximity Matrices
+
+Year 2000 - NBER DATA
+
+"""
+
+import pandas as pd
+import numpy as np 
+import matplotlib.pyplot as plt
+
+from pyeconlab import DynamicProductLevelExportSystem
+
+#-Local Imports-#
+from dataset_info import TARGET_DATASET_DIR, CHAPTER_RESULTS
+DATASET_DIR = TARGET_DATASET_DIR['nber']
+RESULTS_DIR = CHAPTER_RESULTS["G"]
+
+data = pd.read_hdf(DATASET_DIR+"nber-export-sitcr2l4-1962to2000.h5", "D")
+data = data.rename(columns={'eiso3c' : 'country', 'sitc4' : 'productcode', 'value' : 'export'})
+data = data.set_index(["year"])
+system = DynamicProductLevelExportSystem()
+system.from_df(data)
+
+#-Year 2000-#
+ys = system[2000]
+ys.rca_matrix(complete_data=True)
+ys.mcp_matrix()
+ys.compute_pci()
+ys.auto_adjust_pci_sign()
+pci = ys.pci.copy()
+
+#-Example Proximity Values-#
+from pyeconlab.trade.classification import SITCR2
+sitc_to_name = SITCR2().code_description_dict()
+prox1 = ys.proximity_matrix()
+products1 = ["8423", "0711"]
+products2 = ["0611", "2927", "8451", "7810", "8441", "6584", "7924"]
+exval = prox1.filter(items=products1, axis=0).filter(items=products2, axis=1).T.unstack().to_frame(name="Proximity")
+exval.index.names = ["P1", "P2"]
+exval = exval.groupby(level="P1").apply(lambda x: x.sort(columns="Proximity", ascending=False))
+exval.index = exval.index.droplevel()
+exval = exval.reset_index()
+exval["P1 Description"] = exval["P1"].apply(lambda x: sitc_to_name[x])
+exval["P2 Description"] = exval["P2"].apply(lambda x: sitc_to_name[x])
+exval = exval.set_index(["P1","P1 Description","P2","P2 Description"])
+exval.to_excel(RESULTS_DIR+"proximity-examples-yr2000-nber-datasetD.xlsx")
+
+
+#-Symmetric Proximity Analysis-#
+prox1 = ys.compute_proximity(matrix_type='symmetric')
+fig1 = ys.plot_proximity(prox_cutoff=0.6, sortby=pci, sortby_text="PCI", step=15)
+ax = fig1.gca()
+ax.set_title("Symmetric Proximity Matrix [Yr: 2000]")
+plt.savefig(RESULTS_DIR + "proximity-symmetric-yr2000-nber-datasetD.png" , dpi=600)
+
+products1 = ["8423", "8441", "8451", "6584"]
+products2 = ["8423", "8441", "8451", "6584"]
+exval = prox1.filter(items=products1, axis=0).filter(items=products2, axis=1).T.unstack().to_frame(name="Proximity")
+exval.index.names = ["P1", "P2"]
+exval = exval.groupby(level="P1").apply(lambda x: x.sort(columns="Proximity", ascending=False))
+exval.index = exval.index.droplevel()
+exval = exval.reset_index()
+exval = exval.set_index(["P1","P2"]).unstack()
+exval = exval.reset_index()
+exval["P1 Description"] = exval["P1"].apply(lambda x: sitc_to_name[x])
+exval = exval.set_index(["P1", "P1 Description"])
+exval.to_excel(RESULTS_DIR+"proximity-symmetric-examples-yr2000-nber-datasetD.xlsx")
+
+
+#------------------------------#
+#-Asymetric Proximity Analysis-#
+#------------------------------#
+prox2 = ys.compute_proximity(matrix_type="asymmetric")
+fig2 = ys.plot_proximity(prox_cutoff=0.6, sortby=pci, sortby_text="PCI", step=15)
+ax = fig2.gca()
+ax.set_title("Asymmetric Proximity Matrix [Yr: 2000]")
+plt.savefig(RESULTS_DIR + "proximity-asymmetric-yr2000-nber-datasetD-value-examples.png" , dpi=600)
+
+products1 = ["8423", "8441", "8451", "6584"]
+products2 = ["8423", "8441", "8451", "6584"]
+exval = prox2.filter(items=products1, axis=0).filter(items=products2, axis=1).T.unstack().to_frame(name="Proximity")
+exval.index.names = ["P1", "P2"]
+exval = exval.groupby(level="P1").apply(lambda x: x.sort(columns="Proximity", ascending=False))
+exval.index = exval.index.droplevel()
+exval = exval.reset_index()
+exval = exval.set_index(["P1","P2"]).unstack()
+exval = exval.reset_index()
+exval["P1 Description"] = exval["P1"].apply(lambda x: sitc_to_name[x])
+exval = exval.set_index(["P1", "P1 Description"])
+exval.to_excel(RESULTS_DIR+"proximity-asymmetric-yr2000-nber-datasetD-value-examples.xlsx")
+
+#-Histogram Comparing Symmetric and Asymmetric Proximity-#
+s1 = prox1.unstack()
+s1 = s1.apply(lambda x: np.nan if x == 1 else x)
+s1 = s1.apply(lambda x: np.nan if x == 0 else x)
+s1v = s1.values
+bins = np.linspace(0,1,50)
+plt.hist(s1v, bins, alpha=0.5, label="Symmetric") 
+s2 = prox2.unstack()
+s2 = s2.apply(lambda x: np.nan if x == 1 else x)
+s2 = s2.apply(lambda x: np.nan if x == 0 else x)
+s2v = s2.values
+bins = np.linspace(0,1,50)
+plt.hist(s2v, bins, alpha=0.5, label="Asymmetric")
+plt.legend(loc="upper right")
+ax = plt.gca()
+ax.set_xlabel("Proximity Values")
+ax.set_ylabel("Frequency")
+plt.savefig(RESULTS_DIR+"proximity-symmetric-and-asymmetric-overlayedhistogram-yr2000-nber-datasetD.png", dpi=600) 
@@ -0,0 +1,195 @@
+"""
+Analyse Tables, Plots and Construct Meta Data for BACI Data
+"""
+
+import os
+import gc
+import glob
+import matplotlib.pyplot as plt
+import pandas as pd
+
+#-HS Levels-#
+HS96L6 = True
+
+#-SITC Levels-#
+SITCR2L5 = True
+SITCR2L4 = True
+SITCR2L3 = True
+SITCR2L2 = True
+SITCR2L1 = True
+
+#---------------#
+#-Control Logic-#
+#---------------#
+
+RAW_SIMPLESTATS_TABLE = True
+
+DATASET_PRODUCTCODE_INTERTEMPORAL_TABLES = True
+DATASET_COUNTRYCODE_INTERTEMPORAL_TABLES = True
+DATASET_SIMPLESTATS_TABLE = True
+DATASET_PERCENTWORLDTRADE_PLOTS = True
+
+#-----#
+#-RAW-#
+#-----#
+
+from dataset_info import RESULTS_DIR, TARGET_DATASET_DIR
+SOURCE_DIR = TARGET_DATASET_DIR["baci96"]
+STORE = "raw_baci_hs96-1998-2012.h5"
+RESULTS_DIR = RESULTS_DIR["baci96"]
+
+if RAW_SIMPLESTATS_TABLE:
+
+    from pyeconlab.trade.util import describe
+
+    print "Running RAW_SIMPLESTATS_TABLE ..."
+
+    DIR = RESULTS_DIR + "tables/"
+    STORE = SOURCE_DIR + STORE
+
+    print "Running STATS on File %s" % STORE
+    store = pd.HDFStore(STORE)
+    for dataset in sorted(store.keys()):
+        dataset = dataset.strip("/")                                #Remove Directory Structure
+        print "Computing SIMPLE STATS for dataset: %s" % dataset
+        data = pd.read_hdf(STORE, key=dataset)
+        productcode = "hs6"
+        dataset_table = describe(data, table_name=dataset, productcode=productcode, exporter="eiso3n", importer="iiso3n")
+        del data
+        gc.collect()
+    store.close()
+    #-Excel Table-#
+    fl = "baciraw-trade-hs6-1998to2012_stats.xlsx"
+    dataset_table.to_excel(DIR + fl)
+    #-Latex Snippet-#
+    fl = "baciraw-trade-hs6-1998to2012_stats.tex"
+    with open(DIR + fl, "w") as latex_file:
+        latex_file.write(dataset_table.to_latex())
+
+#----------#
+#-DATASETS-#
+#----------#
+
+from dataset_info import RESULTS_DIR, TARGET_DATASET_DIR
+SOURCE_DIR = TARGET_DATASET_DIR["baci96"]
+STORES = glob.glob(SOURCE_DIR + "*.h5")
+RESULTS_DIR = RESULTS_DIR["baci96"]
+
+## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
+## ---> Product Composition Tables <--- ##
+## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
+
+STORES = [x for x in STORES if x.split("/")[-1][0:3] != "raw"]  #Filter Out RAW Files
+
+def split_filenames(fl):
+    dataset, data_type, classification, years = fl.split("-")
+    classification, product_level = classification[:-2], classification[-1:]
+    return dataset, data_type, classification, product_level
+
+if DATASET_PRODUCTCODE_INTERTEMPORAL_TABLES:
+
+    print "Running DATASET_PRODUCTCODE_INTERTEMPORAL_TABLES ..."
+
+    DIR = RESULTS_DIR + "intertemporal-productcodes/"
+
+    for store in STORES:
+        print "Computing Composition Tables for: %s" % store
+        dataset, data_type, classification, product_level = split_filenames(store.split("/")[-1])
+        store = pd.HDFStore(store)
+        for dataset in store.keys():
+            print "Computing table for dataset: %s ..." % dataset
+            dataset = dataset.strip("/")
+            intertemp_product = store[dataset].groupby(["year", "sitc%s"%product_level]).sum().unstack("year")
+            intertemp_product.columns = intertemp_product.columns.droplevel()
+            intertemp_product.to_excel(DIR + "intertemporal_product_%s_%sl%s_%s.xlsx"%(data_type, classification, product_level, dataset))
+        store.close()
+
+## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
+## ---> Country Composition Tables <--- ##
+## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
+
+if DATASET_COUNTRYCODE_INTERTEMPORAL_TABLES:
+
+    print "Running DATASET_COUNTRYCODE_INTERTEMPORAL_TABLES ..."
+
+    DIR = RESULTS_DIR + "intertemporal-countrycodes/"
+
+    for store in STORES:
+        print "Computing Composition Tables for: %s" % store
+        dataset, data_type, classification, product_level = split_filenames(store.split("/")[-1])
+        store = pd.HDFStore(store)
+        for dataset in store.keys():
+            print "Computing table for dataset: %s ..." % dataset
+            dataset = dataset.strip("/")
+            if data_type == "export":
+                intertemp_country = store[dataset].groupby(["year", "eiso3c"]).sum().unstack("year")
+            if data_type == "import":
+                intertemp_country = store[dataset].groupby(["year", "iiso3c"]).sum().unstack("year")
+            else:
+                continue
+            intertemp_country.columns = intertemp_country.columns.droplevel()
+            intertemp_country.to_excel(DIR + "intertemporal_country_%s_%s_%s.xlsx"%(data_type, classification, dataset))
+        store.close()
+
+## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
+## ----> SIMPLE STATS TABLES <---- ##
+## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
+
+
+if DATASET_SIMPLESTATS_TABLE:
+
+    from pyeconlab.trade.util import describe
+
+    print "Running DATASET_SIMPLESTATS_TABLE: ..."
+
+    DIR = RESULTS_DIR + "tables/"
+
+    for dataset_file in STORES:
+        print "Running STATS on File %s" % dataset_file
+        store = pd.HDFStore(dataset_file)
+        for dataset in sorted(store.keys()):
+            dataset = dataset.strip("/")                                #Remove Directory Structure
+            print "Computing SIMPLE STATS for dataset: %s" % dataset
+            data = pd.read_hdf(dataset_file, key=dataset)
+            productcode = "".join(dataset_file.split("/")[-1].split("-")[2].split("r2l"))
+            dataset_table = describe(data, table_name=dataset, productcode=productcode)
+            if dataset == "A":
+                table = dataset_table
+            else:
+                table = table.merge(dataset_table, left_index=True, right_index=True)
+        store.close()
+        #-Excel Table-#
+        fl = dataset_file.split("/")[-1].split(".")[0] + "_stats" + ".xlsx"
+        table.to_excel(DIR + fl)
+        #-Latex Snippet-#
+        fl = dataset_file.split("/")[-1].split(".")[0] + "_stats" + ".tex"
+        with open(DIR + fl, "w") as latex_file:
+            latex_file.write(table.to_latex())
+
+
+if DATASET_PERCENTWORLDTRADE_PLOTS:
+    
+    print "DATASET_PERCENTWORLDTRADE_PLOTS ... "
+
+    DIR = RESULTS_DIR + "plots/percent_world_values/"
+
+    #-World Values-#
+    fl = "./output/dataset/baci96/raw_baci_world_yearly-1998to2012.h5"
+    world_values = pd.read_hdf(fl, key="World")["value"]
+
+    for dataset_file in STORES:
+        print "Producing GRAPH on File %s" % dataset_file
+        store = pd.HDFStore(dataset_file)
+        datasets = store.keys()
+        for dataset in sorted(datasets):
+            print "Computing GRAPH for dataset: %s" % dataset
+            data = pd.read_hdf(dataset_file, key=dataset)
+            yearly_values = data.groupby(["year"]).sum()["value"]
+            percent_values = yearly_values.div(world_values)*100
+            fig = percent_values.plot(title="Dataset: %s (%s)"%(dataset, dataset_file))
+            plt.savefig(DIR + "%s_%s_percent_wld.pdf"%(dataset, dataset_file.split('/')[-1].split('.')[0]))
+            plt.close()
+        store.close()
+
+
+
@@ -0,0 +1,210 @@
+"""
+Analyse Other Datasets
+======================
+
+"atlas" -> Atlas of Complexity
+
+"""
+
+import gc
+import re
+import glob
+import pandas as pd
+import matplotlib.pyplot as plt
+
+from dataset_info import RESULTS_DIR, TARGET_DATASET_DIR
+
+#-Control-#
+
+ATLAS = True
+
+#-Atlas of Complexity-#
+if ATLAS:
+    #-Setup Source-#
+    SOURCE_DIR = TARGET_DATASET_DIR["atlas"]
+    HS_STORES = glob.glob(SOURCE_DIR + "*_hs92_*.h5")
+    SITC_STORES = glob.glob(SOURCE_DIR + "*_sitcr2_*.h5")
+    RESULTS_DIR = RESULTS_DIR["atlas"]
+
+    #----------------------------------#
+    #-ProductCode Intertemporal Tables-#
+    #----------------------------------#
+    
+    print
+    print "[INFO] Computing ProductCode Intertemporal Tables ..."
+
+    DIR = RESULTS_DIR + "intertemporal-productcodes/"
+
+    #-SITC DATA-#
+    for store in SITC_STORES:
+        print "Analysing SITC File: %s ..." % store
+        fln = store.split("/")[-1].split(".")[0]
+        store = pd.HDFStore(store)
+        for dataset in store.keys():
+            print "Computing table for dataset: %s ..." % dataset
+            dataset = dataset.strip("/")
+            product_level = int(dataset[-1])
+            intertemp_product = store[dataset].groupby(["year", "sitc%s"%product_level]).sum().unstack("year")
+            intertemp_product.columns = intertemp_product.columns.droplevel()
+            intertemp_product.to_excel(DIR + "%s_L%s.xlsx"%(fln, product_level))
+        store.close()
+
+    #-HS DATA-#
+    for store in HS_STORES:
+        print "Analysing HS File: %s ..." % store
+        fln = store.split("/")[-1].split(".")[0]
+        store = pd.HDFStore(store)
+        for dataset in store.keys():
+            print "Computing table for dataset: %s ..." % dataset
+            dataset = dataset.strip("/")
+            product_level = int(dataset[-1])
+            intertemp_product = store[dataset].groupby(["year", "hs%s"%product_level]).sum().unstack("year")
+            intertemp_product.columns = intertemp_product.columns.droplevel()
+            intertemp_product.to_excel(DIR + "%s_L%s.xlsx"%(fln, product_level))
+        store.close()
+
+    #----------------------------------#
+    #-CountryCode Intertemporal Tables-#
+    #----------------------------------#
+
+    print
+    print "[INFO] Computing CountryCode Intertemporal Tables ..."
+
+    DIR = RESULTS_DIR + "intertemporal-countrycodes/"
+
+    #-SITC-#
+    for store in SITC_STORES:
+        print "Analysing SITC File: %s ..." % store
+        fln = store.split("/")[-1].split(".")[0]
+        store = pd.HDFStore(store)
+        for dataset in store.keys():
+            print "Computing table for dataset: %s ..." % dataset
+            product_level = int(dataset[-1])
+            if product_level != 4:
+                continue
+            dataset = dataset.strip("/")
+            if re.search("export", fln):
+                print "[INFO] Export Data"
+                intertemp_country = store[dataset].groupby(["year", "eiso3c"]).sum().unstack("year")
+            elif re.search("import", fln):
+                print "[INFO] Import Data"
+                intertemp_country = store[dataset].groupby(["year", "iiso3c"]).sum().unstack("year")
+            else:
+                continue
+            intertemp_country.columns = intertemp_country.columns.droplevel()
+            intertemp_country.to_excel(DIR + "%s.xlsx"%(fln))
+        store.close()
+
+    #-HS DATA-#
+    for store in HS_STORES:
+        print "Analysing HS File: %s ..." % store
+        fln = store.split("/")[-1].split(".")[0]
+        store = pd.HDFStore(store)
+        for dataset in store.keys():
+            print "Computing table for dataset: %s ..." % dataset
+            dataset = dataset.strip("/")
+            if re.search("export", fln):
+                print "[INFO] Export Data"
+                intertemp_country = store[dataset].groupby(["year", "eiso3c"]).sum().unstack("year")
+            elif re.search("import", fln):
+                print "[INFO] Import Data"
+                intertemp_country = store[dataset].groupby(["year", "iiso3c"]).sum().unstack("year")
+            else:
+                continue
+            intertemp_country.columns = intertemp_country.columns.droplevel()
+            intertemp_country.to_excel(DIR + "%s.xlsx"%(fln))
+        store.close()
+
+    ## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
+    ## ----> SIMPLE STATS TABLES <---- ##
+    ## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
+
+
+    from pyeconlab.trade.util import describe
+
+    print "Running DATASET_SIMPLESTATS_TABLE: ..."
+
+    DIR = RESULTS_DIR + "tables/"
+
+    #-SITC DATA-#
+
+    for dataset_file in SITC_STORES:
+        print "Running (SITC) STATS on File %s" % dataset_file
+        store = pd.HDFStore(dataset_file)
+        for dataset in sorted(store.keys()):
+            product_level = dataset.strip("/")                                #Remove Directory Structure
+            print "Computing SIMPLE STATS for dataset: %s" % product_level
+            data = pd.read_hdf(dataset_file, key=dataset)
+            productcode = "sitc%s"%(product_level[-1])
+            dataset_table = describe(data, table_name=product_level, productcode=productcode)
+            #-Memory Reduction-#
+            del data
+            gc.collect()
+            if product_level == "L1":
+                table = dataset_table
+            else:
+                table = table.merge(dataset_table, left_index=True, right_index=True)
+        store.close()
+        #-Excel Table-#
+        fl = dataset_file.split("/")[-1].split(".")[0] + "_stats" + ".xlsx"
+        table.to_excel(DIR + fl)
+        #-Latex Snippet-#
+        fl = dataset_file.split("/")[-1].split(".")[0] + "_stats" + ".tex"
+        with open(DIR + fl, "w") as latex_file:
+            latex_file.write(table.to_latex())
+
+    #-HS DATA-#
+
+    for dataset_file in HS_STORES:
+        print "Running (HS) STATS on File %s" % dataset_file
+        store = pd.HDFStore(dataset_file)
+        for dataset in sorted(store.keys()):
+            product_level = dataset.strip("/")                                #Remove Directory Structure
+            print "Computing SIMPLE STATS for dataset: %s" % product_level
+            data = pd.read_hdf(dataset_file, key=dataset)
+            productcode = "hs%s"%(product_level[-1])
+            dataset_table = describe(data, table_name=product_level, productcode=productcode)
+            #-Memory Reduction-#
+            del data
+            gc.collect()
+            if product_level == "L1":
+                table = dataset_table
+            else:
+                table = table.merge(dataset_table, left_index=True, right_index=True)
+        store.close()
+        #-Excel Table-#
+        fl = dataset_file.split("/")[-1].split(".")[0] + "_stats" + ".xlsx"
+        table.to_excel(DIR + fl)
+        #-Latex Snippet-#
+        fl = dataset_file.split("/")[-1].split(".")[0] + "_stats" + ".tex"
+        with open(DIR + fl, "w") as latex_file:
+            latex_file.write(table.to_latex
+
+    #-------#
+    #-PLOTS-#
+    #-------#
+
+    #-Intertemporal Number of Positive Productcodes-#
+
+    DIR = RESULTS_DIR + "plots/intertemporal-productcodes-num/"
+
+    for dataset_file in SITC_STORES:
+        print "Running (SITC) PLOTS on File %s" % dataset_file
+        store = pd.HDFStore(dataset_file)
+        for dataset in sorted(store.keys()):
+            product_level = dataset.strip("/")                                #Remove Directory Structure
+            print "Computing PLOT for dataset: %s" % product_level
+            data = pd.read_hdf(dataset_file, key=dataset)
+            productcode = "sitc%s"%(product_level[-1])
+            if re.search("rca", dataset_file):
+                value = "rca"
+            else:
+                value = "value"
+            data_year = data.groupby(["year", productcode], as_index=False).sum().groupby("year").apply(lambda row: row[value].count())
+            fig = data_year.plot(title="Dataset: %s (%s)"%(dataset, dataset_file))
+            plt.savefig(DIR + "%s_%s_numproducts.pdf"%(dataset_file.split('/')[-1].split('.')[0], product_level))
+            plt.close()
+            #-Memory Reduction-#
+            del data, data_year
+            gc.collect()
+        store.close()
@@ -0,0 +1,142 @@
+"""
+Compile RAW Data into a Single Data File
+========================================
+
+Author: Matthew McKay (mamckay@gmail.com)
+
+This script compiles and converts (where necessary) raw data into a single data file
+
+Sources
+-------
+[1] NBER 
+[2] BACI
+
+Notes
+-----
+1. 	Care must be taken when working with CSV files. 
+	a. After 1984 there are some float values in 'value' column
+	b. icode, ecode, sitc4 need to be imported explicitly as strings
+2. 	Stata and HDF are more similar in type when compared with CSV. Both have the presence of "" which should be replaced with np.nan
+3. 	Unit and Quantity information is only available after 1984 in NBER Dataset
+4. 	HDF are both compact and fast and therefore should be used as the standard file source for dataset objects
+
+"""
+
+import sys
+import pandas as pd
+import csv
+import numpy as np
+
+#-Dataset Information-#
+from dataset_info import SOURCE_DIR, TARGET_RAW_DIR
+
+#------#
+#-NBER-#
+#------#
+
+#-Convert Each year to CSV File-#
+def nber_convert_dta_to_csv(source_dir, target_dir):
+	for year in range(1962, 2000+1, 1):
+		fn = source_dir + "wtf%s.dta" % str(year)[2:]
+		print "Loading Year: %s from file: %s" % (year, fn)
+		data = pd.read_stata(fn)
+		fn = target_dir + "wtf%s.csv" % str(year)[2:]
+		print "Converting Year: %s from file: %s" % (year, fn)
+		data.to_csv(fn, index=False, quoting=csv.QUOTE_NONNUMERIC)
+		print "Convert DTA to CSV Finished!"
+
+#-Convert All Years to an HDF File-#
+def nber_convert_dta_to_hdf(source_dir, target_dir, index='year'):
+	if index == 'year':
+		fn = target_dir + "nber_year.h5"
+		store = pd.HDFStore(fn, complevel=9, complib='zlib')
+		for year in range(1962, 2000+1, 1):
+			fn = source_dir + "wtf%s.dta" % str(year)[2:]
+			print "Loading Year: %s from file: %s" % (year, fn)
+			data = pd.read_stata(fn)
+			store.put('Y'+str(year), data, format='table')
+		print "HDF File Saved ..."
+		print store
+		store.close()
+	else:
+		data = pd.DataFrame()
+		for year in range(1962, 2000+1, 1):
+			fn = source_dir + "wtf%s.dta" % str(year)[2:]
+			print "Loading Year: %s from file: %s" % (year, fn)
+			data = data.append(pd.read_stata(fn))
+		fn = target_dir + "nber.h5"
+		store = pd.HDFStore(fn, complevel=9, complib='zlib')
+		store.put('nber', data, format='table')
+		print "HDF File Saved ..."
+		print store
+		store.close()
+	print "Convert DTA to HDF Finished!"
+
+#-Convert NBER supplementary Data-#
+def nber_supp_convert_dta_to_hdf(source_dir, target_dir):
+	"""
+	Save NBER supplementary data into an HDF file "nber_supp_year.hdf"
+	"""
+	fn = target_dir + "nber_supp_year.h5"
+	store = pd.HDFStore(fn, complevel=9, complib='zlib')
+	for year in xrange(1988, 2000+1, 1):
+		fn = source_dir + "china_hk%s.dta" % str(year)[2:]
+		print "[NBER-SUPP] Loading Year: %s from file: %s" % (year, fn)
+		data = pd.read_stata(fn)
+		store.put('Y'+str(year), data, format='table')
+	print "HDF file Saved ..."
+	print store
+	store.close()
+
+
+#------#
+#-BACI-#
+#------#
+
+#-Convert All CSV Year Files to an HDF File-#
+def baci_convert_dta_to_hdf(source_dir, target_dir):
+	fn = target_dir + "baci_year.h5"
+	store = pd.HDFStore(fn, complevel=9, complib='zlib')
+	for year in range(1998, 2012+1, 1):
+		fn = source_dir + "baci96_%s.csv" % str(year)
+		print "Loading Year: %s from file: %s" % (year, fn)
+		data = pd.read_csv(fn, dtype={'hs6' : str})
+		store.put('Y'+str(year), data, format='table')
+	print "HDF File Saved"
+	print store
+	store.close()
+	print "Convert CSV to HDF Finished!"
+
+#-Raw Data Conversions and Comparisons-#
+
+if __name__ == "__main__":
+	
+	#-Execution Settings-#
+	NBER=True
+	dta_to_csv = False 				# Using HDF as Key DataStructure Due to it's size and speed advantage
+	dta_to_hdf = True 				# Data Structure of Choice
+	
+	BACI=True
+	csv_to_hdf = True
+
+	#-Convert NBER-#
+	if NBER:
+		source_dir = SOURCE_DIR['nber']
+		target_dir = TARGET_RAW_DIR['nber']
+		#-Conversions-#
+		if dta_to_csv:
+			print "Convert dta to csv files"
+			nber_convert_dta_to_csv(source_dir, target_dir)
+		if dta_to_hdf:
+			print "Convert dta to hdf file"
+			nber_convert_dta_to_hdf(source_dir, target_dir, index='year') 
+			nber_supp_convert_dta_to_hdf(source_dir, target_dir)
+
+	#-Convert BACI-#
+	if BACI:
+		source_dir = SOURCE_DIR['baci96'] 	
+		target_dir = TARGET_RAW_DIR['baci96']
+		#-Conversions-#
+		if csv_to_hdf:
+			print "Convert csv to hdf file"
+			baci_convert_dta_to_hdf(source_dir, target_dir)
@@ -0,0 +1,202 @@
+"""
+Compute BACI Datasets
+=====================
+
+Author: Matthew McKay (mamckay@gmail.com)
+
+Filename rules: {{ source }}-{{ flow }}-{{ classification }}-{{ years }}-{{ raw/cleaned }}-{{ type }}-{{ id }}
+				source-flow-classification-years-raw/cleaned-type-id
+
+Supporting Scripts
+------------------
+1. dataset-info.py 			Contains Information about the relevant datasets
+2. dataset-compile-raw.py 	Compiles RAW data files to a single dataset file
+
+Sources
+-------
+2. 	baci
+	md5: e988b6544563675492b59f397a8cb6bb
+	notes: BACI Trade RAW Dataset [HS96]
+
+Supporting Files
+----------------
+TBD
+
+"""
+
+import numpy as np
+import pandas as pd
+from pyeconlab.util import concord_data
+import gc
+
+#----------#
+#- BACI96 -#
+#----------#
+
+#-Dataset Information-#
+from dataset_info import TARGET_RAW_DIR, TARGET_DATASET_DIR, YEARS
+
+#-Setup Local Environment-#
+#~~~~~~~~~~~~~~~~~~~~~~~~~#
+SOURCE_DIR = TARGET_RAW_DIR['baci96']
+TARGET_DIR = TARGET_DATASET_DIR['baci96']
+start_year, end_year = YEARS['baci96']
+
+#-Helper Functions-#
+#~~~~~~~~~~~~~~~~~~#
+
+def load_raw_dataset(fn, start_year, end_year, verbose=True):
+    """
+    Load Raw BACI Dataset
+    """
+    data = pd.DataFrame()
+    for year in range(start_year, end_year+1, 1):
+        print "Loading Year: %s" % year
+        data = data.append(pd.read_hdf(fn, "Y%s"%year))
+    if verbose: print data.t.unique()
+    return data
+
+#-Source Information-#
+#~~~~~~~~~~~~~~~~~~~~#
+print
+print "---> Loading RAW Data <---"
+fn = SOURCE_DIR + "baci_year.h5"
+rawdata = load_raw_dataset(fn, start_year, end_year)
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
+#-Construct SITC Revision 2 Datasets-#
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
+
+CONSTRUCT_SITC_DATASETS = True
+
+if CONSTRUCT_SITC_DATASETS:
+
+    from pyeconlab.trade.dataset.CEPIIBACI import SITC_DATASET_DESCRIPTION, SITC_DATASET_OPTIONS
+    from pyeconlab.trade.dataset.CEPIIBACI import construct_sitc
+
+    LEVELS = [1,2,3,4,5]
+    DATA_TYPES = ["trade", "export", "import"]
+
+    for level in LEVELS:
+        #-Import this as a Function from pyeconlab-#
+        print
+        print "---> COMPUTING SITC REVISION 2 LEVEL %s DATASETS <---" % level
+        print
+        for data_type in DATA_TYPES:
+            #-Setup Store-#
+            fn = "baci-%s-sitcr2l%s-%sto%s.h5" % (data_type, level, start_year, end_year)                                    #-Write File: {{ source }}-{{ flow }}-{{ classification }}-{{ years }}.h5-#
+            store = pd.HDFStore(TARGET_DIR+fn, complevel=9, complib='zlib')
+            #-Compute Datasets-#
+            for dataset in sorted(SITC_DATASET_OPTIONS.keys()):
+                print "[SITCR2L%s] Computing Dataset %s for %s" % (level, dataset, data_type)
+                #-Compute Data-#
+                #INTERFACE: def construct_sitc(data, data_classification, data_type, level, revision, check_concordance=True, adjust_units=False, concordance_institution="un", multiindex=True, verbose=True):#
+                data = construct_sitc(rawdata.copy(deep=True), data_classification="HS96", data_type=data_type, level=level, revision=2, **SITC_DATASET_OPTIONS[dataset])
+                store.put(dataset, data, format='table')
+                store.get_storer(dataset).attrs.options = SITC_DATASET_OPTIONS[dataset]
+                store.get_storer(dataset).attrs.data_type = data_type
+                store.get_storer(dataset).attrs.description = SITC_DATASET_DESCRIPTION[dataset]
+                print
+            #-Close-#
+            store.close()
+        del data
+        gc.collect()
+
+#----------#
+#-RAW DATA-#
+#----------#
+
+RAW_DATA = True
+RAW_WORLD_YEARLY = True       
+RAW_COUNTRY_YEARLY = True     
+RAW_PRODUCT_YEARLY = True
+
+#-Adjust RAW Data to have common interface names-#
+stdnames = {'t' : 'year', 'i' : 'eiso3n', 'j' : 'iiso3n', 'v' : 'value', 'q' : 'quantity'}
+rawdata = rawdata.rename_axis(stdnames, axis=1)
+
+if RAW_DATA:
+    print
+    print "---> SAVING RAW DATA (WITH STANDARD COLUMNS NAMES) <---"
+    print
+    fn = "raw_baci_hs96-1998-2012.h5"
+    store = pd.HDFStore(TARGET_DIR+fn, complevel=9, complib='zlib')
+    store.put('RAW', rawdata, format='table')
+    store.close()
+
+
+if RAW_WORLD_YEARLY:
+    
+    ## Shold this be filtered through a countries only filter? ##
+
+    print
+    print "---> COMPUTING WORLD YEARLY VALUES FROM RAW BACI DATASET <---"
+    print 
+    fn = "raw_baci_world_yearly-1998to2012.h5"
+    store = pd.HDFStore(TARGET_DIR+fn, complevel=9, complib='zlib')
+    world_values = rawdata[["year", "value"]].groupby(["year"]).sum()
+    store.put('World', world_values, format='table')
+    store.close()
+    del world_values
+    gc.collect()
+
+if RAW_COUNTRY_YEARLY:
+    print
+    print "---> COMPUTING COUNTRY YEARLY VALUES FROM RAW BACI DATASET <---"
+    print
+    #-Setup Store-#
+    fn = "raw_baci_country_year-1998to2012.h5"
+    store = pd.HDFStore(TARGET_DIR+fn, complevel=9, complib='zlib')
+    #-Import ISO3C-#
+    from pyeconlab.trade.dataset.CEPIIBACI.meta import hs96_iso3n_to_iso3c
+    rawdata['eiso3c'] = rawdata['eiso3n'].apply(lambda x: concord_data(hs96_iso3n_to_iso3c, x, issue_error=np.nan))     #Is this Complete?
+    rawdata['iiso3c'] = rawdata['iiso3n'].apply(lambda x: concord_data(hs96_iso3n_to_iso3c, x, issue_error=np.nan))     #Is this Complete?
+    #-Country Exports-#
+    exports = rawdata[["year", "eiso3c", "value"]].groupby(["year", "eiso3c"]).sum().reset_index()
+    store.put("CountryExports", exports, format='table')
+    #-Country Imports-#
+    imports = rawdata[["year", "iiso3c", "value"]].groupby(["year", "iiso3c"]).sum().reset_index()
+    store.put("CountryImports", imports, format='table')
+    store.close()
+    del exports
+    del imports
+    gc.collect()
+
+if RAW_PRODUCT_YEARLY:
+    
+    ## Shold this be filtered through a countries only filter? ##
+
+    print
+    print "---> COMPUTING PRODUCT YEAR VALUES FROM RAW BACI DATASET (HS and SITC)"
+    print
+    #-Setup Store-#
+    fn = "raw_baci_product_year-1998to2012.h5"
+    store = pd.HDFStore(TARGET_DIR+fn, complevel=9, complib='zlib')
+    #-HS-#
+    for level in [6,5,4,3,2,1]:
+        print "Computing HS%s Product Year Values ..."%level
+        data = rawdata.copy(deep=True)
+        if level != 6:
+            data["hs%s"%level] = data["hs6"].apply(lambda x: x[0:level])
+        product_trade = data[["year", "hs%s"%level, "value"]].groupby(["year", "hs%s"%level]).sum().reset_index()
+        store.put("HS96L%s"%level, product_trade, format='table')
+        del data
+        del product_trade
+        gc.collect()
+
+    #-SITC-#
+    from pyeconlab.trade.concordance import HS_To_SITC
+    concordance = HS_To_SITC(hs="HS96", sitc="SITCR2", hs_level=6, sitc_level=5, source_institution='un', verbose=True).concordance
+    for level in [5,4,3,2,1]:
+        print "Computing SITC%s Product Year Values ..."%level
+        data = rawdata.copy(deep=True)
+        data['sitc5'] = data['hs6'].apply(lambda x: concord_data(concordance, x, issue_error=np.nan))
+        if level != 5:
+            data["sitc%s"%level] = data["sitc5"].apply(lambda x: x[0:level])
+        product_trade = data[["year", "sitc%s"%level, "value"]].groupby(["year", "sitc%s"%level]).sum().reset_index()
+        store.put("SITCR2L%s"%level, product_trade, format='table')
+        del data
+        del product_trade
+        gc.collect()
+    store.close()
+    
@@ -0,0 +1,169 @@
+"""
+NBER DATASET CONSTRUCT OPTIONS
+"""
+
+#-Dataset Configuration-#
+#~~~~~~~~~~~~~~~~~~~~~~~#
+
+#-Future Work: Check this is Consistent with pyeconlab definitions-#
+
+DATA_DESCRIPTION = {
+    #-Country Datasets-#
+    'A' :   u"A basic dataset that incudes AX and SITCR2 indicators and collapses data to a specified level maintaining initial countrycodes and productcodes as in the raw dataset, removes NES",
+    'B' :   u"[A] except corrects HK-CHINA data from nber correction files",
+    'C' :   u"A dataset that does not contain AX, adjusts HK-CHINA data, but does not adjust products or countries for intertemporal consistency",
+    'D' :   u"A Dataset that does not contain AX or any non standard SITCR2 codes, adjusts HK-CHINA data, but does not adjust products or countries for intertemporal consistency",
+    'E' :   u"A Dataset that does not contain AX and updates productcodes to be more intertemporally consisted, adjusts HK-CHINA data, but does not adjust countries for intertemporal consistency",
+    'F' :   u"A dataset that does not contain AX and updates productcodes to be more intertemporally consisted, adjusts HK-CHINA data, and adjusts countries for intertemporal consistency",
+    'G' :   u"A dataset that does not contain AX or any non standard SITCR2 codes, adjusts HK-CHINA data, and adjusts country codes for intertemporal consistency",
+    # 'H' :   u"A dataset that does not contain AX and udpates productcodes to be more intertemporally consistent, adjusts HK-CHINA data, and adjusts country codes for intertemporaly consistency and drops non-complete countries (EXPERIMENTAL)",
+    # 'I' :   u"A dataset that does not contain AX or any non standard SITCR2 codes, adjusts HK-CHINA data, and drops countries that are not intertemporally complete (EXPERIMENTAL)",
+} 
+
+RAW_DATA_DESCRIPTION = {
+    #-Raw Dataset Descriptions-#
+    'RAW1' : u"Basic RAW dataset with iso3c countrycodes included and collapsed quantity disaggregation",
+    'RAW2' : u"Basic RAW dataset with iso3c countrycodes included, collapsed quantity disaggregation, and adjusts HK-CHINA data",
+}
+
+#-Data Option Definitions-#
+
+DATA_OPTIONS = {
+    'A' :   {   
+                #-ProductCode Adjustments-#
+                'AX'     : True,                      #Add a Marker for 'A' and 'X' Codes
+                'dropAX' : False,                     #Drops Products where Codes have 'A' or 'X'
+                'sitcr2' : True,                      #Adds an Official SITC Revision 2 Indicator
+                'drop_nonsitcr2' : False,             #Removes Non-Official SITC Revision 2 Codes From the Dataset
+                'adjust_hk' : False,                  #Adjust Data to incorporate Honk Kong Adjusments provided by NBER
+                'intertemp_productcode' : False,      #Compute an Intertemporal ProductCode
+                #-CountryCode Adjustments-#
+                'intertemp_cntrycode' : False,        #Recode Country Codes to be Intertemporally Consistent
+                'drop_incp_cntrycode' : False,        #Drop Incomplete Intertemporal Countries
+                #-Other Adjustments-#
+                'adjust_units' : False,
+                'source_institution' : 'un',
+                'verbose' : True,
+            },
+    'B' :   {   
+                'AX'     : True,                      #Add a Marker for 'A' and 'X' Codes
+                'dropAX' : False,                     #Drops Products where Codes have 'A' or 'X'
+                'sitcr2' : True,                      #Adds an Official SITC Revision 2 Indicator
+                'drop_nonsitcr2' : False,             #Removes Non-Official SITC Revision 2 Codes From the Dataset
+                'adjust_hk' : True,                   #Adjust Data to incorporate Honk Kong Adjusments provided by NBER
+                'intertemp_productcode' : False,      #Compute an Intertemporal ProductCode
+                'intertemp_cntrycode' : False,        #Recode Country Codes to be Intertemporally Consistent
+                'drop_incp_cntrycode' : False,        #Drop Incomplete Intertemporal Countries
+                'adjust_units' : False,
+                'source_institution' : 'un',
+                'verbose' : True,
+            },
+    'C' :   {   
+                'AX'     : True,                     #Add a Marker for 'A' and 'X' Codes
+                'dropAX' : True,                     #Drops Products where Codes have 'A' or 'X'
+                'sitcr2' : True,                     #Adds an Official SITC Revision 2 Indicator
+                'drop_nonsitcr2' : False,            #Removes Non-Official SITC Revision 2 Codes From the Dataset
+                'adjust_hk' : True,                  #Adjust Data to incorporate Honk Kong Adjusments provided by NBER
+                'intertemp_productcode' : False,     #Compute an Intertemporal ProductCode
+                'intertemp_cntrycode' : False,       #Recode Country Codes to be Intertemporally Consistent
+                'drop_incp_cntrycode' : False,       #Drop Incomplete Intertemporal Countries
+                'adjust_units' : False,
+                'source_institution' : 'un',
+                'verbose' : True,
+            }, 
+    'D' :   {                                        #-!!-MAJOR-!!-# 
+                'AX'     : True,                     #Add a Marker for 'A' and 'X' Codes
+                'dropAX' : True,                     #Drops Products where Codes have 'A' or 'X'
+                'sitcr2' : True,                     #Adds an Official SITC Revision 2 Indicator
+                'drop_nonsitcr2' : True,             #Removes Non-Official SITC Revision 2 Codes From the Dataset
+                'adjust_hk' : True,                  #Adjust Data to incorporate Honk Kong Adjusments provided by NBER
+                'intertemp_productcode' : False,     #Compute an Intertemporal ProductCode
+                'intertemp_cntrycode' : False,       #Recode Country Codes to be Intertemporally Consistent
+                'drop_incp_cntrycode' : False,       #Drop Incomplete Intertemporal Countries
+                'adjust_units' : False,
+                'source_institution' : 'un',
+                'verbose' : True,
+            },       
+    'E' :   {                                        #-!!-MAJOR-!!-# 
+                'AX'     : True,                     #Add a Marker for 'A' and 'X' Codes
+                'dropAX' : True,                     #Drops Products where Codes have 'A' or 'X'
+                'sitcr2' : True,                     #Adds an Official SITC Revision 2 Indicator
+                'drop_nonsitcr2' : False,            #Removes Non-Official SITC Revision 2 Codes From the Dataset
+                'adjust_hk' : True,                  #Adjust Data to incorporate Honk Kong Adjusments provided by NBER
+                'intertemp_productcode' : True,      #Compute an Intertemporal ProductCode
+                'intertemp_cntrycode' : False,       #Recode Country Codes to be Intertemporally Consistent
+                'drop_incp_cntrycode' : False,       #Drop Incomplete Intertemporal Countries
+                'adjust_units' : False,
+                'source_institution' : 'un',
+                'verbose' : True,
+            },           
+   'F' :   {                                         #-!!-MAJOR-!!-# 
+                'AX'     : True,                     #Add a Marker for 'A' and 'X' Codes
+                'dropAX' : True,                     #Drops Products where Codes have 'A' or 'X'
+                'sitcr2' : True,                     #Adds an Official SITC Revision 2 Indicator
+                'drop_nonsitcr2' : False,            #Removes Non-Official SITC Revision 2 Codes From the Dataset
+                'adjust_hk' : True,                  #Adjust Data to incorporate Honk Kong Adjusments provided by NBER
+                'intertemp_productcode' : True,      #Compute an Intertemporal ProductCode
+                'intertemp_cntrycode' : True,        #Recode Country Codes to be Intertemporally Consistent
+                'drop_incp_cntrycode' : False,       #Drop Incomplete Intertemporal Countries
+                'adjust_units' : False,
+                'source_institution' : 'un',
+                'verbose' : True,
+            },     
+    'G' :   {   
+                'AX'     : True,                     #Add a Marker for 'A' and 'X' Codes
+                'dropAX' : True,                     #Drops Products where Codes have 'A' or 'X'
+                'sitcr2' : True,                     #Adds an Official SITC Revision 2 Indicator
+                'drop_nonsitcr2' : True,             #Removes Non-Official SITC Revision 2 Codes From the Dataset
+                'adjust_hk' : True,                  #Adjust Data to incorporate Honk Kong Adjusments provided by NBER
+                'intertemp_productcode' : False,     #Compute an Intertemporal ProductCode
+                'intertemp_cntrycode' : True,        #Recode Country Codes to be Intertemporally Consistent
+                'drop_incp_cntrycode' : False,       #Drop Incomplete Intertemporal Countries
+                'adjust_units' : False,
+                'source_institution' : 'un',
+                'verbose' : True,
+            },
+    # 'H' :   {                                        #-!!-EXPERIMENTAL-!!-#
+    #             'AX'     : True,                     #Add a Marker for 'A' and 'X' Codes
+    #             'dropAX' : True,                     #Drops Products where Codes have 'A' or 'X'
+    #             'sitcr2' : True,                     #Adds an Official SITC Revision 2 Indicator
+    #             'drop_nonsitcr2' : False,            #Removes Non-Official SITC Revision 2 Codes From the Dataset
+    #             'adjust_hk' : True,                  #Adjust Data to incorporate Honk Kong Adjusments provided by NBER
+    #             'intertemp_productcode' : True,      #Compute an Intertemporal ProductCode
+    #             'intertemp_cntrycode' : True,        #Recode Country Codes to be Intertemporally Consistent
+    #             'drop_incp_cntrycode' : True,        #Drop Incomplete Intertemporal Countries
+    #             'adjust_units' : False,
+    #             'source_institution' : 'un',
+    #             'verbose' : True,
+    #         },
+    # 'I' :   {                                        #-!!-EXPERIMENTAL-!!-#
+    #             'AX'     : True,                     #Add a Marker for 'A' and 'X' Codes
+    #             'dropAX' : True,                     #Drops Products where Codes have 'A' or 'X'
+    #             'sitcr2' : True,                     #Adds an Official SITC Revision 2 Indicator
+    #             'drop_nonsitcr2' : True,             #Removes Non-Official SITC Revision 2 Codes From the Dataset
+    #             'adjust_hk' : True,                  #Adjust Data to incorporate Honk Kong Adjusments provided by NBER
+    #             'intertemp_productcode' : False,     #Compute an Intertemporal ProductCode
+    #             'intertemp_cntrycode' : False,       #Recode Country Codes to be Intertemporally Consistent
+    #             'drop_incp_cntrycode' : True,        #Drop Incomplete Intertemporal Countries
+    #             'adjust_units' : False,
+    #             'source_institution' : 'un',
+    #             'verbose' : True,
+    #         },
+}
+
+
+RAW_DATA_OPTIONS = {
+#-RAW includes NES, World etc. and Undertakes a Minimum of Changes to the Data to make it Comparable-#
+    'RAW1' : { 
+                'adjust_hk'      : False,           #Adjust Hong Kong Data
+                'harmonised_raw' : True,            #Construct Harmonised RAW Data File (No Quantity Disaggregation, Common Names)
+                #-Required Due to Script Logic Below-#
+                'intertemp_productcode' : False,
+            },
+    'RAW2' : { 
+                'adjust_hk'      : True,            #Adjust Hong Kong Data
+                'harmonised_raw' : True,            #Construct Harmonised RAW Data File (No Quantity Disaggregation, Common Names)
+                #-Required Due to Script Logic Below-#
+                'intertemp_productcode' : False,
+            },
+}
@@ -0,0 +1,134 @@
+"""
+Compilation of Other Useful Datasets
+====================================
+
+  1. World Development Indicators Dataset 
+  2. Atlas of Complexity Dataset 
+  3. Penn World Table Dataset 
+
+Construct h5 datasets and pyeconlab objects
+
+ """
+
+import os
+import gc
+import shutil
+import warnings
+import pandas as pd
+
+from pyeconlab import WDI, CIDAtlasDataConstructor, PENN
+from dataset_info import SOURCE_DIR, TARGET_DATASET_DIR
+
+#---------#
+#-Control-#
+#---------#
+
+COMPILE_WDI = False
+COMPILE_ATLAS = False 		#-!!-Requires AWS for 'Trade'-!!-# 'Trade Disabled'
+COMPILE_PENN = True
+
+#------------------------------#
+#-World Development Indicators-#
+#------------------------------#
+
+if COMPILE_WDI:
+	wdi = WDI(source_dir=SOURCE_DIR['wdi'])
+	stata_wide_fln = wdi.to_stata(table_type="wide", target_dir=TARGET_DATASET_DIR['wdi']) 	# wdi_data_wide.dta
+	stata_long_fln = wdi.to_stata(table_type="long", target_dir=TARGET_DATASET_DIR['wdi']) 	# wdi_data_long.dta
+	hdf_fln = wdi.to_hdf(target_dir = TARGET_DATASET_DIR['wdi']) 										# wdi_data.h5
+
+#-----------------------------#
+#-Atlas of Complexity Dataset-#
+#-----------------------------#
+
+#-Countries Only Dataset-#
+
+if COMPILE_ATLAS:
+	#-Values-#
+	print "[INFO] Processing VALUES Data ..."
+	for classification in ["SITCR2", "HS92"]:
+		print warnings.warn("This will not compile 'trade' data - just export and import data")
+		for dtype in ["export", "import"]:  				# -- !! -- Excluding "trade" -- !! -- due to memory constraints -- use stata -- #
+			print "Processing %s for %s data ..." % (classification, dtype)
+			atlas = CIDAtlasDataConstructor(source_dir=SOURCE_DIR['atlas'], trade_classification=classification, dtype=dtype, reduce_memory=True)
+			atlas.construct_standardized_dataset()
+			#-Store-#
+			startyear = atlas.dataset.year.min()
+			endyear = atlas.dataset.year.max()
+			fln = TARGET_DATASET_DIR["atlas"] + "cidatlas_%s_%s_%sto%s.h5"%(classification.lower(), dtype, startyear, endyear)
+			store = pd.HDFStore(fln, complevel=9, complib='zlib')
+			#-Country Value Data-#
+			atlas.countries_only()
+			#-Value-#
+			for level in [4,3,2,1]:
+				gc.collect()
+				print "[INFO] Saving Level %s ... " % level
+				if classification == "SITCR2":
+					productid = "sitc%s"%level
+					if dtype == "export": 
+						idx = ["year", "eiso3c", productid]
+					elif dtype == "import": 
+						idx = ["year", "iiso3c", productid]
+					else:
+						idx = ["year", "eiso3c", "iiso3c", productid]
+					if level != 4:
+						atlas.dataset[productid] = atlas.dataset["sitc4"].apply(lambda x: x[0:level])
+				if classification == "HS92":
+					productid = "hs%s"%level
+					if dtype == "export": 
+						idx = ["year", "eiso3c", productid]
+					elif dtype == "import": 
+						idx = ["year", "iiso3c", productid]
+					else:
+						idx = ["year", "eiso3c", "iiso3c", productid]
+					if level != 4:
+						atlas.dataset[productid] = atlas.dataset["hs4"].apply(lambda x: x[0:level])
+				#-Collapse Levels-#
+				countrydata = atlas.dataset[idx+["value"]].groupby(idx, as_index=False).sum()
+				store.put("L%s"%level, countrydata, format="table")
+				del countrydata
+			store.close()
+			del atlas 	
+			gc.collect()
+
+	#-RCA-#
+	print "[INFO] Processing RCA Data ..."
+	for classification in ["SITCR2", "HS92"]:
+		for dtype in ["export", "import"]:
+			print "Processing %s for %s data ..." % (classification, dtype)
+			atlas = CIDAtlasDataConstructor(source_dir=SOURCE_DIR['atlas'], trade_classification=classification, dtype=dtype)
+			atlas.construct_standardized_dataset()
+			#-Store-#
+			startyear = atlas.dataset.year.min()
+			endyear = atlas.dataset.year.max()
+			fln = TARGET_DATASET_DIR["atlas"] + "cidatlas_%s_%s_rca_%sto%s.h5"%(classification.lower(), dtype, startyear, endyear)
+			store = pd.HDFStore(fln, complevel=9, complib='zlib')
+			#-Country RCA Data-#
+			atlas.countries_only()
+			countrydata = atlas.dataset.copy(deep=True)
+			if classification == "SITCR2":
+				if dtype == "export": 
+					idx = ["year", "eiso3c", "sitc4"]
+				elif dtype == "import": 
+					idx = ["year", "iiso3c", "sitc4"]
+			if classification == "HS92":
+				if dtype == "export": 
+					idx = ["year", "eiso3c", "hs4"]
+				elif dtype == "import": 
+					idx = ["year", "iiso3c", "hs4"]
+			countrydata = countrydata.groupby(idx).sum()["rca"].reset_index()
+			store.put("L4", countrydata, format="table")
+			store.close()
+			del countrydata
+			gc.collect()
+
+
+#--------------------------#
+#-Penn World Table Dataset-#
+#--------------------------#
+
+if COMPILE_PENN:
+	print "[INFO] Processing PENN World Tables ... "
+	penn = PENN(source_dir=SOURCE_DIR['penn'])
+	penn.to_hdf(fl="penn_%s_%sto%s.h5"%(penn.version, penn.start_year, penn.end_year), target_dir=TARGET_DATASET_DIR["penn"])
+	penn.to_stata(fl="penn_%s_%sto%s.dta"%(penn.version, penn.start_year, penn.end_year), target_dir=TARGET_DATASET_DIR["penn"])
@@ -0,0 +1,81 @@
+"""
+Dataset Info
+============
+
+Author: Matthew McKay (mamckay@gmail.com)
+
+Central Source of Dataset Information and Directory Structure etc.
+
+"""
+
+import sys
+import os
+
+if sys.platform.startswith('win'):
+	DATA_DIR = r"D:/work-data/datasets/"
+elif sys.platform.startswith('darwin') or sys.platform.startswith('linux'):             
+    abs_path = os.path.expanduser("~")
+    DATA_DIR = abs_path + "/work-data/datasets/"
+
+#-Source Information-#
+
+SOURCE_DIR = 	{
+	"nber" 		: 	DATA_DIR + "36a376e5a01385782112519bddfac85e" + "/",
+	"baci96"	: 	DATA_DIR + "e988b6544563675492b59f397a8cb6bb" + "/",
+	"wdi" 		: 	DATA_DIR + "70146f20cf40f818e6733d552c6cabb5" + "/",
+	"atlas" 	: 	DATA_DIR + "2d48c79173719bd41eb5e192fb4470b6" + "/",
+	"penn" 		: 	DATA_DIR + "2c2e8d593f39ee74aeb2c7c17047ea3f" + "/",
+	"waziarg"	: 	DATA_DIR + "e93e2009b02d39655f1beb5bcaaf04a8" + "/",
+} 
+
+#-Check Environment Settings-#
+for source in SOURCE_DIR.keys():
+	if not os.path.isdir(SOURCE_DIR[source]):
+		raise ValueError("Directory: %s is not found!" % SOURCE_DIR[source])
+
+#-Target Information-#
+
+TARGET_RAW_DIR = {
+	"nber" 		: "./output/raw/",
+	"baci96" 	: "./output/raw/",
+	"wdi" 		: "./output/raw/",
+}
+
+TARGET_DATASET_DIR = {
+	"nber" 			: "./output/dataset/nber/",
+	"baci96" 		: "./output/dataset/baci96/",
+	"nberbaci96" 	: "./output/dataset/nberbaci96/",
+	"regression"	: "./output/dataset/regression/",
+	"atlas" 		: "./output/dataset/atlas/",
+	"wdi" 			: "./output/dataset/wdi/",
+	"penn"			: "./output/dataset/penn/"
+}
+
+RESULTS_DIR = {
+	#-General Analysis-#
+	"nber" 			: 	"./output/results/nber/",
+	"baci96" 		: 	"./output/results/baci96/",
+	"nberbaci96" 	: 	"./output/results/nberbaci96/",
+	"atlas" 		: 	"./output/results/atlas/",
+}
+
+#-Dataset Attributes-#
+YEARS = {
+	"nber" 		: (1962,2000),
+	"baci96" 	: (1998,2012)
+}
+
+#-Thesis Chapter Level Results-#
+CHAPTER_RESULTS = {
+	1 	: "./output/chapter1/",
+	2 	: "./output/chapter2/",
+	3 	: "./output/chapter3/",
+	4 	: "./output/chapter4/",
+	5 	: "./output/chapter5/",
+	6 	: "./output/chapter6/",
+	"A" : "./output/appendixA/",
+	"B" : "./output/appendixB/",
+	"C" : "./output/appendixC/",
+	"D" : "./output/appendixD/",
+	"G" : "./output/appendixG/",
+}
@@ -0,0 +1,117 @@
+"""
+PhD Thesis Setup
+================
+
+This script will setup the thesis project and any required folders etc.
+
+"""
+
+import os
+
+FOLDERS = [ 
+            #-Output Directories-#
+            "output",
+            "output/raw",           #Files Should be clearly marked as raw_sources.h5
+            "output/dataset/",
+            "output/dataset/nber",
+            "output/dataset/nber/Y7400/",
+            "output/dataset/nber/Y8400/",
+            "output/dataset/baci96",
+            "output/dataset/baci96/harmonised",
+            "output/dataset/baci96/harmonised/Y7400/",
+            "output/dataset/baci96/harmonised/Y8400/",
+            "output/dataset/nberbaci96",
+            "output/dataset/nberbaci96/Y7400/",
+            "output/dataset/nberbaci96/Y8400",
+            "output/dataset/regression/",
+            "output/dataset/atlas/",
+            "output/dataset/wdi/",
+            "output/dataset/penn/",
+            
+            #-NBER Results-#
+            "output/results/nber",
+            "output/results/nber/intertemporal-productcodes/",
+            "output/results/nber/intertemporal-productcodes/Y7400/",
+            "output/results/nber/intertemporal-productcodes/Y8400/",
+            "output/results/nber/intertemporal-productcodes-sitcl4/",
+            "output/results/nber/intertemporal-productcodes-sitcl4/raw/",
+            "output/results/nber/intertemporal-productcodes-sitcl4/plots/",
+            "output/results/nber/intertemporal-productcodes-sitcl3/",
+            "output/results/nber/intertemporal-productcodes-sitcl3/raw/",
+            "output/results/nber/intertemporal-productcodes-sitcl2/",
+            "output/results/nber/intertemporal-productcodes-sitcl2/raw/",
+            "output/results/nber/intertemporal-exporters/",
+            "output/results/nber/intertemporal-exporters/raw/",
+            "output/results/nber/intertemporal-countrycodes/",
+            "output/results/nber/intertemporal-countrycodes/raw/",
+            #-NBER Tables-#
+            "output/results/nber/tables/",
+            "output/results/nber/tables/Y7400/",
+            "output/results/nber/tables/Y8400/",
+            #-NBER Plots-#
+            "output/results/nber/plots/",
+            "output/results/nber/plots/percent_unofficial_codes/",
+            "output/results/nber/plots/percent_world_values/",
+            "output/results/nber/plots/percent_world_values/Y7400/",
+            "output/results/nber/plots/percent_world_values/Y8400/",
+
+            #-BACI96 Results-#
+            "output/results/baci96",
+            "output/results/baci96/intertemporal-countrycodes/",
+            "output/results/baci96/intertemporal-productcodes/",
+            #-BACI96 Tables-#
+            "output/results/baci96/tables/",
+            #-BACI96 Plots-#
+            "output/results/baci96/plots/",
+            "output/results/baci96/plots/percent_world_values/",
+
+            #-Combined Dataset Results-#
+            "output/results/nberbaci96",
+            "output/results/nberbaci96/intertemporal-countrycodes/",
+            "output/results/nberbaci96/intertemporal-countrycodes/Y7400/",
+            "output/results/nberbaci96/intertemporal-countrycodes/Y8400/",
+            "output/results/nberbaci96/intertemporal-productcodes/",
+            "output/results/nberbaci96/intertemporal-productcodes/Y7400/",
+            "output/results/nberbaci96/intertemporal-productcodes/Y8400/",
+            #-NBERBACI Tables-#
+            "output/results/nberbaci96/tables/",
+            "output/results/nberbaci96/tables/Y7400/",
+            "output/results/nberbaci96/tables/Y8400/",
+            #-NBERBACI Plots-#
+            "output/results/nberbaci96/plots/",
+            "output/results/nberbaci96/plots/percent_world_values/",
+
+            #-Atlas of Complexity-#
+            "output/results/atlas/",
+            "output/results/atlas/intertemporal-productcodes/",
+            "output/results/atlas/intertemporal-countrycodes/",
+            "output/results/atlas/tables/",
+            "output/results/atlas/plots/",
+            "output/results/atlas/plots/intertemporal-productcodes-num/",
+
+            #-Chapter and Appendix Working Areas-#
+            "output/chapter1/",
+            "output/chapter2/",
+            "output/chapter3/",
+            "output/chapter3/sensativity-analysis/",
+            "output/chapter3/plots/",
+            "output/chapter4/",
+            "output/chapter5/",
+            "output/chapter6/",
+            "output/appendixA/",
+            "output/appendixB/",
+            "output/appendixC/",
+            "output/appendixD/",
+            "output/appendixG/",
+            
+            #-Log Directory-#
+            "log/",
+
+          ]
+
+#-Setup Folders-#
+
+for folder in FOLDERS:
+    if not os.path.exists(folder):
+        print "[Setup] Creating directory: %s" % folder
+        os.makedirs(folder)