Merge pull request #134 from ml4ai/dev

This PR implements - Misc. developments from the WM hackathon (better integration with Uncharted HMI) - Refactoring of some of the program analysis code to a more OOP-like form, to get rid of global module-level variables.
ml4ai · Dec 11, 2018 · ccdcd3a · ccdcd3a
2 parents 04a49ed + 1210d64
commit ccdcd3a
Show file tree

Hide file tree

Showing 22 changed files with 890 additions and 3,800 deletions.
diff --git a/delphi/AnalysisGraph.py b/delphi/AnalysisGraph.py
@@ -118,54 +118,55 @@ def from_uncharted_json_serialized_dict(
         for s in sts:
             if len(s["evidence"]) >= minimum_evidence_pieces_required:
                 subj, obj = s["subj"], s["obj"]
-                subj_name, obj_name = [
-                    s[x]["db_refs"]["concept"].split("/")[-1]
-                    for x in ["subj", "obj"]
-                ]
-                G.add_edge(subj_name, obj_name)
-                subj_delta = s["subj_delta"]
-                obj_delta = s["obj_delta"]
-                for delta in (subj_delta, obj_delta):
-                    # TODO : Ensure that all the statements provided by
-                    # Uncharted have unambiguous polarities.
-                    if delta["polarity"] is None:
-                        delta["polarity"] = 1
-                influence_stmt = Influence(
-                    Concept(subj_name, db_refs=subj["db_refs"]),
-                    Concept(obj_name, db_refs=obj["db_refs"]),
-                    subj_delta=s["subj_delta"],
-                    obj_delta=s["obj_delta"],
-                    evidence=[
-                        Evidence(
-                            source_api=ev["source_api"],
-                            annotations=ev["annotations"],
-                            text=ev["text"],
-                            epistemics=ev.get("epistemics"),
-                        )
-                        for ev in s["evidence"]
-                    ],
-                )
-                influence_sts = G.edges[subj_name, obj_name].get(
-                    "InfluenceStatements", []
-                )
-                influence_sts.append(influence_stmt)
-                G.edges[subj_name, obj_name][
-                    "InfluenceStatements"
-                ] = influence_sts
+                if (subj["db_refs"]["concept"] is not None and
+                        obj["db_refs"]["concept"] is not None):
+                    subj_name, obj_name = [
+                            "/".join(s[x]["db_refs"]["concept"].split("/")[:])
+                        for x in ["subj", "obj"]
+                    ]
+                    G.add_edge(subj_name, obj_name)
+                    subj_delta = s["subj_delta"]
+                    obj_delta = s["obj_delta"]
+
+                    for delta in (subj_delta, obj_delta):
+                        # TODO : Ensure that all the statements provided by
+                        # Uncharted have unambiguous polarities.
+                        if delta["polarity"] is None:
+                            delta["polarity"] = 1
+                    influence_stmt = Influence(
+                        Concept(subj_name, db_refs=subj["db_refs"]),
+                        Concept(obj_name, db_refs=obj["db_refs"]),
+                        subj_delta=s["subj_delta"],
+                        obj_delta=s["obj_delta"],
+                        evidence=[
+                            Evidence(
+                                source_api=ev["source_api"],
+                                annotations=ev["annotations"],
+                                text=ev["text"],
+                                epistemics=ev.get("epistemics"),
+                            )
+                            for ev in s["evidence"]
+                        ],
+                    )
+                    influence_sts = G.edges[subj_name, obj_name].get(
+                        "InfluenceStatements", []
+                    )
+                    influence_sts.append(influence_stmt)
+                    G.edges[subj_name, obj_name][
+                        "InfluenceStatements"
+                    ] = influence_sts
 
         for concept, indicator in _dict[
             "concept_to_indicator_mapping"
         ].items():
-            concept_name = concept.split("/")[-1]
-            if concept_name != "Unknown":
-                if indicator != "???":
-                    indicator_source, *indicator_name = indicator.split("/")
-                    if concept_name in G:
-                        if G.nodes[concept_name].get("indicators") is None:
-                            G.nodes[concept_name]["indicators"] = {}
-                        G.nodes[concept_name]["indicators"][
-                            indicator_name[-1]
-                        ] = Indicator(indicator_name[-1], indicator_source)
+            if indicator is not None:
+                indicator_source, indicator_name = indicator.split("/")[0], indicator
+                if concept in G:
+                    if G.nodes[concept].get("indicators") is None:
+                        G.nodes[concept]["indicators"] = {}
+                    G.nodes[concept]["indicators"][
+                        indicator_name
+                    ] = Indicator(indicator_name, indicator_source)
 
         self = cls(G)
         self.assign_uuids_to_nodes_and_edges()
@@ -229,7 +230,7 @@ def map_concepts_to_indicators(
 
         for n in self.nodes(data=True):
             n[1]["indicators"] = get_indicators(
-                n[0].lower().replace(" ", "_"), mapping
+                n[0], mapping
             )
 
     def default_update_function(self, n: Tuple[str, dict]) -> List[float]:
@@ -258,7 +259,7 @@ def construct_default_initial_state(self) -> pd.Series:
     # Basic Modeling Interface (BMI)
     # ==========================================================================
 
-    def initialize(self, config_file: str):
+    def initialize(self, config_file: str = "bmi_config.txt"):
         """ Initialize the executable AnalysisGraph with a config file.
 
         Args:
@@ -276,9 +277,9 @@ def initialize(self, config_file: str):
             node = n[1]["rv"]
             node.dataset = [self.s0[n[0]] for _ in range(self.res)]
             node.partial_t = self.s0[f"∂({n[0]})/∂t"]
-            if n[1].get("indicators") is not None:
-                for ind in n[1]["indicators"].values():
-                    ind.dataset = np.ones(self.res) * ind.mean
+            # if n[1].get("indicators") is not None:
+                # for ind in n[1]["indicators"].values():
+                    # ind.dataset = np.ones(self.res) * ind.mean
 
     def update(self):
         """ Advance the model by one time step. """
@@ -403,7 +404,7 @@ def parameterize(self, time: datetime, data=south_sudan_data):
             data = get_data(data)
 
         nodes_with_indicators = [
-            n for n in self.nodes(data=True) if n[1]["indicators"] is not None
+            n for n in self.nodes(data=True) if n[1].get("indicators") is not None
         ]
 
         for n in nodes_with_indicators:

diff --git a/delphi/assembly.py b/delphi/assembly.py
@@ -27,11 +27,6 @@ def get_respdevs(gb):
     return gb["respdev"]
 
 
-def process_concept_name(name: str) -> str:
-    """ Remove underscores from concept name. """
-    return name.replace("_", " ")
-
-
 def filter_statements(sts: List[Influence]) -> List[Influence]:
     return [s for s in sts if is_well_grounded(s) and is_simulable(s)]
 
@@ -46,34 +41,36 @@ def constructConditionalPDF(
     all_thetas = []
     for stmt in e[2]["InfluenceStatements"]:
         for ev in stmt.evidence:
-            for subj_adjective in ev.annotations["subj_adjectives"]:
-                if (
-                    subj_adjective in gb.groups
-                    and subj_adjective not in adjective_response_dict
-                ):
-                    adjective_response_dict[subj_adjective] = get_respdevs(
-                        gb.get_group(subj_adjective)
-                    )
-                rs_subj = stmt.subj_delta[
-                    "polarity"
-                ] * adjective_response_dict.get(subj_adjective, rs)
-
-                for obj_adjective in ev.annotations["obj_adjectives"]:
+            # To account for discrepancy between Hume and Eidos extractions
+            if ev.annotations.get("subj_adjectives") is not None:
+                for subj_adjective in ev.annotations["subj_adjectives"]:
                     if (
-                        obj_adjective in gb.groups
-                        and obj_adjective not in adjective_response_dict
+                        subj_adjective in gb.groups
+                        and subj_adjective not in adjective_response_dict
                     ):
-                        adjective_response_dict[obj_adjective] = get_respdevs(
-                            gb.get_group(obj_adjective)
+                        adjective_response_dict[subj_adjective] = get_respdevs(
+                            gb.get_group(subj_adjective)
                         )
-
-                    rs_obj = stmt.obj_delta[
+                    rs_subj = stmt.subj_delta[
                         "polarity"
-                    ] * adjective_response_dict.get(obj_adjective, rs)
+                    ] * adjective_response_dict.get(subj_adjective, rs)
 
-                    xs1, ys1 = np.meshgrid(rs_subj, rs_obj, indexing="xy")
-                    thetas = np.arctan2(ys1.flatten(), xs1.flatten())
-                    all_thetas.append(thetas)
+                    for obj_adjective in ev.annotations["obj_adjectives"]:
+                        if (
+                            obj_adjective in gb.groups
+                            and obj_adjective not in adjective_response_dict
+                        ):
+                            adjective_response_dict[obj_adjective] = get_respdevs(
+                                gb.get_group(obj_adjective)
+                            )
+
+                        rs_obj = stmt.obj_delta[
+                            "polarity"
+                        ] * adjective_response_dict.get(obj_adjective, rs)
+
+                        xs1, ys1 = np.meshgrid(rs_subj, rs_obj, indexing="xy")
+                        thetas = np.arctan2(ys1.flatten(), xs1.flatten())
+                        all_thetas.append(thetas)
 
             # Prior
             xs1, ys1 = np.meshgrid(
@@ -102,7 +99,7 @@ def get_best_match(indicator: Indicator, items: Iterable[str]) -> str:
 
 def get_data(filename: str) -> pd.DataFrame:
     """ Create a dataframe out of south_sudan_data.csv """
-    df = pd.read_csv(filename, sep="|", index_col="Indicator Name")
+    df = pd.read_csv(filename, index_col="Variable")
     return df
 
 
@@ -131,61 +128,61 @@ def get_indicator_value(
 ) -> Optional[float]:
     """ Get the value of a particular indicator at a particular date and time. """
 
-    if indicator.source == "FAO/WDI":
-        best_match = get_best_match(indicator, df.index)
-
-        year = str(date.year)
-        if not year in df.columns:
-            return None
-        else:
-            indicator_value = df[year][best_match]
-            indicator_units = df.loc[best_match]["Unit"]
+    # if indicator.source == "FAO/WDI":
+    best_match = get_best_match(indicator, df.index)
 
-        return (
-            (indicator_value, indicator_units)
-            if not pd.isna(indicator_value)
-            else (None, indicator_units)
-        )
+    # TODO Fix the above
+    df = df.loc[best_match].loc[lambda df: df["Year"] == date.year].loc[lambda df: df["Month"] == date.month]
+    if not df["Value"].isna().all():
+        indicator_value = float(df["Value"].iloc[0])
+        indicator_units = df["Unit"].iloc[0]
+    else:
+        indicator_value = None
+        indicator_units = None
 
-    elif indicator.source == "CYCLES":
-        return get_mean_precipitation(date.year), "mm"
+    return (
+        (indicator_value, indicator_units)
+        if not pd.isna(indicator_value)
+        else (None, None)
+    )
 
 
-def process_variable_name(x: str):
+def get_variable_and_source(x: str):
     """ Process the variable name to make it more human-readable. """
     xs = x.replace("\/", "|").split("/")
     xs = [x.replace("|", "/") for x in xs]
-    xs.reverse()
-    return " ".join(xs[0:2])
+    if xs[0] == "FAO":
+        return " ".join(xs[2:]), xs[0]
+    else:
+        return xs[-1], xs[0]
 
 
 def construct_concept_to_indicator_mapping(
-    n: int = 2, mapping=concept_to_indicator_mapping
+    n: int = 1, mapping = concept_to_indicator_mapping
 ) -> Dict[str, List[str]]:
     """ Create a dictionary mapping high-level concepts to low-level indicators """
 
     df = pd.read_table(
         mapping,
-        usecols=[1, 3, 4],
-        names=["Concept Grounding", "Indicator Grounding", "Score"],
+        usecols=[1, 2, 3, 4],
+        names=["Concept", "Source", "Indicator", "Score"],
+        dtype={"Concept":str, "Source":str, "Indicator":str, "Score":np.float64},
     )
-    gb = df.groupby("Concept Grounding")
+    gb = df.groupby("Concept")
 
-    construct_variable_name = (
-        lambda x: x.split("/")[-1] + " " + x.split("/")[-2]
-    )
-    return {
-        k.split("/")[-1]: [
-            process_variable_name(x)
-            for x in v["Indicator Grounding"].values[0:n]
+    _dict = {
+        k: [
+            get_variable_and_source(x)
+            for x in v["Indicator"].values[0:n]
         ]
         for k, v in gb
     }
+    return _dict
 
 
 def get_indicators(concept: str, mapping: Dict = None) -> Optional[List[str]]:
     return (
-        {x: Indicator(x, "FAO/WDI") for x in mapping[concept]}
+        {x[0]: Indicator(x[0], x[1]) for x in mapping[concept]}
         if concept in mapping
         else None
     )

diff --git a/delphi/export.py b/delphi/export.py
@@ -74,8 +74,12 @@ def to_agraph(G, *args, **kwargs) -> AGraph:
     )
 
     color_str = "#650021"
-    for n in G.nodes():
-        A.add_node(n, label=n.capitalize().replace("_", " "))
+    for n in G.nodes(data=True):
+        if kwargs.get("values"):
+            node_label = n[0].capitalize().replace("_", " ") + " ("+str(np.mean(n[1]["rv"].dataset))+")"
+        else:
+            node_label = n[0].capitalize().replace("_", " ")
+        A.add_node(n[0], label=node_label)
 
     for e in G.edges(data=True):
         reinforcement = np.mean(
@@ -89,11 +93,12 @@ def to_agraph(G, *args, **kwargs) -> AGraph:
         )
         h = (opacity * 255).hex()
         cmap = cm.Greens if reinforcement > 0 else cm.Reds
-        c_str = matplotlib.colors.rgb2hex(cmap(abs(reinforcement))) + h[4:6]
+        c_str = matplotlib.colors.rgb2hex(cmap(abs(reinforcement)))# + h[4:6]
         A.add_edge(e[0], e[1], color=c_str, arrowsize=0.5)
 
     # Drawing indicator variables
 
+
     if kwargs.get("indicators"):
         for n in nodes_with_indicators:
             for indicator_name, ind in n[1]["indicators"].items():

diff --git a/delphi/icm_api/api.py b/delphi/icm_api/api.py
@@ -208,7 +208,6 @@ def createExperiment(uuid: str):
         elif data["projection"]["stepSize"] == "YEAR":
             d = d + relativedelta(years=1)
 
-        G.update()
 
         for n in G.nodes(data=True):
             CausalVariable.query.filter_by(
@@ -232,6 +231,8 @@ def createExperiment(uuid: str):
                     },
                 }
             )
+
+        G.update()
     db.session.add(result)
     db.session.commit()