Skip to content

Commit

Permalink
Merge pull request #134 from ml4ai/dev
Browse files Browse the repository at this point in the history
This PR implements

- Misc. developments from the WM hackathon (better integration with Uncharted HMI)
- Refactoring of some of the program analysis code to a more OOP-like form, to get rid of global module-level variables.
  • Loading branch information
adarshp authored Dec 11, 2018
2 parents 04a49ed + 1210d64 commit ccdcd3a
Show file tree
Hide file tree
Showing 22 changed files with 890 additions and 3,800 deletions.
101 changes: 51 additions & 50 deletions delphi/AnalysisGraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,54 +118,55 @@ def from_uncharted_json_serialized_dict(
for s in sts:
if len(s["evidence"]) >= minimum_evidence_pieces_required:
subj, obj = s["subj"], s["obj"]
subj_name, obj_name = [
s[x]["db_refs"]["concept"].split("/")[-1]
for x in ["subj", "obj"]
]
G.add_edge(subj_name, obj_name)
subj_delta = s["subj_delta"]
obj_delta = s["obj_delta"]
for delta in (subj_delta, obj_delta):
# TODO : Ensure that all the statements provided by
# Uncharted have unambiguous polarities.
if delta["polarity"] is None:
delta["polarity"] = 1
influence_stmt = Influence(
Concept(subj_name, db_refs=subj["db_refs"]),
Concept(obj_name, db_refs=obj["db_refs"]),
subj_delta=s["subj_delta"],
obj_delta=s["obj_delta"],
evidence=[
Evidence(
source_api=ev["source_api"],
annotations=ev["annotations"],
text=ev["text"],
epistemics=ev.get("epistemics"),
)
for ev in s["evidence"]
],
)
influence_sts = G.edges[subj_name, obj_name].get(
"InfluenceStatements", []
)
influence_sts.append(influence_stmt)
G.edges[subj_name, obj_name][
"InfluenceStatements"
] = influence_sts
if (subj["db_refs"]["concept"] is not None and
obj["db_refs"]["concept"] is not None):
subj_name, obj_name = [
"/".join(s[x]["db_refs"]["concept"].split("/")[:])
for x in ["subj", "obj"]
]
G.add_edge(subj_name, obj_name)
subj_delta = s["subj_delta"]
obj_delta = s["obj_delta"]

for delta in (subj_delta, obj_delta):
# TODO : Ensure that all the statements provided by
# Uncharted have unambiguous polarities.
if delta["polarity"] is None:
delta["polarity"] = 1
influence_stmt = Influence(
Concept(subj_name, db_refs=subj["db_refs"]),
Concept(obj_name, db_refs=obj["db_refs"]),
subj_delta=s["subj_delta"],
obj_delta=s["obj_delta"],
evidence=[
Evidence(
source_api=ev["source_api"],
annotations=ev["annotations"],
text=ev["text"],
epistemics=ev.get("epistemics"),
)
for ev in s["evidence"]
],
)
influence_sts = G.edges[subj_name, obj_name].get(
"InfluenceStatements", []
)
influence_sts.append(influence_stmt)
G.edges[subj_name, obj_name][
"InfluenceStatements"
] = influence_sts

for concept, indicator in _dict[
"concept_to_indicator_mapping"
].items():
concept_name = concept.split("/")[-1]
if concept_name != "Unknown":
if indicator != "???":
indicator_source, *indicator_name = indicator.split("/")
if concept_name in G:
if G.nodes[concept_name].get("indicators") is None:
G.nodes[concept_name]["indicators"] = {}
G.nodes[concept_name]["indicators"][
indicator_name[-1]
] = Indicator(indicator_name[-1], indicator_source)
if indicator is not None:
indicator_source, indicator_name = indicator.split("/")[0], indicator
if concept in G:
if G.nodes[concept].get("indicators") is None:
G.nodes[concept]["indicators"] = {}
G.nodes[concept]["indicators"][
indicator_name
] = Indicator(indicator_name, indicator_source)

self = cls(G)
self.assign_uuids_to_nodes_and_edges()
Expand Down Expand Up @@ -229,7 +230,7 @@ def map_concepts_to_indicators(

for n in self.nodes(data=True):
n[1]["indicators"] = get_indicators(
n[0].lower().replace(" ", "_"), mapping
n[0], mapping
)

def default_update_function(self, n: Tuple[str, dict]) -> List[float]:
Expand Down Expand Up @@ -258,7 +259,7 @@ def construct_default_initial_state(self) -> pd.Series:
# Basic Modeling Interface (BMI)
# ==========================================================================

def initialize(self, config_file: str):
def initialize(self, config_file: str = "bmi_config.txt"):
""" Initialize the executable AnalysisGraph with a config file.
Args:
Expand All @@ -276,9 +277,9 @@ def initialize(self, config_file: str):
node = n[1]["rv"]
node.dataset = [self.s0[n[0]] for _ in range(self.res)]
node.partial_t = self.s0[f"∂({n[0]})/∂t"]
if n[1].get("indicators") is not None:
for ind in n[1]["indicators"].values():
ind.dataset = np.ones(self.res) * ind.mean
# if n[1].get("indicators") is not None:
# for ind in n[1]["indicators"].values():
# ind.dataset = np.ones(self.res) * ind.mean

def update(self):
""" Advance the model by one time step. """
Expand Down Expand Up @@ -403,7 +404,7 @@ def parameterize(self, time: datetime, data=south_sudan_data):
data = get_data(data)

nodes_with_indicators = [
n for n in self.nodes(data=True) if n[1]["indicators"] is not None
n for n in self.nodes(data=True) if n[1].get("indicators") is not None
]

for n in nodes_with_indicators:
Expand Down
117 changes: 57 additions & 60 deletions delphi/assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@ def get_respdevs(gb):
return gb["respdev"]


def process_concept_name(name: str) -> str:
""" Remove underscores from concept name. """
return name.replace("_", " ")


def filter_statements(sts: List[Influence]) -> List[Influence]:
return [s for s in sts if is_well_grounded(s) and is_simulable(s)]

Expand All @@ -46,34 +41,36 @@ def constructConditionalPDF(
all_thetas = []
for stmt in e[2]["InfluenceStatements"]:
for ev in stmt.evidence:
for subj_adjective in ev.annotations["subj_adjectives"]:
if (
subj_adjective in gb.groups
and subj_adjective not in adjective_response_dict
):
adjective_response_dict[subj_adjective] = get_respdevs(
gb.get_group(subj_adjective)
)
rs_subj = stmt.subj_delta[
"polarity"
] * adjective_response_dict.get(subj_adjective, rs)

for obj_adjective in ev.annotations["obj_adjectives"]:
# To account for discrepancy between Hume and Eidos extractions
if ev.annotations.get("subj_adjectives") is not None:
for subj_adjective in ev.annotations["subj_adjectives"]:
if (
obj_adjective in gb.groups
and obj_adjective not in adjective_response_dict
subj_adjective in gb.groups
and subj_adjective not in adjective_response_dict
):
adjective_response_dict[obj_adjective] = get_respdevs(
gb.get_group(obj_adjective)
adjective_response_dict[subj_adjective] = get_respdevs(
gb.get_group(subj_adjective)
)

rs_obj = stmt.obj_delta[
rs_subj = stmt.subj_delta[
"polarity"
] * adjective_response_dict.get(obj_adjective, rs)
] * adjective_response_dict.get(subj_adjective, rs)

xs1, ys1 = np.meshgrid(rs_subj, rs_obj, indexing="xy")
thetas = np.arctan2(ys1.flatten(), xs1.flatten())
all_thetas.append(thetas)
for obj_adjective in ev.annotations["obj_adjectives"]:
if (
obj_adjective in gb.groups
and obj_adjective not in adjective_response_dict
):
adjective_response_dict[obj_adjective] = get_respdevs(
gb.get_group(obj_adjective)
)

rs_obj = stmt.obj_delta[
"polarity"
] * adjective_response_dict.get(obj_adjective, rs)

xs1, ys1 = np.meshgrid(rs_subj, rs_obj, indexing="xy")
thetas = np.arctan2(ys1.flatten(), xs1.flatten())
all_thetas.append(thetas)

# Prior
xs1, ys1 = np.meshgrid(
Expand Down Expand Up @@ -102,7 +99,7 @@ def get_best_match(indicator: Indicator, items: Iterable[str]) -> str:

def get_data(filename: str) -> pd.DataFrame:
""" Create a dataframe out of south_sudan_data.csv """
df = pd.read_csv(filename, sep="|", index_col="Indicator Name")
df = pd.read_csv(filename, index_col="Variable")
return df


Expand Down Expand Up @@ -131,61 +128,61 @@ def get_indicator_value(
) -> Optional[float]:
""" Get the value of a particular indicator at a particular date and time. """

if indicator.source == "FAO/WDI":
best_match = get_best_match(indicator, df.index)

year = str(date.year)
if not year in df.columns:
return None
else:
indicator_value = df[year][best_match]
indicator_units = df.loc[best_match]["Unit"]
# if indicator.source == "FAO/WDI":
best_match = get_best_match(indicator, df.index)

return (
(indicator_value, indicator_units)
if not pd.isna(indicator_value)
else (None, indicator_units)
)
# TODO Fix the above
df = df.loc[best_match].loc[lambda df: df["Year"] == date.year].loc[lambda df: df["Month"] == date.month]
if not df["Value"].isna().all():
indicator_value = float(df["Value"].iloc[0])
indicator_units = df["Unit"].iloc[0]
else:
indicator_value = None
indicator_units = None

elif indicator.source == "CYCLES":
return get_mean_precipitation(date.year), "mm"
return (
(indicator_value, indicator_units)
if not pd.isna(indicator_value)
else (None, None)
)


def process_variable_name(x: str):
def get_variable_and_source(x: str):
""" Process the variable name to make it more human-readable. """
xs = x.replace("\/", "|").split("/")
xs = [x.replace("|", "/") for x in xs]
xs.reverse()
return " ".join(xs[0:2])
if xs[0] == "FAO":
return " ".join(xs[2:]), xs[0]
else:
return xs[-1], xs[0]


def construct_concept_to_indicator_mapping(
n: int = 2, mapping=concept_to_indicator_mapping
n: int = 1, mapping = concept_to_indicator_mapping
) -> Dict[str, List[str]]:
""" Create a dictionary mapping high-level concepts to low-level indicators """

df = pd.read_table(
mapping,
usecols=[1, 3, 4],
names=["Concept Grounding", "Indicator Grounding", "Score"],
usecols=[1, 2, 3, 4],
names=["Concept", "Source", "Indicator", "Score"],
dtype={"Concept":str, "Source":str, "Indicator":str, "Score":np.float64},
)
gb = df.groupby("Concept Grounding")
gb = df.groupby("Concept")

construct_variable_name = (
lambda x: x.split("/")[-1] + " " + x.split("/")[-2]
)
return {
k.split("/")[-1]: [
process_variable_name(x)
for x in v["Indicator Grounding"].values[0:n]
_dict = {
k: [
get_variable_and_source(x)
for x in v["Indicator"].values[0:n]
]
for k, v in gb
}
return _dict


def get_indicators(concept: str, mapping: Dict = None) -> Optional[List[str]]:
return (
{x: Indicator(x, "FAO/WDI") for x in mapping[concept]}
{x[0]: Indicator(x[0], x[1]) for x in mapping[concept]}
if concept in mapping
else None
)
Expand Down
11 changes: 8 additions & 3 deletions delphi/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,12 @@ def to_agraph(G, *args, **kwargs) -> AGraph:
)

color_str = "#650021"
for n in G.nodes():
A.add_node(n, label=n.capitalize().replace("_", " "))
for n in G.nodes(data=True):
if kwargs.get("values"):
node_label = n[0].capitalize().replace("_", " ") + " ("+str(np.mean(n[1]["rv"].dataset))+")"
else:
node_label = n[0].capitalize().replace("_", " ")
A.add_node(n[0], label=node_label)

for e in G.edges(data=True):
reinforcement = np.mean(
Expand All @@ -89,11 +93,12 @@ def to_agraph(G, *args, **kwargs) -> AGraph:
)
h = (opacity * 255).hex()
cmap = cm.Greens if reinforcement > 0 else cm.Reds
c_str = matplotlib.colors.rgb2hex(cmap(abs(reinforcement))) + h[4:6]
c_str = matplotlib.colors.rgb2hex(cmap(abs(reinforcement)))# + h[4:6]
A.add_edge(e[0], e[1], color=c_str, arrowsize=0.5)

# Drawing indicator variables


if kwargs.get("indicators"):
for n in nodes_with_indicators:
for indicator_name, ind in n[1]["indicators"].items():
Expand Down
3 changes: 2 additions & 1 deletion delphi/icm_api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,6 @@ def createExperiment(uuid: str):
elif data["projection"]["stepSize"] == "YEAR":
d = d + relativedelta(years=1)

G.update()

for n in G.nodes(data=True):
CausalVariable.query.filter_by(
Expand All @@ -232,6 +231,8 @@ def createExperiment(uuid: str):
},
}
)

G.update()
db.session.add(result)
db.session.commit()

Expand Down
Loading

0 comments on commit ccdcd3a

Please sign in to comment.