From f098a1fe0ad44f4947b478399de682d706b2d892 Mon Sep 17 00:00:00 2001 From: AnGWar26 Date: Wed, 5 Aug 2020 14:23:16 -0700 Subject: [PATCH 1/8] analytics: store information about models in ModelResults class - prep for silhoutte plotting - enables us to pass info about the models much more easily Ex: comm.models.['ward'].X will give the X of the 'ward' model --- geosnap/analyze/analytics.py | 45 ++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/geosnap/analyze/analytics.py b/geosnap/analyze/analytics.py index c7163eab..194315da 100644 --- a/geosnap/analyze/analytics.py +++ b/geosnap/analyze/analytics.py @@ -1,7 +1,5 @@ """Tools for the spatial analysis of neighborhood change.""" -from collections import namedtuple - import numpy as np import pandas as pd from sklearn.preprocessing import StandardScaler @@ -25,9 +23,46 @@ ward_spatial, ) -ModelResults = namedtuple( - "model", ["X", "columns", "labels", "instance", "W"], rename=False -) + +class ModelResults: + """Stores data about cluster and cluster_spatial models. + + Attributes + ---------- + X: array-like + data used to compute model + columns: list-like + columns used in model + W: 'queen', 'rook', or spatial weights matrix + spatial weights matrix used in model + labels: array-like + labels of each column + instance: AgglomerativeCluserting object, or other model specific object type + how many clusters model was computed with + + """ + def __init__(self, X, columns, labels,instance,W,): + """Initialize a new ModelResults instance. + + Parameters + ---------- + X: array-like + data of the cluster + columns: list-like + columns used to compute model + W: 'queen', 'rook', or pyasl spatial weights matrix + spatial weights matrix used in model + labels: array-like + labels of each column + instance: AgglomerativeCluserting object, or other model specific object type + how many clusters model was computed with + + """ + self.columns = columns + self.X = X + self.W = W + self.instance = instance + self.labels = labels def cluster( From 6c52a82d805632c1c3b1de52425e568f82bcfea5 Mon Sep 17 00:00:00 2001 From: AnGWar26 Date: Wed, 5 Aug 2020 15:25:45 -0700 Subject: [PATCH 2/8] Add silhouette plotting to Community class --- ci/36.yaml | 1 + ci/37.yaml | 1 + ci/38.yaml | 1 + environment.yml | 2 +- geosnap/_community.py | 20 ++++++++++++++++++++ 5 files changed, 24 insertions(+), 1 deletion(-) diff --git a/ci/36.yaml b/ci/36.yaml index 91424574..80a7553f 100644 --- a/ci/36.yaml +++ b/ci/36.yaml @@ -35,3 +35,4 @@ dependencies: - pywget - proplot - contextily + - scikit-plot diff --git a/ci/37.yaml b/ci/37.yaml index 5381ba10..affa540b 100644 --- a/ci/37.yaml +++ b/ci/37.yaml @@ -35,3 +35,4 @@ dependencies: - pywget - proplot - contextily + - scikit-plot diff --git a/ci/38.yaml b/ci/38.yaml index db98c31f..84396022 100644 --- a/ci/38.yaml +++ b/ci/38.yaml @@ -35,3 +35,4 @@ dependencies: - pywget - proplot - contextily + - scikit-plot diff --git a/environment.yml b/environment.yml index 12c0b5be..6d261a81 100644 --- a/environment.yml +++ b/environment.yml @@ -25,4 +25,4 @@ dependencies: - tobler >=0.2.1 - proplot - contextily - + - scikit-plot diff --git a/geosnap/_community.py b/geosnap/_community.py index 105efece..9cd644a2 100644 --- a/geosnap/_community.py +++ b/geosnap/_community.py @@ -3,6 +3,7 @@ import geopandas as gpd import pandas as pd +import scikitplot as skplt from ._data import _Map, datasets from .analyze import cluster as _cluster @@ -321,6 +322,25 @@ def cluster_spatial( comm.models[model_name] = model return comm + def silplot(self, model_name, year, **kwargs): + """ Returns a silhouette plot of the model that is passed to it. + + Parameters + ---------- + model_name : str , required + model to be silhouette plotted + year : dict key, optional + year of model if model has model for each year + kwargs : **kwargs, optional + pass through to plot_silhouette() + Returns + ------- + silhouette plot of given model. + + """ + plot = skplt.metrics.plot_silhouette(self.models[model_name][year].X, self.models[model_name][year].labels, **kwargs) + return plot + def transition( self, cluster_col, time_var="year", id_var="geoid", w_type=None, permutations=0 ): From eaaa641cc6198bc1b15f987c70b5abd9da0420f8 Mon Sep 17 00:00:00 2001 From: AnGWar26 Date: Wed, 5 Aug 2020 16:34:43 -0700 Subject: [PATCH 3/8] silplot: add logic for when model does not have several years --- geosnap/_community.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/geosnap/_community.py b/geosnap/_community.py index 9cd644a2..3631ec91 100644 --- a/geosnap/_community.py +++ b/geosnap/_community.py @@ -322,7 +322,7 @@ def cluster_spatial( comm.models[model_name] = model return comm - def silplot(self, model_name, year, **kwargs): + def silplot(self, model_name, year=None, **kwargs): """ Returns a silhouette plot of the model that is passed to it. Parameters @@ -338,7 +338,12 @@ def silplot(self, model_name, year, **kwargs): silhouette plot of given model. """ - plot = skplt.metrics.plot_silhouette(self.models[model_name][year].X, self.models[model_name][year].labels, **kwargs) + if not year: + plot = skplt.metrics.plot_silhouette(self.models[model_name].X, self.models[model_name].labels, + **kwargs) + else: + plot = skplt.metrics.plot_silhouette(self.models[model_name][year].X, self.models[model_name][year].labels, + **kwargs) return plot def transition( From 6cfe9f4d3d62748b33b71c7939ab6d8d3572d783 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Wed, 5 Aug 2020 16:44:14 -0700 Subject: [PATCH 4/8] small edit to docstrings --- geosnap/_community.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/geosnap/_community.py b/geosnap/_community.py index 3631ec91..2c21564a 100644 --- a/geosnap/_community.py +++ b/geosnap/_community.py @@ -329,8 +329,8 @@ def silplot(self, model_name, year=None, **kwargs): ---------- model_name : str , required model to be silhouette plotted - year : dict key, optional - year of model if model has model for each year + year : int, optional + year to be plotted if model created with pooling=='unique' kwargs : **kwargs, optional pass through to plot_silhouette() Returns From 770f166b2d32376fd6bc161375fea0b50aef2cf3 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Wed, 5 Aug 2020 16:44:30 -0700 Subject: [PATCH 5/8] small edit to docstrings From 2b64f1870d8e2a9e02f17da2eca0d85e0edf775c Mon Sep 17 00:00:00 2001 From: eli knaap Date: Wed, 5 Aug 2020 16:45:05 -0700 Subject: [PATCH 6/8] Update _community.py --- geosnap/_community.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geosnap/_community.py b/geosnap/_community.py index 2c21564a..0857536e 100644 --- a/geosnap/_community.py +++ b/geosnap/_community.py @@ -322,7 +322,7 @@ def cluster_spatial( comm.models[model_name] = model return comm - def silplot(self, model_name, year=None, **kwargs): + def silplot(self, model_name=None, year=None, **kwargs): """ Returns a silhouette plot of the model that is passed to it. Parameters From e799500aafda15656ba63ae9f15eebeddb96b562 Mon Sep 17 00:00:00 2001 From: eli knaap Date: Wed, 5 Aug 2020 16:49:20 -0700 Subject: [PATCH 7/8] Update analytics.py docstring --- geosnap/analyze/analytics.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/geosnap/analyze/analytics.py b/geosnap/analyze/analytics.py index 194315da..07d1c2e8 100644 --- a/geosnap/analyze/analytics.py +++ b/geosnap/analyze/analytics.py @@ -33,12 +33,13 @@ class ModelResults: data used to compute model columns: list-like columns used in model - W: 'queen', 'rook', or spatial weights matrix - spatial weights matrix used in model + W: libpysal.weights.W + libpysal spatial weights matrix used in model labels: array-like labels of each column - instance: AgglomerativeCluserting object, or other model specific object type - how many clusters model was computed with + instance: instance of model class used to generate neighborhood labels. + fitted model instance, e.g sklearn.cluster.AgglomerativeClustering object + or other model class used to estimate class labels """ def __init__(self, X, columns, labels,instance,W,): From d1ab1dab03c16391d700e577bcfac24a9eefe4ed Mon Sep 17 00:00:00 2001 From: eli knaap Date: Wed, 5 Aug 2020 16:50:19 -0700 Subject: [PATCH 8/8] Update analytics.py --- geosnap/analyze/analytics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/geosnap/analyze/analytics.py b/geosnap/analyze/analytics.py index 07d1c2e8..48810ca8 100644 --- a/geosnap/analyze/analytics.py +++ b/geosnap/analyze/analytics.py @@ -51,8 +51,8 @@ def __init__(self, X, columns, labels,instance,W,): data of the cluster columns: list-like columns used to compute model - W: 'queen', 'rook', or pyasl spatial weights matrix - spatial weights matrix used in model + W: libpysal.weights.W + libpysal spatial weights matrix used in model labels: array-like labels of each column instance: AgglomerativeCluserting object, or other model specific object type