Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/mysociety/data_common into …
Browse files Browse the repository at this point in the history
…main
  • Loading branch information
ajparsons committed Aug 21, 2024
2 parents 26861c9 + 7b160b1 commit 410eb12
Show file tree
Hide file tree
Showing 147 changed files with 71 additions and 12,570 deletions.
90 changes: 58 additions & 32 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ authors = []

[tool.poetry.dependencies]
python = ">=3.10,<3.11"
numpy = "1.21.0"
openpyxl = "3.0.7"
pandas = "1.4.2"
scikit-learn = "^1.0.2"
Expand Down Expand Up @@ -49,6 +48,7 @@ altair-viewer = "^0.4.0"
vl-convert-python = "^1.6.0"
ruff = "^0.6.1"
types-pillow = "^10.2.0.20240520"
pandas-stubs = "^2.2.2.240807"

[build-system]
requires = ["poetry-core>=1.0.0"]
Expand Down
10 changes: 5 additions & 5 deletions src/data_common/dataset/resource_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -1042,9 +1042,9 @@ def build_coversheet(
desc = self.get_datapackage()
settings = get_settings()

bold = writer.book.add_format({"bold": True})
bold = writer.book.add_format({"bold": True}) # type: ignore

ws = writer.book.add_worksheet("package_description")
ws = writer.book.add_worksheet("package_description") # type: ignore
ws.set_column(2, 2, 40)
ws.set_column(3, 3, 30)
ws.write(2, 2, "Dataset", bold)
Expand Down Expand Up @@ -1200,7 +1200,7 @@ def build_excel(self, is_geodata: bool = False):

writer = pd.ExcelWriter(excel_path)
writer = self.build_coversheet(writer, allowed_sheets=allowed_resource_slugs)
text_wrap = writer.book.add_format({"text_wrap": True})
text_wrap = writer.book.add_format({"text_wrap": True}) # type: ignore

for sheet_name, df in sheets.items():
short_sheet_name = sheet_name[-31:] # only allow 31 characters
Expand All @@ -1210,7 +1210,7 @@ def build_excel(self, is_geodata: bool = False):
df.to_excel(writer, sheet_name=short_sheet_name, index=False)

for column in df:
column_length = max(df[column].astype(str).map(len).max(), len(column))
column_length = max(df[column].astype(str).map(len).max(), len(column)) # type: ignore
column_length += 4

col_idx = df.columns.get_loc(column)
Expand All @@ -1223,7 +1223,7 @@ def build_excel(self, is_geodata: bool = False):
col_idx, col_idx, 50, text_wrap
)

writer.save()
writer.save() # type: ignore

def build_sqlite(self, is_geodata: bool = False):
"""
Expand Down
2 changes: 1 addition & 1 deletion src/data_common/dataset/table_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def get_table_schema(
Produce a table data schema for the dataframe
https://specs.frictionlessdata.io/table-schema/
"""
data: SchemaValidator = build_table_schema(df, index=False, version=False)
data: SchemaValidator = build_table_schema(df, index=False, version=False) # type: ignore
data["fields"] = [
cls.enhance_field(df, field, descriptions, enums)
for field in data["fields"]
Expand Down
2 changes: 1 addition & 1 deletion src/data_common/pandas/df_extensions/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def __init__(self, pandas_obj: pd.DataFrame):
self._obj = pandas_obj

def update_from_map(self, map: dict) -> pd.Series:
return self._obj.apply(lambda x: map.get(x, x))
return self._obj.apply(lambda x: map.get(x, x)) # type:ignore


@pd_api.extensions.register_dataframe_accessor("common")
Expand Down
2 changes: 1 addition & 1 deletion src/data_common/pandas/df_extensions/la.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def get_council_info(
adf = get_la_df(include_historical=include_historical, as_of_date=as_of_date)
if items:
adf = adf[["local-authority-code"] + items]
return df.merge(adf, how=merge_type)
return df.merge(adf, how=merge_type) # type: ignore

def just_lower_tier(self) -> pd.DataFrame:
"""
Expand Down
9 changes: 4 additions & 5 deletions src/data_common/pandas/df_extensions/space.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from IPython.display import display
from ipywidgets import interactive
from matplotlib.colors import Colormap
from numpy.typing import ArrayLike
from scipy.spatial.distance import pdist, squareform
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
Expand Down Expand Up @@ -198,7 +197,7 @@ def map_from_anchor(self, anchor: pd.DataFrame | Path) -> dict[int, int]:

# create mapping of the new (and possibly random numbers) to the ones we've previously assigned labels.

return mapping.to_dict()
return mapping.to_dict() # type: ignore

def get_label_options(self) -> list:
return [self.get_label_name(x) for x in range(1, self.k + 1)]
Expand All @@ -209,7 +208,7 @@ def get_cluster_label_ids(self) -> pd.Series:
labels = labels.map(self.cluster_no_mapping)
return labels

def get_cluster_labels(self, include_short=True) -> ArrayLike:
def get_cluster_labels(self, include_short=True) -> np.ndarray:
labels = self.get_cluster_label_ids()

def f(x):
Expand All @@ -220,7 +219,7 @@ def f(x):

label_array = get_cluster_labels

def get_cluster_descs(self) -> ArrayLike:
def get_cluster_descs(self) -> np.ndarray:
labels = self.get_cluster_label_ids()
labels = labels.apply(lambda x: self.get_label_desc(n=x))
return np.array(labels)
Expand Down Expand Up @@ -350,7 +349,7 @@ def s_score(kmeans):
df = pd.DataFrame({"n": range(start, stop, step)})
df["k_means"] = df["n"].apply(self.get_clusters)
df["sum_squares"] = df["k_means"].apply(lambda x: x.inertia_)
df["silhouette"] = df["k_means"].apply(s_score)
df["silhouette"] = df["k_means"].apply(s_score) # type: ignore

plt.rcParams["figure.figsize"] = (10, 5) # type: ignore
plt.subplot(1, 2, 1)
Expand Down
116 changes: 0 additions & 116 deletions typings/pandas/__init__.pyi

This file was deleted.

10 changes: 0 additions & 10 deletions typings/pandas/_libs/__init__.pyi

This file was deleted.

Loading

0 comments on commit 410eb12

Please sign in to comment.