Skip to content

Commit

Permalink
Fix CGA Events metric?
Browse files Browse the repository at this point in the history
  • Loading branch information
doprdele committed Jun 26, 2024
1 parent e314fe7 commit 3bcbb2f
Showing 1 changed file with 158 additions and 45 deletions.
203 changes: 158 additions & 45 deletions cga.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,51 @@
import os

sheets = [
["cgaContact", 279615175, os.getenv("SHEET_URL_CGA_CONTACT"), "A:H", [0, 5, 6, 7]], # OK
["cgaWorkshopEvaluation", 1803423154, os.getenv("SHEET_URL_CGA_WORKSHOP_EVALUATIONS"), "A:P",
[0, 1, 2, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]],
[
"cgaContact",
279615175,
os.getenv("SHEET_URL_CGA_CONTACT"),
"A:H",
[0, 5, 6, 7],
], # OK
[
"cgaWorkshopEvaluation",
1803423154,
os.getenv("SHEET_URL_CGA_WORKSHOP_EVALUATIONS"),
"A:P",
[0, 1, 2, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
],
# OK
["cgaGISApplication", 1021617292, os.getenv("SHEET_URL_CGA_GIS_APPLICATION"), "A:N", [0, 1, 5, 6, 7]],
[
"cgaGISApplication",
1021617292,
os.getenv("SHEET_URL_CGA_GIS_APPLICATION"),
"A:N",
[0, 1, 5, 6, 7],
],
# OK
["cgaEventRegistration", 340045856, os.getenv("SHEET_URL_CGA_EVENT_REGISTRATION"), "A:L", [0, 1, 5, 6, 7]],
[
"cgaEventRegistration",
340045856,
os.getenv("SHEET_URL_CGA_EVENT_REGISTRATION"),
"A:L",
[0, 1, 5, 6, 7],
],
# OK
["cgaTrainingRegistration", 2068274999, os.getenv("SHEET_URL_CGA_TRAINING_REGISTRATION"), "A:M",
[0, 1, 2, 6, 7, 8, 11, 12]],
["cgaLicenseRequest", 842362239, os.getenv("SHEET_URL_CGA_LICENSE_REQUEST"), "A:L", [0, 5, 6, 7, 10, 11]]
[
"cgaTrainingRegistration",
2068274999,
os.getenv("SHEET_URL_CGA_TRAINING_REGISTRATION"),
"A:M",
[0, 1, 2, 6, 7, 8, 11, 12],
],
[
"cgaLicenseRequest",
842362239,
os.getenv("SHEET_URL_CGA_LICENSE_REQUEST"),
"A:L",
[0, 5, 6, 7, 10, 11],
],
]


Expand All @@ -33,7 +67,9 @@ def harvest_cga(path):
sheet_url = s[2]
range_name = s[3]
columns = s[4]
harvest_sheet_tsv_http(path, collection, sheet_url, range_name, columns, gid=gid)
harvest_sheet_tsv_http(
path, collection, sheet_url, range_name, columns, gid=gid
)

return

Expand All @@ -52,6 +88,7 @@ def aggregate_cga(path):
gis_institute(path)
cga_lic_req_top10(path)
cga_lic_req_status(path)
cga_event_registration_aggr(path)


def cga_contact_school(path):
Expand All @@ -61,14 +98,20 @@ def cga_contact_school(path):
@return: nothing
"""
# ----------------------------------------------
df = pd.read_csv(path + 'cgaContact.tsv', delimiter="\t")
df_contact_12mo = filter_last_12_months(df, 'Timestamp')
df = pd.read_csv(path + "cgaContact.tsv", delimiter="\t")
df_contact_12mo = filter_last_12_months(df, "Timestamp")
c = "Your primary affiliated school at Harvard"
df_contact_12mo_aggr = get_counts(df_contact_12mo, column=c)
df_contact_12mo_aggr = df_contact_12mo_aggr[df_contact_12mo_aggr[c] != "Non-Harvard"]
df_contact_12mo_aggr = create_percentage(df_contact_12mo_aggr, 'count')
df_contact_12mo_aggr.to_csv(path + "cga_contact_last_12_months_by_school.tsv", sep='\t', index=True,
index_label="id")
df_contact_12mo_aggr = df_contact_12mo_aggr[
df_contact_12mo_aggr[c] != "Non-Harvard"
]
df_contact_12mo_aggr = create_percentage(df_contact_12mo_aggr, "count")
df_contact_12mo_aggr.to_csv(
path + "cga_contact_last_12_months_by_school.tsv",
sep="\t",
index=True,
index_label="id",
)


def cga_lic_req_status(path):
Expand All @@ -77,14 +120,18 @@ def cga_lic_req_status(path):
@param path: path where to write the TSV
@return: nothing
"""
df = pd.read_csv(path + 'cgaLicenseRequest.tsv', delimiter="\t")
df_lic_12mo = filter_last_12_months(df, 'Timestamp')
df = pd.read_csv(path + "cgaLicenseRequest.tsv", delimiter="\t")
df_lic_12mo = filter_last_12_months(df, "Timestamp")
c = "Your primary affiliated school at Harvard"
df_aggr_status = get_counts(df_lic_12mo, c)
df_aggr_status = df_aggr_status[df_aggr_status[c] != "Non-Harvard"]
df_aggr_status = create_percentage(df_aggr_status, 'count')
df_aggr_status.to_csv(path + "cga_license_request_last_12_months_by_status.tsv", sep='\t', index=True,
index_label="id")
df_aggr_status = create_percentage(df_aggr_status, "count")
df_aggr_status.to_csv(
path + "cga_license_request_last_12_months_by_status.tsv",
sep="\t",
index=True,
index_label="id",
)


def cga_lic_req_top10(path):
Expand All @@ -93,7 +140,7 @@ def cga_lic_req_top10(path):
@param path: path where to write the TSV
@return: nothing
"""
df = pd.read_csv(path + 'cgaLicenseRequest.tsv', delimiter="\t")
df = pd.read_csv(path + "cgaLicenseRequest.tsv", delimiter="\t")
# smaller df for last 12 months
df2 = df[["Software product which you need a license for", "Timestamp"]]
df3 = filter_last_12_months(df2, "Timestamp", drop_datetime=True)
Expand All @@ -103,8 +150,17 @@ def cga_lic_req_top10(path):
# clean up of output
df3 = df3.reset_index()
df3 = df3.rename(
columns={'Software product which you need a license for': 'Software product', 'Timestamp': 'count'})
df3.to_csv(path + "cga_license_req_last_12_months.tsv", sep='\t', index=True, index_label="id")
columns={
"Software product which you need a license for": "Software product",
"Timestamp": "count",
}
)
df3.to_csv(
path + "cga_license_req_last_12_months.tsv",
sep="\t",
index=True,
index_label="id",
)


def gis_institute(path):
Expand All @@ -114,13 +170,19 @@ def gis_institute(path):
@return: nothing
"""
# ---------------------------
df = pd.read_csv(path + 'cgaGISApplication.tsv', delimiter="\t")
df = pd.read_csv(path + "cgaGISApplication.tsv", delimiter="\t")
applications_ytd = len(get_records_YTD(df, drop_datetime=True))
write_metric(path=path, group="CGA", metric="GIS Institute Applications",
title="GIS Institute",
value=applications_ytd, unit="Number of applications " + get_current_year_str() + " YTD",
icon="fa fa-university", color="blue",
url="")
write_metric(
path=path,
group="CGA",
metric="GIS Institute Applications",
title="GIS Institute",
value=applications_ytd,
unit="Number of applications " + get_current_year_str() + " YTD",
icon="fa fa-university",
color="blue",
url="",
)


def cga_training_evaluations(path):
Expand All @@ -129,10 +191,55 @@ def cga_training_evaluations(path):
@param path: path where to write the TSV
@return: nothing
"""
df = pd.read_csv(path + 'cgaWorkshopEvaluation.tsv', delimiter="\t")
df = pd.read_csv(path + "cgaWorkshopEvaluation.tsv", delimiter="\t")
df_aggr = df.describe()[1:2].transpose()
df_aggr = df_aggr.transform(lambda x: round(x, 2))
df_aggr.to_csv(path + 'cga_workshop_evaluations.tsv', sep="\t", index=True, index_label="metric")
df_aggr.to_csv(
path + "cga_workshop_evaluations.tsv",
sep="\t",
index=True,
index_label="metric",
)


def cga_event_registration_aggr(path):
"""
Aggregate number of registration per workshop. Selects last 12 months and only workshops with more than
5 registrations
@param path: path where to write the TSV
@return: nothing
"""
# Training (C) --------------------------------
df = pd.read_csv(path + "cgaEventRegistration.tsv", delimiter="\t")
df = filter_last_12_months(df, "Timestamp")
df["name"] = df["The event name"]
df = df.sort_values("datetime")

# create a list with unique courses in time order
df2 = df[["name"]].drop_duplicates()
df2.reset_index(drop=True) # save the order

# count the number or registrations and save ones with more than 5
df3 = df[["name", "datetime"]].groupby(["name"]).count()
df3 = df3[df3["datetime"] > 5]

# join with the table with the correct order and rename columns
df3 = df2.merge(df3, how="inner", on="name").drop_duplicates()[["name", "datetime"]]
df_aggr = df3.rename(columns={"name": "course", "datetime": "registration_count"})
# save
registrations_ytd = int(df_aggr.tail(1)['registration_count'][0])

write_metric(
path=path,
group="CGA",
metric="Number of Registrations for CGA Conference",
title="CGA Events",
value=registrations_ytd,
unit="Registrations for CGA Conference " + get_current_year_str() + " YTD",
icon="fa fa-university",
color="blue",
url="",
)


def cga_training_aggr(path):
Expand All @@ -143,25 +250,27 @@ def cga_training_aggr(path):
@return: nothing
"""
# Training (C) --------------------------------
df = pd.read_csv(path + 'cgaTrainingRegistration.tsv', delimiter="\t")
df = filter_last_12_months(df, 'Date of the training workshop')
df = pd.read_csv(path + "cgaTrainingRegistration.tsv", delimiter="\t")
df = filter_last_12_months(df, "Date of the training workshop")
df["month"] = df.datetime.transform(lambda x: x.strftime("%b") + " " + str(x.year))
df["name"] = df["Name of the training workshop"] + "#(" + df["month"] + ")" # we name the column 'count'
df["name"] = (
df["Name of the training workshop"] + "#(" + df["month"] + ")"
) # we name the column 'count'
df = df.sort_values("datetime")

# create a list with unique courses in time order
df2 = df[["Name of the training workshop", "month", "name"]].drop_duplicates()
df2.reset_index(drop=True) # save the order

# count the number or registrations and save ones with more than 5
df3 = df[["name", "datetime"]].groupby(['name']).count()
df3 = df[["name", "datetime"]].groupby(["name"]).count()
df3 = df3[df3["datetime"] > 5]

# join with the table with the correct order and rename columns
df3 = df2.merge(df3, how="inner", on="name").drop_duplicates()[["name", "datetime"]]
df_aggr = df3.rename(columns={'name': 'course', 'datetime': 'registration_count'})
df_aggr = df3.rename(columns={"name": "course", "datetime": "registration_count"})
# save
df_aggr.to_csv(path + 'cga_training.tsv', sep="\t", index=True, index_label="id")
df_aggr.to_csv(path + "cga_training.tsv", sep="\t", index=True, index_label="id")


def cga_contact_time(path):
Expand All @@ -170,10 +279,12 @@ def cga_contact_time(path):
@param path: path where to write the TSV
@return: nothing
"""
df = pd.read_csv(path + 'cgaContact.tsv', delimiter="\t")
df = pd.read_csv(path + "cgaContact.tsv", delimiter="\t")
df["date"] = df.Timestamp.transform(lambda x: convert_timestamp_str(x)[:7])
df_aggr = pd.DataFrame({'count': df["date"].value_counts()}).sort_index()
df_previous_12_months(df_aggr).to_csv(path + 'cga_contact.tsv', sep="\t", index=True, index_label="date")
df_aggr = pd.DataFrame({"count": df["date"].value_counts()}).sort_index()
df_previous_12_months(df_aggr).to_csv(
path + "cga_contact.tsv", sep="\t", index=True, index_label="date"
)


def cga_contact_status(path):
Expand All @@ -182,10 +293,12 @@ def cga_contact_status(path):
@param path: path where to write the TSV
@return: nothing
"""
df = pd.read_csv(path + 'cgaContact.tsv', delimiter="\t")
c = 'Your Harvard status/appointment'
df_aggr = filter_last_12_months(df, 'Timestamp')
df = pd.read_csv(path + "cgaContact.tsv", delimiter="\t")
c = "Your Harvard status/appointment"
df_aggr = filter_last_12_months(df, "Timestamp")
df_aggr2 = get_counts(df_aggr, c)
df_aggr2 = df_aggr2[df_aggr2["Your Harvard status/appointment"] != "Non-Harvard"]
df_aggr2 = create_percentage(df_aggr2, 'count')
df_aggr2.to_csv(path + 'cga_contact_status.tsv', sep="\t", index=True, index_label="id")
df_aggr2 = create_percentage(df_aggr2, "count")
df_aggr2.to_csv(
path + "cga_contact_status.tsv", sep="\t", index=True, index_label="id"
)

0 comments on commit 3bcbb2f

Please sign in to comment.