Skip to content

Commit

Permalink
feat: support latest changes to Grant data model (#31)
Browse files Browse the repository at this point in the history
* add new script: create_grant_projects

* remove unneeded code from sync script

* use latest data model/schema

* remove whitespace from manifest colnames

* add missing column

* split select STRING cols to STRINGLIST

* black format

* update default manifest value

* replace nan with empty strings

* move script to utils folder; remove return statement
  • Loading branch information
vpchung committed Jan 24, 2024
1 parent bf389ff commit 2aa6d3d
Show file tree
Hide file tree
Showing 2 changed files with 251 additions and 177 deletions.
234 changes: 57 additions & 177 deletions portal_tables/sync_grants.py
Original file line number Diff line number Diff line change
@@ -1,171 +1,45 @@
"""Add Grants to the Cancer Complexity Knowledge Portal (CCKP).
This script will sync over new grants and its annotations to the
Grants portal table. A Synapse Project with pre-filled Wikis and
Folders will also be created for each new grant found, as well as
a Synapse team.
Grants portal table.
"""

import argparse

import synapseclient
from synapseclient import Table, Project, Wiki, Folder, Team

PERMISSIONS = {
"view": ["READ"],
"download": ["READ", "DOWNLOAD"],
"edit": ["READ", "DOWNLOAD", "CREATE", "UPDATE"],
"edit_delete": ["READ", "DOWNLOAD", "CREATE", "UPDATE", "DELETE"],
"admin": [
"READ",
"DOWNLOAD",
"CREATE",
"UPDATE",
"DELETE",
"MODERATE",
"CHANGE_SETTINGS",
"CHANGE_PERMISSIONS",
],
}


def _syn_prettify(name):
"""Prettify a name that will conform to Synapse naming rules.
Names can only contain letters, numbers, spaces, underscores, hyphens,
periods, plus signs, apostrophes, and parentheses.
"""
valid = {38: "and", 58: "-", 59: "-", 47: "_"}
return name.translate(valid)
from synapseclient import Table


def _join_listlike_col(col, join_by="_", delim=","):
"""Join list-like column values by specified value.
# def _join_listlike_col(col, join_by="_", delim=","):
# """Join list-like column values by specified value.

Expects a list, but if string is given, then split (and strip
whitespace) by delimiter first.
"""
if isinstance(col, str):
col = [el.strip() for el in col.split(delim)]
return join_by.join(col).replace("'", "")
# Expects a list, but if string is given, then split (and strip
# whitespace) by delimiter first.
# """
# if isinstance(col, str):
# col = [el.strip() for el in col.split(delim)]
# return join_by.join(col).replace("'", "")


def get_args():
"""Set up command-line interface and get arguments."""
parser = argparse.ArgumentParser(description="Add new grants to the CCKP")
parser.add_argument("-m", "--manifest",
type=str, default="syn35242677",
help=("Synapse ID to the manifest table/fileview."
"(Default: syn35242677)"))
parser.add_argument("-t", "--portal_table",
type=str, default="syn21918972",
help=("Add grants to this specified table. "
"(Default: syn21918972)"))
parser.add_argument("--dryrun", action="store_true")
return parser.parse_args()


def create_wiki_pages(syn, project_id, grant):
"""Create main Wiki page for the Project."""

# Main Wiki page
consortium = grant["grantConsortiumName"]
grant_type = grant["grantType"]
title = grant["grantInstitutionAlias"]
institutions = grant["grantInstitutionName"]
desc = grant["grantAbstract"] or ""

content = f"""### The {consortium} {grant_type} Research Project \@ {title}
#### List of Collaborating Institutions
{institutions}
#### Project Description
{desc}
"""
content += (
"->"
"${buttonlink?text="
"Back to Multi-Consortia Coordinating (MC2) Center"
"&url=https%3A%2F%2Fwww%2Esynapse%2Eorg%2F%23%21Synapse%3Asyn7080714%2F}"
"<-"
parser.add_argument(
"-m",
"--manifest",
type=str,
default="syn53259587",
help=("Synapse ID to the manifest table/fileview." "(Default: syn35242677)"),
)
main_wiki = Wiki(title=grant["grantName"], owner=project_id, markdown=content)
main_wiki = syn.store(main_wiki)

# Sub-wiki page: Project Investigators
pis = [pi.strip(" ") for pi in grant["grantInvestigator"].split(",")]
pi_markdown = "* " + "\n* ".join(pis)
pi_wiki = Wiki(
title="Project Investigators",
owner=project_id,
markdown=pi_markdown,
parentWikiId=main_wiki.id,
parser.add_argument(
"-t",
"--portal_table",
type=str,
default="syn21918972",
help=("Add grants to this specified table. " "(Default: syn21918972)"),
)
pi_wiki = syn.store(pi_wiki)


def create_folders(syn, project_id):
"""Create top-levels expected by the DCA.
Folders:
- projects
- publications
- datasets
- tools
"""
for name in ["projects", "publications", "datasets", "tools"]:
syn.store(Folder(name, parent=project_id))


def create_team(syn, project_id, grant, access_type="edit"):
"""Create team for new grant project."""
consortia = _join_listlike_col(grant["grantConsortiumName"])
center = _join_listlike_col(grant["grantInstitutionAlias"])
team_name = f"{consortia} {center} {grant['grantType']} {grant['grantNumber']}"
try:
new_team = Team(name=team_name, canPublicJoin=False)
new_team = syn.store(new_team)
syn.setPermissions(
project_id,
principalId=new_team.id,
accessType=PERMISSIONS.get(access_type)
)
except ValueError as err:
if err.__context__.response.status_code == 409:
print(f"Team already exists: {team_name}")
else:
print(f"Something went wrong! Team: {team_name}")


def create_grant_projects(syn, grants):
"""Create a new Synapse project for each grant and populate its Wiki.
Returns:
df: grants information (including their new Project IDs)
"""
for _, row in grants.iterrows():
name = _syn_prettify(row["grantName"])
try:
project = Project(name)
project = syn.store(project)
syn.setPermissions(
project.id,
principalId=3450948,
accessType=PERMISSIONS.get("admin")
)

# Update grants table with new synId
grants.at[_, 'grantId'] = project.id

create_wiki_pages(syn, project.id, row)
create_folders(syn, project.id)
create_team(syn, project.id, row)
except synapseclient.core.exceptions.SynapseHTTPError:
print(f"Skipping: {name}")
grants.at[_, "grantId"] = ""
return grants
parser.add_argument("--dryrun", action="store_true")
return parser.parse_args()


def sync_table(syn, grants, table):
Expand All @@ -178,29 +52,34 @@ def sync_table(syn, grants, table):

# Reorder columns to match the table order.
col_order = [
"grantId",
"grantName",
"grantNumber",
"grantAbstract",
"grantType",
"grantThemeName",
"grantInstitutionAlias",
"grantInstitutionName",
"grantInvestigator",
"grantConsortiumName",
"project_id",
"GrantView_id",
"GrantName",
"GrantNumber",
"GrantAbstract",
"GrantType",
"GrantThemeName",
"GrantInstitutionAlias",
"GrantInstitutionName",
"GrantInvestigator",
"GrantConsortiumName",
"GrantStartDate",
"NIHRePORTERLink",
"DurationofFunding",
"EmbargoEndDate",
"GrantSynapseTeam",
"GrantSynapseProject",
]
grants = grants[col_order]

# Convert columns into STRINGLIST.
grants.loc[:, "grantThemeName"] = grants.grantThemeName.str.split(", ")
grants.loc[:, "grantInstitutionName"] = (
grants["grantInstitutionName"]
.str
.split(", "))
grants.loc[:, "grantInstitutionAlias"] = (
grants["grantInstitutionAlias"]
.str
.split(", "))
for col in [
"GrantThemeName",
"GrantInstitutionAlias",
"GrantInstitutionName",
"GrantConsortiumName",
]:
grants.loc[:, col] = grants[col].str.replace(", ", ",").str.split(",")

new_rows = grants.values.tolist()
syn.store(Table(schema, new_rows))
Expand All @@ -212,28 +91,29 @@ def main():
syn.login(silent=True)
args = get_args()

manifest = syn.tableQuery(f"SELECT * FROM {args.manifest}").asDataFrame()
manifest = syn.tableQuery(f"SELECT * FROM {args.manifest}").asDataFrame().fillna("")
manifest.columns = manifest.columns.str.replace(" ", "")
curr_grants = (
syn.tableQuery(f"SELECT grantNumber FROM {args.portal_table}")
.asDataFrame()
.grantNumber
.to_list()
.grantNumber.to_list()
)

# Only add grants not currently in the Grants table.
new_grants = manifest[~manifest.grantNumber.isin(curr_grants)]
new_grants = manifest[~manifest.GrantNumber.isin(curr_grants)]
if new_grants.empty:
print("No new grants found!")
else:
print(f"{len(new_grants)} new grants found!\n")
if args.dryrun:
print("\u26A0", "WARNING:",
"dryrun is enabled (no updates will be done)\n")
print("\u26A0", "WARNING: dryrun is enabled (no updates will be done)\n")
print(new_grants)
else:
print("Adding new grants...")
added_grants = create_grant_projects(syn, new_grants)
sync_table(syn, added_grants, args.portal_table)
new_grants.loc[
:, "project_id"
] = new_grants.GrantSynapseProject.str.extract(r":(syn\d*?)/wiki")
sync_table(syn, new_grants, args.portal_table)
print("DONE ✓")


Expand Down
Loading

0 comments on commit 2aa6d3d

Please sign in to comment.