Skip to content

Commit

Permalink
Merge branch 'main' of github.com:COMSOC-Community/easychair-extra
Browse files Browse the repository at this point in the history
  • Loading branch information
Simon-Rey committed Nov 19, 2024
2 parents 5ac55f1 + 083ebd0 commit a883f39
Show file tree
Hide file tree
Showing 3 changed files with 203 additions and 1 deletion.
2 changes: 1 addition & 1 deletion easychair_extra/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def read_submission(
if row["corresponding?"] == "yes":
corresponding_authors[sub_id].append(person_id)
df["authors_id"] = df.apply(
lambda df_row: sub_to_authors.get(df_row["#"], []), axis=1
lambda df_row: tuple(sub_to_authors.get(df_row["#"], [])), axis=1
)
df["corresponding_id"] = df.apply(
lambda df_row: corresponding_authors.get(df_row["#"], []), axis=1
Expand Down
41 changes: 41 additions & 0 deletions examples/papers_with_same_authors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import os

import pandas as pd

from easychair_extra.read import read_submission, read_committee


def main():
current_dir = os.path.dirname(os.path.abspath(__file__))

# Read the submission file
submissions = read_submission(
os.path.join(current_dir, "..", "easychair_sample_files", "submission.csv"),
author_file_path=os.path.join(current_dir, "..", "easychair_sample_files", "author.csv"),
)

# Add a column with the number of authors
submissions["num_authors"] = submissions.apply(
lambda x: len(x["authors_id"]), axis=1
)
submissions.sort_values("num_authors", inplace=True, ascending=False)

duplicated_authors = submissions[submissions.duplicated(subset=["authors_id"])][
"authors_id"
].unique()

print(f"These author_sets have one or more submission: {duplicated_authors}")
for authors in duplicated_authors:
print("=" * 20 + f"\nAuthor set: {authors}")
with pd.option_context(
"display.max_rows", None, "display.max_columns", None, "display.width", 500
):
print(
submissions[submissions["authors_id"] == authors][
["#", "title", "authors_id", "authors"]
].to_string(index=False)
)


if __name__ == "__main__":
main()
161 changes: 161 additions & 0 deletions examples/plot_num_pc_per_area.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import pathlib
from collections import Counter, defaultdict

import csv
import os.path

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from easychair_extra.read import read_committee, read_topics

AREA_NICKNAMES = {
"Fairness, Ethics, and Trust": "FAIR",
"Computer Vision": "CV",
"Constraints and Satisfiability": "CONSAT",
"Data Mining": "DATA",
"Knowledge Representation and Reasoning": "KRR",
"Humans and AI": "HUM",
"Machine Learning": "ML",
"Multiagent Systems": "MAS",
"Natural Language Processing": "NLP",
"Planning and Search": "PLAN",
"Robotics": "ROBO",
"Uncertainty in AI": "UAI",
"Multidisciplinary Topics": "MULT",
}

AREA_NICKNAMES_SWAPPED = {
"FAIR": "Fairness, Ethics, and Trust",
"CV": "Computer Vision",
"CONSAT": "Constraints and Satisfiability",
"DATA": "Data Mining",
"KRR": "Knowledge Representation and Reasoning",
"HUM": "Humans and AI",
"ML": "Machine Learning",
"MAS": "Multiagent Systems",
"NLP": "Natural Language Processing",
"PLAN": "Planning and Search",
"ROBO": "Robotics",
"UAI": "Uncertainty in AI",
"MULT": "Multidisciplinary Topics",
}

ROLE_NICKNAMES = {
"associate chair": "AC",
"senior PC member": "SPC",
"chair": "PCC",
"PC member": "PC",
}


def compute_area_topic_to_weight(committee_df, topics_to_areas):
# We initialise some dicts that hold the information.
topics_to_pcs = defaultdict(lambda: defaultdict(list)) # topics => dict[role, list of PCs]
num_topics_per_pc = Counter()

def populate_pc_dicts(df_row):
pc_id = df_row["#"]
r = df_row["role"]
num_topics_per_pc[pc_id] += 1
for t in df_row["topics"]:
topics_to_pcs[t][r].append(pc_id)

# Populate the dicts initialised above
committee_df.apply(populate_pc_dicts, axis=1)

# We replace the PC members ids by their weight, for the topic dict
topics_to_weight = {}
for topic, role_dict in topics_to_pcs.items():
topics_to_weight[topic] = dict()
for role, pc_list in role_dict.items():
if role not in topics_to_weight[topic]:
topics_to_weight[topic][role] = sum(
1 / num_topics_per_pc[pc] for pc in pc_list
)

# We sum up for the areas:
areas_to_weight = {}
for topic, role_dict in topics_to_weight.items():
area = topics_to_areas[topic] # The area corresponding to the topic
if area not in areas_to_weight:
areas_to_weight[area] = dict()
for role, weight in role_dict.items():
if role not in areas_to_weight[area]:
areas_to_weight[area][role] = weight
else:
areas_to_weight[area][role] += weight

return areas_to_weight, topics_to_weight


def plot_requirement_vs_practice(
area_weights,
):
all_areas = sorted(AREA_NICKNAMES)
all_roles = ["associate chair", "senior PC member", "PC member"]

# Prepare the pandas frame
data = []
for area in all_areas:
for role in all_roles:
actual_value = area_weights[area].get(role, 0)
data.append(
{"area": area, "role": role, "type": "current", "value": actual_value}
)

df = pd.DataFrame(data)

plt.close("all")
g = sns.catplot(
data=df,
x="role",
y="value",
col="area",
col_wrap=3,
kind="bar",
sharey=False,
sharex=False,
)

# Add the values on top
for ax in g.axes.ravel():
for c in ax.containers:
labels = []
for v in c:
height = v.get_height()
if int(height) == height or height > 10:
labels.append(str(round(height)))
else:
labels.append(f"{height:.1f}")
ax.bar_label(c, labels=labels, label_type="edge")
ax.margins(y=0.2)

g.set_titles("{col_name}")
g.set_axis_labels("", "Count")

plt.show()


def main():
current_dir = os.path.dirname(os.path.abspath(__file__))
root_dir = os.path.join(current_dir, "..", "easychair_sample_files")

# Read the committee file with the bids
committee_df = read_committee(
os.path.join(root_dir, "committee.csv"),
committee_topic_file_path=os.path.join(root_dir, "committee_topic.csv")
)

area_topic_mapping, topic_area_mapping = read_topics(os.path.join(root_dir, "topics.csv"))

# Compute the weight per area and per role
area_weight_map, topic_weight_map = compute_area_topic_to_weight(committee_df, topic_area_mapping)
plot_requirement_vs_practice(
area_weight_map,
)


if __name__ == "__main__":
main()

0 comments on commit a883f39

Please sign in to comment.