Packaging the code

COMSOC-Community · Nov 6, 2024 · 4f6c3b3 · 4f6c3b3
1 parent bbbe4a9
commit 4f6c3b3
Show file tree

Hide file tree

Showing 20 changed files with 48,889 additions and 3 deletions.
diff --git a/.gitignore b/.gitignore
@@ -159,4 +159,4 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
diff --git a/README.md b/README.md
@@ -1,2 +1,33 @@
-# easychair-extra
-Python package to work with files exported from EasyChair. Useful to develop tools when running a conference.
+# EasyChair-Extra
+
+Python package to work with files exported from EasyChair. 
+Useful to develop tools when running a conference.
+
+This package came to life when Ulle Endriss was a PC chair for ECAI-2024. For this role, many tools
+enhancing the functionalities provided by EasyChair were needed. The tools that could benefit the
+community have been gathered there.
+
+## The Package
+
+The package is documented in the code and there is no current plan on provided a full-fleshed 
+documentation. Roughly speaking:
+
+- `easychair_extra.read` provides functions to read EasyChair files;
+- `easychair_extra.programcommittee` provides functions relating to the committee;
+- `easychair_extra.reviewassignment` provides functions relating to the assignment of 
+submissions to PC members;
+- `easychair_extra.submission` provides functions relating to the submissions.
+
+## Learn by Examples
+
+In the `examples` folder we provide example script of how to use the package. Check them out to
+understand how the package works.
+
+Example files for typical EasyChair outputs are located in the `easychair_sample_files` folder.
+
+## Usage Policy
+
+This package is provided as an open-source repository under a GNU 3.0 license.
+
+If you are using this package and adding functionalities, please consider submitting pull requests.
+Others may be looking for the same functionalities!
diff --git a/easychair_extra/__init__.py b/easychair_extra/__init__.py
diff --git a/easychair_extra/generate.py b/easychair_extra/generate.py
@@ -0,0 +1,290 @@
+import csv
+import os.path
+import random
+from collections import defaultdict
+from datetime import datetime
+
+from faker import Faker
+
+from easychair_extra.read import author_list_to_str, read_topics
+
+
+def generate_submission_files(
+        num_submission: int,
+        *,
+        submission_file_path: str = "submission.csv",
+        submission_topic_file_path: str = "submission_topic.csv",
+        author_file_path: str = "author.csv",
+        topic_list: list = None):
+    """Generates a sample "author.csv" file following the typical EasyChair format (no guarantees
+     here). """
+
+    fake = Faker()
+
+    if topic_list is None:
+        topic_list = [f"topic_{x}" for x in range(30)]
+
+    submissions = []
+    sub_to_authors = defaultdict(list)
+    all_authors = dict()
+    sub_to_topics = {}
+    for sub_id in range(1, num_submission + 2):
+        num_authors = random.randint(1, 5)
+        authors = [fake.name() for _ in range(num_authors)]
+        sub_to_authors[sub_id] = authors
+        for author in authors:
+            all_authors[author] = None
+        sub_to_topics[sub_id] = random.sample(topic_list, random.randint(2, 5))
+        submission_dict = {
+            "#": sub_id,
+            "title": fake.sentence(nb_words=6)[:-1],
+            "authors": authors,
+            "keywords": fake.words(nb=random.randint(2, 5)),
+            "abstract": fake.text(max_nb_chars=500),
+        }
+        submissions.append(submission_dict)
+
+    submission_headers = [
+        "#",
+        "title",
+        "authors",
+        "submitted",
+        "last updated",
+        "form fields",
+        "keywords",
+        "decision",
+        "notified",
+        "reviews sent",
+        "abstract",
+        "deleted?"
+    ]
+    with open(submission_file_path, "w", encoding="utf-8") as f:
+        writer = csv.writer(f, delimiter=",")
+        writer.writerow(submission_headers)
+        for s in submissions:
+            writer.writerow([
+                s["#"],
+                s["title"],
+                author_list_to_str(s["authors"]),
+                datetime.now().strftime("%Y-%m-%d %H:%M"),
+                datetime.now().strftime("%Y-%m-%d %H:%M"),
+                "",
+                '\n'.join(s["keywords"]),
+                '',
+                'no',
+                'no',
+                s["abstract"],
+                'yes' if random.random() < 0.05 else 'no'
+            ])
+
+    author_headers = ["submission #", "first name", "last name", "email", "country", "affiliation",
+                      "Web page", "person #", "corresponding?"]
+
+    for author_id, author in enumerate(all_authors):
+        all_authors[author] = {
+            "first name": author.split(" ")[0],
+            "last name": author.split(" ")[1],
+            "email": fake.email(),
+            "country": fake.country(),
+            "affiliation": fake.sentence(nb_words=4)[:-1],
+            "Web page": fake.url(),
+            "person #": author_id + 1
+        }
+
+    with open(author_file_path, "w", encoding="utf-8") as f:
+        writer = csv.writer(f, delimiter=",")
+        writer.writerow(author_headers)
+        for sub_id, authors in sub_to_authors.items():
+            corresponding_author = random.choice(authors)
+            for author in authors:
+                author_details = all_authors[author]
+                writer.writerow([
+                    sub_id,
+                    author_details["first name"],
+                    author_details["last name"],
+                    author_details["email"],
+                    author_details["country"],
+                    author_details["affiliation"],
+                    author_details["Web page"],
+                    author_details["person #"],
+                    "yes" if author == corresponding_author else "no"
+                ])
+
+    with open(submission_topic_file_path, "w", encoding="utf-8") as f:
+        writer = csv.writer(f, delimiter=",")
+        writer.writerow(["submission #", "topic"])
+        for sub_id, topics in sub_to_topics.items():
+            for topic in topics:
+                writer.writerow([sub_id, topic])
+
+
+def generate_committee_files(
+        committee_size: int,
+        authors_file_path: str,
+        *,
+        committee_file_path: str = "committee.csv",
+        committee_topic_file_path: str = "committee_topic.csv",
+        topic_list: list = None
+):
+    fake = Faker()
+
+    if topic_list is None:
+        topic_list = [f"topic_{x}" for x in range(30)]
+
+    with open(authors_file_path, encoding="utf-8") as f:
+        reader = csv.DictReader(f)
+        all_authors = list(reader)[1:]
+    max_person_id = max((int(a["person #"]) for a in all_authors), default=0)
+
+    all_persons = []
+    person_to_topics = {}
+    for person_idx in range(1, committee_size + 2):
+        if random.random() < 0.35:
+            person_details = random.choice(all_authors)
+        else:
+            name = fake.name()
+            person_details = {
+                "#": person_idx,
+                "first name": name.split(" ")[0],
+                "last name": name.split(" ")[1],
+                "email": fake.email(),
+                "country": fake.country(),
+                "affiliation": fake.sentence(nb_words=4)[:-1],
+                "Web page": fake.url(),
+                "person #": max_person_id + 1,
+            }
+            max_person_id += 1
+
+        person_details["#"] = person_idx
+        person_details["role"] = random.choice(
+            ["PC member"] * 5 * 5 + ["senior PC member"] * 5 + ["associate chair"])
+        all_persons.append(person_details)
+
+        key = (
+        person_details["#"], person_details["first name"] + " " + person_details["last name"])
+        person_to_topics[key] = random.sample(topic_list, random.randint(5, 10))
+
+    committee_headers = ["#", "person #", "first name", "last name", "email", "country",
+                         "affiliation", "Web page", "role"]
+
+    with open(committee_file_path, "w", encoding="utf-8") as f:
+        writer = csv.writer(f, delimiter=",")
+        writer.writerow(committee_headers)
+        for person in all_persons:
+            writer.writerow([
+                person["#"],
+                person["person #"],
+                person["first name"],
+                person["last name"],
+                person["email"],
+                person["country"],
+                person["affiliation"],
+                person["Web page"],
+                person["role"]
+            ])
+
+    with open(committee_topic_file_path, "w", encoding="utf-8") as f:
+        writer = csv.writer(f, delimiter=",")
+        writer.writerow(["member #", "member name", "topic"])
+        for person_key, topics in person_to_topics.items():
+            for topic in topics:
+                writer.writerow([person_key[0], person_key[1], topic])
+
+
+def generate_review_files(
+        submission_file_path,
+        committee_file_path,
+        bidding_file_path="bidding.csv",
+        review_file_path="review.csv",
+):
+    with open(submission_file_path, encoding="utf-8") as f:
+        reader = csv.DictReader(f)
+        all_submissions = list(reader)[1:]
+
+    with open(committee_file_path, encoding="utf-8") as f:
+        reader = csv.DictReader(f)
+        all_persons = list(reader)[1:]
+
+    person_to_bids = {}
+    for person in all_persons:
+        bid_dict = {}
+        threshold = 20 / len(all_submissions)
+        if person["role"] == "senior PC member":
+            threshold *= 2.5
+        if person["role"] == "associate chair":
+            threshold *= 4
+        for submission in all_submissions:
+            if random.random() < 20 / len(all_submissions):
+                if random.random() < 0.5 or person["role"] == "associate chair":
+                    bid_dict[submission["#"]] = "yes"
+                else:
+                    bid_dict[submission["#"]] = "maybe"
+        key = (person["#"], person["first name"] + " " + person["last name"])
+        person_to_bids[key] = bid_dict
+
+    with open(bidding_file_path, "w", encoding="utf-8") as f:
+        writer = csv.writer(f, delimiter=",")
+        writer.writerow(["member #", "member name", "submission #", "bid"])
+        for person_key, bid_dict in person_to_bids.items():
+            for sub, bid in bid_dict.items():
+                writer.writerow([
+                    person_key[0],
+                    person_key[1],
+                    sub,
+                    bid
+                ])
+
+
+def generate_full_conference(
+        num_submissions,
+        committee_size,
+        *,
+        submission_file_path: str = "submission.csv",
+        submission_topic_file_path: str = "submission_topic.csv",
+        author_file_path: str = "author.csv",
+        committee_file_path="committee.csv",
+        committee_topic_file_path="committee_topic.csv",
+        bidding_file_path="bidding.csv",
+        review_file_path="review.csv",
+        topic_list=None,
+):
+    generate_submission_files(
+        num_submissions,
+        submission_file_path=submission_file_path,
+        author_file_path=author_file_path,
+        submission_topic_file_path=submission_topic_file_path,
+        topic_list=topic_list,
+    )
+
+    generate_committee_files(
+        committee_size,
+        author_file_path,
+        committee_file_path=committee_file_path,
+        committee_topic_file_path=committee_topic_file_path,
+        topic_list=topic_list,
+    )
+
+    generate_review_files(
+        submission_file_path,
+        committee_file_path,
+        bidding_file_path=bidding_file_path,
+        review_file_path=review_file_path
+    )
+
+
+if __name__ == "__main__":
+    areas_to_topics, topics_to_areas = read_topics(os.path.join("..", "easychair_sample_files", "topics.csv"))
+    generate_full_conference(
+        500,
+        1500,
+        submission_file_path=os.path.join("..", "easychair_sample_files", "submission.csv"),
+        submission_topic_file_path=os.path.join("..", "easychair_sample_files",
+                                                "submission_topic.csv"),
+        author_file_path=os.path.join("..", "easychair_sample_files", "author.csv"),
+        committee_file_path=os.path.join("..", "easychair_sample_files", "committee.csv"),
+        committee_topic_file_path=os.path.join("..", "easychair_sample_files",
+                                               "committee_topic.csv"),
+        bidding_file_path=os.path.join("..", "easychair_sample_files", "bidding.csv"),
+        review_file_path=os.path.join("..", "easychair_sample_files", "review.csv"),
+        topic_list=list(topics_to_areas)
+    )
diff --git a/easychair_extra/programcommittee.py b/easychair_extra/programcommittee.py
@@ -0,0 +1,21 @@
+def papers_without_pc(committee_df, submission_df):
+    """Inserts a column in the submission dataframe called "no_author_pc" indicating
+    whether at least one author of a submission is part of the program committee. This
+    is "None" if all authors are students.
+
+    Useful in case of policies such as: for every submission, one author can be asked to serve in
+    the program committee.
+
+    Parameters
+    ----------
+        submission_df : pandas.DataFrame
+            The submission dataframe
+        committee_df : pandas.DataFrame
+            The committee dataframe
+    """
+    def aux(row):
+        if row["all_authors_students"]:
+            return None
+        return not any(a in committee_df["person #"].values for a in row["authors_id"])
+
+    submission_df["no_author_pc"] = submission_df.apply(aux, axis=1)