tests and all

COMSOC-Community · Nov 7, 2024 · f53be1d · f53be1d
1 parent 4f6c3b3
commit f53be1d
Show file tree

Hide file tree

Showing 20 changed files with 142,853 additions and 47,582 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -0,0 +1,53 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: build
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+  workflow_dispatch:
+    branches: [ "main" ]
+
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
+
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install flake8
+        pip install .
+        pip install .[dev]
+
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+
+    - name: Run tests with unittest and collect coverage
+      run: python -m coverage run --omit="*/test*,/usr/lib*,tests/*" -m unittest
+
+    - name: Upload coverage reports to Codecov
+      uses: codecov/codecov-action@v4
+      env:
+        CODECOV_TOKEN: ${{ secrets.CODECOV }}
diff --git a/README.md b/README.md
@@ -1,5 +1,9 @@
 # EasyChair-Extra
 
+[![Build badge](https://github.com/COMSOC-Community/easychair-extra/workflows/build/badge.svg)](https://github.com/COMSOC-Community/easychair-extra/actions/workflows/build.yml)
+[![codecov](https://codecov.io/gh/COMSOC-Community/easychair-extra/branch/main/graphs/badge.svg)](https://codecov.io/gh/COMSOC-Community/easychair-extra/tree/main)
+
+
 Python package to work with files exported from EasyChair. 
 Useful to develop tools when running a conference.
 
@@ -13,6 +17,7 @@ The package is documented in the code and there is no current plan on provided a
 documentation. Roughly speaking:
 
 - `easychair_extra.read` provides functions to read EasyChair files;
+- `easychair_extra.generate` provides functions to generate random EasyChair files;
 - `easychair_extra.programcommittee` provides functions relating to the committee;
 - `easychair_extra.reviewassignment` provides functions relating to the assignment of 
 submissions to PC members;

diff --git a/easychair_extra/generate.py b/easychair_extra/generate.py
@@ -1,23 +1,38 @@
+from __future__ import annotations
+
 import csv
-import os.path
 import random
+
 from collections import defaultdict
 from datetime import datetime
-
 from faker import Faker
 
-from easychair_extra.read import author_list_to_str, read_topics
-
 
 def generate_submission_files(
-        num_submission: int,
+        num_submissions: int,
         *,
         submission_file_path: str = "submission.csv",
         submission_topic_file_path: str = "submission_topic.csv",
         author_file_path: str = "author.csv",
         topic_list: list = None):
-    """Generates a sample "author.csv" file following the typical EasyChair format (no guarantees
-     here). """
+    """Generates sample files related to the submissions. Specifically, the submission, the
+    submission topic and the author files are generated. The format of the files follows that of
+    EasyChair. The EasyChair format has been inferred from actual files, there is thus no guarantees
+    her that the format is exactly correct.
+
+    Parameters
+    ----------
+        num_submissions: int
+            The number of submission to generate.
+        submission_file_path: str
+            The path to the submission file that will be generated.
+        submission_topic_file_path: str
+            The path to the submission_topic file that will be generated.
+        author_file_path: str
+            The path to the author file that will be generated.
+        topic_list: list
+            A list of topics to chose from for the topics of the submissions.
+    """
 
     fake = Faker()
 
@@ -28,19 +43,30 @@ def generate_submission_files(
     sub_to_authors = defaultdict(list)
     all_authors = dict()
     sub_to_topics = {}
-    for sub_id in range(1, num_submission + 2):
+    for sub_id in range(1, num_submissions + 2):
         num_authors = random.randint(1, 5)
         authors = [fake.name() for _ in range(num_authors)]
         sub_to_authors[sub_id] = authors
         for author in authors:
             all_authors[author] = None
         sub_to_topics[sub_id] = random.sample(topic_list, random.randint(2, 5))
+        decision = random.choice(
+            ["no decision"] * 10 + ["desk reject"] * 3 + ["reject"] * 25 + ["accept"] * 10 +
+            ["withdrawn"] * 1
+        )
         submission_dict = {
             "#": sub_id,
             "title": fake.sentence(nb_words=6)[:-1],
             "authors": authors,
+            "submitted": datetime.now().strftime("%Y-%m-%d %H:%M"),
+            "last updated": datetime.now().strftime("%Y-%m-%d %H:%M"),
+            "form fields": "",
             "keywords": fake.words(nb=random.randint(2, 5)),
+            "decision": decision,
+            "notified": "no",
+            "reviews sent": "no",
             "abstract": fake.text(max_nb_chars=500),
+            "deleted?": 'yes' if random.random() < 0.05 else 'no'
         }
         submissions.append(submission_dict)
 
@@ -62,20 +88,7 @@ def generate_submission_files(
         writer = csv.writer(f, delimiter=",")
         writer.writerow(submission_headers)
         for s in submissions:
-            writer.writerow([
-                s["#"],
-                s["title"],
-                author_list_to_str(s["authors"]),
-                datetime.now().strftime("%Y-%m-%d %H:%M"),
-                datetime.now().strftime("%Y-%m-%d %H:%M"),
-                "",
-                '\n'.join(s["keywords"]),
-                '',
-                'no',
-                'no',
-                s["abstract"],
-                'yes' if random.random() < 0.05 else 'no'
-            ])
+            writer.writerow([s[h] for h in submission_headers])
 
     author_headers = ["submission #", "first name", "last name", "email", "country", "affiliation",
                       "Web page", "person #", "corresponding?"]
@@ -126,6 +139,25 @@ def generate_committee_files(
         committee_topic_file_path: str = "committee_topic.csv",
         topic_list: list = None
 ):
+    """Generates sample files related to the committee. Specifically, the committee and the
+    committee topic files are generated. The format of the files follows that of
+    EasyChair. The EasyChair format has been inferred from actual files, there is thus no guarantees
+    her that the format is exactly correct.
+
+    Parameters
+    ----------
+        committee_size: int
+            The number of persons to generate.
+        authors_file_path: str
+            The path to the author file as exported from EasyChair, or as generated by the function
+            generate_submission_files. This is used to include some authors as reviewers.
+        committee_file_path: str
+            The path to the committee file that will be generated.
+        committee_topic_file_path: str
+            The path to the committee_topic file that will be generated.
+        topic_list: list
+            A list of topics to chose from for the topics of the committee members.
+    """
     fake = Faker()
 
     if topic_list is None:
@@ -161,7 +193,7 @@ def generate_committee_files(
         all_persons.append(person_details)
 
         key = (
-        person_details["#"], person_details["first name"] + " " + person_details["last name"])
+            person_details["#"], person_details["first name"] + " " + person_details["last name"])
         person_to_topics[key] = random.sample(topic_list, random.randint(5, 10))
 
     committee_headers = ["#", "person #", "first name", "last name", "email", "country",
@@ -192,11 +224,31 @@ def generate_committee_files(
 
 
 def generate_review_files(
-        submission_file_path,
-        committee_file_path,
-        bidding_file_path="bidding.csv",
-        review_file_path="review.csv",
+        submission_file_path: str,
+        committee_file_path: str,
+        bidding_file_path: str = "bidding.csv",
+        review_file_path: str = "review.csv",
 ):
+    """Generates sample files related to the reviews. Specifically, the bidding and the review
+    files are generated. The format of the files follows that of EasyChair. The EasyChair format
+    has been inferred from actual files, there is thus no guarantees her that the format is
+    exactly correct.
+
+    Parameters
+    ----------
+        submission_file_path: str
+            The path to the submission file as exported from EasyChair, or as generated by the
+            function generate_submission_files. Reviews for the submissions are generated.
+        committee_file_path: str
+            The path to the committee file as exported from EasyChair, or as generated by the
+            function generate_committee_files. The reviewers are taken from this file.
+        bidding_file_path: str
+            The path to the bidding file that will be generated.
+        review_file_path: str
+            The path to the review file that will be generated.
+    """
+    fake = Faker()
+
     with open(submission_file_path, encoding="utf-8") as f:
         reader = csv.DictReader(f)
         all_submissions = list(reader)[1:]
@@ -234,6 +286,53 @@ def generate_review_files(
                     bid
                 ])
 
+    all_reviews = []
+    potential_reviewers = [p for p in all_persons if p["role"] == "PC member"]
+    review_counter = 1
+    for submission in all_submissions:
+        num_reviewers = random.choice([0] + [1] * 2 + [2] * 3 + [4] * 4)
+        for reviewer_idx, reviewer in enumerate(random.sample(potential_reviewers, num_reviewers)):
+            score = random.randint(1, 10)
+            review = {
+                "#": review_counter,
+                "submission #": submission["#"],
+                "member #": reviewer["#"],
+                "member name": reviewer["first name"] + " " + reviewer["last name"],
+                "number": reviewer_idx,
+                "version": random.randint(1, 5),
+                "text": fake.text(2000),
+                "scores": f"Score: {score}\nConfidence: {random.randint(1, 5)}",
+                "total score": score,
+                "reviewer first name": "",
+                "reviewer last name": "",
+                "reviewer email": "",
+                "reviewer person #": "",
+                "date": datetime.now().strftime("%Y-%m-%d"),
+                "time": datetime.now().strftime("%H:%M"),
+                "attachment?": "no",
+            }
+            if random.random() < 0.05:
+                sub_reviewer = random.choice(all_persons)
+                while sub_reviewer != reviewer:
+                    sub_reviewer = random.choice(all_persons)
+                review["reviewer first name"] = sub_reviewer["first name"]
+                review["reviewer last name"] = sub_reviewer["last name"]
+                review["reviewer email"] = sub_reviewer["email"]
+                review["reviewer person #"] = sub_reviewer["person #"]
+
+            all_reviews.append(review)
+            review_counter += 1
+
+    review_headers = ["#", "submission #", "member #", "member name", "number", "version", "text",
+                      "scores", "total score", "reviewer first name", "reviewer last name",
+                      "reviewer email", "reviewer person #", "date", "time", "attachment?"]
+
+    with open(review_file_path, "w", encoding="utf-8") as f:
+        writer = csv.writer(f, delimiter=",")
+        writer.writerow(review_headers)
+        for review in all_reviews:
+            writer.writerow([review[h] for h in review_headers])
+
 
 def generate_full_conference(
         num_submissions,
@@ -248,6 +347,34 @@ def generate_full_conference(
         review_file_path="review.csv",
         topic_list=None,
 ):
+    """Generates sample files to simulate a full conference. The format of the files follows that of
+    EasyChair. The EasyChair format has been inferred from actual files, there is thus no guarantees
+    her that the format is exactly correct.
+
+    Parameters
+    ----------
+        num_submissions: int
+            The number of submission to generate.
+        committee_size: int
+            The number of persons to generate.
+        submission_file_path: str
+            The path to the submission file that will be generated.
+        submission_topic_file_path: str
+            The path to the submission_topic file that will be generated.
+        author_file_path: str
+            The path to the author file that will be generated.
+        committee_file_path: str
+            The path to the committee file that will be generated.
+        committee_topic_file_path: str
+            The path to the committee_topic file that will be generated.
+        topic_list: list
+            A list of topics to chose from for the topics of the submissions and of the committee
+            members.
+        bidding_file_path: str
+            The path to the bidding file that will be generated.
+        review_file_path: str
+            The path to the review file that will be generated.
+    """
     generate_submission_files(
         num_submissions,
         submission_file_path=submission_file_path,
@@ -272,19 +399,25 @@ def generate_full_conference(
     )
 
 
-if __name__ == "__main__":
-    areas_to_topics, topics_to_areas = read_topics(os.path.join("..", "easychair_sample_files", "topics.csv"))
-    generate_full_conference(
-        500,
-        1500,
-        submission_file_path=os.path.join("..", "easychair_sample_files", "submission.csv"),
-        submission_topic_file_path=os.path.join("..", "easychair_sample_files",
-                                                "submission_topic.csv"),
-        author_file_path=os.path.join("..", "easychair_sample_files", "author.csv"),
-        committee_file_path=os.path.join("..", "easychair_sample_files", "committee.csv"),
-        committee_topic_file_path=os.path.join("..", "easychair_sample_files",
-                                               "committee_topic.csv"),
-        bidding_file_path=os.path.join("..", "easychair_sample_files", "bidding.csv"),
-        review_file_path=os.path.join("..", "easychair_sample_files", "review.csv"),
-        topic_list=list(topics_to_areas)
-    )
+# if __name__ == "__main__":
+#     import os
+#
+#     from easychair_extra.read import read_topics
+#
+#     areas_to_topics, topics_to_areas = read_topics(
+#         os.path.join("..", "easychair_sample_files", "topics.csv")
+#     )
+#     generate_full_conference(
+#         1000,
+#         2800,
+#         submission_file_path=os.path.join("..", "easychair_sample_files", "submission.csv"),
+#         submission_topic_file_path=os.path.join("..", "easychair_sample_files",
+#                                                 "submission_topic.csv"),
+#         author_file_path=os.path.join("..", "easychair_sample_files", "author.csv"),
+#         committee_file_path=os.path.join("..", "easychair_sample_files", "committee.csv"),
+#         committee_topic_file_path=os.path.join("..", "easychair_sample_files",
+#                                                "committee_topic.csv"),
+#         bidding_file_path=os.path.join("..", "easychair_sample_files", "bidding.csv"),
+#         review_file_path=os.path.join("..", "easychair_sample_files", "review.csv"),
+#         topic_list=list(topics_to_areas)
+#     )
diff --git a/easychair_extra/programcommittee.py b/easychair_extra/programcommittee.py
@@ -1,4 +1,9 @@
-def papers_without_pc(committee_df, submission_df):
+from __future__ import annotations
+
+from pandas import DataFrame
+
+
+def papers_without_pc(committee_df: DataFrame, submission_df: DataFrame):
     """Inserts a column in the submission dataframe called "no_author_pc" indicating
     whether at least one author of a submission is part of the program committee. This
     is "None" if all authors are students.
@@ -13,8 +18,13 @@ def papers_without_pc(committee_df, submission_df):
         committee_df : pandas.DataFrame
             The committee dataframe
     """
+    if "authors_id" not in submission_df.columns:
+        raise ValueError("There is no 'authors_id' column in the submission dataframe. Did you "
+                         "forget to pass a 'author_file_path' argument to the read_submission "
+                         "function?")
+
     def aux(row):
-        if row["all_authors_students"]:
+        if row.get("all_authors_students"):
             return None
         return not any(a in committee_df["person #"].values for a in row["authors_id"])