Skip to content

Commit

Permalink
tests and all
Browse files Browse the repository at this point in the history
  • Loading branch information
Simon-Rey committed Nov 7, 2024
1 parent 4f6c3b3 commit f53be1d
Show file tree
Hide file tree
Showing 20 changed files with 142,853 additions and 47,582 deletions.
53 changes: 53 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: build

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]
workflow_dispatch:
branches: [ "main" ]


jobs:
build:

runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8
pip install .
pip install .[dev]
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Run tests with unittest and collect coverage
run: python -m coverage run --omit="*/test*,/usr/lib*,tests/*" -m unittest

- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v4
env:
CODECOV_TOKEN: ${{ secrets.CODECOV }}
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# EasyChair-Extra

[![Build badge](https://github.com/COMSOC-Community/easychair-extra/workflows/build/badge.svg)](https://github.com/COMSOC-Community/easychair-extra/actions/workflows/build.yml)
[![codecov](https://codecov.io/gh/COMSOC-Community/easychair-extra/branch/main/graphs/badge.svg)](https://codecov.io/gh/COMSOC-Community/easychair-extra/tree/main)


Python package to work with files exported from EasyChair.
Useful to develop tools when running a conference.

Expand All @@ -13,6 +17,7 @@ The package is documented in the code and there is no current plan on provided a
documentation. Roughly speaking:

- `easychair_extra.read` provides functions to read EasyChair files;
- `easychair_extra.generate` provides functions to generate random EasyChair files;
- `easychair_extra.programcommittee` provides functions relating to the committee;
- `easychair_extra.reviewassignment` provides functions relating to the assignment of
submissions to PC members;
Expand Down
219 changes: 176 additions & 43 deletions easychair_extra/generate.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,38 @@
from __future__ import annotations

import csv
import os.path
import random

from collections import defaultdict
from datetime import datetime

from faker import Faker

from easychair_extra.read import author_list_to_str, read_topics


def generate_submission_files(
num_submission: int,
num_submissions: int,
*,
submission_file_path: str = "submission.csv",
submission_topic_file_path: str = "submission_topic.csv",
author_file_path: str = "author.csv",
topic_list: list = None):
"""Generates a sample "author.csv" file following the typical EasyChair format (no guarantees
here). """
"""Generates sample files related to the submissions. Specifically, the submission, the
submission topic and the author files are generated. The format of the files follows that of
EasyChair. The EasyChair format has been inferred from actual files, there is thus no guarantees
her that the format is exactly correct.
Parameters
----------
num_submissions: int
The number of submission to generate.
submission_file_path: str
The path to the submission file that will be generated.
submission_topic_file_path: str
The path to the submission_topic file that will be generated.
author_file_path: str
The path to the author file that will be generated.
topic_list: list
A list of topics to chose from for the topics of the submissions.
"""

fake = Faker()

Expand All @@ -28,19 +43,30 @@ def generate_submission_files(
sub_to_authors = defaultdict(list)
all_authors = dict()
sub_to_topics = {}
for sub_id in range(1, num_submission + 2):
for sub_id in range(1, num_submissions + 2):
num_authors = random.randint(1, 5)
authors = [fake.name() for _ in range(num_authors)]
sub_to_authors[sub_id] = authors
for author in authors:
all_authors[author] = None
sub_to_topics[sub_id] = random.sample(topic_list, random.randint(2, 5))
decision = random.choice(
["no decision"] * 10 + ["desk reject"] * 3 + ["reject"] * 25 + ["accept"] * 10 +
["withdrawn"] * 1
)
submission_dict = {
"#": sub_id,
"title": fake.sentence(nb_words=6)[:-1],
"authors": authors,
"submitted": datetime.now().strftime("%Y-%m-%d %H:%M"),
"last updated": datetime.now().strftime("%Y-%m-%d %H:%M"),
"form fields": "",
"keywords": fake.words(nb=random.randint(2, 5)),
"decision": decision,
"notified": "no",
"reviews sent": "no",
"abstract": fake.text(max_nb_chars=500),
"deleted?": 'yes' if random.random() < 0.05 else 'no'
}
submissions.append(submission_dict)

Expand All @@ -62,20 +88,7 @@ def generate_submission_files(
writer = csv.writer(f, delimiter=",")
writer.writerow(submission_headers)
for s in submissions:
writer.writerow([
s["#"],
s["title"],
author_list_to_str(s["authors"]),
datetime.now().strftime("%Y-%m-%d %H:%M"),
datetime.now().strftime("%Y-%m-%d %H:%M"),
"",
'\n'.join(s["keywords"]),
'',
'no',
'no',
s["abstract"],
'yes' if random.random() < 0.05 else 'no'
])
writer.writerow([s[h] for h in submission_headers])

author_headers = ["submission #", "first name", "last name", "email", "country", "affiliation",
"Web page", "person #", "corresponding?"]
Expand Down Expand Up @@ -126,6 +139,25 @@ def generate_committee_files(
committee_topic_file_path: str = "committee_topic.csv",
topic_list: list = None
):
"""Generates sample files related to the committee. Specifically, the committee and the
committee topic files are generated. The format of the files follows that of
EasyChair. The EasyChair format has been inferred from actual files, there is thus no guarantees
her that the format is exactly correct.
Parameters
----------
committee_size: int
The number of persons to generate.
authors_file_path: str
The path to the author file as exported from EasyChair, or as generated by the function
generate_submission_files. This is used to include some authors as reviewers.
committee_file_path: str
The path to the committee file that will be generated.
committee_topic_file_path: str
The path to the committee_topic file that will be generated.
topic_list: list
A list of topics to chose from for the topics of the committee members.
"""
fake = Faker()

if topic_list is None:
Expand Down Expand Up @@ -161,7 +193,7 @@ def generate_committee_files(
all_persons.append(person_details)

key = (
person_details["#"], person_details["first name"] + " " + person_details["last name"])
person_details["#"], person_details["first name"] + " " + person_details["last name"])
person_to_topics[key] = random.sample(topic_list, random.randint(5, 10))

committee_headers = ["#", "person #", "first name", "last name", "email", "country",
Expand Down Expand Up @@ -192,11 +224,31 @@ def generate_committee_files(


def generate_review_files(
submission_file_path,
committee_file_path,
bidding_file_path="bidding.csv",
review_file_path="review.csv",
submission_file_path: str,
committee_file_path: str,
bidding_file_path: str = "bidding.csv",
review_file_path: str = "review.csv",
):
"""Generates sample files related to the reviews. Specifically, the bidding and the review
files are generated. The format of the files follows that of EasyChair. The EasyChair format
has been inferred from actual files, there is thus no guarantees her that the format is
exactly correct.
Parameters
----------
submission_file_path: str
The path to the submission file as exported from EasyChair, or as generated by the
function generate_submission_files. Reviews for the submissions are generated.
committee_file_path: str
The path to the committee file as exported from EasyChair, or as generated by the
function generate_committee_files. The reviewers are taken from this file.
bidding_file_path: str
The path to the bidding file that will be generated.
review_file_path: str
The path to the review file that will be generated.
"""
fake = Faker()

with open(submission_file_path, encoding="utf-8") as f:
reader = csv.DictReader(f)
all_submissions = list(reader)[1:]
Expand Down Expand Up @@ -234,6 +286,53 @@ def generate_review_files(
bid
])

all_reviews = []
potential_reviewers = [p for p in all_persons if p["role"] == "PC member"]
review_counter = 1
for submission in all_submissions:
num_reviewers = random.choice([0] + [1] * 2 + [2] * 3 + [4] * 4)
for reviewer_idx, reviewer in enumerate(random.sample(potential_reviewers, num_reviewers)):
score = random.randint(1, 10)
review = {
"#": review_counter,
"submission #": submission["#"],
"member #": reviewer["#"],
"member name": reviewer["first name"] + " " + reviewer["last name"],
"number": reviewer_idx,
"version": random.randint(1, 5),
"text": fake.text(2000),
"scores": f"Score: {score}\nConfidence: {random.randint(1, 5)}",
"total score": score,
"reviewer first name": "",
"reviewer last name": "",
"reviewer email": "",
"reviewer person #": "",
"date": datetime.now().strftime("%Y-%m-%d"),
"time": datetime.now().strftime("%H:%M"),
"attachment?": "no",
}
if random.random() < 0.05:
sub_reviewer = random.choice(all_persons)
while sub_reviewer != reviewer:
sub_reviewer = random.choice(all_persons)
review["reviewer first name"] = sub_reviewer["first name"]
review["reviewer last name"] = sub_reviewer["last name"]
review["reviewer email"] = sub_reviewer["email"]
review["reviewer person #"] = sub_reviewer["person #"]

all_reviews.append(review)
review_counter += 1

review_headers = ["#", "submission #", "member #", "member name", "number", "version", "text",
"scores", "total score", "reviewer first name", "reviewer last name",
"reviewer email", "reviewer person #", "date", "time", "attachment?"]

with open(review_file_path, "w", encoding="utf-8") as f:
writer = csv.writer(f, delimiter=",")
writer.writerow(review_headers)
for review in all_reviews:
writer.writerow([review[h] for h in review_headers])


def generate_full_conference(
num_submissions,
Expand All @@ -248,6 +347,34 @@ def generate_full_conference(
review_file_path="review.csv",
topic_list=None,
):
"""Generates sample files to simulate a full conference. The format of the files follows that of
EasyChair. The EasyChair format has been inferred from actual files, there is thus no guarantees
her that the format is exactly correct.
Parameters
----------
num_submissions: int
The number of submission to generate.
committee_size: int
The number of persons to generate.
submission_file_path: str
The path to the submission file that will be generated.
submission_topic_file_path: str
The path to the submission_topic file that will be generated.
author_file_path: str
The path to the author file that will be generated.
committee_file_path: str
The path to the committee file that will be generated.
committee_topic_file_path: str
The path to the committee_topic file that will be generated.
topic_list: list
A list of topics to chose from for the topics of the submissions and of the committee
members.
bidding_file_path: str
The path to the bidding file that will be generated.
review_file_path: str
The path to the review file that will be generated.
"""
generate_submission_files(
num_submissions,
submission_file_path=submission_file_path,
Expand All @@ -272,19 +399,25 @@ def generate_full_conference(
)


if __name__ == "__main__":
areas_to_topics, topics_to_areas = read_topics(os.path.join("..", "easychair_sample_files", "topics.csv"))
generate_full_conference(
500,
1500,
submission_file_path=os.path.join("..", "easychair_sample_files", "submission.csv"),
submission_topic_file_path=os.path.join("..", "easychair_sample_files",
"submission_topic.csv"),
author_file_path=os.path.join("..", "easychair_sample_files", "author.csv"),
committee_file_path=os.path.join("..", "easychair_sample_files", "committee.csv"),
committee_topic_file_path=os.path.join("..", "easychair_sample_files",
"committee_topic.csv"),
bidding_file_path=os.path.join("..", "easychair_sample_files", "bidding.csv"),
review_file_path=os.path.join("..", "easychair_sample_files", "review.csv"),
topic_list=list(topics_to_areas)
)
# if __name__ == "__main__":
# import os
#
# from easychair_extra.read import read_topics
#
# areas_to_topics, topics_to_areas = read_topics(
# os.path.join("..", "easychair_sample_files", "topics.csv")
# )
# generate_full_conference(
# 1000,
# 2800,
# submission_file_path=os.path.join("..", "easychair_sample_files", "submission.csv"),
# submission_topic_file_path=os.path.join("..", "easychair_sample_files",
# "submission_topic.csv"),
# author_file_path=os.path.join("..", "easychair_sample_files", "author.csv"),
# committee_file_path=os.path.join("..", "easychair_sample_files", "committee.csv"),
# committee_topic_file_path=os.path.join("..", "easychair_sample_files",
# "committee_topic.csv"),
# bidding_file_path=os.path.join("..", "easychair_sample_files", "bidding.csv"),
# review_file_path=os.path.join("..", "easychair_sample_files", "review.csv"),
# topic_list=list(topics_to_areas)
# )
14 changes: 12 additions & 2 deletions easychair_extra/programcommittee.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
def papers_without_pc(committee_df, submission_df):
from __future__ import annotations

from pandas import DataFrame


def papers_without_pc(committee_df: DataFrame, submission_df: DataFrame):
"""Inserts a column in the submission dataframe called "no_author_pc" indicating
whether at least one author of a submission is part of the program committee. This
is "None" if all authors are students.
Expand All @@ -13,8 +18,13 @@ def papers_without_pc(committee_df, submission_df):
committee_df : pandas.DataFrame
The committee dataframe
"""
if "authors_id" not in submission_df.columns:
raise ValueError("There is no 'authors_id' column in the submission dataframe. Did you "
"forget to pass a 'author_file_path' argument to the read_submission "
"function?")

def aux(row):
if row["all_authors_students"]:
if row.get("all_authors_students"):
return None
return not any(a in committee_df["person #"].values for a in row["authors_id"])

Expand Down
Loading

0 comments on commit f53be1d

Please sign in to comment.