Skip to content

Commit

Permalink
read_author
Browse files Browse the repository at this point in the history
  • Loading branch information
Simon-Rey committed Nov 19, 2024
1 parent 9e75414 commit 5ac55f1
Show file tree
Hide file tree
Showing 10 changed files with 141,943 additions and 142,406 deletions.
109 changes: 62 additions & 47 deletions easychair_extra/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,22 @@
from datetime import datetime
from faker import Faker

from easychair_extra.read import author_list_to_str


def generate_random_author(max_author_id):
fake = Faker()
author = fake.name()
return {
"first name": author.split(" ")[0],
"last name": author.split(" ")[1],
"email": fake.email(),
"country": fake.country(),
"affiliation": fake.sentence(nb_words=4)[:-1],
"Web page": fake.url(),
"person #": max_author_id + 1,
}


def generate_submission_files(
num_submissions: int,
Expand Down Expand Up @@ -43,13 +59,23 @@ def generate_submission_files(
submissions = []
sub_to_authors = defaultdict(list)
all_authors = dict()
max_author_id = 1
sub_to_topics = {}
for sub_id in range(1, num_submissions + 2):
num_authors = random.randint(1, 5)
authors = [fake.name() for _ in range(num_authors)]
sub_to_authors[sub_id] = authors
for author in authors:
all_authors[author] = None
authors_names = []
for i in range(num_authors):
if len(all_authors) > 0 and random.random() < 0.1:
random_author = random.choice(list(all_authors.values()))
while random_author["first name"] + " " + random_author["last name"] in authors_names:
random_author = random.choice(list(all_authors.values()))
author = random_author
else:
author = generate_random_author(max_author_id)
all_authors[author["first name"] + " " + author["last name"]] = author
max_author_id += 1
authors_names.append(author["first name"] + " " + author["last name"])
sub_to_authors[sub_id] = authors_names
sub_to_topics[sub_id] = random.sample(topic_list, random.randint(2, 5))
decision = random.choice(
["no decision"] * 10
Expand All @@ -61,7 +87,7 @@ def generate_submission_files(
submission_dict = {
"#": sub_id,
"title": fake.sentence(nb_words=6)[:-1],
"authors": authors,
"authors": author_list_to_str(authors_names),
"submitted": datetime.now().strftime("%Y-%m-%d %H:%M"),
"last updated": datetime.now().strftime("%Y-%m-%d %H:%M"),
"form fields": "",
Expand Down Expand Up @@ -106,17 +132,6 @@ def generate_submission_files(
"corresponding?",
]

for author_id, author in enumerate(all_authors):
all_authors[author] = {
"first name": author.split(" ")[0],
"last name": author.split(" ")[1],
"email": fake.email(),
"country": fake.country(),
"affiliation": fake.sentence(nb_words=4)[:-1],
"Web page": fake.url(),
"person #": author_id + 1,
}

with open(author_file_path, "w", encoding="utf-8") as f:
writer = csv.writer(f, delimiter=",")
writer.writerow(author_headers)
Expand Down Expand Up @@ -440,34 +455,34 @@ def generate_full_conference(
)


# if __name__ == "__main__":
# import os
#
# from easychair_extra.read import read_topics
#
# current_dir = os.path.dirname(os.path.abspath(__file__))
#
# areas_to_topics, topics_to_areas = read_topics(
# os.path.join(current_dir, "..", "easychair_sample_files", "topics.csv")
# )
# generate_full_conference(
# 1000,
# 2800,
# submission_file_path=os.path.join(
# current_dir, "..", "easychair_sample_files", "submission.csv"
# ),
# submission_topic_file_path=os.path.join(
# current_dir, "..", "easychair_sample_files", "submission_topic.csv"
# ),
# author_file_path=os.path.join(current_dir, "..", "easychair_sample_files", "author.csv"),
# committee_file_path=os.path.join(
# current_dir, "..", "easychair_sample_files", "committee.csv"
# ),
# committee_topic_file_path=os.path.join(current_dir, "..", "easychair_sample_files",
# "committee_topic.csv"),
# bidding_file_path=os.path.join(
# current_dir, "..", "easychair_sample_files", "bidding.csv"
# ),
# review_file_path=os.path.join(current_dir, "..", "easychair_sample_files", "review.csv"),
# topic_list=list(topics_to_areas)
# )
if __name__ == "__main__":
import os

from easychair_extra.read import read_topics

current_dir = os.path.dirname(os.path.abspath(__file__))

areas_to_topics, topics_to_areas = read_topics(
os.path.join(current_dir, "..", "easychair_sample_files", "topics.csv")
)
generate_full_conference(
1000,
2800,
submission_file_path=os.path.join(
current_dir, "..", "easychair_sample_files", "submission.csv"
),
submission_topic_file_path=os.path.join(
current_dir, "..", "easychair_sample_files", "submission_topic.csv"
),
author_file_path=os.path.join(current_dir, "..", "easychair_sample_files", "author.csv"),
committee_file_path=os.path.join(
current_dir, "..", "easychair_sample_files", "committee.csv"
),
committee_topic_file_path=os.path.join(current_dir, "..", "easychair_sample_files",
"committee_topic.csv"),
bidding_file_path=os.path.join(
current_dir, "..", "easychair_sample_files", "bidding.csv"
),
review_file_path=os.path.join(current_dir, "..", "easychair_sample_files", "review.csv"),
topic_list=list(topics_to_areas)
)
22 changes: 17 additions & 5 deletions easychair_extra/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,19 +230,22 @@ def read_submission(
)

if author_file_path:
sub_to_authors = {}
sub_to_authors = defaultdict(list)
corresponding_authors = defaultdict(list)
with open(author_file_path, encoding="utf-8") as f:
reader = csv.DictReader(f)
for row in reader:
sub_id = int(row["submission #"].strip()) # The id of the submission
person_id = int(row["person #"].strip()) # The id of the person in EC
if sub_id in sub_to_authors:
sub_to_authors[sub_id].append(person_id)
else:
sub_to_authors[sub_id] = [person_id]
sub_to_authors[sub_id].append(person_id)
if row["corresponding?"] == "yes":
corresponding_authors[sub_id].append(person_id)
df["authors_id"] = df.apply(
lambda df_row: sub_to_authors.get(df_row["#"], []), axis=1
)
df["corresponding_id"] = df.apply(
lambda df_row: corresponding_authors.get(df_row["#"], []), axis=1
)

if submission_field_value_path:
sub_to_is_students = {}
Expand Down Expand Up @@ -272,3 +275,12 @@ def read_submission(
lambda df_row: sub_to_total_scores.get(df_row["#"], []), axis=1
)
return df


def read_author(author_file_path):
df = pd.read_csv(author_file_path, delimiter=",", encoding="utf-8")
grouped_df = df.groupby(["first name", "last name", "email", "country", "affiliation", "Web page", "person #"])
res_df = grouped_df["submission #"].apply(list).reset_index(name="submission_ids")
res_df["full name"] = res_df["first name"] + " " + res_df["last name"]
return res_df

Loading

0 comments on commit 5ac55f1

Please sign in to comment.