Skip to content

Commit

Permalink
add processed data
Browse files Browse the repository at this point in the history
  • Loading branch information
terryyz committed Apr 14, 2024
1 parent 3cbeb25 commit 9dc8383
Show file tree
Hide file tree
Showing 347 changed files with 37,067 additions and 0 deletions.
87 changes: 87 additions & 0 deletions data/processed/f_327_jenny_w_doc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import random
import matplotlib.pyplot as plt


def f_327(points: int):
"""
Generate a plot of random numbers such that indices are on the x-axis and generated numbers are on the y-axis.
Parameters:
- points (int): Number of random points to generate.
Returns:
- Returns a tuple containing:
- A list of generated random numbers.
- A matplotlib Axes object representing the plot.
Requirements:
- random
- matplotlib.pyplot
Example:
>>> import random
>>> random.seed(0)
>>> f_327(5)
([0.8444218515250481, 0.7579544029403025, 0.420571580830845, 0.25891675029296335, 0.5112747213686085], <Axes: >)
>>> f_327(3)
([0.4049341374504143, 0.7837985890347726, 0.30331272607892745], <Axes: >)
"""
x = list(range(points))
y = [random.random() for _ in range(points)]

_, ax = plt.subplots()
ax.plot(x, y)

return y, ax

import unittest
import random
class TestCases(unittest.TestCase):
def test_case_1(self):
random.seed(0)
y, _ = f_327(5)
# Test correct number of points are generated
self.assertEqual(len(y), 5)
def test_case_2(self):
random.seed(0)
y, _ = f_327(5)
# Test expected values
self.assertTrue(all(0 <= num <= 1 for num in y))
self.assertAlmostEqual(
y,
[
0.8444218515250481,
0.7579544029403025,
0.420571580830845,
0.25891675029296335,
0.5112747213686085,
],
)
def test_case_3(self):
random.seed(0)
# Test incorrect data types
with self.assertRaises(TypeError):
f_327("5")
with self.assertRaises(TypeError):
f_327([])
with self.assertRaises(TypeError):
f_327(None)
def test_case_4(self):
random.seed(0)
# Test handling 1 number
y, ax = f_327(1)
# Assert that 1 random number is generated
self.assertEqual(len(y), 1)
# Assert that the plot has the correct x and y data
self.assertEqual(list(ax.lines[0].get_xdata()), [0])
self.assertEqual(list(ax.lines[0].get_ydata()), y)
def test_case_5(self):
random.seed(0)
# Test handling no random numbers
y, ax = f_327(0)
self.assertEqual(len(y), 0)
# Assert that the plot has no data
self.assertEqual(list(ax.lines[0].get_xdata()), [])
self.assertEqual(list(ax.lines[0].get_ydata()), [])
def tearDown(self):
plt.close("all")
121 changes: 121 additions & 0 deletions data/processed/f_328_jenny_wo_doc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import sqlite3
import pandas as pd


def f_328(db_file: str, query: str) -> pd.DataFrame:
"""Query an SQLite database and return the results.
This function connects to a given SQLite database, executes a given SQL query,
and returns the results as a pandas DataFrame.
Parameters:
- db_file (str): Path to the SQLite database file.
- query (str): SQL query to execute.
Returns:
- pd.DataFrame: A DataFrame containing the results of the executed query.
Requirements:
- sqlite3
- pandas
Example:
>>> db_file = 'sample_database.db'
>>> df = f_328(db_file, "SELECT * FROM users WHERE name = 'John Doe'")
pd.DataFrame:
id name age
-- ---------- ---
.. John Doe ..
>>> df = f_328(db_file, "SELECT age, COUNT(*) AS count FROM users GROUP BY age")
pd.DataFrame:
age count
--- -----
25 3
"""
with sqlite3.connect(db_file) as conn:
return pd.read_sql_query(query, conn)

import unittest
import sqlite3
from faker import Faker
import os
class TestCases(unittest.TestCase):
fake = Faker()
specific_names = [
"John Doe",
"Jane Smith",
"Alice Brown",
"Bob White",
"Charlie Green",
]
specific_ages = [25, 30, 35, 40, 45]
@classmethod
def setUpClass(cls):
"""Set up test data before running tests."""
cls.db_file = cls.generate_test_data_with_file()
@staticmethod
def generate_test_data_with_file() -> str:
"""Generate test data and save it to a temporary SQLite database file."""
db_file = "./temp_test_db.sqlite3"
if os.path.exists(db_file):
os.remove(db_file)
conn = sqlite3.connect(db_file)
create_table_query = """
CREATE TABLE users (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
age INTEGER NOT NULL
)
"""
conn.execute(create_table_query)
for _ in range(100):
name = TestCases.fake.name()
age = TestCases.fake.random_int(min=20, max=70)
conn.execute("INSERT INTO users (name, age) VALUES (?, ?)", (name, age))
for name, age in zip(TestCases.specific_names, TestCases.specific_ages):
conn.execute("INSERT INTO users (name, age) VALUES (?, ?)", (name, age))
conn.commit()
conn.close()
return db_file
def test_case_1(self):
"""Test fetching all users."""
df = f_328(self.db_file, "SELECT * FROM users")
self.assertEqual(len(df), 100 + len(self.specific_names))
for name in self.specific_names:
self.assertIn(name, df["name"].values)
def test_case_2(self):
"""Test fetching specific users based on names."""
names_as_strings = "', '".join(self.specific_names)
df = f_328(
self.db_file,
f"SELECT name, age FROM users WHERE name IN ('{names_as_strings}')",
)
for name in self.specific_names:
self.assertIn(name, df["name"].values)
for age in self.specific_ages:
self.assertIn(age, df["age"].values)
def test_case_3(self):
"""Test fetching users based on age condition."""
age_limit = self.fake.random_int(min=20, max=60)
df = f_328(self.db_file, f"SELECT * FROM users WHERE age > {age_limit}")
self.assertTrue(all(df["age"] > age_limit))
def test_case_4(self):
"""Test fetching users and sorting by name."""
df = f_328(self.db_file, "SELECT * FROM users ORDER BY name")
sorted_names = sorted(df["name"].tolist())
self.assertListEqual(df["name"].tolist(), sorted_names)
def test_case_5(self):
"""Test fetching users based on age and sorting by age."""
age_limit = self.fake.random_int(min=20, max=30)
df = f_328(
self.db_file,
f"SELECT * FROM users WHERE age < {age_limit} ORDER BY age DESC",
)
self.assertTrue(all(df["age"] < age_limit))
self.assertTrue(
all(df["age"].iloc[i] >= df["age"].iloc[i + 1] for i in range(len(df) - 1))
)
@classmethod
def tearDownClass(cls):
"""Clean up test data after running tests."""
os.remove(cls.db_file)
116 changes: 116 additions & 0 deletions data/processed/f_329_jenny_wo_doc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import pandas as pd
import json


def f_329(data: dict, output_path: str = "./default_data_output.json") -> str:
"""Converts the given DataFrame to a dictionary, dropping the column named 'c'
if it exists, and then saves it as a JSON file.
Parameters:
- data (dict): The input data dictionary.
- output_path (str, optional): The path where the JSON file should be saved. Default is './default_data_output.json'.
Returns:
- str: Path where the JSON file was saved.
Requirements:
- pandas
- json
Example:
>>> f_329({'a': [1,2], 'b': [3,4], 'c': [5,6]})
'./default_data_output.json'
>>> f_329({'a': [1,2], 'b': [3,4], 'c': [5,6]}, 'custom/path/results.json')
'custom/path/results.json'
"""
df = pd.DataFrame(data)
# Drop column named 'c' if it exists
df = df.drop(columns="c", errors="ignore")
# Convert the DataFrame to dictionary
data_dict = df.to_dict(orient="dict")
# Save the dictionary as a JSON file
with open(output_path, "w") as file:
json.dump(data_dict, file)

return output_path

import unittest
import pandas as pd
import json
import os
class TestCases(unittest.TestCase):
def read_json_file(self, path):
# Helper function to read content from a JSON file
with open(path, "r") as f:
return json.load(f)
def tearDown(self):
# Cleanup procedure after each test to remove generated files
files_to_remove = [
"./default_data_output.json",
"./custom_data_output_2.json",
"./custom_data_output_3.json",
"./custom_data_output_4.json",
"./custom_data_output_5.json",
]
for file in files_to_remove:
if os.path.exists(file):
os.remove(file)
def convert_keys_to_str(self, dictionary):
# Convert dictionary keys to strings recursively
if not isinstance(dictionary, dict):
return dictionary
return {str(k): self.convert_keys_to_str(v) for k, v in dictionary.items()}
def test_case_1(self):
# Test basic DataFrame with column "c"
data = {"a": [1, 2], "b": [3, 4], "c": [5, 6]}
df = pd.DataFrame(data)
output_path = f_329(data)
self.assertTrue(os.path.exists(output_path))
expected_data = self.convert_keys_to_str(
df.drop(columns="c").to_dict(orient="dict")
)
self.assertEqual(self.read_json_file(output_path), expected_data)
def test_case_2(self):
# Test DataFrame with non-numeric data and column "c"
data = {"name": ["Alice", "Bob"], "country": ["USA", "Canada"], "c": ["x", "y"]}
df = pd.DataFrame(data)
custom_path = "./custom_data_output_2.json"
output_path = f_329(data, custom_path)
self.assertTrue(os.path.exists(output_path))
expected_data = self.convert_keys_to_str(
df.drop(columns="c").to_dict(orient="dict")
)
self.assertEqual(self.read_json_file(output_path), expected_data)
def test_case_3(self):
# Test DataFrame with multiple columns and no column "c"
data = {"age": [25, 30], "height": [170, 175]}
df = pd.DataFrame(data)
custom_path = "./custom_data_output_3.json"
output_path = f_329(data, custom_path)
self.assertTrue(os.path.exists(output_path))
expected_data = self.convert_keys_to_str(df.to_dict(orient="dict"))
self.assertEqual(self.read_json_file(output_path), expected_data)
def test_case_4(self):
# Test DataFrame with mixed data types including column "c"
data = {
"id": [1, 2],
"is_student": [True, False],
"grades": ["A", "B"],
"c": [0.5, 0.8],
}
df = pd.DataFrame(data)
output_path = f_329(data)
self.assertTrue(os.path.exists(output_path))
expected_data = self.convert_keys_to_str(
df.drop(columns="c").to_dict(orient="dict")
)
self.assertEqual(self.read_json_file(output_path), expected_data)
def test_case_5(self):
# Test an empty DataFrame
data = {}
df = pd.DataFrame(data)
custom_path = "./custom_data_output_5.json"
output_path = f_329(data, custom_path)
self.assertTrue(os.path.exists(output_path))
expected_data = self.convert_keys_to_str(df.to_dict(orient="dict"))
self.assertEqual(self.read_json_file(output_path), expected_data)
Loading

0 comments on commit 9dc8383

Please sign in to comment.