diff --git a/data/instruct_advanced_mysql.csv b/data/instruct_advanced_mysql.csv index 670f864..ad6999b 100644 --- a/data/instruct_advanced_mysql.csv +++ b/data/instruct_advanced_mysql.csv @@ -40,7 +40,7 @@ broker,mysql,keywords_ratio,"SELECT sbTickerSymbol, CASE WHEN SUM(sbTxAmount) = SPM (Selling Profit Margin) = (Total Amount from Sells - (Tax + Commission)) / Total Amount from Sells * 100 TAC = Total Active Customers who joined within a specified timeframe CR = Rank customers by their total transaction volume, identifying the customer with the highest transaction volume as rank 1. This involves joining price data with ticker identifiers and filtering for a specified date range." -car_dealership,mysql,instructions_cte_join,"WITH sale_payments AS (SELECT s.id AS sale_id, s.sale_date, MAX(p.payment_date) AS latest_payment_date FROM sales AS s JOIN payments_received AS p ON s.id = p.sale_id GROUP BY 1, 2) SELECT ROUND(AVG(latest_payment_date - sale_date), 2) AS avg_days_to_payment FROM sale_payments;","What is the average number of days between the sale date and payment received date, rounded to 2 decimal places?","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first.","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first. ASP = Calculate the average price of sales within a specific timeframe Last 30 days = Use a range from the current date minus a certain interval to the current date, always ensure to make the necessary joins before utilizing the sales data. TSC = Count of sales within a specified period" +car_dealership,mysql,instructions_cte_join,"WITH sale_payments AS (SELECT s.id AS sale_id,s.sale_date,MAX(p.payment_date) AS latest_payment_date FROM sales AS s JOIN payments_received AS p ON s.id = p.sale_id GROUP BY s.id,s.sale_date) SELECT ROUND(AVG(DATEDIFF(latest_payment_date,sale_date)),2) AS avg_days_to_payment FROM sale_payments;","What is the average number of days between the sale date and payment received date, rounded to 2 decimal places?","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first.","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first. ASP = Calculate the average price of sales within a specific timeframe Last 30 days = Use a range from the current date minus a certain interval to the current date, always ensure to make the necessary joins before utilizing the sales data. TSC = Count of sales within a specified period" car_dealership,mysql,instructions_cte_join,"WITH latest_inventory_status AS (SELECT car_id, is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY CASE WHEN snapshot_date IS NULL THEN 1 ELSE 0 END DESC, snapshot_date DESC, CASE WHEN crtd_ts IS NULL THEN 1 ELSE 0 END DESC, crtd_ts DESC) AS rn FROM inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM cars AS c JOIN sales AS s ON c.id = s.car_id JOIN latest_inventory_status AS lis ON c.id = lis.car_id WHERE lis.is_in_inventory = FALSE AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY CASE WHEN highest_sale_price IS NULL THEN 1 ELSE 0 END DESC, highest_sale_price DESC;","Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","TSC = Count of sales within a specified period MoM = Change in total receivable amounts from one month to the next, comparing with the immediately preceding month. ASP = Mean sale price for a designated start period diff --git a/data/instruct_advanced_sqlite.csv b/data/instruct_advanced_sqlite.csv index 604c7ab..b6bd00b 100644 --- a/data/instruct_advanced_sqlite.csv +++ b/data/instruct_advanced_sqlite.csv @@ -40,7 +40,7 @@ broker,sqlite,keywords_ratio,"SELECT sbTickerSymbol, CASE WHEN SUM(sbTxAmount) = SPM (Selling Profit Margin) = (Total Amount from Sells - (Tax + Commission)) / Total Amount from Sells * 100 TAC = Total Active Customers who joined within a specified timeframe CR = Rank customers by their total transaction volume, identifying the customer with the highest transaction volume as rank 1. This involves joining price data with ticker identifiers and filtering for a specified date range." -car_dealership,sqlite,instructions_cte_join,"WITH sale_payments AS (SELECT s.id AS sale_id, s.sale_date, MAX(p.payment_date) AS latest_payment_date FROM sales AS s JOIN payments_received AS p ON s.id = p.sale_id GROUP BY 1, 2) SELECT ROUND(AVG(latest_payment_date - sale_date), 2) AS avg_days_to_payment FROM sale_payments;","What is the average number of days between the sale date and payment received date, rounded to 2 decimal places?","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first.","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first. ASP = Calculate the average price of sales within a specific timeframe Last 30 days = Use a range from the current date minus a certain interval to the current date, always ensure to make the necessary joins before utilizing the sales data. TSC = Count of sales within a specified period" +car_dealership,sqlite,instructions_cte_join,"WITH sale_payments AS (SELECT s.id AS sale_id, s.sale_date, MAX(p.payment_date) AS latest_payment_date FROM sales AS s JOIN payments_received AS p ON s.id = p.sale_id GROUP BY s.id, s.sale_date) SELECT ROUND(AVG(julianday(latest_payment_date) - julianday(sale_date)), 2) AS avg_days_to_paymen FROM sale_payments;","What is the average number of days between the sale date and payment received date, rounded to 2 decimal places?","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first.","When getting duration between sale and payment date for each sale, get the latest payment for sale by aggregating over the payments_received table first. ASP = Calculate the average price of sales within a specific timeframe Last 30 days = Use a range from the current date minus a certain interval to the current date, always ensure to make the necessary joins before utilizing the sales data. TSC = Count of sales within a specified period" car_dealership,sqlite,instructions_cte_join,"WITH latest_inventory_status AS (SELECT car_id, is_in_inventory, ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY CASE WHEN snapshot_date IS NULL THEN 1 ELSE 0 END DESC, snapshot_date DESC, CASE WHEN crtd_ts IS NULL THEN 1 ELSE 0 END DESC, crtd_ts DESC) AS rn FROM inventory_snapshots) SELECT c.make, c.model, MAX(s.sale_price) AS highest_sale_price FROM cars AS c JOIN sales AS s ON c.id = s.car_id JOIN latest_inventory_status AS lis ON c.id = lis.car_id WHERE lis.is_in_inventory = FALSE AND lis.rn = 1 GROUP BY c.make, c.model ORDER BY CASE WHEN highest_sale_price IS NULL THEN 1 ELSE 0 END DESC, highest_sale_price DESC;","Return the highest sale price for each make and model of cars that have been sold and are no longer in inventory, ordered by the sale price from highest to lowest.","When getting a car's inventory status, always take the latest status from the inventory_snapshots table","TSC = Count of sales within a specified period MoM = Change in total receivable amounts from one month to the next, comparing with the immediately preceding month. ASP = Mean sale price for a designated start period diff --git a/data/questions_gen_mysql.csv b/data/questions_gen_mysql.csv index 074bc3d..62ba56c 100644 --- a/data/questions_gen_mysql.csv +++ b/data/questions_gen_mysql.csv @@ -30,9 +30,9 @@ academic,mysql,instruct,"SELECT DISTINCT name FROM author WHERE oid IS NULL;SELE academic,mysql,instruct,"SELECT DISTINCT publication.title FROM DOMAIN JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE '%Sociology%' AND publication.year = 2020 AND publication.cid = domain_conference.cid;SELECT DISTINCT publication.pid FROM DOMAIN JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE '%Sociology%' AND publication.year = 2020 AND publication.cid = domain_conference.cid;SELECT DISTINCT publication.title, publication.pid FROM DOMAIN JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE '%Sociology%' AND publication.year = 2020 AND publication.cid = domain_conference.cid",What are the publications written by authors from the 'Sociology' domain and presented at conferences in the year 2020?,"To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. Finally, to see which ones were presented at conferences, you must join the domain table with the domain_conference table. You must also filter names using ILIKE." academic,mysql,instruct,SELECT DISTINCT author.name FROM author JOIN writes ON author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_publication ON publication.pid = domain_publication.pid JOIN domain ON domain_publication.did = domain.did WHERE LOWER(domain.name) LIKE '%computer%science%',"What are the names of the authors who have written publications in the domain ""Computer Science""?","To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. You must also filter names using ILIKE." advising,mysql,date_functions,"SELECT DATE_FORMAT(s.admit_term, '%Y-%m') AS month, COUNT(*) AS total_students FROM advising.student AS s GROUP BY month ORDER BY total_students DESC LIMIT 1",What month were most students admitted?, -advising,mysql,date_functions,SELECT AVG(predicted_graduation_semester - admit_term) AS average_predicted_time_to_graduation FROM student,What's the average predicted time to graduation since admission in no. of days?, +advising,mysql,date_functions,"SELECT AVG(DATEDIFF(predicted_graduation_semester, admit_term)) AS average_predicted_time_to_graduation FROM student;",What's the average predicted time to graduation since admission in no. of days?, advising,mysql,date_functions,"SELECT COUNT(*) AS num_students_graduated FROM advising.student WHERE predicted_graduation_semester >= DATE_SUB(CURDATE(), INTERVAL 10 YEAR);",How many students were predicted to graduate in the last 10 years?, -advising,mysql,date_functions,SELECT CURRENT_DATE - MAX(admit_term) AS duration_since_last_admitted_student FROM student,How long has it been in days since the last admitted student?, +advising,mysql,date_functions,"SELECT DATEDIFF(CURRENT_DATE, MAX(admit_term)) AS duration_since_last_admitted_student FROM student;",How long has it been in days since the last admitted student?, advising,mysql,date_functions,"SELECT MONTH(DATE_SUB(s.predicted_graduation_semester, INTERVAL 2 WEEK)) AS month FROM advising.student AS s ORDER BY s.predicted_graduation_semester DESC LIMIT 1;SELECT EXTRACT(MONTH FROM DATE_SUB(predicted_graduation_semester, INTERVAL 2 WEEK)) AS month FROM advising.student ORDER BY CASE WHEN predicted_graduation_semester IS NULL THEN 1 ELSE 0 END DESC, predicted_graduation_semester DESC LIMIT 1",Subtract 2 weeks from the most recent predicted graduation date and give the month., advising,mysql,group_by,"SELECT course_tags_count.course_id, SUM(course_tags_count.hilarious) AS total_hilarious FROM course_tags_count GROUP BY course_tags_count.course_id",What is the total number of students who found the instructor to be hilarious per course id?, advising,mysql,group_by,"SELECT i.name, AVG(c.clarity_score) FROM course AS c JOIN course_offering AS co ON c.course_id = co.course_id JOIN offering_instructor AS oi ON co.offering_id = oi.offering_id JOIN instructor AS i ON oi.instructor_id = i.instructor_id GROUP BY i.name;SELECT i.instructor_id, AVG(c.clarity_score) FROM course AS c JOIN course_offering AS co ON c.course_id = co.course_id JOIN offering_instructor AS oi ON co.offering_id = oi.offering_id JOIN instructor AS i ON oi.instructor_id = i.instructor_id GROUP BY i.instructor_id;SELECT i.name, i.instructor_id, AVG(c.clarity_score) FROM course AS c JOIN course_offering AS co ON c.course_id = co.course_id JOIN offering_instructor AS oi ON co.offering_id = oi.offering_id JOIN instructor AS i ON oi.instructor_id = i.instructor_id GROUP BY i.name, i.instructor_id",What is the average clarity score for each instructor who taught a course?, @@ -54,19 +54,19 @@ advising,mysql,table_join,"SELECT DISTINCT course.name FROM course_offering JOIN advising,mysql,table_join,SELECT COUNT(DISTINCT student_record.student_id) AS total_students FROM student_record JOIN course_offering ON student_record.course_id = course_offering.course_id WHERE course_offering.has_final_project = TRUE OR course_offering.has_final_exam = TRUE,What is the total number of students who have taken a course with a final project or exam?, advising,mysql,table_join,"SELECT program.name, SUM(student.total_credit) AS total_credits FROM student JOIN program ON student.program_id = program.program_id GROUP BY program.name;SELECT program.program_id, SUM(student.total_credit) AS total_credits FROM student JOIN program ON student.program_id = program.program_id GROUP BY program.program_id;SELECT program.name, program.program_id, SUM(student.total_credit) AS total_credits FROM student JOIN program ON student.program_id = program.program_id GROUP BY program.name, program.program_id",What is the total number of credits earned by students in each program?, advising,mysql,table_join,"SELECT program.name, COUNT(student.student_id) AS number_of_students FROM student JOIN program ON student.program_id = program.program_id WHERE NOT student.declare_major IS NULL GROUP BY program.name ORDER BY CASE WHEN number_of_students IS NULL THEN 1 ELSE 0 END DESC, number_of_students DESC;SELECT program.program_id, COUNT(student.student_id) AS number_of_students FROM student JOIN program ON student.program_id = program.program_id WHERE NOT student.declare_major IS NULL GROUP BY program.program_id ORDER BY CASE WHEN number_of_students IS NULL THEN 1 ELSE 0 END DESC, number_of_students DESC;SELECT program.name, program.program_id, COUNT(student.student_id) AS number_of_students FROM student JOIN program ON student.program_id = program.program_id WHERE NOT student.declare_major IS NULL GROUP BY program.name, program.program_id ORDER BY CASE WHEN number_of_students IS NULL THEN 1 ELSE 0 END DESC, number_of_students DESC",How many students have declared a major in each program?, -advising,mysql,instruct,"SELECT student.firstname, student.lastname FROM student WHERE NOT student.minor IS NULL ORDER BY CASE WHEN student.lastname IS NULL THEN 1 ELSE 0 END, student.lastname",Which students have declared a minor program? List their firstname and lastname. Order the results by the students' last names.,"student.declare_major is null for students who have not declared their major. +advising,mysql,instruct,"SELECT student.firstname, student.lastname FROM student WHERE NOT student.minor IS NULL ORDER BY CASE WHEN student.lastname IS NULL THEN 1 ELSE 0 END, student.lastname",Which students have declared a minor program? List their firstname and lastname. Order the results by the students' last names.,"student.declare_major is null for students who have not declared their major. student.minor is null for students who have not declared a minor program." advising,mysql,instruct,SELECT AVG(student.total_gpa) FROM student JOIN program ON student.program_id = program.program_id WHERE LOWER(program.name) = 'mathematics',What is the average GPA of students in the program mathematics?,Match strings case-insensitively -advising,mysql,instruct,"SELECT course.name FROM course WHERE LOWER(course.department) LIKE '%Computer Science%' ORDER BY CASE WHEN course.name IS NULL THEN 1 ELSE 0 END, course.name ASC",What are the names of all the courses offered by the department of Computer Science?,"Filter strings using ILIKE. +advising,mysql,instruct,"SELECT course.name FROM course WHERE LOWER(course.department) LIKE '%Computer Science%' ORDER BY CASE WHEN course.name IS NULL THEN 1 ELSE 0 END, course.name ASC",What are the names of all the courses offered by the department of Computer Science?,"Filter strings using ILIKE. Use the student_record table for all information relating to students' choices and their course." advising,mysql,instruct,"SELECT course.name, course.easiness_score FROM course WHERE course.department = 'Computer Science';SELECT course.course_id, course.easiness_score FROM course WHERE course.department = 'Computer Science';SELECT course.number, course.easiness_score FROM course WHERE course.department = 'Computer Science';SELECT course.name, course.course_id, course.easiness_score FROM course WHERE course.department = 'Computer Science';SELECT course.name, course.number, course.easiness_score FROM course WHERE course.department = 'Computer Science';SELECT course.course_id, course.number, course.easiness_score FROM course WHERE course.department = 'Computer Science';SELECT course.name, course.course_id, course.number, course.easiness_score FROM course WHERE course.department = 'Computer Science'","What are the easiness scores for courses in the ""Computer Science"" department? Show both courses and scores.",Always filter names using exact string matching -advising,mysql,instruct,"SELECT DISTINCT student_id FROM student_record WHERE student_record.how = 'in-person' AND student_record.grade IN ('A', 'C')",Return the student IDs who have taken an in-person course and have gotten a grade of A or C.,"Always filter strings with an exact match. +advising,mysql,instruct,"SELECT DISTINCT student_id FROM student_record WHERE student_record.how = 'in-person' AND student_record.grade IN ('A', 'C')",Return the student IDs who have taken an in-person course and have gotten a grade of A or C.,"Always filter strings with an exact match. When asked for specific students or courses, do not return duplicates." atis,mysql,date_functions,"SELECT flight.flight_number, (arrival_time - departure_time) / 60 AS duration_minutes FROM flight ORDER BY CASE WHEN duration_minutes IS NULL THEN 1 ELSE 0 END, duration_minutes LIMIT 1;SELECT flight.flight_id, (arrival_time - departure_time) / 60 AS duration_minutes FROM flight ORDER BY CASE WHEN duration_minutes IS NULL THEN 1 ELSE 0 END, duration_minutes LIMIT 1;SELECT flight.flight_number, flight.flight_id, (arrival_time - departure_time) / 60 AS duration_minutes FROM flight ORDER BY CASE WHEN duration_minutes IS NULL THEN 1 ELSE 0 END, duration_minutes LIMIT 1",Which flight has the shortest duration between departure and arrival times? Convert to minutes., atis,mysql,date_functions,SELECT AVG((arrival_time - departure_time - 34 * 60) / 60) AS average_duration FROM flight;SELECT AVG(arrival_time - departure_time) / 60 - 34 AS average_duration FROM flight,"What's the average duration between departure and arrival times minus 34 minutes? Convert from UNIX to regular datetime, and return the answer in minutes", atis,mysql,date_functions,"SELECT month.month_name, COUNT(*) AS departure_count FROM flight JOIN month ON EXTRACT(MONTH FROM FROM_UNIXTIME(flight.departure_time)) = month.month_number GROUP BY month.month_name, month.month_number ORDER BY CASE WHEN month.month_number IS NULL THEN 1 ELSE 0 END, month.month_number;SELECT DATE_FORMAT(FROM_UNIXTIME(departure_time), '%Y-%m') AS month, COUNT(*) AS num_departures FROM atis.flight GROUP BY month ORDER BY month",Count the number of flight departures for each month?, atis,mysql,date_functions,"SELECT DATE_FORMAT(CAST(FROM_UNIXTIME(departure_time) AS TIME), '%H:%i') AS earliest_departure_time FROM flight ORDER BY CASE WHEN earliest_departure_time IS NULL THEN 1 ELSE 0 END, earliest_departure_time LIMIT 1",What's the earliest flight departure time in the day in HH:MM?, -atis,mysql,date_functions,"SELECT EXTRACT(day FROM CURRENT_DATE - FROM_UNIXTIME(departure_time)) AS difference_in_days FROM flight ORDER BY CASE WHEN departure_time IS NULL THEN 1 ELSE 0 END, departure_time LIMIT 1;SELECT (CURRENT_DATE - FROM_UNIXTIME(MIN(f.departure_time))) AS days_difference FROM flight AS f",What's the difference in time in days between today and the earliest flight departure?, +atis,mysql,date_functions,"SELECT DATEDIFF(CURRENT_DATE, FROM_UNIXTIME(departure_time)) AS difference_in_days FROM flight WHERE departure_time IS NOT NULL ORDER BY departure_time ASC LIMIT 1;SELECT DATEDIFF(CURRENT_DATE, FROM_UNIXTIME(MIN(f.departure_time))) AS days_difference FROM flight AS f;",What's the difference in time in days between today and the earliest flight departure?, atis,mysql,group_by,"SELECT fare.fare_airline, SUM(fare.round_trip_cost) AS total_round_trip_cost FROM fare GROUP BY fare.fare_airline ORDER BY CASE WHEN total_round_trip_cost IS NULL THEN 1 ELSE 0 END DESC, total_round_trip_cost DESC",What is the total cost of round-trip fares for each airline code?, atis,mysql,group_by,"SELECT fare.fare_airline, AVG(fare.round_trip_cost) AS average_cost FROM fare WHERE fare.from_airport = 'LAX' AND fare.to_airport = 'ORD' GROUP BY fare.fare_airline ORDER BY average_cost DESC;SELECT airline.airline_name, AVG(fare.round_trip_cost) AS avg_round_trip_cost FROM fare JOIN airline ON fare.fare_airline = airline.airline_code WHERE fare.from_airport = 'LAX' AND fare.to_airport = 'ORD' GROUP BY airline.airline_name ORDER BY CASE WHEN avg_round_trip_cost IS NULL THEN 1 ELSE 0 END DESC, avg_round_trip_cost DESC","What is the average cost of round-trip fares from Los Angeles (LAX) to Chicago (ORD) for each airline, sorted in descending order by average cost?", atis,mysql,group_by,"SELECT f.from_airport, f.to_airport, AVG(f.one_direction_cost) AS average_cost FROM fare AS f GROUP BY f.from_airport, f.to_airport ORDER BY CASE WHEN f.from_airport IS NULL THEN 1 ELSE 0 END, f.from_airport, CASE WHEN f.to_airport IS NULL THEN 1 ELSE 0 END, f.to_airport",What is the average cost of a one-way trip for each airport pair in the fare table?, @@ -87,18 +87,18 @@ atis,mysql,table_join,"SELECT DISTINCT airline.airline_name FROM flight_stop JOI atis,mysql,table_join,"SELECT DISTINCT airline.airline_name FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'LAX' AND flight.to_airport = 'ORD';SELECT DISTINCT airline.airline_code FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'LAX' AND flight.to_airport = 'ORD';SELECT DISTINCT airline.airline_name, airline.airline_code FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'LAX' AND flight.to_airport = 'ORD'",Which airlines offer flights from LAX to ORD?, atis,mysql,table_join,"SELECT airline.airline_name, flight.stops FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'ORD' AND flight.to_airport = 'JFK' ORDER BY CASE WHEN flight.stops IS NULL THEN 1 ELSE 0 END, flight.stops;SELECT airline.airline_code, flight.stops FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'ORD' AND flight.to_airport = 'JFK' ORDER BY CASE WHEN flight.stops IS NULL THEN 1 ELSE 0 END, flight.stops;SELECT airline.airline_name, airline.airline_code, flight.stops FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'ORD' AND flight.to_airport = 'JFK' ORDER BY CASE WHEN flight.stops IS NULL THEN 1 ELSE 0 END, flight.stops","Which airlines offer flights from Chicago (ORD) to New York (JFK), and how many stops do they have, sorted by number of stops in ascending order?", atis,mysql,table_join,"SELECT DISTINCT airline.airline_name FROM airline WHERE NOT airline.airline_code IN (SELECT flight.airline_code FROM flight WHERE flight.from_airport = 'JFK' OR flight.to_airport = 'JFK' OR flight.stops > 0);SELECT DISTINCT airline.airline_code FROM airline WHERE NOT airline.airline_code IN (SELECT flight.airline_code FROM flight WHERE flight.from_airport = 'JFK' OR flight.to_airport = 'JFK' OR flight.stops > 0);SELECT DISTINCT airline.airline_name, airline.airline_code FROM airline WHERE NOT airline.airline_code IN (SELECT flight.airline_code FROM flight WHERE flight.from_airport = 'JFK' OR flight.to_airport = 'JFK' OR flight.stops > 0)","Which airlines do not have any flights that either depart from/arrive at JFK, or have one or more stops?", -atis,mysql,instruct,SELECT state_code FROM airport WHERE LOWER(airport_name) LIKE '%Orlando International Airport%',Which state code is Orlando International Airport in?,"Filter airport, city, country names using ILIKE. -Filter state code (eg NY), airport codes (eg JFK) using case-insensitive matches. +atis,mysql,instruct,SELECT state_code FROM airport WHERE LOWER(airport_name) LIKE '%Orlando International Airport%',Which state code is Orlando International Airport in?,"Filter airport, city, country names using ILIKE. +Filter state code (eg NY), airport codes (eg JFK) using case-insensitive matches. If multiple flight days are requested, use ILIKE and wildcards for each of the days separately, since they are not necessarily ordered." -atis,mysql,instruct,"SELECT flight.flight_number FROM flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';SELECT flight.flight_id FROM flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';SELECT flight.flight_number, flight.flight_id FROM flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%'",Which flights operate on Mondays and Wednesdays? Give me the relevant flight numbers,"Filter airport, city, country names using ILIKE. -Filter state code (eg NY), airport codes (eg JFK) using case-insensitive matches. +atis,mysql,instruct,"SELECT flight.flight_number FROM flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';SELECT flight.flight_id FROM flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';SELECT flight.flight_number, flight.flight_id FROM flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%'",Which flights operate on Mondays and Wednesdays? Give me the relevant flight numbers,"Filter airport, city, country names using ILIKE. +Filter state code (eg NY), airport codes (eg JFK) using case-insensitive matches. If multiple flight days are requested, use ILIKE for each of the days separately, since they are not necessarily ordered." -atis,mysql,instruct,SELECT SUM(fare.round_trip_cost) AS total_round_trip_cost FROM fare WHERE fare.from_airport = 'JFK' AND fare.to_airport = 'LAX',What is the total cost of all round-trip fares from New York (JFK) to Los Angeles?,"Filter airport, city, country names using ILIKE. -Filter state code (eg NY), airport codes (eg JFK) using case-insensitive matches. +atis,mysql,instruct,SELECT SUM(fare.round_trip_cost) AS total_round_trip_cost FROM fare WHERE fare.from_airport = 'JFK' AND fare.to_airport = 'LAX',What is the total cost of all round-trip fares from New York (JFK) to Los Angeles?,"Filter airport, city, country names using ILIKE. +Filter state code (eg NY), airport codes (eg JFK) using case-insensitive matches. fare.round_trip_required is not needed when getting the round trip cost." -atis,mysql,instruct,SELECT minimum_connect_time FROM airport WHERE airport_code = 'JFK',What is the minimum amount of time required for a connecting flight at JFK Airport?,"Filter airport, city, country names using ILIKE. +atis,mysql,instruct,SELECT minimum_connect_time FROM airport WHERE airport_code = 'JFK',What is the minimum amount of time required for a connecting flight at JFK Airport?,"Filter airport, city, country names using ILIKE. Filter state code (eg NY) and airport codes (eg JFK) using upper-case matches." -atis,mysql,instruct,SELECT COUNT(DISTINCT flight_fare.flight_id) FROM flight_fare JOIN fare ON flight_fare.fare_id = fare.fare_id WHERE fare.round_trip_required = 'Yes',How many flights require a round-trip to purchase the fare?,"Filter airport, city, country names using ILIKE. +atis,mysql,instruct,SELECT COUNT(DISTINCT flight_fare.flight_id) FROM flight_fare JOIN fare ON flight_fare.fare_id = fare.fare_id WHERE fare.round_trip_required = 'Yes',How many flights require a round-trip to purchase the fare?,"Filter airport, city, country names using ILIKE. Filter state code (eg NY) and airport codes (eg JFK) using upper-case matches." geography,mysql,group_by,"SELECT city.country_name, SUM(city.population) AS total_population FROM city GROUP BY city.country_name ORDER BY total_population DESC",What is the total population in cities by country?, geography,mysql,group_by,"SELECT river.country_name, AVG(river.length) AS average_length FROM river GROUP BY river.country_name ORDER BY average_length DESC",What is the average length of rivers in each country?, @@ -205,28 +205,28 @@ yelp,mysql,table_join,"SELECT category.category_name, SUM(business.review_count) yelp,mysql,table_join,"SELECT category.category_name, SUM(business.review_count) AS total_reviews FROM business JOIN category ON business.business_id = category.business_id GROUP BY category.category_name ORDER BY total_reviews DESC;SELECT category.id, SUM(business.review_count) AS total_reviews FROM business JOIN category ON business.business_id = category.business_id GROUP BY category.id ORDER BY total_reviews DESC;SELECT category.category_name, category.id, SUM(business.review_count) AS total_reviews FROM business JOIN category ON business.business_id = category.business_id GROUP BY category.category_name, category.id ORDER BY total_reviews DESC",What is the total number of reviews for each business category?, yelp,mysql,table_join,"SELECT business.business_id, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.business_id ORDER BY total_checkins DESC;SELECT business.name, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.name ORDER BY total_checkins DESC;SELECT business.bid, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.bid ORDER BY total_checkins DESC;SELECT business.business_id, business.name, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.business_id, business.name ORDER BY total_checkins DESC;SELECT business.business_id, business.bid, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.business_id, business.bid ORDER BY total_checkins DESC;SELECT business.name, business.bid, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.name, business.bid ORDER BY total_checkins DESC;SELECT business.business_id, business.name, business.bid, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.business_id, business.name, business.bid ORDER BY total_checkins DESC",What is the total number of check-ins for each business in the state of California?, yelp,mysql,table_join,"SELECT category.category_name FROM (SELECT business.business_id, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id GROUP BY business.business_id) AS business_rating JOIN category ON business_rating.business_id = category.business_id GROUP BY category.category_name ORDER BY AVG(business_rating.average_rating) DESC LIMIT 2;SELECT category.id FROM (SELECT business.business_id, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id GROUP BY business.business_id) AS business_rating JOIN category ON business_rating.business_id = category.business_id GROUP BY category.id ORDER BY AVG(business_rating.average_rating) DESC LIMIT 2;SELECT category.category_name, category.id FROM (SELECT business.business_id, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id GROUP BY business.business_id) AS business_rating JOIN category ON business_rating.business_id = category.business_id GROUP BY category.category_name, category.id ORDER BY AVG(business_rating.average_rating) DESC LIMIT 2",What are the top 2 categories of businesses with the highest average rating?, -yelp,mysql,instruct,SELECT COUNT(review.rid) AS total_reviews FROM review JOIN category ON review.business_id = category.business_id WHERE review.year = 2021 AND LOWER(category.category_name) LIKE '%Cafe%',"What is the total number of reviews posted in the year 2021 for businesses in the category ""Cafe""?","Filter strings of users, city, address, business.name using ILIKE with wildcards. -Filter strings of state using exact upper case matches. -Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. -Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. +yelp,mysql,instruct,SELECT COUNT(review.rid) AS total_reviews FROM review JOIN category ON review.business_id = category.business_id WHERE review.year = 2021 AND LOWER(category.category_name) LIKE '%Cafe%',"What is the total number of reviews posted in the year 2021 for businesses in the category ""Cafe""?","Filter strings of users, city, address, business.name using ILIKE with wildcards. +Filter strings of state using exact upper case matches. +Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. +Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. " -yelp,mysql,instruct,"SELECT AVG(sf.average_rating) AS sf_average_rating FROM (SELECT business.business_id, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id WHERE LOWER(LOWER(business.city)) LIKE '%san francisco%' GROUP BY business.business_id) AS sf",What is the average rating of businesses in the city of San Francisco?,"Filter strings of users, city, address, business.name using ILIKE with wildcards. -Filter strings of state using exact upper case matches. -The rating of businesses in a city refers to the average rating of the businesses in that city. I.e., you must compute the average rating of each business before computing the average rating of businesses in the city. -Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. +yelp,mysql,instruct,"SELECT AVG(sf.average_rating) AS sf_average_rating FROM (SELECT business.business_id, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id WHERE LOWER(LOWER(business.city)) LIKE '%san francisco%' GROUP BY business.business_id) AS sf",What is the average rating of businesses in the city of San Francisco?,"Filter strings of users, city, address, business.name using ILIKE with wildcards. +Filter strings of state using exact upper case matches. +The rating of businesses in a city refers to the average rating of the businesses in that city. I.e., you must compute the average rating of each business before computing the average rating of businesses in the city. +Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. " -yelp,mysql,instruct,"SELECT review.business_id, COUNT(*) AS review_count FROM review WHERE review.year = 2021 GROUP BY review.business_id ORDER BY review_count DESC",How many reviews were posted for each business id in the year 2021?,"Filter strings of users, city, address, business.name using ILIKE with wildcards. -Filter strings of state using exact upper case matches. -Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. -Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. +yelp,mysql,instruct,"SELECT review.business_id, COUNT(*) AS review_count FROM review WHERE review.year = 2021 GROUP BY review.business_id ORDER BY review_count DESC",How many reviews were posted for each business id in the year 2021?,"Filter strings of users, city, address, business.name using ILIKE with wildcards. +Filter strings of state using exact upper case matches. +Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. +Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. " -yelp,mysql,instruct,SELECT COUNT(*) FROM review JOIN users ON review.user_id = users.user_id WHERE LOWER(users.name) LIKE '%Sarah Williams%' AND review.month = 'April' AND review.year = 2021,"How many reviews were posted by users with the name ""Sarah Williams"" in the month of April 2021?","Filter strings of users, city, address, business.name using ILIKE with wildcards. -Filter strings of state using exact upper case matches. -Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. -Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. +yelp,mysql,instruct,SELECT COUNT(*) FROM review JOIN users ON review.user_id = users.user_id WHERE LOWER(users.name) LIKE '%Sarah Williams%' AND review.month = 'April' AND review.year = 2021,"How many reviews were posted by users with the name ""Sarah Williams"" in the month of April 2021?","Filter strings of users, city, address, business.name using ILIKE with wildcards. +Filter strings of state using exact upper case matches. +Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. +Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. " -yelp,mysql,instruct,SELECT SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' AND LOWER(checkin.day) LIKE '%Monday%',How many check-ins occurred on Mondays at businesses in the state of California?,"Filter strings of users, city, address, business.name using ILIKE with wildcards. -Filter strings of state using exact upper case matches. -Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. -Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. +yelp,mysql,instruct,SELECT SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' AND LOWER(checkin.day) LIKE '%Monday%',How many check-ins occurred on Mondays at businesses in the state of California?,"Filter strings of users, city, address, business.name using ILIKE with wildcards. +Filter strings of state using exact upper case matches. +Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. +Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. " diff --git a/data/questions_gen_sqlite.csv b/data/questions_gen_sqlite.csv index 8d4c063..ff55a7e 100644 --- a/data/questions_gen_sqlite.csv +++ b/data/questions_gen_sqlite.csv @@ -25,14 +25,14 @@ academic,sqlite,table_join,"SELECT journal.name, COUNT(publication.pid) AS total academic,sqlite,table_join,"SELECT conference.name, COUNT(publication.pid) AS num_publications FROM publication JOIN conference ON publication.cid = conference.cid GROUP BY conference.name, conference.cid ORDER BY num_publications DESC;","How many publications were presented at each conference, ordered by the number of publications in descending order? Give the names of the conferences and their corresponding number of publications.", academic,sqlite,table_join,SELECT COUNT(DISTINCT publication.pid) FROM publication JOIN journal ON publication.jid = journal.jid WHERE LOWER(journal.name) LIKE 'J%';,"How many publications were published in journals whose names start with the letter ""J""?", academic,sqlite,instruct,"SELECT DISTINCT organization.name FROM organization JOIN author ON organization.oid = author.oid JOIN writes ON author.aid = writes.aid JOIN domain_publication ON writes.pid = domain_publication.pid JOIN domain ON domain_publication.did = domain.did WHERE domain.name = 'Machine Learning';SELECT DISTINCT organization.oid FROM organization JOIN author ON organization.oid = author.oid JOIN writes ON author.aid = writes.aid JOIN domain_publication ON writes.pid = domain_publication.pid JOIN domain ON domain_publication.did = domain.did WHERE domain.name = 'Machine Learning';SELECT DISTINCT organization.name, organization.oid FROM organization JOIN author ON organization.oid = author.oid JOIN writes ON author.aid = writes.aid JOIN domain_publication ON writes.pid = domain_publication.pid JOIN domain ON domain_publication.did = domain.did WHERE domain.name = 'Machine Learning';","Which organizations have authors who have written publications in the domain ""Machine Learning""?",Always filter names using an exact match -academic,sqlite,instruct,"SELECT DISTINCT a2.name FROM author AS a1 JOIN domain_author AS da1 ON a1.aid = da1.aid JOIN domain_author AS da2 ON da1.did = da2.did JOIN author AS a2 ON da2.aid = a2.aid WHERE LOWER(LOWER(a1.name)) LIKE '%martin%';SELECT DISTINCT a2.aid FROM author AS a1 JOIN domain_author AS da1 ON a1.aid = da1.aid JOIN domain_author AS da2 ON da1.did = da2.did JOIN author AS a2 ON da2.aid = a2.aid WHERE LOWER(LOWER(a1.name)) LIKE '%martin%';SELECT DISTINCT a2.name, a2.aid FROM author AS a1 JOIN domain_author AS da1 ON a1.aid = da1.aid JOIN domain_author AS da2 ON da1.did = da2.did JOIN author AS a2 ON da2.aid = a2.aid WHERE LOWER(LOWER(a1.name)) LIKE '%martin%';",Which authors belong to the same domain as Martin?,Always filter names using ILIKE with percent sign wildcards -academic,sqlite,instruct,"SELECT DISTINCT name FROM author WHERE oid IS NULL;SELECT DISTINCT aid FROM author WHERE oid IS NULL;SELECT DISTINCT name, aid FROM author WHERE oid IS NULL;",Which authors are not part of any organization?,Always filter names using ILIKE -academic,sqlite,instruct,"SELECT DISTINCT publication.title FROM DOMAIN JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE '%Sociology%' AND publication.year = 2020 AND publication.cid = domain_conference.cid;SELECT DISTINCT publication.pid FROM DOMAIN JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE '%Sociology%' AND publication.year = 2020 AND publication.cid = domain_conference.cid;SELECT DISTINCT publication.title, publication.pid FROM DOMAIN JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE '%Sociology%' AND publication.year = 2020 AND publication.cid = domain_conference.cid;",What are the publications written by authors from the 'Sociology' domain and presented at conferences in the year 2020?,"To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. Finally, to see which ones were presented at conferences, you must join the domain table with the domain_conference table. You must also filter names using ILIKE." -academic,sqlite,instruct,SELECT DISTINCT author.name FROM author JOIN writes ON author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_publication ON publication.pid = domain_publication.pid JOIN domain ON domain_publication.did = domain.did WHERE LOWER(domain.name) LIKE '%computer%science%';,"What are the names of the authors who have written publications in the domain ""Computer Science""?","To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. You must also filter names using ILIKE." +academic,sqlite,instruct,"SELECT DISTINCT a2.name FROM author AS a1 JOIN domain_author AS da1 ON a1.aid = da1.aid JOIN domain_author AS da2 ON da1.did = da2.did JOIN author AS a2 ON da2.aid = a2.aid WHERE LOWER(LOWER(a1.name)) LIKE '%martin%';SELECT DISTINCT a2.aid FROM author AS a1 JOIN domain_author AS da1 ON a1.aid = da1.aid JOIN domain_author AS da2 ON da1.did = da2.did JOIN author AS a2 ON da2.aid = a2.aid WHERE LOWER(LOWER(a1.name)) LIKE '%martin%';SELECT DISTINCT a2.name, a2.aid FROM author AS a1 JOIN domain_author AS da1 ON a1.aid = da1.aid JOIN domain_author AS da2 ON da1.did = da2.did JOIN author AS a2 ON da2.aid = a2.aid WHERE LOWER(LOWER(a1.name)) LIKE '%martin%';",Which authors belong to the same domain as Martin?,Always filter names using LIKE with percent sign wildcards +academic,sqlite,instruct,"SELECT DISTINCT name FROM author WHERE oid IS NULL;SELECT DISTINCT aid FROM author WHERE oid IS NULL;SELECT DISTINCT name, aid FROM author WHERE oid IS NULL;",Which authors are not part of any organization?,Always filter names using LIKE +academic,sqlite,instruct,"SELECT DISTINCT publication.title FROM DOMAIN JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE '%Sociology%' AND publication.year = 2020 AND publication.cid = domain_conference.cid;SELECT DISTINCT publication.pid FROM DOMAIN JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE '%Sociology%' AND publication.year = 2020 AND publication.cid = domain_conference.cid;SELECT DISTINCT publication.title, publication.pid FROM DOMAIN JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE '%Sociology%' AND publication.year = 2020 AND publication.cid = domain_conference.cid;",What are the publications written by authors from the 'Sociology' domain and presented at conferences in the year 2020?,"To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. Finally, to see which ones were presented at conferences, you must join the domain table with the domain_conference table. You must also filter names using LIKE." +academic,sqlite,instruct,SELECT DISTINCT author.name FROM author JOIN writes ON author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_publication ON publication.pid = domain_publication.pid JOIN domain ON domain_publication.did = domain.did WHERE LOWER(domain.name) LIKE '%computer%science%';,"What are the names of the authors who have written publications in the domain ""Computer Science""?","To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. You must also filter names using LIKE." advising,sqlite,date_functions,"SELECT strftime('%m', s.admit_term) AS month, COUNT(*) AS total_students FROM student AS s GROUP BY month ORDER BY total_students DESC LIMIT 1;",What month were most students admitted?, -advising,sqlite,date_functions,SELECT AVG(predicted_graduation_semester - admit_term) AS average_predicted_time_to_graduation FROM student;,What's the average predicted time to graduation since admission in no. of days?, +advising,sqlite,date_functions,SELECT AVG(julianday(predicted_graduation_semester) - julianday(admit_term)) AS average_predicted_time_to_graduation FROM student;,What's the average predicted time to graduation since admission in no. of days?, advising,sqlite,date_functions,"SELECT COUNT(*) AS num_students_graduated FROM student WHERE predicted_graduation_semester >= DATE('now', '-10 years');",How many students were predicted to graduate in the last 10 years?, -advising,sqlite,date_functions,SELECT CURRENT_DATE - MAX(admit_term) AS duration_since_last_admitted_student FROM student;,How long has it been in days since the last admitted student?, +advising,sqlite,date_functions,SELECT ROUND(julianday(CURRENT_DATE) - julianday(MAX(admit_term))) AS duration_since_last_admitted_student FROM student s;,How long has it been in days since the last admitted student?, advising,sqlite,date_functions,"SELECT STRFTIME('%m', DATE(predicted_graduation_semester, '-14 days')) AS month FROM student ORDER BY CASE WHEN predicted_graduation_semester IS NULL THEN 1 ELSE 0 END DESC, predicted_graduation_semester DESC LIMIT 1;SELECT strftime('%m', date(predicted_graduation_semester, '-14 days')) AS month FROM student ORDER BY CASE WHEN predicted_graduation_semester IS NULL THEN 1 ELSE 0 END DESC, predicted_graduation_semester DESC LIMIT 1;",Subtract 2 weeks from the most recent predicted graduation date and give the month., advising,sqlite,group_by,"SELECT course_tags_count.course_id, SUM(course_tags_count.hilarious) AS total_hilarious FROM course_tags_count GROUP BY course_tags_count.course_id;",What is the total number of students who found the instructor to be hilarious per course id?, advising,sqlite,group_by,"SELECT i.name, AVG(c.clarity_score) FROM course AS c JOIN course_offering AS co ON c.course_id = co.course_id JOIN offering_instructor AS oi ON co.offering_id = oi.offering_id JOIN instructor AS i ON oi.instructor_id = i.instructor_id GROUP BY i.name;SELECT i.instructor_id, AVG(c.clarity_score) FROM course AS c JOIN course_offering AS co ON c.course_id = co.course_id JOIN offering_instructor AS oi ON co.offering_id = oi.offering_id JOIN instructor AS i ON oi.instructor_id = i.instructor_id GROUP BY i.instructor_id;SELECT i.name, i.instructor_id, AVG(c.clarity_score) FROM course AS c JOIN course_offering AS co ON c.course_id = co.course_id JOIN offering_instructor AS oi ON co.offering_id = oi.offering_id JOIN instructor AS i ON oi.instructor_id = i.instructor_id GROUP BY i.name, i.instructor_id;",What is the average clarity score for each instructor who taught a course?, @@ -54,19 +54,19 @@ advising,sqlite,table_join,"SELECT DISTINCT course.name FROM course_offering JOI advising,sqlite,table_join,SELECT COUNT(DISTINCT student_record.student_id) AS total_students FROM student_record JOIN course_offering ON student_record.course_id = course_offering.course_id WHERE course_offering.has_final_project = TRUE OR course_offering.has_final_exam = TRUE;,What is the total number of students who have taken a course with a final project or exam?, advising,sqlite,table_join,"SELECT program.name, SUM(student.total_credit) AS total_credits FROM student JOIN program ON student.program_id = program.program_id GROUP BY program.name;SELECT program.program_id, SUM(student.total_credit) AS total_credits FROM student JOIN program ON student.program_id = program.program_id GROUP BY program.program_id;SELECT program.name, program.program_id, SUM(student.total_credit) AS total_credits FROM student JOIN program ON student.program_id = program.program_id GROUP BY program.name, program.program_id;",What is the total number of credits earned by students in each program?, advising,sqlite,table_join,"SELECT program.name, COUNT(student.student_id) AS number_of_students FROM student JOIN program ON student.program_id = program.program_id WHERE NOT student.declare_major IS NULL GROUP BY program.name ORDER BY CASE WHEN number_of_students IS NULL THEN 1 ELSE 0 END DESC, number_of_students DESC;SELECT program.program_id, COUNT(student.student_id) AS number_of_students FROM student JOIN program ON student.program_id = program.program_id WHERE NOT student.declare_major IS NULL GROUP BY program.program_id ORDER BY CASE WHEN number_of_students IS NULL THEN 1 ELSE 0 END DESC, number_of_students DESC;SELECT program.name, program.program_id, COUNT(student.student_id) AS number_of_students FROM student JOIN program ON student.program_id = program.program_id WHERE NOT student.declare_major IS NULL GROUP BY program.name, program.program_id ORDER BY CASE WHEN number_of_students IS NULL THEN 1 ELSE 0 END DESC, number_of_students DESC;",How many students have declared a major in each program?, -advising,sqlite,instruct,"SELECT student.firstname, student.lastname FROM student WHERE NOT student.minor IS NULL ORDER BY CASE WHEN student.lastname IS NULL THEN 1 ELSE 0 END, student.lastname;",Which students have declared a minor program? List their firstname and lastname. Order the results by the students' last names.,"student.declare_major is null for students who have not declared their major. +advising,sqlite,instruct,"SELECT student.firstname, student.lastname FROM student WHERE NOT student.minor IS NULL ORDER BY CASE WHEN student.lastname IS NULL THEN 1 ELSE 0 END, student.lastname;",Which students have declared a minor program? List their firstname and lastname. Order the results by the students' last names.,"student.declare_major is null for students who have not declared their major. student.minor is null for students who have not declared a minor program." advising,sqlite,instruct,SELECT AVG(student.total_gpa) FROM student JOIN program ON student.program_id = program.program_id WHERE LOWER(program.name) = 'mathematics';,What is the average GPA of students in the program mathematics?,Match strings case-insensitively -advising,sqlite,instruct,"SELECT course.name FROM course WHERE LOWER(course.department) LIKE '%Computer Science%' ORDER BY CASE WHEN course.name IS NULL THEN 1 ELSE 0 END, course.name ASC;",What are the names of all the courses offered by the department of Computer Science?,"Filter strings using ILIKE. +advising,sqlite,instruct,"SELECT course.name FROM course WHERE LOWER(course.department) LIKE '%Computer Science%' ORDER BY CASE WHEN course.name IS NULL THEN 1 ELSE 0 END, course.name ASC;",What are the names of all the courses offered by the department of Computer Science?,"Filter strings using LIKE. Use the student_record table for all information relating to students' choices and their course." advising,sqlite,instruct,"SELECT course.name, course.easiness_score FROM course WHERE course.department = 'Computer Science';SELECT course.course_id, course.easiness_score FROM course WHERE course.department = 'Computer Science';SELECT course.number, course.easiness_score FROM course WHERE course.department = 'Computer Science';SELECT course.name, course.course_id, course.easiness_score FROM course WHERE course.department = 'Computer Science';SELECT course.name, course.number, course.easiness_score FROM course WHERE course.department = 'Computer Science';SELECT course.course_id, course.number, course.easiness_score FROM course WHERE course.department = 'Computer Science';SELECT course.name, course.course_id, course.number, course.easiness_score FROM course WHERE course.department = 'Computer Science';","What are the easiness scores for courses in the ""Computer Science"" department? Show both courses and scores.",Always filter names using exact string matching -advising,sqlite,instruct,"SELECT DISTINCT student_id FROM student_record WHERE student_record.how = 'in-person' AND student_record.grade IN ('A', 'C');",Return the student IDs who have taken an in-person course and have gotten a grade of A or C.,"Always filter strings with an exact match. +advising,sqlite,instruct,"SELECT DISTINCT student_id FROM student_record WHERE student_record.how = 'in-person' AND student_record.grade IN ('A', 'C');",Return the student IDs who have taken an in-person course and have gotten a grade of A or C.,"Always filter strings with an exact match. When asked for specific students or courses, do not return duplicates." atis,sqlite,date_functions,"SELECT flight.flight_number, (arrival_time - departure_time) / 60 AS duration_minutes FROM flight ORDER BY CASE WHEN duration_minutes IS NULL THEN 1 ELSE 0 END, duration_minutes LIMIT 1;SELECT flight.flight_id, (arrival_time - departure_time) / 60 AS duration_minutes FROM flight ORDER BY CASE WHEN duration_minutes IS NULL THEN 1 ELSE 0 END, duration_minutes LIMIT 1;SELECT flight.flight_number, flight.flight_id, (arrival_time - departure_time) / 60 AS duration_minutes FROM flight ORDER BY CASE WHEN duration_minutes IS NULL THEN 1 ELSE 0 END, duration_minutes LIMIT 1;",Which flight has the shortest duration between departure and arrival times? Convert to minutes., atis,sqlite,date_functions,SELECT AVG((arrival_time - departure_time) / 60.0 - 34) AS average_duration FROM flight;SELECT AVG(arrival_time - departure_time) / 60 - 34 AS average_duration FROM flight;,"What's the average duration between departure and arrival times minus 34 minutes? Convert from UNIX to regular datetime, and return the answer in minutes", atis,sqlite,date_functions,"SELECT month.month_name, COUNT(*) AS departure_count FROM flight JOIN month ON strftime('%m', flight.departure_time, 'unixepoch') = printf('%02d', month.month_number) GROUP BY month.month_name, month.month_number ORDER BY month.month_number;SELECT strftime('%Y-%m', datetime(flight.departure_time, 'unixepoch')) AS month, COUNT(*) AS num_departures FROM flight GROUP BY month ORDER BY CASE WHEN month IS NULL THEN 1 ELSE 0 END, month;",Count the number of flight departures for each month?, atis,sqlite,date_functions,"SELECT strftime('%H:%M', datetime(departure_time, 'unixepoch')) AS earliest_departure_time FROM flight ORDER BY departure_time LIMIT 1;",What's the earliest flight departure time in the day in HH:MM?, -atis,sqlite,date_functions,"SELECT julianday('now') - julianday(datetime(departure_time, 'unixepoch')) AS difference_in_days FROM flight ORDER BY departure_time LIMIT 1;SELECT julianday('now') - julianday(datetime(MIN(f.departure_time), 'unixepoch')) AS days_difference FROM flight AS f;",What's the difference in time in days between today and the earliest flight departure?, +atis,sqlite,date_functions,"SELECT ROUND(julianday('now') - julianday(datetime(departure_time, 'unixepoch')), 0) AS difference_in_days FROM flight ORDER BY departure_time LIMIT 1;SELECT ROUND(julianday('now') - julianday(datetime(MIN(f.departure_time), 'unixepoch')), 0) AS days_difference FROM flight AS f;",What's the difference in time in days between today and the earliest flight departure?, atis,sqlite,group_by,"SELECT fare.fare_airline, SUM(fare.round_trip_cost) AS total_round_trip_cost FROM fare GROUP BY fare.fare_airline ORDER BY CASE WHEN total_round_trip_cost IS NULL THEN 1 ELSE 0 END DESC, total_round_trip_cost DESC;",What is the total cost of round-trip fares for each airline code?, atis,sqlite,group_by,"SELECT fare.fare_airline, AVG(fare.round_trip_cost) AS average_cost FROM fare WHERE fare.from_airport = 'LAX' AND fare.to_airport = 'ORD' GROUP BY fare.fare_airline ORDER BY average_cost DESC;SELECT airline.airline_name, AVG(fare.round_trip_cost) AS avg_round_trip_cost FROM fare JOIN airline ON fare.fare_airline = airline.airline_code WHERE fare.from_airport = 'LAX' AND fare.to_airport = 'ORD' GROUP BY airline.airline_name ORDER BY CASE WHEN avg_round_trip_cost IS NULL THEN 1 ELSE 0 END DESC, avg_round_trip_cost DESC;","What is the average cost of round-trip fares from Los Angeles (LAX) to Chicago (ORD) for each airline, sorted in descending order by average cost?", atis,sqlite,group_by,"SELECT f.from_airport, f.to_airport, AVG(f.one_direction_cost) AS average_cost FROM fare AS f GROUP BY f.from_airport, f.to_airport ORDER BY CASE WHEN f.from_airport IS NULL THEN 1 ELSE 0 END, f.from_airport, CASE WHEN f.to_airport IS NULL THEN 1 ELSE 0 END, f.to_airport;",What is the average cost of a one-way trip for each airport pair in the fare table?, @@ -87,18 +87,18 @@ atis,sqlite,table_join,"SELECT DISTINCT airline.airline_name FROM flight_stop JO atis,sqlite,table_join,"SELECT DISTINCT airline.airline_name FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'LAX' AND flight.to_airport = 'ORD';SELECT DISTINCT airline.airline_code FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'LAX' AND flight.to_airport = 'ORD';SELECT DISTINCT airline.airline_name, airline.airline_code FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'LAX' AND flight.to_airport = 'ORD';",Which airlines offer flights from LAX to ORD?, atis,sqlite,table_join,"SELECT airline.airline_name, flight.stops FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'ORD' AND flight.to_airport = 'JFK' ORDER BY CASE WHEN flight.stops IS NULL THEN 1 ELSE 0 END, flight.stops;SELECT airline.airline_code, flight.stops FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'ORD' AND flight.to_airport = 'JFK' ORDER BY CASE WHEN flight.stops IS NULL THEN 1 ELSE 0 END, flight.stops;SELECT airline.airline_name, airline.airline_code, flight.stops FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'ORD' AND flight.to_airport = 'JFK' ORDER BY CASE WHEN flight.stops IS NULL THEN 1 ELSE 0 END, flight.stops;","Which airlines offer flights from Chicago (ORD) to New York (JFK), and how many stops do they have, sorted by number of stops in ascending order?", atis,sqlite,table_join,"SELECT DISTINCT airline.airline_name FROM airline WHERE NOT airline.airline_code IN (SELECT flight.airline_code FROM flight WHERE flight.from_airport = 'JFK' OR flight.to_airport = 'JFK' OR flight.stops > 0);SELECT DISTINCT airline.airline_code FROM airline WHERE NOT airline.airline_code IN (SELECT flight.airline_code FROM flight WHERE flight.from_airport = 'JFK' OR flight.to_airport = 'JFK' OR flight.stops > 0);SELECT DISTINCT airline.airline_name, airline.airline_code FROM airline WHERE NOT airline.airline_code IN (SELECT flight.airline_code FROM flight WHERE flight.from_airport = 'JFK' OR flight.to_airport = 'JFK' OR flight.stops > 0);","Which airlines do not have any flights that either depart from/arrive at JFK, or have one or more stops?", -atis,sqlite,instruct,SELECT state_code FROM airport WHERE LOWER(airport_name) LIKE '%Orlando International Airport%';,Which state code is Orlando International Airport in?,"Filter airport, city, country names using ILIKE. -Filter state code (eg NY), airport codes (eg JFK) using case-insensitive matches. +atis,sqlite,instruct,SELECT state_code FROM airport WHERE LOWER(airport_name) LIKE '%Orlando International Airport%';,Which state code is Orlando International Airport in?,"Filter airport, city, country names using LIKE. +Filter state code (eg NY), airport codes (eg JFK) using case-insensitive matches. If multiple flight days are requested, use ILIKE and wildcards for each of the days separately, since they are not necessarily ordered." -atis,sqlite,instruct,"SELECT flight.flight_number FROM flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';SELECT flight.flight_id FROM flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';SELECT flight.flight_number, flight.flight_id FROM flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';",Which flights operate on Mondays and Wednesdays? Give me the relevant flight numbers,"Filter airport, city, country names using ILIKE. -Filter state code (eg NY), airport codes (eg JFK) using case-insensitive matches. +atis,sqlite,instruct,"SELECT flight.flight_number FROM flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';SELECT flight.flight_id FROM flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';SELECT flight.flight_number, flight.flight_id FROM flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';",Which flights operate on Mondays and Wednesdays? Give me the relevant flight numbers,"Filter airport, city, country names using LIKE. +Filter state code (eg NY), airport codes (eg JFK) using case-insensitive matches. If multiple flight days are requested, use ILIKE for each of the days separately, since they are not necessarily ordered." -atis,sqlite,instruct,SELECT SUM(fare.round_trip_cost) AS total_round_trip_cost FROM fare WHERE fare.from_airport = 'JFK' AND fare.to_airport = 'LAX';,What is the total cost of all round-trip fares from New York (JFK) to Los Angeles?,"Filter airport, city, country names using ILIKE. -Filter state code (eg NY), airport codes (eg JFK) using case-insensitive matches. +atis,sqlite,instruct,SELECT SUM(fare.round_trip_cost) AS total_round_trip_cost FROM fare WHERE fare.from_airport = 'JFK' AND fare.to_airport = 'LAX';,What is the total cost of all round-trip fares from New York (JFK) to Los Angeles?,"Filter airport, city, country names using LIKE. +Filter state code (eg NY), airport codes (eg JFK) using case-insensitive matches. fare.round_trip_required is not needed when getting the round trip cost." -atis,sqlite,instruct,SELECT minimum_connect_time FROM airport WHERE airport_code = 'JFK';,What is the minimum amount of time required for a connecting flight at JFK Airport?,"Filter airport, city, country names using ILIKE. +atis,sqlite,instruct,SELECT minimum_connect_time FROM airport WHERE airport_code = 'JFK';,What is the minimum amount of time required for a connecting flight at JFK Airport?,"Filter airport, city, country names using LIKE. Filter state code (eg NY) and airport codes (eg JFK) using upper-case matches." -atis,sqlite,instruct,SELECT COUNT(DISTINCT flight_fare.flight_id) FROM flight_fare JOIN fare ON flight_fare.fare_id = fare.fare_id WHERE fare.round_trip_required = 'Yes';,How many flights require a round-trip to purchase the fare?,"Filter airport, city, country names using ILIKE. +atis,sqlite,instruct,SELECT COUNT(DISTINCT flight_fare.flight_id) FROM flight_fare JOIN fare ON flight_fare.fare_id = fare.fare_id WHERE fare.round_trip_required = 'Yes';,How many flights require a round-trip to purchase the fare?,"Filter airport, city, country names using LIKE. Filter state code (eg NY) and airport codes (eg JFK) using upper-case matches." geography,sqlite,group_by,"SELECT city.country_name, SUM(city.population) AS total_population FROM city GROUP BY city.country_name ORDER BY total_population DESC;",What is the total population in cities by country?, geography,sqlite,group_by,"SELECT river.country_name, AVG(river.length) AS average_length FROM river GROUP BY river.country_name ORDER BY average_length DESC;",What is the average length of rivers in each country?, @@ -120,11 +120,11 @@ geography,sqlite,table_join,SELECT border_info.border FROM border_info JOIN lake geography,sqlite,table_join,"SELECT lake.lake_name FROM lake JOIN state ON lake.state_name = state.state_name WHERE state.area > 1000 AND LOWER(lake.lake_name) LIKE 'Lake%' ORDER BY CASE WHEN lake.lake_name IS NULL THEN 1 ELSE 0 END, lake.lake_name;","Which lakes have a name that starts with ""Lake""? They should be located in states with an area greater than 1000 square kilometers.", geography,sqlite,table_join,"SELECT highlow.state_name, highlow.highest_point, state.density FROM highlow JOIN state ON highlow.state_name = state.state_name;",What is the highest point in each state and what is the population density of that state?, geography,sqlite,table_join,"SELECT l.country_name, AVG(r.length) AS average_length FROM river AS r JOIN lake AS l ON r.country_name = l.country_name GROUP BY 1;",What is the average length of rivers per country in countries with a lake?, -geography,sqlite,instruct,SELECT state_name FROM state WHERE population < 100000;,Which states have fewer than a hundred thousand people?,Always filter names using ILIKE -geography,sqlite,instruct,"SELECT river_name FROM river WHERE traverse LIKE '%,%,%';",Which rivers traverse at least 3 cities/landmarks?,Always filter names using ILIKE -geography,sqlite,instruct,"SELECT lake_name, area FROM lake WHERE LOWER(state_name) LIKE '%Michigan%';",What are the names and areas of the lakes in Michigan?,Always filter names using ILIKE -geography,sqlite,instruct,"SELECT mountain_name, mountain_altitude FROM mountain WHERE LOWER(country_name) LIKE '%Nepal%';",What are the names and altitudes of the mountains in Nepal?,Always filter names using ILIKE -geography,sqlite,instruct,"SELECT city_name, population FROM city WHERE LOWER(country_name) LIKE '%United States%';",Get the cities in the United States and their population,Always filter names using ILIKE +geography,sqlite,instruct,SELECT state_name FROM state WHERE population < 100000;,Which states have fewer than a hundred thousand people?,Always filter names using LIKE +geography,sqlite,instruct,"SELECT river_name FROM river WHERE traverse LIKE '%,%,%';",Which rivers traverse at least 3 cities/landmarks?,Always filter names using LIKE +geography,sqlite,instruct,"SELECT lake_name, area FROM lake WHERE LOWER(state_name) LIKE '%Michigan%';",What are the names and areas of the lakes in Michigan?,Always filter names using LIKE +geography,sqlite,instruct,"SELECT mountain_name, mountain_altitude FROM mountain WHERE LOWER(country_name) LIKE '%Nepal%';",What are the names and altitudes of the mountains in Nepal?,Always filter names using LIKE +geography,sqlite,instruct,"SELECT city_name, population FROM city WHERE LOWER(country_name) LIKE '%United States%';",Get the cities in the United States and their population,Always filter names using LIKE restaurants,sqlite,group_by,"SELECT restaurant.food_type, COUNT(DISTINCT restaurant.id) AS total_number_of_restaurants FROM restaurant GROUP BY restaurant.food_type;",What is the total number of restaurants serving each type of food?, restaurants,sqlite,group_by,"SELECT location.city_name, COUNT(DISTINCT location.restaurant_id) AS total_count FROM LOCATION GROUP BY location.city_name;",What is the total count of restaurants in each city?, restaurants,sqlite,group_by,"SELECT restaurant.food_type, AVG(restaurant.rating) AS average_rating FROM restaurant GROUP BY restaurant.food_type ORDER BY average_rating DESC;",What is the average rating of restaurants serving each type of food?, @@ -175,7 +175,7 @@ scholar,sqlite,table_join,"SELECT author.authorname, COUNT(DISTINCT writes.paper scholar,sqlite,table_join,SELECT COUNT(DISTINCT paperkeyphrase.keyphraseid) AS total_keyphrases FROM paper JOIN journal ON paper.journalid = journal.journalid JOIN paperkeyphrase ON paper.paperid = paperkeyphrase.paperid WHERE LOWER(journal.journalname) LIKE '%IEEE Transactions%';,"What is the total number of unique keyphrases associated with papers published in the journal with ""IEEE Transactions"" in its name?", scholar,sqlite,table_join,"SELECT journal.journalname, COUNT(DISTINCT paper.paperid) AS total_papers FROM paper JOIN journal ON paper.journalid = journal.journalid GROUP BY journal.journalname ORDER BY CASE WHEN journal.journalname IS NULL THEN 1 ELSE 0 END, journal.journalname;","What is the total number of papers published in each journal, ordered by the journal name?", scholar,sqlite,table_join,"SELECT paperdataset.paperid, COUNT(cite.citedpaperid) AS citation_count FROM paperdataset JOIN cite ON paperdataset.paperid = cite.citedpaperid WHERE paperdataset.datasetid = (SELECT datasetid FROM dataset WHERE LOWER(datasetname) LIKE '%COVID-19 Research%') GROUP BY paperdataset.paperid ORDER BY CASE WHEN citation_count IS NULL THEN 1 ELSE 0 END DESC, citation_count DESC;SELECT p.title, COUNT(c.citingpaperid) AS num_citing_papers FROM paper AS p JOIN paperdataset AS pd ON p.paperid = pd.paperid JOIN cite AS c ON p.paperid = c.citedpaperid JOIN dataset AS d ON pd.datasetid = d.datasetid WHERE d.datasetname = 'COVID-19 Research' GROUP BY p.title ORDER BY num_citing_papers DESC;","How many papers cite each paper in the dataset named ""COVID-19 Research""?", -scholar,sqlite,instruct,"SELECT venue.venuename, COUNT(DISTINCT paper.paperid) FROM paper JOIN venue ON paper.venueid = venue.venueid WHERE paper.venueid = (SELECT venueid FROM paper WHERE paperid = 2) GROUP BY venue.venuename;","What is the name of the venue where the paper with paper ID 2 was published, and how many papers were published in total in that venue?",Always filter strings using ILIKE +scholar,sqlite,instruct,"SELECT venue.venuename, COUNT(DISTINCT paper.paperid) FROM paper JOIN venue ON paper.venueid = venue.venueid WHERE paper.venueid = (SELECT venueid FROM paper WHERE paperid = 2) GROUP BY venue.venuename;","What is the name of the venue where the paper with paper ID 2 was published, and how many papers were published in total in that venue?",Always filter strings using LIKE scholar,sqlite,instruct,SELECT author.authorname FROM author JOIN writes ON author.authorid = writes.authorid JOIN paper ON writes.paperid = paper.paperid WHERE paper.title = 'The Effects of Climate Change on Agriculture';,"What are the names of the authors who wrote the paper with the title ""The Effects of Climate Change on Agriculture""?",Always filter strings with an exact match scholar,sqlite,instruct,SELECT COUNT(paper.paperid) FROM paper JOIN journal ON paper.journalid = journal.journalid WHERE paper.year = 2020 AND LOWER(journal.journalname) LIKE '%nature%';,"How many papers were published in the journal ""nature"" in the year 2020?",Filter strings with case-insensitive matching scholar,sqlite,instruct,SELECT COUNT(DISTINCT paper.paperid) FROM paper JOIN journal ON paper.journalid = journal.journalid JOIN paperkeyphrase ON paper.paperid = paperkeyphrase.paperid JOIN keyphrase ON paperkeyphrase.keyphraseid = keyphrase.keyphraseid WHERE LOWER(keyphrase.keyphrasename) LIKE '%machine learning%' AND journal.journalname = 'IEEE Transactions on Pattern Analysis and Machine Intelligence';,"How many papers are associated with the keyphrase ""machine learning"" and were published in the journal named ""IEEE Transactions on Pattern Analysis and Machine Intelligence""?","Filter paper names, journal names, using exact matches. Filter keyphrases with case-insensitive matching." @@ -205,28 +205,28 @@ yelp,sqlite,table_join,"SELECT category.category_name, SUM(business.review_count yelp,sqlite,table_join,"SELECT category.category_name, SUM(business.review_count) AS total_reviews FROM business JOIN category ON business.business_id = category.business_id GROUP BY category.category_name ORDER BY total_reviews DESC;SELECT category.id, SUM(business.review_count) AS total_reviews FROM business JOIN category ON business.business_id = category.business_id GROUP BY category.id ORDER BY total_reviews DESC;SELECT category.category_name, category.id, SUM(business.review_count) AS total_reviews FROM business JOIN category ON business.business_id = category.business_id GROUP BY category.category_name, category.id ORDER BY total_reviews DESC;",What is the total number of reviews for each business category?, yelp,sqlite,table_join,"SELECT business.business_id, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.business_id ORDER BY total_checkins DESC;SELECT business.name, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.name ORDER BY total_checkins DESC;SELECT business.bid, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.bid ORDER BY total_checkins DESC;SELECT business.business_id, business.name, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.business_id, business.name ORDER BY total_checkins DESC;SELECT business.business_id, business.bid, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.business_id, business.bid ORDER BY total_checkins DESC;SELECT business.name, business.bid, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.name, business.bid ORDER BY total_checkins DESC;SELECT business.business_id, business.name, business.bid, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.business_id, business.name, business.bid ORDER BY total_checkins DESC;",What is the total number of check-ins for each business in the state of California?, yelp,sqlite,table_join,"SELECT category.category_name FROM (SELECT business.business_id, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id GROUP BY business.business_id) AS business_rating JOIN category ON business_rating.business_id = category.business_id GROUP BY category.category_name ORDER BY AVG(business_rating.average_rating) DESC LIMIT 2;SELECT category.id FROM (SELECT business.business_id, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id GROUP BY business.business_id) AS business_rating JOIN category ON business_rating.business_id = category.business_id GROUP BY category.id ORDER BY AVG(business_rating.average_rating) DESC LIMIT 2;SELECT category.category_name, category.id FROM (SELECT business.business_id, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id GROUP BY business.business_id) AS business_rating JOIN category ON business_rating.business_id = category.business_id GROUP BY category.category_name, category.id ORDER BY AVG(business_rating.average_rating) DESC LIMIT 2;",What are the top 2 categories of businesses with the highest average rating?, -yelp,sqlite,instruct,SELECT COUNT(review.rid) AS total_reviews FROM review JOIN category ON review.business_id = category.business_id WHERE review.year = 2021 AND LOWER(category.category_name) LIKE '%Cafe%';,"What is the total number of reviews posted in the year 2021 for businesses in the category ""Cafe""?","Filter strings of users, city, address, business.name using ILIKE with wildcards. -Filter strings of state using exact upper case matches. -Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. -Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. +yelp,sqlite,instruct,SELECT COUNT(review.rid) AS total_reviews FROM review JOIN category ON review.business_id = category.business_id WHERE review.year = 2021 AND LOWER(category.category_name) LIKE '%Cafe%';,"What is the total number of reviews posted in the year 2021 for businesses in the category ""Cafe""?","Filter strings of users, city, address, business.name using LIKE with wildcards. +Filter strings of state using exact upper case matches. +Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. +Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. " -yelp,sqlite,instruct,"SELECT AVG(sf.average_rating) AS sf_average_rating FROM (SELECT business.business_id, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id WHERE LOWER(LOWER(business.city)) LIKE '%san francisco%' GROUP BY business.business_id) AS sf;",What is the average rating of businesses in the city of San Francisco?,"Filter strings of users, city, address, business.name using ILIKE with wildcards. -Filter strings of state using exact upper case matches. -The rating of businesses in a city refers to the average rating of the businesses in that city. I.e., you must compute the average rating of each business before computing the average rating of businesses in the city. -Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. +yelp,sqlite,instruct,"SELECT AVG(sf.average_rating) AS sf_average_rating FROM (SELECT business.business_id, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id WHERE LOWER(LOWER(business.city)) LIKE '%san francisco%' GROUP BY business.business_id) AS sf;",What is the average rating of businesses in the city of San Francisco?,"Filter strings of users, city, address, business.name using LIKE with wildcards. +Filter strings of state using exact upper case matches. +The rating of businesses in a city refers to the average rating of the businesses in that city. I.e., you must compute the average rating of each business before computing the average rating of businesses in the city. +Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. " -yelp,sqlite,instruct,"SELECT review.business_id, COUNT(*) AS review_count FROM review WHERE review.year = 2021 GROUP BY review.business_id ORDER BY review_count DESC;",How many reviews were posted for each business id in the year 2021?,"Filter strings of users, city, address, business.name using ILIKE with wildcards. -Filter strings of state using exact upper case matches. -Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. -Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. +yelp,sqlite,instruct,"SELECT review.business_id, COUNT(*) AS review_count FROM review WHERE review.year = 2021 GROUP BY review.business_id ORDER BY review_count DESC;",How many reviews were posted for each business id in the year 2021?,"Filter strings of users, city, address, business.name using LIKE with wildcards. +Filter strings of state using exact upper case matches. +Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. +Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. " -yelp,sqlite,instruct,SELECT COUNT(*) FROM review JOIN users ON review.user_id = users.user_id WHERE LOWER(users.name) LIKE '%Sarah Williams%' AND review.month = 'April' AND review.year = 2021;,"How many reviews were posted by users with the name ""Sarah Williams"" in the month of April 2021?","Filter strings of users, city, address, business.name using ILIKE with wildcards. -Filter strings of state using exact upper case matches. -Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. -Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. +yelp,sqlite,instruct,SELECT COUNT(*) FROM review JOIN users ON review.user_id = users.user_id WHERE LOWER(users.name) LIKE '%Sarah Williams%' AND review.month = 'April' AND review.year = 2021;,"How many reviews were posted by users with the name ""Sarah Williams"" in the month of April 2021?","Filter strings of users, city, address, business.name using LIKE with wildcards. +Filter strings of state using exact upper case matches. +Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. +Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. " -yelp,sqlite,instruct,SELECT SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' AND LOWER(checkin.day) LIKE '%Monday%';,How many check-ins occurred on Mondays at businesses in the state of California?,"Filter strings of users, city, address, business.name using ILIKE with wildcards. -Filter strings of state using exact upper case matches. -Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. -Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. +yelp,sqlite,instruct,SELECT SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' AND LOWER(checkin.day) LIKE '%Monday%';,How many check-ins occurred on Mondays at businesses in the state of California?,"Filter strings of users, city, address, business.name using LIKE with wildcards. +Filter strings of state using exact upper case matches. +Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. +Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. "