From abacf4d15e51df3a12820fc19c006b28e1a0fb66 Mon Sep 17 00:00:00 2001 From: wendy Date: Tue, 7 May 2024 10:16:07 +0800 Subject: [PATCH 1/4] sql edits --- data/instruct_basic_postgres.csv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data/instruct_basic_postgres.csv b/data/instruct_basic_postgres.csv index 7c738b5..d1af5ac 100644 --- a/data/instruct_basic_postgres.csv +++ b/data/instruct_basic_postgres.csv @@ -1,6 +1,6 @@ db_name,query_category,question,query broker,basic_join_date_group_order_limit,"What are the top 5 countries by total transaction amount in the past 30 days, inclusive of 30 days ago? Return the country name, number of transactions and total transaction amount.","SELECT c.sbCustCountry, COUNT(t.sbTxId) AS num_transactions, SUM(t.sbTxAmount) AS total_amount FROM sbCustomer c JOIN sbTransaction t ON c.sbCustId = t.sbTxCustId WHERE t.sbTxDateTime >= CURRENT_DATE - INTERVAL '30 days' GROUP BY c.sbCustCountry ORDER BY total_amount DESC LIMIT 5" -broker,basic_join_date_group_order_limit,"How many distinct customers made each type of transaction between Jan 1, 2023 and Mar 31, 2023 (inclusive of start and end dates)? Return the transaction type, number of distinct customers and average number of shares, for the top 3 transaction types by number of customers.","SELECT t.sbTxType, COUNT(DISTINCT c.sbCustId) AS num_customers, AVG(t.sbTxShares) AS avg_shares FROM sbTransaction t JOIN sbCustomer c ON t.sbTxCustId = c.sbCustId WHERE t.sbTxDateTime BETWEEN '2023-01-01' AND '2023-03-31' GROUP BY t.sbTxType ORDER BY num_customers DESC LIMIT 3" +broker,basic_join_date_group_order_limit,"How many distinct customers made each type of transaction between Jan 1, 2023 and Mar 31, 2023 (inclusive of start and end dates)? Return the transaction type, number of distinct customers and average number of shares, for the top 3 transaction types by number of customers.","SELECT t.sbTxType, COUNT(DISTINCT t.sbTxCustId) AS num_customers, AVG(t.sbTxShares) AS avg_shares FROM sbTransaction t WHERE t.sbTxDateTime BETWEEN '2023-01-01' AND '2023-03-31 23:59:59' GROUP BY t.sbTxType ORDER BY num_customers DESC LIMIT 3" broker,basic_join_group_order_limit,"What are the top 10 ticker symbols by total transaction amount? Return the ticker symbol, number of transactions and total transaction amount.","SELECT tk.sbTickerSymbol, COUNT(tx.sbTxId) AS num_transactions, SUM(tx.sbTxAmount) AS total_amount FROM sbTicker tk JOIN sbTransaction tx ON tk.sbTickerId = tx.sbTxTickerId GROUP BY tk.sbTickerSymbol ORDER BY total_amount DESC LIMIT 10" broker,basic_join_group_order_limit,"What are the top 5 combinations of customer state and ticker type by number of transactions? Return the customer state, ticker type and number of transactions.","SELECT c.sbCustState, t.sbTickerType, COUNT(*) AS num_transactions FROM sbTransaction tx JOIN sbCustomer c ON tx.sbTxCustId = c.sbCustId JOIN sbTicker t ON tx.sbTxTickerId = t.sbTickerId GROUP BY c.sbCustState, t.sbTickerType ORDER BY num_transactions DESC LIMIT 5" broker,basic_join_distinct,Return the distinct list of customer IDs who have made a 'buy' transaction.,SELECT DISTINCT c.sbCustId FROM sbCustomer c JOIN sbTransaction t ON c.sbCustId = t.sbTxCustId WHERE t.sbTxType = 'buy' @@ -10,7 +10,7 @@ broker,basic_group_order_limit,What are the top 5 countries by number of custome broker,basic_left_join,Return the customer ID and name of customers who have not made any transactions.,"SELECT c.sbCustId, c.sbCustName FROM sbCustomer c LEFT JOIN sbTransaction t ON c.sbCustId = t.sbTxCustId WHERE t.sbTxCustId IS NULL" broker,basic_left_join,Return the ticker ID and symbol of tickers that do not have any daily price records.,"SELECT tk.sbTickerId, tk.sbTickerSymbol FROM sbTicker tk LEFT JOIN sbDailyPrice dp ON tk.sbTickerId = dp.sbDpTickerId WHERE dp.sbDpTickerId IS NULL" car_dealership,basic_join_date_group_order_limit,"Who were the top 3 sales representatives by total revenue in the past 3 months, inclusive of today's date? Return their first name, last name, total number of sales and total revenue.","SELECT c.first_name, c.last_name, COUNT(s.id) AS total_sales, SUM(s.sale_price) AS total_revenue FROM sales s JOIN salespersons c ON s.salesperson_id = c.id WHERE s.sale_date >= CURRENT_DATE - INTERVAL '3 months' GROUP BY c.first_name, c.last_name ORDER BY total_revenue DESC LIMIT 3" -car_dealership,basic_join_date_group_order_limit,"Return the top 5 salespersons by number of sales in the past 30 days? Return their first and last name, total sales count and total revenue amount.","SELECT sp.first_name, sp.last_name, COUNT(s.id) AS total_sales, SUM(s.sale_price) AS total_revenue FROM sales s JOIN salespersons sp ON s.salesperson_id = sp.id WHERE s.sale_date >= CURRENT_DATE - INTERVAL '30 days' GROUP BY sp.first_name, sp.last_name ORDER BY total_sales DESC LIMIT 5" +car_dealership,basic_join_date_group_order_limit,"Return the top 5 salespersons by number of sales in the past 30 days? Return their first and last name, total sales count and total revenue amount.","SELECT sp.first_name, sp.last_name, COUNT(s.id) AS total_sales, SUM(s.sale_price) AS total_revenue FROM sales s JOIN salespersons sp ON s.salesperson_id = sp.id WHERE s.sale_date >= CURRENT_DATE - INTERVAL '30 days' GROUP BY sp.first_name, sp.last_name, sp.id ORDER BY total_sales DESC LIMIT 5" car_dealership,basic_join_group_order_limit,"Return the top 5 states by total revenue, showing the number of unique customers and total revenue for each state.","SELECT c.state, COUNT(DISTINCT s.customer_id) AS unique_customers, SUM(s.sale_price) AS total_revenue FROM sales s JOIN customers c ON s.customer_id = c.id GROUP BY c.state ORDER BY total_revenue DESC LIMIT 5" car_dealership,basic_join_group_order_limit,"What are the top 5 best selling car models by total revenue? Return the make, model, total number of sales and total revenue.","SELECT c.make, c.model, COUNT(s.id) AS total_sales, SUM(s.sale_price) AS total_revenue FROM sales s JOIN cars c ON s.car_id = c.id GROUP BY c.make, c.model ORDER BY total_revenue DESC LIMIT 5" car_dealership,basic_join_distinct,"Return the distinct list of customer IDs that have made a purchase, based on joining the customers and sales tables.",SELECT DISTINCT c.id AS customer_id FROM customers c JOIN sales s ON c.id = s.customer_id @@ -34,7 +34,7 @@ ewallet,basic_join_date_group_order_limit,"How many distinct active users sent m ewallet,basic_join_group_order_limit,"What are the top 3 most frequently used coupon codes? Return the coupon code, total number of redemptions, and total amount redeemed.","SELECT c.code AS coupon_code, COUNT(t.txid) AS redemption_count, SUM(t.amount) AS total_discount FROM consumer_div.coupons c JOIN consumer_div.wallet_transactions_daily t ON c.cid = t.coupon_id GROUP BY c.code ORDER BY redemption_count DESC LIMIT 3" ewallet,basic_join_group_order_limit,"Which are the top 5 countries by total transaction amount sent by users, sender_type = 0? Return the country, number of distinct users who sent, and total transaction amount.","SELECT u.country, COUNT(DISTINCT t.sender_id) AS user_count, SUM(t.amount) AS total_amount FROM consumer_div.users u JOIN consumer_div.wallet_transactions_daily t ON u.uid = t.sender_id WHERE t.sender_type = 0 GROUP BY u.country ORDER BY total_amount DESC LIMIT 5" ewallet,basic_join_distinct,Return the distinct list of merchant IDs that have received money from a transaction. Include all transaction types in the results you return.,SELECT DISTINCT m.mid AS merchant_id FROM consumer_div.merchants m JOIN consumer_div.wallet_transactions_daily t ON m.mid = t.receiver_id WHERE t.receiver_type = 1 -ewallet,basic_join_distinct,Return the distinct list of user IDs who have received transaction notifications.,SELECT DISTINCT u.uid AS user_id FROM consumer_div.users u JOIN consumer_div.notifications n ON u.uid = n.user_id WHERE n.type = 'transaction' +ewallet,basic_join_distinct,Return the distinct list of user IDs who have received transaction notifications.,SELECT DISTINCT user_id FROM consumer_div.notifications WHERE type = 'transaction' ewallet,basic_group_order_limit,What are the top 3 most common transaction statuses and their respective counts?,"SELECT status, COUNT(*) AS COUNT FROM consumer_div.wallet_transactions_daily GROUP BY status ORDER BY COUNT DESC LIMIT 3" ewallet,basic_group_order_limit,What are the top 2 most frequently used device types for user sessions and their respective counts?,"SELECT device_type, COUNT(*) AS COUNT FROM consumer_div.user_sessions GROUP BY device_type ORDER BY COUNT DESC LIMIT 2" ewallet,basic_left_join,Return users (user ID and username) who have not received any notifications,"SELECT u.uid, u.username FROM consumer_div.users u LEFT JOIN consumer_div.notifications n ON u.uid = n.user_id WHERE n.id IS NULL" From 54fb9f38bea48478768e5bc7cb010d9b2bf42b73 Mon Sep 17 00:00:00 2001 From: wendy Date: Tue, 7 May 2024 22:31:41 +0800 Subject: [PATCH 2/4] sql edits for classic sql-eval --- data/questions_gen_postgres.csv | 64 ++++++++++++++++----------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/data/questions_gen_postgres.csv b/data/questions_gen_postgres.csv index de4d893..96c5066 100644 --- a/data/questions_gen_postgres.csv +++ b/data/questions_gen_postgres.csv @@ -7,33 +7,33 @@ What's the difference in time between the first and last paper published?,SELECT "Which authors have written publications in both the domain ""Machine Learning"" and the domain ""Data Science""?","SELECT {author.name,author.aid} FROM author WHERE author.aid IN (SELECT domain_author.aid FROM domain_author WHERE domain_author.did IN (SELECT domain.did FROM DOMAIN WHERE domain.name IN ('Machine Learning', 'Data Science') ) GROUP BY 1 HAVING COUNT(DISTINCT domain_author.did) = 2);",academic,group_by, What is the total number of citations received by each author?,"SELECT {author.name, author.aid}, sum(publication.citation_num) AS total_citations FROM author JOIN writes ON author.aid = writes.aid JOIN publication ON writes.pid = publication.pid GROUP BY {} ORDER BY total_citations DESC NULLS LAST;",academic,group_by, What is the total number of publications published in each year?,"SELECT publication.year, COUNT(DISTINCT publication.pid) AS total_publications FROM publication GROUP BY publication.year ORDER BY publication.year;",academic,group_by, -What is the average number of references cited by publications in each domain name?,"SELECT {domain.name,domain.did}, AVG(publication.reference_num) AS average_references FROM domain_publication JOIN publication ON domain_publication.pid = publication.pid JOIN DOMAIN ON domain.did = domain_publication.did GROUP BY {};",academic,group_by, +What is the average number of references cited by publications in each domain name?,"SELECT {domain.name,domain.did}, AVG(publication.reference_num) AS average_references FROM domain_publication JOIN publication ON domain_publication.pid = publication.pid JOIN domain ON domain.did = domain_publication.did GROUP BY {};",academic,group_by, What is the average number of citations received by publications in each year?,"SELECT publication.year, AVG(publication.citation_num) AS average_citations FROM publication GROUP BY publication.year ORDER BY publication.year NULLS LAST;",academic,group_by, What is the title of the publication that has received the highest number of citations?,SELECT publication.title FROM publication ORDER BY publication.citation_num DESC NULLS LAST LIMIT 1;,academic,order_by, -What are the top 5 domains with the highest number of authors associated with them?,"SELECT {d.name, d.did}, COUNT(DISTINCT a.aid) AS author_count FROM author a JOIN domain_author da ON a.aid = da.aid JOIN DOMAIN d ON da.did = d.did GROUP BY {} ORDER BY author_count DESC LIMIT 5;",academic,order_by, +What are the top 5 domains with the highest number of authors associated with them?,"SELECT {d.name, d.did}, COUNT(DISTINCT a.aid) AS author_count FROM author a JOIN domain_author da ON a.aid = da.aid JOIN domain d ON da.did = d.did GROUP BY {} ORDER BY author_count DESC LIMIT 5;",academic,order_by, "What are the top 3 titles of the publications that have the highest number of references cited, ordered by the number of references cited in descending order?",SELECT publication.title FROM publication ORDER BY publication.reference_num DESC LIMIT 3;,academic,order_by, What are the top 3 publications with the highest number of citations?,"SELECT {publication.title, publication.pid}, publication.citation_num FROM publication ORDER BY publication.citation_num DESC LIMIT 3;",academic,order_by, What are the titles of all publications ordered alphabetically?,SELECT DISTINCT publication.title FROM publication ORDER BY publication.title ASC NULLS LAST;,academic,order_by, What is the ratio of publications to authors in the database?,"SELECT CAST(COUNT(DISTINCT publication.pid) AS FLOAT) / NULLIF(COUNT(DISTINCT author.aid), 0) AS publication_to_author_ratio FROM publication, author;",academic,ratio, -What is the ratio of publications presented in conferences to publications published in journals?,"SELECT CAST(COUNT(DISTINCT publication.cid) AS FLOAT) / NULLIF(COUNT(DISTINCT publication.jid), 0) AS ratio FROM publication;",academic,ratio, -What is the ratio of the total number of publications to the total number of keywords within each domain ID? Show all domain IDs.,"SELECT domain_publication.did, CAST(COUNT(DISTINCT domain_publication.pid) AS FLOAT) / NULLIF(COUNT(DISTINCT domain_keyword.kid), 0) AS publication_to_keyword_ratio FROM domain_publication LEFT JOIN domain_keyword ON domain_publication.did = domain_keyword.did GROUP BY domain_publication.did ORDER BY publication_to_keyword_ratio DESC NULLS LAST;SELECT domain_publication.did, CAST(COUNT(DISTINCT domain_publication.pid) AS FLOAT) / NULLIF(COUNT(DISTINCT domain_keyword.kid), 0) AS publication_to_keyword_ratio FROM domain_keyword LEFT JOIN domain_publication ON domain_publication.did = domain_keyword.did GROUP BY domain_publication.did ORDER BY publication_to_keyword_ratio DESC NULLS LAST;",academic,ratio, -How does the ratio of publications to journals change over the years? Return the annual numbers of publications and journals as well.,"SELECT publication.year, COUNT(DISTINCT publication.pid) AS num_publications, COUNT(DISTINCT publication.jid) AS num_journals, CAST(COUNT(DISTINCT publication.pid) AS FLOAT) / NULLIF(COUNT(DISTINCT publication.jid), 0) AS ratio FROM publication GROUP BY publication.year ORDER BY publication.year;",academic,ratio, +What is the ratio of publications presented in conferences to publications published in journals?,"SELECT CAST(COUNT(DISTINCT CASE WHEN cid IS NOT NULL THEN pid END) AS FLOAT) / NULLIF(COUNT(DISTINCT CASE WHEN jid IS NOT NULL THEN pid END), 0) AS ratio FROM publication;",academic,ratio, +What is the ratio of the total number of publications to the total number of keywords within each domain ID? Show all domain IDs.,"SELECT domain_publication.did, CAST(COUNT(DISTINCT domain_publication.pid) AS FLOAT) / NULLIF(COUNT(DISTINCT domain_keyword.kid), 0) AS publication_to_keyword_ratio FROM domain_publication LEFT JOIN domain_keyword ON domain_publication.did = domain_keyword.did GROUP BY domain_publication.did ORDER BY publication_to_keyword_ratio DESC NULLS LAST;SELECT domain_publication.did, CAST(COUNT(DISTINCT domain_publication.pid) AS FLOAT) / NULLIF(COUNT(DISTINCT domain_keyword.kid), 0) AS publication_to_keyword_ratio FROM domain_keyword LEFT JOIN domain_publication ON domain_publication.did = domain_keyword.did GROUP BY domain_publication.did ORDER BY publication_to_keyword_ratio DESC NULLS LAST;SELECT d.did, COALESCE(CAST(COUNT(DISTINCT dp.pid) AS FLOAT) / NULLIF(COUNT(DISTINCT dk.kid), 0), 0) AS publication_to_keyword_ratio FROM domain d LEFT JOIN domain_publication dp ON d.did = dp.did LEFT JOIN domain_keyword dk ON d.did = dk.did GROUP BY d.did ORDER BY publication_to_keyword_ratio DESC NULLS LAST;",academic,ratio, +How does the ratio of publications to journals change over the years? Return the annual numbers of publications and journals as well.,"SELECT p.year, COUNT(DISTINCT p.pid) AS num_publications, COUNT(DISTINCT j.jid) AS num_journals, CAST(COUNT(DISTINCT p.pid) AS FLOAT) / NULLIF(COUNT(DISTINCT j.jid), 0) AS ratio FROM publication p LEFT JOIN journal j ON p.jid = j.jid GROUP BY p.year ORDER BY p.year;",academic,ratio, How does the ratio of authors to organizations differ by continent?,"SELECT o.continent, CAST(COUNT(DISTINCT a.aid) AS FLOAT) / NULLIF(COUNT(DISTINCT o.oid), 0) AS author_to_organization_ratio FROM author a JOIN organization o ON a.oid = o.oid GROUP BY o.continent ORDER BY author_to_organization_ratio DESC NULLS LAST;SELECT organization.continent, COUNT(DISTINCT author.aid)::float / NULLIF(COUNT(DISTINCT organization.oid), 0) AS ratio FROM organization LEFT JOIN author ON author.oid = organization.oid GROUP BY organization.continent ORDER BY ratio DESC NULLS LAST;SELECT organization.continent, COUNT(DISTINCT author.aid)::float / NULLIF(COUNT(DISTINCT organization.oid), 0) AS ratio FROM author LEFT JOIN organization ON author.oid = organization.oid GROUP BY organization.continent ORDER BY ratio DESC NULLS LAST;",academic,ratio, Which author had the most publications in the year 2021 and how many publications did he/she have that year?,"SELECT {author.name, author.aid}, COUNT(publication.pid) AS publication_count FROM writes JOIN author ON writes.aid = author.aid JOIN publication ON writes.pid = publication.pid WHERE publication.year = 2021 GROUP BY {} ORDER BY publication_count DESC NULLS LAST LIMIT 1;",academic,table_join, What is the total number of publications presented in each conference?,"SELECT {conference.name, conference.cid}, COUNT(publication.pid) AS total_publications FROM publication JOIN conference ON publication.cid = conference.cid GROUP BY {} ORDER BY total_publications DESC;",academic,table_join, "What is the total number of publications in each journal, ordered by the number of publications in descending order?","SELECT {journal.name, journal.jid}, COUNT(publication.pid) AS total_publications FROM publication JOIN journal ON publication.jid=journal.jid GROUP BY {} ORDER BY total_publications DESC NULLS LAST;SELECT {journal.name, journal.jid}, COUNT(publication.pid) AS total_publications FROM journal LEFT JOIN publication ON journal.jid=publication.jid GROUP BY {} ORDER BY total_publications DESC NULLS LAST;",academic,table_join, "How many publications were presented at each conference, ordered by the number of publications in descending order? Give the names of the conferences and their corresponding number of publications.","SELECT conference.name, COUNT(publication.pid) AS num_publications FROM publication JOIN conference ON publication.cid=conference.cid GROUP BY conference.name, conference.cid ORDER BY num_publications DESC NULLS LAST;",academic,table_join, "How many publications were published in journals whose names start with the letter ""J""?",SELECT count(DISTINCT publication.pid) FROM publication JOIN journal ON publication.jid = journal.jid WHERE journal.name ilike 'J%';,academic,table_join, -"Which organizations have authors who have written publications in the domain ""Machine Learning""?","SELECT DISTINCT {organization.name, organization.oid} FROM organization JOIN author ON organization.oid = author.oid JOIN writes ON author.aid = writes.aid JOIN domain_publication ON writes.pid = domain_publication.pid JOIN DOMAIN ON domain_publication.did = domain.did WHERE domain.name = 'Machine Learning';",academic,instruct,Always filter names using an exact match -Which authors belong to the same domain as Martin?,"SELECT DISTINCT {a2.name, a2.aid} FROM author a1 JOIN domain_author da1 ON a1.aid = da1.aid JOIN domain_author da2 ON da1.did = da2.did JOIN author a2 ON da2.aid = a2.aid WHERE LOWER(a1.name) LIKE '%martin%';",academic,instruct,Always filter names using ILIKE with percent sign wildcards +"Which organizations have authors who have written publications in the domain ""Machine Learning""?","SELECT DISTINCT {organization.name, organization.oid} FROM organization JOIN author ON organization.oid = author.oid JOIN writes ON author.aid = writes.aid JOIN domain_publication ON writes.pid = domain_publication.pid JOIN domain ON domain_publication.did = domain.did WHERE domain.name = 'Machine Learning';",academic,instruct,Always filter names using an exact match +Which authors belong to the same domain as Martin?,"SELECT DISTINCT {a2.name, a2.aid} FROM author a1 JOIN domain_author da1 ON a1.aid = da1.aid JOIN domain_author da2 ON da1.did = da2.did JOIN author a2 ON da2.aid = a2.aid WHERE LOWER(a1.name) ILIKE '%martin%';",academic,instruct,Always filter names using ILIKE with percent sign wildcards Which authors are not part of any organization?,"SELECT DISTINCT {name, aid} FROM author WHERE oid IS NULL",academic,instruct,Always filter names using ILIKE What are the publications written by authors from the 'Sociology' domain and presented at conferences in the year 2020?,"SELECT DISTINCT {publication.title, publication.pid} FROM DOMAIN JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did WHERE domain.name ILIKE '%Sociology%' AND publication.year = 2020 AND publication.cid = domain_conference.cid;",academic,instruct,"To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. Finally, to see which ones were presented at conferences, you must join the domain table with the domain_conference table. You must also filter names using ILIKE." -"What are the names of the authors who have written publications in the domain ""Computer Science""?",SELECT DISTINCT author.name FROM author JOIN writes ON author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_publication ON publication.pid = domain_publication.pid JOIN DOMAIN ON domain_publication.did = domain.did WHERE domain.name ilike '%computer%science%';,academic,instruct,"To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. You must also filter names using ILIKE." -What month were most students admitted?,"SELECT date_trunc('month', s.admit_term) AS MONTH, COUNT(*) AS total_students FROM student s GROUP BY MONTH ORDER BY total_students DESC LIMIT 1;",advising,date_functions, +"What are the names of the authors who have written publications in the domain ""Computer Science""?",SELECT DISTINCT author.name FROM author JOIN writes ON author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_publication ON publication.pid = domain_publication.pid JOIN domain ON domain_publication.did = domain.did WHERE domain.name ilike '%computer%science%';,academic,instruct,"To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. You must also filter names using ILIKE." +What month were most students admitted?,"SELECT date_trunc('month', s.admit_term) AS month, COUNT(*) AS total_students FROM student s GROUP BY MONTH ORDER BY total_students DESC LIMIT 1;",advising,date_functions, What's the average predicted time to graduation since admission in no. of days?,SELECT avg(predicted_graduation_semester - admit_term) AS average_predicted_time_to_graduation FROM student;,advising,date_functions, How many students were predicted to graduate in the last 10 years?,"SELECT count(*) AS num_students_graduated FROM student WHERE predicted_graduation_semester >= DATE_TRUNC('year', CURRENT_DATE) - interval '10 year';",advising,date_functions, How long has it been in days since the last admitted student?,SELECT CURRENT_DATE - max(admit_term) AS duration_since_last_admitted_student FROM student;,advising,date_functions, -Subtract 2 weeks from the most recent predicted graduation date and give the month.,"SELECT DATE_TRUNC('month', s.predicted_graduation_semester - INTERVAL '2 weeks') AS MONTH FROM student s ORDER BY s.predicted_graduation_semester DESC LIMIT 1;SELECT extract(MONTH FROM date_trunc('month', predicted_graduation_semester) - interval '2 weeks') AS MONTH FROM student ORDER BY predicted_graduation_semester DESC LIMIT 1;",advising,date_functions, +Subtract 2 weeks from the most recent predicted graduation date and give the month.,"SELECT DATE_TRUNC('month', s.predicted_graduation_semester - INTERVAL '2 weeks') AS month FROM student s ORDER BY s.predicted_graduation_semester DESC LIMIT 1;SELECT extract(MONTH FROM predicted_graduation_semester - interval '2 weeks') AS month FROM student ORDER BY predicted_graduation_semester DESC LIMIT 1; ",advising,date_functions, What is the total number of students who found the instructor to be hilarious per course id?,"SELECT course_tags_count.course_id, SUM(course_tags_count.hilarious) AS total_hilarious FROM course_tags_count GROUP BY course_tags_count.course_id;",advising,group_by, What is the average clarity score for each instructor who taught a course?,"SELECT {i.name, i.instructor_id}, AVG(c.clarity_score) FROM course c JOIN course_offering co ON c.course_id = co.course_id JOIN offering_instructor oi ON co.offering_id = oi.offering_id JOIN instructor i ON oi.instructor_id = i.instructor_id GROUP BY {};",advising,group_by, How many course offerings have a final exam and how many do not?,"SELECT course_offering.has_final_exam, COUNT(offering_id) AS num_courses FROM course_offering GROUP BY course_offering.has_final_exam;SELECT COUNT(CASE WHEN co.has_final_exam THEN 1 END) AS num_with_final_exam, COUNT(CASE WHEN NOT co.has_final_exam THEN 1 END) AS num_without_final_exam FROM course_offering co;",advising,group_by, @@ -44,14 +44,14 @@ How many courses are offered for each semester id?,"SELECT course_offering.semes "What is the total number of students enrolled in each course, ordered from highest to lowest?","SELECT {course.course_id, course.name, course.number}, SUM(course.num_enrolled) AS total_students FROM course GROUP BY {} ORDER BY total_students DESC NULLS LAST;",advising,order_by, "What is the total number of credits earned by each student, ordered from highest to lowest? Give the student id and the total number of credits.","SELECT student.student_id, student.total_credit FROM student ORDER BY student.total_credit DESC NULLS LAST;",advising,order_by, "What is the name of the instructor who has taught the most courses, and how many courses have they taught?","SELECT instructor.name, count(offering_instructor.offering_id) AS num_courses FROM offering_instructor JOIN instructor ON offering_instructor.instructor_id = instructor.instructor_id GROUP BY instructor.name ORDER BY num_courses DESC LIMIT 1;",advising,order_by, -What is the ratio of the total number of students enrolled in courses with exams to the total number of students enrolled in courses without exams?,"WITH exams AS (SELECT DISTINCT sr.student_id FROM public.student_record sr JOIN public.course_offering co ON sr.offering_id = co.offering_id WHERE co.has_final_exam = TRUE ), no_exams AS (SELECT DISTINCT sr.student_id FROM public.student_record sr JOIN public.course_offering co ON sr.offering_id = co.offering_id WHERE co.has_final_exam = FALSE ) SELECT (SELECT COUNT(student_id) FROM exams)::FLOAT / (SELECT COUNT(student_id) FROM no_exams) AS ratio;",advising,ratio, +What is the ratio of the total number of students enrolled in courses with exams to the total number of students enrolled in courses without exams?,"SELECT SUM(CASE WHEN c.has_exams THEN c.num_enrolled ELSE 0 END)::FLOAT / SUM(CASE WHEN NOT c.has_exams THEN c.num_enrolled ELSE 0 END) AS ratio FROM course c;",advising,ratio, What is the ratio of the number of students who found the grading criteria clear and easy to understand to the number of students who received good feedback from the instructor for each course id?,"SELECT course_tags_count.course_id, CAST(course_tags_count.clear_grading AS FLOAT) / NULLIF(course_tags_count.good_feedback, 0) AS ratio FROM course_tags_count ORDER BY course_tags_count.course_id NULLS LAST;",advising,ratio, What is the ratio of the number of courses with projects to the number of courses with exams in each semester id?,"SELECT course_offering.semester, CAST(SUM(CASE WHEN course.has_projects THEN 1 ELSE 0 END) AS FLOAT) / NULLIF(SUM(CASE WHEN course.has_exams THEN 1 ELSE 0 END), 0) AS ratio FROM course JOIN course_offering ON course.course_id = course_offering.course_id GROUP BY course_offering.semester ORDER BY course_offering.semester NULLS LAST;",advising,ratio, What is the ratio of helpfulness scores to clarity scores for each course ID?,"SELECT course.course_id, CAST(course.helpfulness_score AS FLOAT) / NULLIF(course.clarity_score, 0) AS ratio FROM course;",advising,ratio, How does the ratio of enrolled students to the number of reviews vary across different courses?,"SELECT {course.course_id, course.name, course.number}, CAST(course.num_enrolled AS FLOAT) / NULLIF(course.num_reviews, 0) AS student_review_ratio FROM course ORDER BY student_review_ratio NULLS LAST;",advising,ratio, -Which courses have been taken by students in the Computer Science program?,"SELECT DISTINCT {course.name, course.course_id, course.number} AS course_name FROM student JOIN student_record ON student.student_id = student_record.student_id JOIN program ON student.program_id = program.program_id JOIN course ON student_record.course_id = course.course_id WHERE program.name ILIKE '%Computer Science%';SELECT c.name AS course_name, s.firstname, s.lastname FROM student s JOIN student_record sr ON s.student_id = sr.student_id JOIN course c ON sr.course_id = c.course_id JOIN program p ON s.program_id = p.program_id WHERE p.name = 'Computer Science';",advising,table_join, -Which courses have a final project and a final exam?,"SELECT DISTINCT {course.name, course.course_id, course.number} FROM course_offering JOIN course ON course_offering.course_id = course.course_id WHERE course_offering.has_final_project AND course_offering.has_final_exam;",advising,table_join, -What is the total number of students who have taken a course with a final project or exam?,SELECT COUNT(DISTINCT student_record.student_id) AS total_students FROM student_record JOIN course_offering ON student_record.course_id = course_offering.course_id WHERE course_offering.has_final_project OR course_offering.has_final_exam;,advising,table_join, +Which courses have been taken by students in the Computer Science program?,"SELECT DISTINCT {course.name, course.course_id, course.number} AS course_name FROM student JOIN student_record ON student.student_id = student_record.student_id JOIN program ON student.program_id = program.program_id JOIN course ON student_record.course_id = course.course_id WHERE program.name ILIKE '%Computer Science%';",advising,table_join, +Which courses have a final project and a final exam?,"SELECT DISTINCT {course.name, course.course_id, course.number} FROM course_offering JOIN course ON course_offering.course_id = course.course_id WHERE course_offering.has_final_project = true AND course_offering.has_final_exam = true;",advising,table_join, +What is the total number of students who have taken a course with a final project or exam?,SELECT COUNT(DISTINCT student_record.student_id) AS total_students FROM student_record JOIN course_offering ON student_record.course_id = course_offering.course_id WHERE course_offering.has_final_project = true OR course_offering.has_final_exam = true;,advising,table_join, What is the total number of credits earned by students in each program?,"SELECT {program.name, program.program_id}, SUM(student.total_credit) AS total_credits FROM student JOIN program ON student.program_id = program.program_id GROUP BY {};",advising,table_join, How many students have declared a major in each program?,"SELECT {program.name, program.program_id}, COUNT(student.student_id) AS number_of_students FROM student JOIN program ON student.program_id = program.program_id WHERE student.declare_major IS NOT NULL GROUP BY {} ORDER BY number_of_students DESC;",advising,table_join, Which students have declared a minor program? List their firstname and lastname. Order the results by the students' last names.,"SELECT student.firstname, student.lastname FROM student WHERE student.minor IS NOT NULL ORDER BY student.lastname NULLS LAST;",advising,instruct,"student.declare_major is null for students who have not declared their major. @@ -64,12 +64,12 @@ Return the student IDs who have taken an in-person course and have gotten a grad When asked for specific students or courses, do not return duplicates." Which flight has the shortest duration between departure and arrival times? Convert to minutes.,"SELECT {flight.flight_number, flight.flight_id}, (arrival_time - departure_time) / 60 AS duration_minutes FROM flight ORDER BY duration_minutes LIMIT 1;",atis,date_functions, "What's the average duration between departure and arrival times minus 34 minutes? Convert from UNIX to regular datetime, and return the answer in minutes",SELECT avg(to_timestamp(arrival_time) - to_timestamp(departure_time) - interval '34 minutes') AS average_duration FROM flight;SELECT AVG(arrival_time - departure_time)/60 - 34 AS average_duration FROM flight;,atis,date_functions, -Count the number of flight departures for each month?,"SELECT month.month_name, count(*) AS departure_count FROM flight JOIN MONTH ON extract(MONTH FROM to_timestamp(flight.departure_time)) = month.month_number GROUP BY month.month_name, month.month_number ORDER BY month.month_number;SELECT date_trunc('month', to_timestamp(flight.departure_time)) AS MONTH, COUNT(*) AS num_departures FROM flight GROUP BY MONTH ORDER BY MONTH;",atis,date_functions, +Count the number of flight departures for each month?,"SELECT month.month_name, count(*) AS departure_count FROM flight JOIN month ON extract(MONTH FROM to_timestamp(flight.departure_time)) = month.month_number GROUP BY month.month_name, month.month_number ORDER BY month.month_number;SELECT date_trunc('month', to_timestamp(flight.departure_time)) AS month, COUNT(*) AS num_departures FROM flight GROUP BY MONTH ORDER BY MONTH;",atis,date_functions, What's the earliest flight departure time in the day in HH:MM?,"SELECT to_char(to_timestamp(departure_time)::TIME, 'HH24:MI') AS earliest_departure_time FROM flight ORDER BY earliest_departure_time LIMIT 1;",atis,date_functions, What's the difference in time in days between today and the earliest flight departure?,"SELECT date_part('day', CURRENT_DATE - to_timestamp(departure_time)) AS difference_in_days FROM flight ORDER BY departure_time LIMIT 1;SELECT (CURRENT_DATE - TO_TIMESTAMP(MIN(f.departure_time))) AS days_difference FROM flight f;",atis,date_functions, What is the total cost of round-trip fares for each airline code?,"SELECT fare.fare_airline, SUM(fare.round_trip_cost) AS total_round_trip_cost FROM fare GROUP BY fare.fare_airline ORDER BY total_round_trip_cost DESC;",atis,group_by, "What is the average cost of round-trip fares from Los Angeles (LAX) to Chicago (ORD) for each airline, sorted in descending order by average cost?","SELECT fare.fare_airline, AVG(fare.round_trip_cost) AS average_cost FROM fare WHERE fare.from_airport = 'LAX' AND fare.to_airport = 'ORD' GROUP BY fare.fare_airline ORDER BY average_cost DESC NULLS LAST;SELECT airline.airline_name, AVG(fare.round_trip_cost) AS avg_round_trip_cost FROM fare JOIN airline ON fare.fare_airline = airline.airline_code WHERE fare.from_airport = 'LAX' AND fare.to_airport = 'ORD' GROUP BY airline.airline_name ORDER BY avg_round_trip_cost DESC;",atis,group_by, -"What is the average cost of a one-way trip for each fare id, sorted in ascending order of the cost?","SELECT fare.fare_id, AVG(fare.one_direction_cost) AS average_cost FROM fare GROUP BY fare.fare_id ORDER BY average_cost ASC NULLS LAST;",atis,group_by, +"What is the average cost of a one-way trip for each airport pair in the fare table?","SELECT f.from_airport, f.to_airport, AVG(f.one_direction_cost) AS average_cost FROM fare f GROUP BY f.from_airport, f.to_airport ORDER BY f.from_airport, f.to_airport NULLS LAST;",atis,group_by, "How many meals are served in each compartment, sorted by the number of meals in descending order?","SELECT food_service.compartment, COUNT(food_service.meal_number) AS number_of_meals FROM food_service GROUP BY food_service.compartment ORDER BY number_of_meals DESC NULLS LAST;",atis,group_by, "How many flights depart from each airport code, excluding departures with connections?","SELECT airport.airport_code, COUNT(flight.from_airport) AS num_departures FROM airport LEFT JOIN flight ON airport.airport_code = flight.from_airport AND flight.connections=0 GROUP BY airport.airport_code;SELECT airport.airport_code, COUNT(flight.from_airport) AS num_departures FROM airport JOIN flight ON airport.airport_code = flight.from_airport WHERE flight.connections=0 GROUP BY airport.airport_code;",atis,group_by, "Which flight ids to Chicago (ORD) have the longest duration from departure to arrival, sorted in ascending order?","SELECT flight.flight_id, (flight.arrival_time - flight.departure_time) AS duration FROM flight WHERE to_airport = 'ORD' ORDER BY duration ASC NULLS LAST;",atis,order_by, @@ -98,7 +98,7 @@ Filter state code (eg NY), airport codes (eg JFK) using case-insensitive matches fare.round_trip_required is not needed when getting the round trip cost." What is the minimum amount of time required for a connecting flight at JFK Airport?,SELECT minimum_connect_time FROM airport WHERE airport_code = 'JFK';,atis,instruct,"Filter airport, city, country names using ILIKE. Filter state code (eg NY) and airport codes (eg JFK) using upper-case matches." -How many flights require a round-trip to purchase the fare?,SELECT COUNT(*) FROM fare WHERE round_trip_required = 'Yes';,atis,instruct,"Filter airport, city, country names using ILIKE. +How many flights require a round-trip to purchase the fare?,SELECT COUNT(DISTINCT flight_fare.flight_id) FROM flight_fare JOIN fare ON flight_fare.fare_id = fare.fare_id WHERE fare.round_trip_required = 'Yes';,atis,instruct,"Filter airport, city, country names using ILIKE. Filter state code (eg NY) and airport codes (eg JFK) using upper-case matches." What is the total population in cities by country?,"SELECT city.country_name, SUM(city.population) AS total_population FROM city GROUP BY city.country_name ORDER BY total_population DESC NULLS LAST;",geography,group_by, What is the average length of rivers in each country?,"SELECT river.country_name, AVG(river.length) AS average_length FROM river GROUP BY river.country_name ORDER BY average_length DESC NULLS LAST;",geography,group_by, @@ -111,14 +111,14 @@ What are the top 5 cities with the highest population? Give both city names and "What are the longest rivers in meters, ordered from longest to shortest?","SELECT river.river_name, river.length FROM river ORDER BY river.length DESC NULLS LAST;",geography,order_by, "What are the highest mountains in meters, ordered from highest to lowest altitude?","SELECT mountain.mountain_name, mountain.mountain_altitude FROM mountain ORDER BY mountain.mountain_altitude DESC NULLS LAST;",geography,order_by, What is the ratio of the population of the United States to the population of California?,"SELECT CAST(SUM(NULLIF(state.population, 0)) FILTER (WHERE LOWER(state.country_name) LIKE '%united states%') AS FLOAT) / CAST(SUM(NULLIF(state.population, 0)) FILTER (WHERE LOWER(state.state_name) LIKE '%california%') AS FLOAT) AS population_ratio FROM state;",geography,ratio, -What is the ratio of the length of the Mississippi River to the length of the Rhine River?,"SELECT CAST((SELECT LENGTH FROM river WHERE LOWER(river_name) LIKE '%mississippi%') AS FLOAT) / NULLIF((SELECT LENGTH FROM river WHERE LOWER(river_name) LIKE '%rhine%'), 0) AS ratio ;",geography,ratio, +What is the ratio of the length of the Mississippi River to the length of the Rhine River?,"SELECT CAST((SELECT length FROM river WHERE LOWER(river_name) LIKE '%mississippi%') AS FLOAT) / NULLIF((SELECT length FROM river WHERE LOWER(river_name) LIKE '%rhine%'), 0) AS ratio ;",geography,ratio, "What is the ratio of the altitude of 'Mount Everest' to the altitude of 'Dhaulagiri'? Match strings exactly","SELECT (CAST(everest.mountain_altitude AS FLOAT) / NULLIF(dhaulagiri.mountain_altitude, 0)) AS altitude_ratio FROM (SELECT mountain_altitude FROM mountain WHERE mountain_name = 'Mount Everest') AS everest, (SELECT mountain_altitude FROM mountain WHERE mountain_name = 'Dhaulagiri') AS dhaulagiri;",geography,ratio, "How does the population of each city vary in relation to the population of its corresponding state? Return the city name, and the proportion of each city's population relative to the state.","SELECT city.city_name, CAST(city.population AS float) / NULLIF(state.population, 0) AS population_ratio FROM city JOIN state ON city.state_name = state.state_name ORDER BY population_ratio DESC NULLS LAST;",geography,ratio, Get the ratio of population per area for each state,"SELECT state_name, population / NULLIF(area, 0) AS population_density FROM state;",geography,ratio, Which countries have both lakes and rivers?,SELECT DISTINCT lake.country_name FROM public.lake JOIN public.river ON lake.country_name = river.country_name;,geography,table_join, Which states border the state where lake ontario is?,SELECT border_info.border FROM border_info JOIN lake ON border_info.state_name = lake.state_name WHERE lake.lake_name ilike '%Ontario%';,geography,table_join, "Which lakes have a name that starts with ""Lake""? They should be located in states with an area greater than 1000 square kilometers.",SELECT lake.lake_name FROM lake JOIN state ON lake.state_name = state.state_name WHERE state.area > 1000 AND lake.lake_name ilike 'Lake%' ORDER BY lake.lake_name NULLS LAST;,geography,table_join, -What is the highest point in each state and what is the population density of that state?,"SELECT highlow.state_name, highlow.highest_point, state.density FROM highlow JOIN state ON highlow.state_name = state.state_name GROUP BY highlow.state_name, highlow.highest_point, state.density;",geography,table_join, +What is the highest point in each state and what is the population density of that state?,"SELECT highlow.state_name, highlow.highest_point, state.density FROM highlow JOIN state ON highlow.state_name = state.state_name;",geography,table_join, What is the average length of rivers per country in countries with a lake?,"SELECT l.country_name, AVG(r.length) AS average_length FROM river r JOIN lake l ON r.country_name = l.country_name GROUP BY 1;",geography,table_join, Which states have fewer than a hundred thousand people?,SELECT state_name FROM state WHERE population < 100000;,geography,instruct,Always filter names using ILIKE Which rivers traverse at least 3 cities/landmarks?,"SELECT river_name FROM river WHERE traverse LIKE '%,%,%';",geography,instruct,Always filter names using ILIKE @@ -128,10 +128,10 @@ Get the cities in the United States and their population,"SELECT city_name, popu What is the total number of restaurants serving each type of food?,"SELECT restaurant.food_type, COUNT(DISTINCT restaurant.id) AS total_number_of_restaurants FROM restaurant GROUP BY restaurant.food_type;",restaurants,group_by, What is the total count of restaurants in each city?,"SELECT location.city_name, COUNT(DISTINCT location.restaurant_id) AS total_count FROM LOCATION GROUP BY location.city_name;",restaurants,group_by, What is the average rating of restaurants serving each type of food?,"SELECT restaurant.food_type, AVG(restaurant.rating) AS average_rating FROM restaurant GROUP BY restaurant.food_type ORDER BY average_rating DESC NULLS LAST;",restaurants,group_by, -How many restaurants serve Italian food in each city?,"SELECT location.city_name, COUNT(*) AS number_of_restaurants FROM LOCATION JOIN restaurant ON location.restaurant_id = restaurant.id WHERE restaurant.food_type ILIKE '%Italian%' GROUP BY location.city_name ORDER BY number_of_restaurants DESC NULLS LAST;",restaurants,group_by, +How many restaurants serve Italian food in each city?,"SELECT restaurant.city_name, COUNT(*) AS number_of_restaurants FROM restaurant WHERE restaurant.food_type ILIKE '%Italian%' GROUP BY restaurant.city_name ORDER BY number_of_restaurants DESC NULLS LAST;",restaurants,group_by, How many restaurants are there in each city? Order the results by the number of restaurants in descending order.,"SELECT location.city_name, COUNT(DISTINCT location.restaurant_id) AS number_of_restaurants FROM LOCATION GROUP BY location.city_name ORDER BY number_of_restaurants DESC NULLS LAST;",restaurants,group_by, -Which street has the most number of restaurants?,SELECT DISTINCT location.street_name FROM LOCATION WHERE street_name = (SELECT street_name FROM LOCATION GROUP BY 1 ORDER BY COUNT(restaurant_id) DESC LIMIT 1);,restaurants,order_by, -Which restaurants serve Italian cuisine or are located in New York? Order the results by the restaurant name.,SELECT restaurant.name FROM restaurant JOIN LOCATION ON restaurant.id = location.restaurant_id WHERE restaurant.food_type ILIKE '%Italian%' OR location.city_name ILIKE '%New York%' ORDER BY restaurant.name NULLS LAST;,restaurants,order_by, +Which street has the most number of restaurants?,SELECT street_name FROM location GROUP BY street_name ORDER BY COUNT(restaurant_id) DESC LIMIT 1;,restaurants,order_by, +Which restaurants serve Italian cuisine or are located in New York? Order the results by the restaurant name.,SELECT name FROM restaurant WHERE food_type ILIKE '%Italian%' OR city_name ILIKE ‘%New York%’ ORDER BY name NULLS LAST;,restaurants,order_by, What is the average rating of restaurants in each region? Order the results by the region name.,"SELECT geographic.region, AVG(restaurant.rating) AS average_rating FROM restaurant JOIN geographic ON restaurant.city_name = geographic.city_name GROUP BY geographic.region ORDER BY geographic.region NULLS LAST;",restaurants,order_by, What are the names of the top 3 restaurants with the highest ratings?,SELECT restaurant.name FROM restaurant ORDER BY restaurant.rating DESC NULLS LAST LIMIT 3;,restaurants,order_by, List the restaurants starting from the best ratings to the lowest,"SELECT {name, id}, rating FROM restaurant ORDER BY rating DESC;",restaurants,order_by, @@ -144,11 +144,11 @@ What is the ratio of Italian restaurants out of all restaurants in Los Angeles?, What is the average rating of restaurants that serve Mexican food in each city?,"SELECT location.city_name, AVG(restaurant.rating) AS average_rating FROM restaurant JOIN LOCATION ON restaurant.id = location.restaurant_id WHERE LOWER(restaurant.food_type) LIKE '%mexican%' GROUP BY location.city_name;",restaurants,table_join, What is the average rating of restaurants in each region?,"SELECT geographic.region, AVG(restaurant.rating) AS average_rating FROM geographic JOIN restaurant ON geographic.city_name=restaurant.city_name GROUP BY 1;",restaurants,table_join, How many restaurants serve Italian food in each region?,"SELECT geographic.region, COUNT(restaurant.id) AS number_of_restaurants FROM restaurant JOIN geographic ON restaurant.city_name = geographic.city_name WHERE LOWER(restaurant.food_type) LIKE '%italian%' GROUP BY geographic.region ORDER BY number_of_restaurants DESC NULLS LAST;",restaurants,table_join, -How many restaurants are there in each region?,"SELECT geographic.region, COUNT(DISTINCT location.restaurant_id) AS number_of_restaurants FROM geographic JOIN LOCATION ON geographic.city_name = location.city_name GROUP BY geographic.region ORDER BY number_of_restaurants DESC;SELECT geographic.region, COUNT(DISTINCT location.restaurant_id) AS number_of_restaurants FROM geographic LEFT JOIN LOCATION ON geographic.city_name = location.city_name GROUP BY geographic.region ORDER BY number_of_restaurants DESC;",restaurants,table_join, +How many restaurants are there in each region?,"SELECT geographic.region, COUNT(DISTINCT restaurant.id) AS number_of_restaurants FROM geographic JOIN restaurant ON geographic.city_name = restaurant.city_name GROUP BY geographic.region ORDER BY number_of_restaurants DESC;SELECT geographic.region, COUNT(DISTINCT restaurant.id) AS number_of_restaurants FROM geographic LEFT JOIN restaurant ON geographic.city_name = restaurant.city_name GROUP BY geographic.region ORDER BY number_of_restaurants DESC;",restaurants,table_join, Which city has the highest-rated restaurant?,SELECT DISTINCT restaurant.city_name FROM restaurant WHERE rating=(SELECT MAX(rating) FROM restaurant);,restaurants,instruct,Match city_name and food_type case-insensitively. Match with ILIKE and percent sign for substring matching for all other string matches. What's the name and rating of all the restaurants that have a rating greater than 4 and are located in the city of New York?,"SELECT restaurant.name, restaurant.rating FROM restaurant WHERE restaurant.rating > 4 AND restaurant.city_name ILIKE '%New York%';",restaurants,instruct,Match city_name and food_type case-insensitively. Match with ILIKE and percent sign for substring matching for all other string matches. What's the name and food type of all the restaurants located on Market St in San Francisco?,"SELECT restaurant.name, restaurant.food_type FROM restaurant JOIN LOCATION ON restaurant.id = location.restaurant_id WHERE location.street_name ILIKE '%Market St%' AND location.city_name ILIKE '%San Francisco%';",restaurants,instruct,Match city_name and food_type case-insensitively. Match with ILIKE and percent sign for substring matching for all other string matches. -What are the names of the restaurants that serve Italian food?,SELECT restaurant.name FROM restaurant WHERE LOWER(restaurant.food_type) ILIKE '%italian%' ORDER BY restaurant.rating DESC NULLS LAST;,restaurants,instruct,Match city_name and food_type case-insensitively. Match with ILIKE and percent sign for substring matching for all other string matches. +What are the names of the restaurants that serve Italian food?,SELECT restaurant.name FROM restaurant WHERE LOWER(restaurant.food_type) ILIKE '%italian%';,restaurants,instruct,Match city_name and food_type case-insensitively. Match with ILIKE and percent sign for substring matching for all other string matches. What are the names of the restaurants in Los Angeles that have a rating higher than 4?,SELECT DISTINCT restaurant.name FROM restaurant WHERE restaurant.city_name ILIKE '%Los Angeles%' AND restaurant.rating > 4 ORDER BY restaurant.name NULLS LAST;,restaurants,instruct,Match city_name and food_type case-insensitively. Match with ILIKE and percent sign for substring matching for all other string matches. How many authors have written a paper that was published 1 year or longer before today's date?,SELECT count(DISTINCT w.authorid) AS num_authors FROM paper p JOIN writes w ON p.paperid = w.paperid WHERE p.year < extract(YEAR FROM CURRENT_DATE - interval '1 year');,scholar,date_functions, How many keyphrases are associated with papers published between 2020 and 2035?,SELECT count(DISTINCT pk.keyphraseid) AS num_keyphrases FROM paper p JOIN paperkeyphrase pk ON p.paperid = pk.paperid WHERE p.year >= 2020 AND p.year <= 2035 ;,scholar,date_functions, @@ -157,7 +157,7 @@ Give me the total number of papers published in the first 12 months of 2019.,SEL "On average, how many papers per month were published in the whole of 2020?",SELECT cast(count(*) AS float)/ 12 AS average_papers_per_month FROM paper WHERE YEAR = 2020;,scholar,date_functions, What is the total number of papers published per year?,"SELECT paper.year, COUNT(paper.paperid) AS total_papers FROM paper GROUP BY paper.year ORDER BY paper.year NULLS LAST;",scholar,group_by, What is the total number of papers published in each year?,"SELECT paper.year, COUNT(paper.paperid) AS total_papers FROM paper GROUP BY paper.year ORDER BY paper.year;",scholar,group_by, -What is the total number of papers associated with each dataset?,"SELECT paperdataset.datasetid, COUNT(DISTINCT paperdataset.paperid) AS total_papers FROM paperdataset GROUP BY paperdataset.datasetid;SELECT dataset.datasetname, COUNT(paperdataset.paperid) AS total_papers FROM paperdataset JOIN dataset ON paperdataset.datasetid = dataset.datasetid GROUP BY dataset.datasetname ORDER BY total_papers DESC NULLS LAST;",scholar,group_by, +What is the total number of papers associated with each dataset?,"SELECT paperdataset.datasetid, COUNT(DISTINCT paperdataset.paperid) AS total_papers FROM paperdataset GROUP BY paperdataset.datasetid;SELECT dataset.datasetname, COUNT(paperdataset.paperid) AS total_papers FROM paperdataset JOIN dataset ON paperdataset.datasetid = dataset.datasetid GROUP BY dataset.datasetname;",scholar,group_by, How many keyphrases are associated with each paper?,"SELECT paperkeyphrase.paperid, COUNT(paperkeyphrase.keyphraseid) AS keyphrase_count FROM paperkeyphrase GROUP BY paperkeyphrase.paperid ORDER BY keyphrase_count DESC NULLS LAST;SELECT p.title, COUNT(pk.keyphraseid) AS num_keyphrases FROM paper p JOIN paperkeyphrase pk ON p.paperid = pk.paperid GROUP BY p.title ORDER BY num_keyphrases DESC NULLS LAST;",scholar,group_by, How many authors have published more than 2 papers?,SELECT COUNT(*) AS number_of_authors FROM (SELECT writes.authorid FROM writes GROUP BY writes.authorid HAVING COUNT(writes.paperid) > 2) AS subquery;,scholar,group_by, "Which papers have the highest number of authors, ordered by the number of authors in descending order?","SELECT writes.paperid, COUNT(writes.authorid) AS num_authors FROM writes GROUP BY writes.paperid ORDER BY num_authors DESC NULLS LAST;SELECT paper.title, COUNT(DISTINCT writes.authorid) AS num_authors FROM paper JOIN writes ON paper.paperid = writes.paperid GROUP BY paper.title ORDER BY num_authors DESC;",scholar,order_by, @@ -176,9 +176,9 @@ What is the proportion of papers that belong to more than 1 dataset to papers th "What is the total number of papers published in each journal, ordered by the journal name?","SELECT journal.journalname, COUNT(DISTINCT paper.paperid) AS total_papers FROM paper JOIN journal ON paper.journalid = journal.journalid GROUP BY journal.journalname ORDER BY journal.journalname NULLS LAST;",scholar,table_join, "How many papers cite each paper in the dataset named ""COVID-19 Research""?","SELECT paperdataset.paperid, COUNT(cite.citedpaperid) AS citation_count FROM paperdataset JOIN cite ON paperdataset.paperid = cite.citedpaperid WHERE paperdataset.datasetid = (SELECT datasetid FROM dataset WHERE datasetname ILIKE '%COVID-19 Research%') GROUP BY paperdataset.paperid ORDER BY citation_count DESC;SELECT p.title, COUNT(c.citingpaperid) AS num_citing_papers FROM paper p JOIN paperdataset pd ON p.paperid = pd.paperid JOIN cite c ON p.paperid = c.citedpaperid JOIN dataset d ON pd.datasetid = d.datasetid WHERE d.datasetname = 'COVID-19 Research' GROUP BY p.title ORDER BY num_citing_papers DESC NULLS LAST;",scholar,table_join, "What is the name of the venue where the paper with paper ID 2 was published, and how many papers were published in total in that venue?","SELECT venue.venuename, COUNT(DISTINCT paper.paperid) FROM paper JOIN venue ON paper.venueid = venue.venueid WHERE paper.venueid = (SELECT venueid FROM paper WHERE paperid = 2 ) GROUP BY venue.venuename;",scholar,instruct,Always filter strings using ILIKE -"What are the names of the authors who wrote the paper with the title ""The Effects of Climate Change on Agriculture""?",SELECT author.authorname FROM author JOIN writes ON author.authorid = writes.authorid JOIN paper ON writes.paperid = paper.paperid WHERE paper.title ILIKE '%The Effects of Climate Change on Agriculture%';,scholar,instruct,Always filter strings with an exact match +"What are the names of the authors who wrote the paper with the title ""The Effects of Climate Change on Agriculture""?",SELECT author.authorname FROM author JOIN writes ON author.authorid = writes.authorid JOIN paper ON writes.paperid = paper.paperid WHERE paper.title = 'The Effects of Climate Change on Agriculture';,scholar,instruct,Always filter strings with an exact match "How many papers were published in the journal ""nature"" in the year 2020?",SELECT COUNT(paper.paperid) FROM paper JOIN journal ON paper.journalid = journal.journalid WHERE paper.year = 2020 AND journal.journalname ILIKE '%nature%';,scholar,instruct,Filter strings with case-insensitive matching -"How many papers are associated with the keyphrase ""machine learning"" and were published in the journal named ""IEEE Transactions on Pattern Analysis and Machine Intelligence""?",SELECT COUNT(DISTINCT paper.paperid) FROM paper JOIN journal ON paper.journalid = journal.journalid JOIN paperkeyphrase ON paper.paperid = paperkeyphrase.paperid JOIN keyphrase ON paperkeyphrase.keyphraseid = keyphrase.keyphraseid WHERE keyphrase.keyphrasename ILIKE '%machine learning%' AND journal.journalname ILIKE '%IEEE Transactions on Pattern Analysis and Machine Intelligence%';,scholar,instruct,"Filter paper names, journal names, using exact matches. Filter keyphrases with case-insensitive matching." +"How many papers are associated with the keyphrase ""machine learning"" and were published in the journal named ""IEEE Transactions on Pattern Analysis and Machine Intelligence""?",SELECT COUNT(DISTINCT paper.paperid) FROM paper JOIN journal ON paper.journalid = journal.journalid JOIN paperkeyphrase ON paper.paperid = paperkeyphrase.paperid JOIN keyphrase ON paperkeyphrase.keyphraseid = keyphrase.keyphraseid WHERE keyphrase.keyphrasename ILIKE 'machine learning' AND journal.journalname = 'IEEE Transactions on Pattern Analysis and Machine Intelligence';,scholar,instruct,"Filter paper names, journal names, using exact matches. Filter keyphrases with case-insensitive matching." "How many authors wrote papers that were published in the journal ""Science"" in the year 2020?",SELECT COUNT(DISTINCT writes.authorid) AS number_of_authors FROM writes JOIN paper ON writes.paperid = paper.paperid JOIN journal ON paper.journalid = journal.journalid WHERE journal.journalname ILIKE '%Science%' AND paper.year = 2020;,scholar,instruct,Filter paper names using exact matches. Filter keyphrases and journal names with case-insensitive matching. How many reviews were written for businesses located in California in the last 1000 months?,"SELECT count(*) AS review_count FROM review r JOIN business b ON r.business_id = b.business_id WHERE b.state = 'CA' AND (r.year * 12 + extract(MONTH FROM to_date(r.month, 'Month'))) >= (extract(YEAR FROM CURRENT_DATE) * 12 + extract(MONTH FROM CURRENT_DATE) - 1000) ;",yelp,date_functions, What is the total number of check-ins on the 2 days before Saturday?,"SELECT sum(COUNT) AS total_checkins FROM checkin WHERE DAY IN ('Thursday', 'Friday') ;",yelp,date_functions, @@ -190,11 +190,11 @@ How many months between June 2021 and December 2021 had reviews?,SELECT COUNT(DI What is the total count of check-ins for each business id?,"SELECT checkin.business_id, SUM(checkin.count) AS total_checkins FROM checkin GROUP BY checkin.business_id ORDER BY total_checkins DESC NULLS LAST;",yelp,group_by, Return the name and average rating for each business in new york,"SELECT business.name, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id WHERE business.city ILIKE '%NEW YORK%' GROUP BY business.name;",yelp,group_by, How many check-ins occurred on each day of the week?,"SELECT checkin.day, SUM(checkin.count) AS total_checkins FROM checkin GROUP BY checkin.day ORDER BY total_checkins DESC NULLS LAST;",yelp,group_by, -Please provide a list of business names in New York and their average ratings ordered by the highest average rating first.,"SELECT business.name, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id WHERE business.city ILIKE '%New York%' GROUP BY business.name ORDER BY average_rating DESC NULLS LAST;",yelp,order_by, -What is the latitude and longitude of the business with the highest rating?,"SELECT business.latitude, business.longitude FROM business JOIN review ON business.business_id = review.business_id GROUP BY review.rating, business.latitude, business.longitude ORDER BY AVG(review.rating) DESC NULLS LAST LIMIT 1;",yelp,order_by, +Please provide a list of business names in New York and their average ratings ordered by the highest average rating first.,"SELECT business.name, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id WHERE business.city ILIKE '%New York%' GROUP BY business.name, business.business_id ORDER BY average_rating DESC NULLS LAST;",yelp,order_by, +What is the latitude and longitude of the business with the highest rating?,"SELECT business.latitude, business.longitude FROM business JOIN review ON business.business_id = review.business_id GROUP BY business.business_id, business.latitude, business.longitude ORDER BY AVG(review.rating) DESC LIMIT 1;",yelp,order_by, What are the top 3 businesses in terms of review count?,"SELECT {business.name, business.business_id, business.bid}, business.review_count FROM business ORDER BY business.review_count DESC NULLS LAST LIMIT 3;",yelp,order_by, "What are the names of the businesses in the database, ordered alphabetically?",SELECT business.name FROM business ORDER BY business.name ASC NULLS LAST;,yelp,order_by, -"How many reviews were posted in each month of the year 2021, ordered by the month?","SELECT review.month, COUNT(review.rid) AS review_count FROM review WHERE review.year = 2021 GROUP BY review.month ORDER BY TO_DATE(review.month, 'MONTH') NULLS LAST;",yelp,order_by, +"How many reviews were posted in each month of the year 2021, ordered by the month?","SELECT review.month, COUNT(review.rid) AS review_count FROM review WHERE review.year = 2021 GROUP BY review.month ORDER BY TO_DATE(review.month, 'Month') NULLS LAST;",yelp,order_by, What is the ratio of the number of businesses in each state to the total number of businesses in the database?,"SELECT business.state, COUNT(business.business_id) / NULLIF(CAST((SELECT COUNT(*) FROM business) AS FLOAT), 0) AS ratio FROM business GROUP BY business.state;",yelp,ratio, What is the ratio of open businesses to closed businesses in the city of San Francisco?,"SELECT CAST(SUM(CASE WHEN business.is_open = 1 THEN 1 ELSE 0 END) AS FLOAT) / NULLIF(SUM(CASE WHEN business.is_open = 0 THEN 1 ELSE 0 END), 0) AS ratio FROM business WHERE LOWER(business.city) ILIKE '%san francisco%';",yelp,ratio, "What is the ratio of check-ins on weekends to check-ins on weekdays for the business named ""Mark's Bistro""?","SELECT CAST(SUM(CASE WHEN checkin.day IN ('Saturday', 'Sunday') THEN checkin.count ELSE 0 END) AS FLOAT) / NULLIF(SUM(CASE WHEN checkin.day NOT IN ('Saturday', 'Sunday') THEN checkin.count ELSE 0 END), 0) AS ratio FROM checkin JOIN business ON checkin.business_id = business.business_id WHERE business.name ILIKE '%Mark''s Bistro%';",yelp,ratio, @@ -204,7 +204,7 @@ What is the ratio of businesses in the state of California to businesses in the "What is the total number of reviews for each category in the state of ""California""?","SELECT {category.category_name, category.id}, SUM(business.review_count) AS total_reviews FROM business JOIN category ON business.business_id = category.business_id WHERE business.state = 'CA' GROUP BY {} ORDER BY total_reviews DESC NULLS LAST;",yelp,table_join, What is the total number of reviews for each business category?,"SELECT {category.category_name, category.id}, SUM(business.review_count) AS total_reviews FROM business JOIN category ON business.business_id = category.business_id GROUP BY {} ORDER BY total_reviews DESC NULLS LAST;",yelp,table_join, What is the total number of check-ins for each business in the state of California?,"SELECT {business.business_id, business.name, business.bid}, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY {} ORDER BY total_checkins DESC NULLS LAST;",yelp,table_join, -What are the top 2 categories of businesses with the highest average rating?,"SELECT {category.category_name, category.id} FROM (SELECT business.name, business.business_id, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id GROUP BY business.name, business.business_id) AS business_rating JOIN category ON business_rating.business_id = category.business_id GROUP BY {} ORDER BY AVG(business_rating.average_rating) DESC NULLS LAST LIMIT 2;",yelp,table_join, +What are the top 2 categories of businesses with the highest average rating?,"SELECT {category.category_name, category.id} FROM (SELECT business.business_id, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id GROUP BY business.business_id) AS business_rating JOIN category ON business_rating.business_id = category.business_id GROUP BY {} ORDER BY AVG(business_rating.average_rating) DESC NULLS LAST LIMIT 2;",yelp,table_join, "What is the total number of reviews posted in the year 2021 for businesses in the category ""Cafe""?",SELECT COUNT(review.rid) AS total_reviews FROM review JOIN category ON review.business_id = category.business_id WHERE review.year = 2021 AND category.category_name ILIKE '%Cafe%';,yelp,instruct,"Filter strings of users, city, address, business.name using ILIKE with wildcards. Filter strings of state using exact upper case matches. Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. @@ -220,7 +220,7 @@ Filter strings of state using exact upper case matches. Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. " -"How many reviews were posted by users with the name ""Sarah Williams"" in the month of April 2021?",SELECT COUNT(*) FROM review JOIN users ON review.user_id = users.user_id WHERE users.name ILIKE '%Sarah Williams%' AND review.month ILIKE '%April%';,yelp,instruct,"Filter strings of users, city, address, business.name using ILIKE with wildcards. +"How many reviews were posted by users with the name ""Sarah Williams"" in the month of April 2021?",SELECT COUNT(*) FROM review JOIN users ON review.user_id = users.user_id WHERE users.name ILIKE '%Sarah Williams%' AND review.month = 'April' AND review.year = 2021;,yelp,instruct,"Filter strings of users, city, address, business.name using ILIKE with wildcards. Filter strings of state using exact upper case matches. Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. From 4d2aa187c56853d776deb5ec819b40c1ba898172 Mon Sep 17 00:00:00 2001 From: wendy Date: Tue, 7 May 2024 22:51:22 +0800 Subject: [PATCH 3/4] add missing wildcards --- data/questions_gen_postgres.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/questions_gen_postgres.csv b/data/questions_gen_postgres.csv index 96c5066..192ea1d 100644 --- a/data/questions_gen_postgres.csv +++ b/data/questions_gen_postgres.csv @@ -33,7 +33,7 @@ What month were most students admitted?,"SELECT date_trunc('month', s.admit_term What's the average predicted time to graduation since admission in no. of days?,SELECT avg(predicted_graduation_semester - admit_term) AS average_predicted_time_to_graduation FROM student;,advising,date_functions, How many students were predicted to graduate in the last 10 years?,"SELECT count(*) AS num_students_graduated FROM student WHERE predicted_graduation_semester >= DATE_TRUNC('year', CURRENT_DATE) - interval '10 year';",advising,date_functions, How long has it been in days since the last admitted student?,SELECT CURRENT_DATE - max(admit_term) AS duration_since_last_admitted_student FROM student;,advising,date_functions, -Subtract 2 weeks from the most recent predicted graduation date and give the month.,"SELECT DATE_TRUNC('month', s.predicted_graduation_semester - INTERVAL '2 weeks') AS month FROM student s ORDER BY s.predicted_graduation_semester DESC LIMIT 1;SELECT extract(MONTH FROM predicted_graduation_semester - interval '2 weeks') AS month FROM student ORDER BY predicted_graduation_semester DESC LIMIT 1; ",advising,date_functions, +Subtract 2 weeks from the most recent predicted graduation date and give the month.,"SELECT DATE_TRUNC('month', s.predicted_graduation_semester - INTERVAL '2 weeks') AS month FROM student s ORDER BY s.predicted_graduation_semester DESC LIMIT 1;SELECT extract(MONTH FROM predicted_graduation_semester - interval '2 weeks') AS month FROM student ORDER BY predicted_graduation_semester DESC LIMIT 1;",advising,date_functions, What is the total number of students who found the instructor to be hilarious per course id?,"SELECT course_tags_count.course_id, SUM(course_tags_count.hilarious) AS total_hilarious FROM course_tags_count GROUP BY course_tags_count.course_id;",advising,group_by, What is the average clarity score for each instructor who taught a course?,"SELECT {i.name, i.instructor_id}, AVG(c.clarity_score) FROM course c JOIN course_offering co ON c.course_id = co.course_id JOIN offering_instructor oi ON co.offering_id = oi.offering_id JOIN instructor i ON oi.instructor_id = i.instructor_id GROUP BY {};",advising,group_by, How many course offerings have a final exam and how many do not?,"SELECT course_offering.has_final_exam, COUNT(offering_id) AS num_courses FROM course_offering GROUP BY course_offering.has_final_exam;SELECT COUNT(CASE WHEN co.has_final_exam THEN 1 END) AS num_with_final_exam, COUNT(CASE WHEN NOT co.has_final_exam THEN 1 END) AS num_without_final_exam FROM course_offering co;",advising,group_by, @@ -178,7 +178,7 @@ What is the proportion of papers that belong to more than 1 dataset to papers th "What is the name of the venue where the paper with paper ID 2 was published, and how many papers were published in total in that venue?","SELECT venue.venuename, COUNT(DISTINCT paper.paperid) FROM paper JOIN venue ON paper.venueid = venue.venueid WHERE paper.venueid = (SELECT venueid FROM paper WHERE paperid = 2 ) GROUP BY venue.venuename;",scholar,instruct,Always filter strings using ILIKE "What are the names of the authors who wrote the paper with the title ""The Effects of Climate Change on Agriculture""?",SELECT author.authorname FROM author JOIN writes ON author.authorid = writes.authorid JOIN paper ON writes.paperid = paper.paperid WHERE paper.title = 'The Effects of Climate Change on Agriculture';,scholar,instruct,Always filter strings with an exact match "How many papers were published in the journal ""nature"" in the year 2020?",SELECT COUNT(paper.paperid) FROM paper JOIN journal ON paper.journalid = journal.journalid WHERE paper.year = 2020 AND journal.journalname ILIKE '%nature%';,scholar,instruct,Filter strings with case-insensitive matching -"How many papers are associated with the keyphrase ""machine learning"" and were published in the journal named ""IEEE Transactions on Pattern Analysis and Machine Intelligence""?",SELECT COUNT(DISTINCT paper.paperid) FROM paper JOIN journal ON paper.journalid = journal.journalid JOIN paperkeyphrase ON paper.paperid = paperkeyphrase.paperid JOIN keyphrase ON paperkeyphrase.keyphraseid = keyphrase.keyphraseid WHERE keyphrase.keyphrasename ILIKE 'machine learning' AND journal.journalname = 'IEEE Transactions on Pattern Analysis and Machine Intelligence';,scholar,instruct,"Filter paper names, journal names, using exact matches. Filter keyphrases with case-insensitive matching." +"How many papers are associated with the keyphrase ""machine learning"" and were published in the journal named ""IEEE Transactions on Pattern Analysis and Machine Intelligence""?",SELECT COUNT(DISTINCT paper.paperid) FROM paper JOIN journal ON paper.journalid = journal.journalid JOIN paperkeyphrase ON paper.paperid = paperkeyphrase.paperid JOIN keyphrase ON paperkeyphrase.keyphraseid = keyphrase.keyphraseid WHERE keyphrase.keyphrasename ILIKE '%machine learning%' AND journal.journalname = 'IEEE Transactions on Pattern Analysis and Machine Intelligence';,scholar,instruct,"Filter paper names, journal names, using exact matches. Filter keyphrases with case-insensitive matching." "How many authors wrote papers that were published in the journal ""Science"" in the year 2020?",SELECT COUNT(DISTINCT writes.authorid) AS number_of_authors FROM writes JOIN paper ON writes.paperid = paper.paperid JOIN journal ON paper.journalid = journal.journalid WHERE journal.journalname ILIKE '%Science%' AND paper.year = 2020;,scholar,instruct,Filter paper names using exact matches. Filter keyphrases and journal names with case-insensitive matching. How many reviews were written for businesses located in California in the last 1000 months?,"SELECT count(*) AS review_count FROM review r JOIN business b ON r.business_id = b.business_id WHERE b.state = 'CA' AND (r.year * 12 + extract(MONTH FROM to_date(r.month, 'Month'))) >= (extract(YEAR FROM CURRENT_DATE) * 12 + extract(MONTH FROM CURRENT_DATE) - 1000) ;",yelp,date_functions, What is the total number of check-ins on the 2 days before Saturday?,"SELECT sum(COUNT) AS total_checkins FROM checkin WHERE DAY IN ('Thursday', 'Friday') ;",yelp,date_functions, From fd21f2c3d499d41e00639efecfd52aba9f421b9d Mon Sep 17 00:00:00 2001 From: wendy Date: Wed, 8 May 2024 10:52:46 +0800 Subject: [PATCH 4/4] - revert query for ratio of publications to journals - changed single quotes --- data/questions_gen_postgres.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/questions_gen_postgres.csv b/data/questions_gen_postgres.csv index 192ea1d..7760794 100644 --- a/data/questions_gen_postgres.csv +++ b/data/questions_gen_postgres.csv @@ -17,7 +17,7 @@ What are the titles of all publications ordered alphabetically?,SELECT DISTINCT What is the ratio of publications to authors in the database?,"SELECT CAST(COUNT(DISTINCT publication.pid) AS FLOAT) / NULLIF(COUNT(DISTINCT author.aid), 0) AS publication_to_author_ratio FROM publication, author;",academic,ratio, What is the ratio of publications presented in conferences to publications published in journals?,"SELECT CAST(COUNT(DISTINCT CASE WHEN cid IS NOT NULL THEN pid END) AS FLOAT) / NULLIF(COUNT(DISTINCT CASE WHEN jid IS NOT NULL THEN pid END), 0) AS ratio FROM publication;",academic,ratio, What is the ratio of the total number of publications to the total number of keywords within each domain ID? Show all domain IDs.,"SELECT domain_publication.did, CAST(COUNT(DISTINCT domain_publication.pid) AS FLOAT) / NULLIF(COUNT(DISTINCT domain_keyword.kid), 0) AS publication_to_keyword_ratio FROM domain_publication LEFT JOIN domain_keyword ON domain_publication.did = domain_keyword.did GROUP BY domain_publication.did ORDER BY publication_to_keyword_ratio DESC NULLS LAST;SELECT domain_publication.did, CAST(COUNT(DISTINCT domain_publication.pid) AS FLOAT) / NULLIF(COUNT(DISTINCT domain_keyword.kid), 0) AS publication_to_keyword_ratio FROM domain_keyword LEFT JOIN domain_publication ON domain_publication.did = domain_keyword.did GROUP BY domain_publication.did ORDER BY publication_to_keyword_ratio DESC NULLS LAST;SELECT d.did, COALESCE(CAST(COUNT(DISTINCT dp.pid) AS FLOAT) / NULLIF(COUNT(DISTINCT dk.kid), 0), 0) AS publication_to_keyword_ratio FROM domain d LEFT JOIN domain_publication dp ON d.did = dp.did LEFT JOIN domain_keyword dk ON d.did = dk.did GROUP BY d.did ORDER BY publication_to_keyword_ratio DESC NULLS LAST;",academic,ratio, -How does the ratio of publications to journals change over the years? Return the annual numbers of publications and journals as well.,"SELECT p.year, COUNT(DISTINCT p.pid) AS num_publications, COUNT(DISTINCT j.jid) AS num_journals, CAST(COUNT(DISTINCT p.pid) AS FLOAT) / NULLIF(COUNT(DISTINCT j.jid), 0) AS ratio FROM publication p LEFT JOIN journal j ON p.jid = j.jid GROUP BY p.year ORDER BY p.year;",academic,ratio, +How does the ratio of publications to journals change over the years? Return the annual numbers of publications and journals as well.,"SELECT publication.year, COUNT(DISTINCT publication.pid) AS num_publications, COUNT(DISTINCT publication.jid) AS num_journals, CAST(COUNT(DISTINCT publication.pid) AS FLOAT) / NULLIF(COUNT(DISTINCT publication.jid), 0) AS ratio FROM publication GROUP BY publication.year ORDER BY publication.year;",academic,ratio, How does the ratio of authors to organizations differ by continent?,"SELECT o.continent, CAST(COUNT(DISTINCT a.aid) AS FLOAT) / NULLIF(COUNT(DISTINCT o.oid), 0) AS author_to_organization_ratio FROM author a JOIN organization o ON a.oid = o.oid GROUP BY o.continent ORDER BY author_to_organization_ratio DESC NULLS LAST;SELECT organization.continent, COUNT(DISTINCT author.aid)::float / NULLIF(COUNT(DISTINCT organization.oid), 0) AS ratio FROM organization LEFT JOIN author ON author.oid = organization.oid GROUP BY organization.continent ORDER BY ratio DESC NULLS LAST;SELECT organization.continent, COUNT(DISTINCT author.aid)::float / NULLIF(COUNT(DISTINCT organization.oid), 0) AS ratio FROM author LEFT JOIN organization ON author.oid = organization.oid GROUP BY organization.continent ORDER BY ratio DESC NULLS LAST;",academic,ratio, Which author had the most publications in the year 2021 and how many publications did he/she have that year?,"SELECT {author.name, author.aid}, COUNT(publication.pid) AS publication_count FROM writes JOIN author ON writes.aid = author.aid JOIN publication ON writes.pid = publication.pid WHERE publication.year = 2021 GROUP BY {} ORDER BY publication_count DESC NULLS LAST LIMIT 1;",academic,table_join, What is the total number of publications presented in each conference?,"SELECT {conference.name, conference.cid}, COUNT(publication.pid) AS total_publications FROM publication JOIN conference ON publication.cid = conference.cid GROUP BY {} ORDER BY total_publications DESC;",academic,table_join, @@ -131,7 +131,7 @@ What is the average rating of restaurants serving each type of food?,"SELECT res How many restaurants serve Italian food in each city?,"SELECT restaurant.city_name, COUNT(*) AS number_of_restaurants FROM restaurant WHERE restaurant.food_type ILIKE '%Italian%' GROUP BY restaurant.city_name ORDER BY number_of_restaurants DESC NULLS LAST;",restaurants,group_by, How many restaurants are there in each city? Order the results by the number of restaurants in descending order.,"SELECT location.city_name, COUNT(DISTINCT location.restaurant_id) AS number_of_restaurants FROM LOCATION GROUP BY location.city_name ORDER BY number_of_restaurants DESC NULLS LAST;",restaurants,group_by, Which street has the most number of restaurants?,SELECT street_name FROM location GROUP BY street_name ORDER BY COUNT(restaurant_id) DESC LIMIT 1;,restaurants,order_by, -Which restaurants serve Italian cuisine or are located in New York? Order the results by the restaurant name.,SELECT name FROM restaurant WHERE food_type ILIKE '%Italian%' OR city_name ILIKE ‘%New York%’ ORDER BY name NULLS LAST;,restaurants,order_by, +Which restaurants serve Italian cuisine or are located in New York? Order the results by the restaurant name.,SELECT name FROM restaurant WHERE food_type ILIKE '%Italian%' OR city_name ILIKE '%New York%' ORDER BY name NULLS LAST;,restaurants,order_by, What is the average rating of restaurants in each region? Order the results by the region name.,"SELECT geographic.region, AVG(restaurant.rating) AS average_rating FROM restaurant JOIN geographic ON restaurant.city_name = geographic.city_name GROUP BY geographic.region ORDER BY geographic.region NULLS LAST;",restaurants,order_by, What are the names of the top 3 restaurants with the highest ratings?,SELECT restaurant.name FROM restaurant ORDER BY restaurant.rating DESC NULLS LAST LIMIT 3;,restaurants,order_by, List the restaurants starting from the best ratings to the lowest,"SELECT {name, id}, rating FROM restaurant ORDER BY rating DESC;",restaurants,order_by,