From 5da60547cbc49b007cb663f9139565174aff3170 Mon Sep 17 00:00:00 2001 From: wendy Date: Wed, 19 Jun 2024 13:53:03 +0800 Subject: [PATCH] replace ILIKE with LIKE in instructions --- data/questions_gen_bigquery.csv | 42 ++++++++++++++++----------------- data/questions_gen_tsql.csv | 42 ++++++++++++++++----------------- translate_sql_dialect.py | 10 +++++++- 3 files changed, 51 insertions(+), 43 deletions(-) diff --git a/data/questions_gen_bigquery.csv b/data/questions_gen_bigquery.csv index f3f9f35..85e3854 100644 --- a/data/questions_gen_bigquery.csv +++ b/data/questions_gen_bigquery.csv @@ -25,10 +25,10 @@ academic,bigquery,table_join,"SELECT journal.name, COUNT(publication.pid) AS tot academic,bigquery,table_join,"SELECT conference.name, COUNT(publication.pid) AS num_publications FROM academic.publication JOIN academic.conference ON publication.cid = conference.cid GROUP BY conference.name, conference.cid ORDER BY num_publications DESC;","How many publications were presented at each conference, ordered by the number of publications in descending order? Give the names of the conferences and their corresponding number of publications.", academic,bigquery,table_join,SELECT COUNT(DISTINCT publication.pid) FROM academic.publication JOIN academic.journal ON publication.jid = journal.jid WHERE LOWER(journal.name) LIKE 'J%';,"How many publications were published in journals whose names start with the letter ""J""?", academic,bigquery,instruct,"SELECT DISTINCT organization.name FROM academic.organization JOIN academic.author ON organization.oid = author.oid JOIN academic.writes ON author.aid = writes.aid JOIN academic.domain_publication ON writes.pid = domain_publication.pid JOIN academic.domain ON domain_publication.did = domain.did WHERE domain.name = 'Machine Learning';SELECT DISTINCT organization.oid FROM academic.organization JOIN academic.author ON organization.oid = author.oid JOIN academic.writes ON author.aid = writes.aid JOIN academic.domain_publication ON writes.pid = domain_publication.pid JOIN academic.domain ON domain_publication.did = domain.did WHERE domain.name = 'Machine Learning';SELECT DISTINCT organization.name, organization.oid FROM academic.organization JOIN academic.author ON organization.oid = author.oid JOIN academic.writes ON author.aid = writes.aid JOIN academic.domain_publication ON writes.pid = domain_publication.pid JOIN academic.domain ON domain_publication.did = domain.did WHERE domain.name = 'Machine Learning';","Which organizations have authors who have written publications in the domain ""Machine Learning""?",Always filter names using an exact match -academic,bigquery,instruct,"SELECT DISTINCT a2.name FROM academic.author AS a1 JOIN academic.domain_author AS da1 ON a1.aid = da1.aid JOIN academic.domain_author AS da2 ON da1.did = da2.did JOIN academic.author AS a2 ON da2.aid = a2.aid WHERE LOWER(LOWER(a1.name)) LIKE '%martin%';SELECT DISTINCT a2.aid FROM academic.author AS a1 JOIN academic.domain_author AS da1 ON a1.aid = da1.aid JOIN academic.domain_author AS da2 ON da1.did = da2.did JOIN academic.author AS a2 ON da2.aid = a2.aid WHERE LOWER(LOWER(a1.name)) LIKE '%martin%';SELECT DISTINCT a2.name, a2.aid FROM academic.author AS a1 JOIN academic.domain_author AS da1 ON a1.aid = da1.aid JOIN academic.domain_author AS da2 ON da1.did = da2.did JOIN academic.author AS a2 ON da2.aid = a2.aid WHERE LOWER(LOWER(a1.name)) LIKE '%martin%';",Which authors belong to the same domain as Martin?,Always filter names using ILIKE with percent sign wildcards -academic,bigquery,instruct,"SELECT DISTINCT name FROM academic.author WHERE oid IS NULL;SELECT DISTINCT aid FROM academic.author WHERE oid IS NULL;SELECT DISTINCT name, aid FROM academic.author WHERE oid IS NULL;",Which authors are not part of any organization?,Always filter names using ILIKE -academic,bigquery,instruct,"SELECT DISTINCT publication.title FROM academic.domain JOIN academic.domain_author ON domain.did = domain_author.did JOIN academic.writes ON domain_author.aid = writes.aid JOIN academic.publication ON writes.pid = publication.pid JOIN academic.domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE '%sociology%' AND publication.year = 2020 AND publication.cid = domain_conference.cid;SELECT DISTINCT publication.pid FROM academic.domain JOIN academic.domain_author ON domain.did = domain_author.did JOIN academic.writes ON domain_author.aid = writes.aid JOIN academic.publication ON writes.pid = publication.pid JOIN academic.domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE '%sociology%' AND publication.year = 2020 AND publication.cid = domain_conference.cid;SELECT DISTINCT publication.pid, publication.title FROM academic.domain JOIN academic.domain_author ON domain.did = domain_author.did JOIN academic.writes ON domain_author.aid = writes.aid JOIN academic.publication ON writes.pid = publication.pid JOIN academic.domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE '%sociology%' AND publication.year = 2020 AND publication.cid = domain_conference.cid;",What are the publications written by authors from the 'Sociology' domain and presented at conferences in the year 2020?,"To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. Finally, to see which ones were presented at conferences, you must join the domain table with the domain_conference table. You must also filter names using ILIKE." -academic,bigquery,instruct,SELECT DISTINCT author.name FROM academic.author JOIN academic.writes ON author.aid = writes.aid JOIN academic.publication ON writes.pid = publication.pid JOIN academic.domain_publication ON publication.pid = domain_publication.pid JOIN academic.domain ON domain_publication.did = domain.did WHERE LOWER(domain.name) LIKE '%computer%science%';,"What are the names of the authors who have written publications in the domain ""Computer Science""?","To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. You must also filter names using ILIKE." +academic,bigquery,instruct,"SELECT DISTINCT a2.name FROM academic.author AS a1 JOIN academic.domain_author AS da1 ON a1.aid = da1.aid JOIN academic.domain_author AS da2 ON da1.did = da2.did JOIN academic.author AS a2 ON da2.aid = a2.aid WHERE LOWER(LOWER(a1.name)) LIKE '%martin%';SELECT DISTINCT a2.aid FROM academic.author AS a1 JOIN academic.domain_author AS da1 ON a1.aid = da1.aid JOIN academic.domain_author AS da2 ON da1.did = da2.did JOIN academic.author AS a2 ON da2.aid = a2.aid WHERE LOWER(LOWER(a1.name)) LIKE '%martin%';SELECT DISTINCT a2.name, a2.aid FROM academic.author AS a1 JOIN academic.domain_author AS da1 ON a1.aid = da1.aid JOIN academic.domain_author AS da2 ON da1.did = da2.did JOIN academic.author AS a2 ON da2.aid = a2.aid WHERE LOWER(LOWER(a1.name)) LIKE '%martin%';",Which authors belong to the same domain as Martin?,Always filter names using LIKE with percent sign wildcards +academic,bigquery,instruct,"SELECT DISTINCT name FROM academic.author WHERE oid IS NULL;SELECT DISTINCT aid FROM academic.author WHERE oid IS NULL;SELECT DISTINCT name, aid FROM academic.author WHERE oid IS NULL;",Which authors are not part of any organization?,Always filter names using LIKE +academic,bigquery,instruct,"SELECT DISTINCT publication.title FROM academic.domain JOIN academic.domain_author ON domain.did = domain_author.did JOIN academic.writes ON domain_author.aid = writes.aid JOIN academic.publication ON writes.pid = publication.pid JOIN academic.domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE '%sociology%' AND publication.year = 2020 AND publication.cid = domain_conference.cid;SELECT DISTINCT publication.pid FROM academic.domain JOIN academic.domain_author ON domain.did = domain_author.did JOIN academic.writes ON domain_author.aid = writes.aid JOIN academic.publication ON writes.pid = publication.pid JOIN academic.domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE '%sociology%' AND publication.year = 2020 AND publication.cid = domain_conference.cid;SELECT DISTINCT publication.pid, publication.title FROM academic.domain JOIN academic.domain_author ON domain.did = domain_author.did JOIN academic.writes ON domain_author.aid = writes.aid JOIN academic.publication ON writes.pid = publication.pid JOIN academic.domain_conference ON domain.did = domain_conference.did WHERE LOWER(domain.name) LIKE '%sociology%' AND publication.year = 2020 AND publication.cid = domain_conference.cid;",What are the publications written by authors from the 'Sociology' domain and presented at conferences in the year 2020?,"To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. Finally, to see which ones were presented at conferences, you must join the domain table with the domain_conference table. You must also filter names using LIKE." +academic,bigquery,instruct,SELECT DISTINCT author.name FROM academic.author JOIN academic.writes ON author.aid = writes.aid JOIN academic.publication ON writes.pid = publication.pid JOIN academic.domain_publication ON publication.pid = domain_publication.pid JOIN academic.domain ON domain_publication.did = domain.did WHERE LOWER(domain.name) LIKE '%computer%science%';,"What are the names of the authors who have written publications in the domain ""Computer Science""?","To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. You must also filter names using LIKE." advising,bigquery,date_functions,"SELECT TIMESTAMP_TRUNC(s.admit_term, MONTH) AS month, COUNT(*) AS total_students FROM advising.student AS s GROUP BY MONTH ORDER BY total_students DESC NULLS FIRST LIMIT 1;",What month were most students admitted?, advising,bigquery,date_functions,SELECT AVG(predicted_graduation_semester - admit_term) AS average_predicted_time_to_graduation FROM advising.student;,What's the average predicted time to graduation since admission in no. of days?, advising,bigquery,date_functions,"SELECT COUNT(*) AS num_students_graduated FROM advising.student WHERE predicted_graduation_semester >= TIMESTAMP_TRUNC(CURRENT_DATE, YEAR) - INTERVAL '10' YEAR;",How many students were predicted to graduate in the last 10 years?, @@ -57,7 +57,7 @@ advising,bigquery,table_join,"SELECT program.name, COUNT(student.student_id) AS advising,bigquery,instruct,"SELECT student.firstname, student.lastname FROM advising.student WHERE NOT student.minor IS NULL ORDER BY student.lastname NULLS LAST;",Which students have declared a minor program? List their firstname and lastname. Order the results by the students' last names.,"student.declare_major is null for students who have not declared their major. student.minor is null for students who have not declared a minor program." advising,bigquery,instruct,SELECT AVG(student.total_gpa) FROM advising.student JOIN advising.program ON student.program_id = program.program_id WHERE LOWER(program.name) = 'mathematics';,What is the average GPA of students in the program mathematics?,Match strings case-insensitively -advising,bigquery,instruct,SELECT course.name FROM advising.course WHERE LOWER(course.department) LIKE '%Computer Science%' ORDER BY course.name ASC NULLS LAST;,What are the names of all the courses offered by the department of Computer Science?,"Filter strings using ILIKE. +advising,bigquery,instruct,SELECT course.name FROM advising.course WHERE LOWER(course.department) LIKE '%Computer Science%' ORDER BY course.name ASC NULLS LAST;,What are the names of all the courses offered by the department of Computer Science?,"Filter strings using LIKE. Use the student_record table for all information relating to students' choices and their course." advising,bigquery,instruct,"SELECT course.name, course.easiness_score FROM advising.course WHERE course.department = 'Computer Science';SELECT course.course_id, course.easiness_score FROM advising.course WHERE course.department = 'Computer Science';SELECT course.number, course.easiness_score FROM advising.course WHERE course.department = 'Computer Science';SELECT course.name, course.course_id, course.easiness_score FROM advising.course WHERE course.department = 'Computer Science';SELECT course.name, course.number, course.easiness_score FROM advising.course WHERE course.department = 'Computer Science';SELECT course.course_id, course.number, course.easiness_score FROM advising.course WHERE course.department = 'Computer Science';SELECT course.name, course.course_id, course.number, course.easiness_score FROM advising.course WHERE course.department = 'Computer Science';","What are the easiness scores for courses in the ""Computer Science"" department? Show both courses and scores.",Always filter names using exact string matching advising,bigquery,instruct,"SELECT DISTINCT student_id FROM advising.student_record WHERE student_record.how = 'in-person' AND student_record.grade IN ('A', 'C');",Return the student IDs who have taken an in-person course and have gotten a grade of A or C.,"Always filter strings with an exact match. @@ -87,18 +87,18 @@ atis,bigquery,table_join,"SELECT DISTINCT airline.airline_name FROM atis.flight_ atis,bigquery,table_join,"SELECT DISTINCT airline.airline_name FROM atis.flight JOIN atis.airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'LAX' AND flight.to_airport = 'ORD';SELECT DISTINCT airline.airline_code FROM atis.flight JOIN atis.airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'LAX' AND flight.to_airport = 'ORD';SELECT DISTINCT airline.airline_name, airline.airline_code FROM atis.flight JOIN atis.airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'LAX' AND flight.to_airport = 'ORD';",Which airlines offer flights from LAX to ORD?, atis,bigquery,table_join,"SELECT airline.airline_name, flight.stops FROM atis.flight JOIN atis.airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'ORD' AND flight.to_airport = 'JFK' ORDER BY flight.stops NULLS LAST;SELECT airline.airline_code, flight.stops FROM atis.flight JOIN atis.airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'ORD' AND flight.to_airport = 'JFK' ORDER BY flight.stops NULLS LAST;SELECT airline.airline_name, airline.airline_code, flight.stops FROM atis.flight JOIN atis.airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'ORD' AND flight.to_airport = 'JFK' ORDER BY flight.stops NULLS LAST;","Which airlines offer flights from Chicago (ORD) to New York (JFK), and how many stops do they have, sorted by number of stops in ascending order?", atis,bigquery,table_join,"SELECT DISTINCT airline.airline_name FROM atis.airline WHERE NOT airline.airline_code IN (SELECT flight.airline_code FROM atis.flight WHERE flight.from_airport = 'JFK' OR flight.to_airport = 'JFK' OR flight.stops > 0);SELECT DISTINCT airline.airline_code FROM atis.airline WHERE NOT airline.airline_code IN (SELECT flight.airline_code FROM atis.flight WHERE flight.from_airport = 'JFK' OR flight.to_airport = 'JFK' OR flight.stops > 0);SELECT DISTINCT airline.airline_name, airline.airline_code FROM atis.airline WHERE NOT airline.airline_code IN (SELECT flight.airline_code FROM atis.flight WHERE flight.from_airport = 'JFK' OR flight.to_airport = 'JFK' OR flight.stops > 0);","Which airlines do not have any flights that either depart from/arrive at JFK, or have one or more stops?", -atis,bigquery,instruct,SELECT state_code FROM atis.airport WHERE LOWER(airport_name) LIKE '%Orlando International Airport%';,Which state code is Orlando International Airport in?,"Filter airport, city, country names using ILIKE. +atis,bigquery,instruct,SELECT state_code FROM atis.airport WHERE LOWER(airport_name) LIKE '%Orlando International Airport%';,Which state code is Orlando International Airport in?,"Filter airport, city, country names using LIKE. Filter state code (eg NY), airport codes (eg JFK) using case-insensitive matches. If multiple flight days are requested, use ILIKE and wildcards for each of the days separately, since they are not necessarily ordered." -atis,bigquery,instruct,"SELECT flight.flight_number FROM atis.flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';SELECT flight.flight_id FROM atis.flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';SELECT flight.flight_number, flight.flight_id FROM atis.flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';",Which flights operate on Mondays and Wednesdays? Give me the relevant flight numbers,"Filter airport, city, country names using ILIKE. +atis,bigquery,instruct,"SELECT flight.flight_number FROM atis.flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';SELECT flight.flight_id FROM atis.flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';SELECT flight.flight_number, flight.flight_id FROM atis.flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';",Which flights operate on Mondays and Wednesdays? Give me the relevant flight numbers,"Filter airport, city, country names using LIKE. Filter state code (eg NY), airport codes (eg JFK) using case-insensitive matches. If multiple flight days are requested, use ILIKE for each of the days separately, since they are not necessarily ordered." -atis,bigquery,instruct,SELECT SUM(fare.round_trip_cost) AS total_round_trip_cost FROM atis.fare WHERE fare.from_airport = 'JFK' AND fare.to_airport = 'LAX';,What is the total cost of all round-trip fares from New York (JFK) to Los Angeles?,"Filter airport, city, country names using ILIKE. +atis,bigquery,instruct,SELECT SUM(fare.round_trip_cost) AS total_round_trip_cost FROM atis.fare WHERE fare.from_airport = 'JFK' AND fare.to_airport = 'LAX';,What is the total cost of all round-trip fares from New York (JFK) to Los Angeles?,"Filter airport, city, country names using LIKE. Filter state code (eg NY), airport codes (eg JFK) using case-insensitive matches. fare.round_trip_required is not needed when getting the round trip cost." -atis,bigquery,instruct,SELECT minimum_connect_time FROM atis.airport WHERE airport_code = 'JFK';,What is the minimum amount of time required for a connecting flight at JFK Airport?,"Filter airport, city, country names using ILIKE. +atis,bigquery,instruct,SELECT minimum_connect_time FROM atis.airport WHERE airport_code = 'JFK';,What is the minimum amount of time required for a connecting flight at JFK Airport?,"Filter airport, city, country names using LIKE. Filter state code (eg NY) and airport codes (eg JFK) using upper-case matches." -atis,bigquery,instruct,SELECT COUNT(DISTINCT flight_fare.flight_id) FROM atis.flight_fare JOIN atis.fare ON flight_fare.fare_id = fare.fare_id WHERE fare.round_trip_required = 'Yes';,How many flights require a round-trip to purchase the fare?,"Filter airport, city, country names using ILIKE. +atis,bigquery,instruct,SELECT COUNT(DISTINCT flight_fare.flight_id) FROM atis.flight_fare JOIN atis.fare ON flight_fare.fare_id = fare.fare_id WHERE fare.round_trip_required = 'Yes';,How many flights require a round-trip to purchase the fare?,"Filter airport, city, country names using LIKE. Filter state code (eg NY) and airport codes (eg JFK) using upper-case matches." geography,bigquery,group_by,"SELECT city.country_name, SUM(city.population) AS total_population FROM geography.city GROUP BY city.country_name ORDER BY total_population DESC;",What is the total population in cities by country?, geography,bigquery,group_by,"SELECT river.country_name, AVG(river.length) AS average_length FROM geography.river GROUP BY river.country_name ORDER BY average_length DESC;",What is the average length of rivers in each country?, @@ -120,11 +120,11 @@ geography,bigquery,table_join,SELECT border_info.border FROM geography.border_in geography,bigquery,table_join,SELECT lake.lake_name FROM geography.lake JOIN geography.state ON lake.state_name = state.state_name WHERE state.area > 1000 AND LOWER(lake.lake_name) LIKE 'Lake%' ORDER BY lake.lake_name NULLS LAST;,"Which lakes have a name that starts with ""Lake""? They should be located in states with an area greater than 1000 square kilometers.", geography,bigquery,table_join,"SELECT highlow.state_name, highlow.highest_point, state.density FROM geography.highlow JOIN geography.state ON highlow.state_name = state.state_name;",What is the highest point in each state and what is the population density of that state?, geography,bigquery,table_join,"SELECT l.country_name, AVG(r.length) AS average_length FROM geography.river AS r JOIN geography.lake AS l ON r.country_name = l.country_name GROUP BY 1;",What is the average length of rivers per country in countries with a lake?, -geography,bigquery,instruct,SELECT state_name FROM geography.state WHERE population < 100000;,Which states have fewer than a hundred thousand people?,Always filter names using ILIKE -geography,bigquery,instruct,"SELECT river_name FROM geography.river WHERE traverse LIKE '%,%,%';",Which rivers traverse at least 3 cities/landmarks?,Always filter names using ILIKE -geography,bigquery,instruct,"SELECT lake_name, area FROM geography.lake WHERE LOWER(state_name) LIKE '%Michigan%';",What are the names and areas of the lakes in Michigan?,Always filter names using ILIKE -geography,bigquery,instruct,"SELECT mountain_name, mountain_altitude FROM geography.mountain WHERE LOWER(country_name) LIKE '%Nepal%';",What are the names and altitudes of the mountains in Nepal?,Always filter names using ILIKE -geography,bigquery,instruct,"SELECT city_name, population FROM geography.city WHERE LOWER(country_name) LIKE '%United States%';",Get the cities in the United States and their population,Always filter names using ILIKE +geography,bigquery,instruct,SELECT state_name FROM geography.state WHERE population < 100000;,Which states have fewer than a hundred thousand people?,Always filter names using LIKE +geography,bigquery,instruct,"SELECT river_name FROM geography.river WHERE traverse LIKE '%,%,%';",Which rivers traverse at least 3 cities/landmarks?,Always filter names using LIKE +geography,bigquery,instruct,"SELECT lake_name, area FROM geography.lake WHERE LOWER(state_name) LIKE '%Michigan%';",What are the names and areas of the lakes in Michigan?,Always filter names using LIKE +geography,bigquery,instruct,"SELECT mountain_name, mountain_altitude FROM geography.mountain WHERE LOWER(country_name) LIKE '%Nepal%';",What are the names and altitudes of the mountains in Nepal?,Always filter names using LIKE +geography,bigquery,instruct,"SELECT city_name, population FROM geography.city WHERE LOWER(country_name) LIKE '%United States%';",Get the cities in the United States and their population,Always filter names using LIKE restaurants,bigquery,group_by,"SELECT restaurant.food_type, COUNT(DISTINCT restaurant.id) AS total_number_of_restaurants FROM restaurants.restaurant GROUP BY restaurant.food_type;",What is the total number of restaurants serving each type of food?, restaurants,bigquery,group_by,"SELECT location.city_name, COUNT(DISTINCT location.restaurant_id) AS total_count FROM restaurants.location GROUP BY location.city_name;",What is the total count of restaurants in each city?, restaurants,bigquery,group_by,"SELECT restaurant.food_type, AVG(restaurant.rating) AS average_rating FROM restaurants.restaurant GROUP BY restaurant.food_type ORDER BY average_rating DESC;",What is the average rating of restaurants serving each type of food?, @@ -175,7 +175,7 @@ scholar,bigquery,table_join,"SELECT author.authorname, COUNT(DISTINCT writes.pap scholar,bigquery,table_join,SELECT COUNT(DISTINCT paperkeyphrase.keyphraseid) AS total_keyphrases FROM scholar.paper JOIN scholar.journal ON paper.journalid = journal.journalid JOIN scholar.paperkeyphrase ON paper.paperid = paperkeyphrase.paperid WHERE LOWER(journal.journalname) LIKE '%IEEE Transactions%';,"What is the total number of unique keyphrases associated with papers published in the journal with ""IEEE Transactions"" in its name?", scholar,bigquery,table_join,"SELECT journal.journalname, COUNT(DISTINCT paper.paperid) AS total_papers FROM scholar.paper JOIN scholar.journal ON paper.journalid = journal.journalid GROUP BY journal.journalname ORDER BY journal.journalname NULLS LAST;","What is the total number of papers published in each journal, ordered by the journal name?", scholar,bigquery,table_join,"SELECT paperdataset.paperid, COUNT(cite.citedpaperid) AS citation_count FROM scholar.paperdataset JOIN scholar.cite ON paperdataset.paperid = cite.citedpaperid WHERE paperdataset.datasetid = (SELECT datasetid FROM scholar.dataset WHERE LOWER(datasetname) LIKE '%COVID-19 Research%') GROUP BY paperdataset.paperid ORDER BY citation_count DESC NULLS FIRST;SELECT p.title, COUNT(c.citingpaperid) AS num_citing_papers FROM scholar.paper AS p JOIN scholar.paperdataset AS pd ON p.paperid = pd.paperid JOIN scholar.cite AS c ON p.paperid = c.citedpaperid JOIN scholar.dataset AS d ON pd.datasetid = d.datasetid WHERE d.datasetname = 'COVID-19 Research' GROUP BY p.title ORDER BY num_citing_papers DESC;","How many papers cite each paper in the dataset named ""COVID-19 Research""?", -scholar,bigquery,instruct,"SELECT venue.venuename, COUNT(DISTINCT paper.paperid) FROM scholar.paper JOIN scholar.venue ON paper.venueid = venue.venueid WHERE paper.venueid = (SELECT venueid FROM scholar.paper WHERE paperid = 2) GROUP BY venue.venuename;","What is the name of the venue where the paper with paper ID 2 was published, and how many papers were published in total in that venue?",Always filter strings using ILIKE +scholar,bigquery,instruct,"SELECT venue.venuename, COUNT(DISTINCT paper.paperid) FROM scholar.paper JOIN scholar.venue ON paper.venueid = venue.venueid WHERE paper.venueid = (SELECT venueid FROM scholar.paper WHERE paperid = 2) GROUP BY venue.venuename;","What is the name of the venue where the paper with paper ID 2 was published, and how many papers were published in total in that venue?",Always filter strings using LIKE scholar,bigquery,instruct,SELECT author.authorname FROM scholar.author JOIN scholar.writes ON author.authorid = writes.authorid JOIN scholar.paper ON writes.paperid = paper.paperid WHERE paper.title = 'The Effects of Climate Change on Agriculture';,"What are the names of the authors who wrote the paper with the title ""The Effects of Climate Change on Agriculture""?",Always filter strings with an exact match scholar,bigquery,instruct,SELECT COUNT(paper.paperid) FROM scholar.paper JOIN scholar.journal ON paper.journalid = journal.journalid WHERE paper.year = 2020 AND LOWER(journal.journalname) LIKE '%nature%';,"How many papers were published in the journal ""nature"" in the year 2020?",Filter strings with case-insensitive matching scholar,bigquery,instruct,SELECT COUNT(DISTINCT paper.paperid) FROM scholar.paper JOIN scholar.journal ON paper.journalid = journal.journalid JOIN scholar.paperkeyphrase ON paper.paperid = paperkeyphrase.paperid JOIN scholar.keyphrase ON paperkeyphrase.keyphraseid = keyphrase.keyphraseid WHERE LOWER(keyphrase.keyphrasename) LIKE '%machine learning%' AND journal.journalname = 'IEEE Transactions on Pattern Analysis and Machine Intelligence';,"How many papers are associated with the keyphrase ""machine learning"" and were published in the journal named ""IEEE Transactions on Pattern Analysis and Machine Intelligence""?","Filter paper names, journal names, using exact matches. Filter keyphrases with case-insensitive matching." @@ -205,27 +205,27 @@ yelp,bigquery,table_join,"SELECT category.category_name, SUM(business.review_cou yelp,bigquery,table_join,"SELECT category.category_name, SUM(business.review_count) AS total_reviews FROM yelp.business JOIN yelp.category ON business.business_id = category.business_id GROUP BY category.category_name ORDER BY total_reviews DESC;SELECT category.id, SUM(business.review_count) AS total_reviews FROM yelp.business JOIN yelp.category ON business.business_id = category.business_id GROUP BY category.id ORDER BY total_reviews DESC;SELECT category.category_name, category.id, SUM(business.review_count) AS total_reviews FROM yelp.business JOIN yelp.category ON business.business_id = category.business_id GROUP BY category.category_name, category.id ORDER BY total_reviews DESC;",What is the total number of reviews for each business category?, yelp,bigquery,table_join,"SELECT business.business_id, SUM(checkin.count) AS total_checkins FROM yelp.business JOIN yelp.checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.business_id ORDER BY total_checkins DESC;SELECT business.name, SUM(checkin.count) AS total_checkins FROM yelp.business JOIN yelp.checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.name ORDER BY total_checkins DESC;SELECT business.bid, SUM(checkin.count) AS total_checkins FROM yelp.business JOIN yelp.checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.bid ORDER BY total_checkins DESC;SELECT business.business_id, business.name, SUM(checkin.count) AS total_checkins FROM yelp.business JOIN yelp.checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.business_id, business.name ORDER BY total_checkins DESC;SELECT business.business_id, business.bid, SUM(checkin.count) AS total_checkins FROM yelp.business JOIN yelp.checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.business_id, business.bid ORDER BY total_checkins DESC;SELECT business.name, business.bid, SUM(checkin.count) AS total_checkins FROM yelp.business JOIN yelp.checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.name, business.bid ORDER BY total_checkins DESC;SELECT business.business_id, business.name, business.bid, SUM(checkin.count) AS total_checkins FROM yelp.business JOIN yelp.checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.business_id, business.name, business.bid ORDER BY total_checkins DESC;",What is the total number of check-ins for each business in the state of California?, yelp,bigquery,table_join,"SELECT category.category_name FROM (SELECT business.business_id, AVG(review.rating) AS average_rating FROM yelp.business JOIN yelp.review ON business.business_id = review.business_id GROUP BY business.business_id) AS business_rating JOIN yelp.category ON business_rating.business_id = category.business_id GROUP BY category.category_name ORDER BY AVG(business_rating.average_rating) DESC LIMIT 2;SELECT category.id FROM (SELECT business.business_id, AVG(review.rating) AS average_rating FROM yelp.business JOIN yelp.review ON business.business_id = review.business_id GROUP BY business.business_id) AS business_rating JOIN yelp.category ON business_rating.business_id = category.business_id GROUP BY category.id ORDER BY AVG(business_rating.average_rating) DESC LIMIT 2;SELECT category.category_name, category.id FROM (SELECT business.business_id, AVG(review.rating) AS average_rating FROM yelp.business JOIN yelp.review ON business.business_id = review.business_id GROUP BY business.business_id) AS business_rating JOIN yelp.category ON business_rating.business_id = category.business_id GROUP BY category.category_name, category.id ORDER BY AVG(business_rating.average_rating) DESC LIMIT 2;",What are the top 2 categories of businesses with the highest average rating?, -yelp,bigquery,instruct,SELECT COUNT(review.rid) AS total_reviews FROM yelp.review JOIN yelp.category ON review.business_id = category.business_id WHERE review.year = 2021 AND LOWER(category.category_name) LIKE '%Cafe%';,"What is the total number of reviews posted in the year 2021 for businesses in the category ""Cafe""?","Filter strings of users, city, address, business.name using ILIKE with wildcards. +yelp,bigquery,instruct,SELECT COUNT(review.rid) AS total_reviews FROM yelp.review JOIN yelp.category ON review.business_id = category.business_id WHERE review.year = 2021 AND LOWER(category.category_name) LIKE '%Cafe%';,"What is the total number of reviews posted in the year 2021 for businesses in the category ""Cafe""?","Filter strings of users, city, address, business.name using LIKE with wildcards. Filter strings of state using exact upper case matches. Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. " -yelp,bigquery,instruct,"SELECT AVG(sf.average_rating) AS sf_average_rating FROM (SELECT business.business_id, AVG(review.rating) AS average_rating FROM yelp.business JOIN yelp.review ON business.business_id = review.business_id WHERE LOWER(LOWER(business.city)) LIKE '%san francisco%' GROUP BY business.business_id) AS sf;",What is the average rating of businesses in the city of San Francisco?,"Filter strings of users, city, address, business.name using ILIKE with wildcards. +yelp,bigquery,instruct,"SELECT AVG(sf.average_rating) AS sf_average_rating FROM (SELECT business.business_id, AVG(review.rating) AS average_rating FROM yelp.business JOIN yelp.review ON business.business_id = review.business_id WHERE LOWER(LOWER(business.city)) LIKE '%san francisco%' GROUP BY business.business_id) AS sf;",What is the average rating of businesses in the city of San Francisco?,"Filter strings of users, city, address, business.name using LIKE with wildcards. Filter strings of state using exact upper case matches. The rating of businesses in a city refers to the average rating of the businesses in that city. I.e., you must compute the average rating of each business before computing the average rating of businesses in the city. Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. " -yelp,bigquery,instruct,"SELECT review.business_id, COUNT(*) AS review_count FROM yelp.review WHERE review.year = 2021 GROUP BY review.business_id ORDER BY review_count DESC;",How many reviews were posted for each business id in the year 2021?,"Filter strings of users, city, address, business.name using ILIKE with wildcards. +yelp,bigquery,instruct,"SELECT review.business_id, COUNT(*) AS review_count FROM yelp.review WHERE review.year = 2021 GROUP BY review.business_id ORDER BY review_count DESC;",How many reviews were posted for each business id in the year 2021?,"Filter strings of users, city, address, business.name using LIKE with wildcards. Filter strings of state using exact upper case matches. Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. " -yelp,bigquery,instruct,SELECT COUNT(*) FROM yelp.review JOIN yelp.users ON review.user_id = users.user_id WHERE LOWER(users.name) LIKE '%Sarah Williams%' AND review.month = 'April' AND review.year = 2021;,"How many reviews were posted by users with the name ""Sarah Williams"" in the month of April 2021?","Filter strings of users, city, address, business.name using ILIKE with wildcards. +yelp,bigquery,instruct,SELECT COUNT(*) FROM yelp.review JOIN yelp.users ON review.user_id = users.user_id WHERE LOWER(users.name) LIKE '%Sarah Williams%' AND review.month = 'April' AND review.year = 2021;,"How many reviews were posted by users with the name ""Sarah Williams"" in the month of April 2021?","Filter strings of users, city, address, business.name using LIKE with wildcards. Filter strings of state using exact upper case matches. Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. " -yelp,bigquery,instruct,SELECT SUM(checkin.count) AS total_checkins FROM yelp.business JOIN yelp.checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' AND LOWER(checkin.day) LIKE '%Monday%';,How many check-ins occurred on Mondays at businesses in the state of California?,"Filter strings of users, city, address, business.name using ILIKE with wildcards. +yelp,bigquery,instruct,SELECT SUM(checkin.count) AS total_checkins FROM yelp.business JOIN yelp.checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' AND LOWER(checkin.day) LIKE '%Monday%';,How many check-ins occurred on Mondays at businesses in the state of California?,"Filter strings of users, city, address, business.name using LIKE with wildcards. Filter strings of state using exact upper case matches. Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. diff --git a/data/questions_gen_tsql.csv b/data/questions_gen_tsql.csv index b83cc4a..96513e8 100644 --- a/data/questions_gen_tsql.csv +++ b/data/questions_gen_tsql.csv @@ -25,10 +25,10 @@ academic,tsql,table_join,"SELECT journal.name, COUNT(publication.pid) AS total_p academic,tsql,table_join,"SELECT conference.name, COUNT(publication.pid) AS num_publications FROM publication JOIN conference ON publication.cid = conference.cid GROUP BY conference.name, conference.cid ORDER BY num_publications DESC;","How many publications were presented at each conference, ordered by the number of publications in descending order? Give the names of the conferences and their corresponding number of publications.", academic,tsql,table_join,SELECT COUNT(DISTINCT publication.pid) FROM publication JOIN journal ON publication.jid = journal.jid WHERE journal.name LIKE 'J%';,"How many publications were published in journals whose names start with the letter ""J""?", academic,tsql,instruct,"SELECT DISTINCT organization.name FROM organization JOIN author ON organization.oid = author.oid JOIN writes ON author.aid = writes.aid JOIN domain_publication ON writes.pid = domain_publication.pid JOIN domain ON domain_publication.did = domain.did WHERE domain.name = 'Machine Learning';SELECT DISTINCT organization.oid FROM organization JOIN author ON organization.oid = author.oid JOIN writes ON author.aid = writes.aid JOIN domain_publication ON writes.pid = domain_publication.pid JOIN domain ON domain_publication.did = domain.did WHERE domain.name = 'Machine Learning';SELECT DISTINCT organization.name, organization.oid FROM organization JOIN author ON organization.oid = author.oid JOIN writes ON author.aid = writes.aid JOIN domain_publication ON writes.pid = domain_publication.pid JOIN domain ON domain_publication.did = domain.did WHERE domain.name = 'Machine Learning';","Which organizations have authors who have written publications in the domain ""Machine Learning""?",Always filter names using an exact match -academic,tsql,instruct,"SELECT DISTINCT a2.name FROM author AS a1 JOIN domain_author AS da1 ON a1.aid = da1.aid JOIN domain_author AS da2 ON da1.did = da2.did JOIN author AS a2 ON da2.aid = a2.aid WHERE LOWER(a1.name) LIKE '%martin%';SELECT DISTINCT a2.aid FROM author AS a1 JOIN domain_author AS da1 ON a1.aid = da1.aid JOIN domain_author AS da2 ON da1.did = da2.did JOIN author AS a2 ON da2.aid = a2.aid WHERE a1.name COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%martin%';SELECT DISTINCT a2.name, a2.aid FROM author AS a1 JOIN domain_author AS da1 ON a1.aid = da1.aid JOIN domain_author AS da2 ON da1.did = da2.did JOIN author AS a2 ON da2.aid = a2.aid WHERE LOWER(a1.name) LIKE LOWER('%martin%');",Which authors belong to the same domain as Martin?,Always filter names using ILIKE with percent sign wildcards -academic,tsql,instruct,"SELECT DISTINCT name FROM author WHERE oid IS NULL;SELECT DISTINCT aid FROM author WHERE oid IS NULL;SELECT DISTINCT name, aid FROM author WHERE oid IS NULL;",Which authors are not part of any organization?,Always filter names using ILIKE -academic,tsql,instruct,"SELECT DISTINCT publication.title FROM domain JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did AND publication.cid = domain_conference.cid WHERE domain.name LIKE '%Sociology%' COLLATE SQL_Latin1_General_CP1_CI_AS AND publication.year = 2020;SELECT DISTINCT publication.pid FROM domain JOIN domain_author ON domain.did = domain_author.did JOIN author ON domain_author.aid = author.aid JOIN writes ON author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did AND publication.cid = domain_conference.cid WHERE domain.name LIKE '%Sociology%' COLLATE SQL_Latin1_General_CP1_CI_AS AND publication.year = 2020;SELECT DISTINCT publication.title, publication.pid FROM domain JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did WHERE domain.name LIKE '%Sociology%' COLLATE SQL_Latin1_General_CP1_CI_AS AND publication.year = 2020 AND publication.cid = domain_conference.cid;",What are the publications written by authors from the 'Sociology' domain and presented at conferences in the year 2020?,"To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. Finally, to see which ones were presented at conferences, you must join the domain table with the domain_conference table. You must also filter names using ILIKE." -academic,tsql,instruct,SELECT DISTINCT author.name FROM author JOIN domain_author ON author.aid = domain_author.aid JOIN domain ON domain_author.did = domain.did JOIN writes ON author.aid = writes.aid JOIN publication ON writes.pid = publication.pid WHERE domain.name LIKE '%computer science%' COLLATE SQL_Latin1_General_CP1_CI_AS;,"What are the names of the authors who have written publications in the domain ""Computer Science""?","To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. You must also filter names using ILIKE." +academic,tsql,instruct,"SELECT DISTINCT a2.name FROM author AS a1 JOIN domain_author AS da1 ON a1.aid = da1.aid JOIN domain_author AS da2 ON da1.did = da2.did JOIN author AS a2 ON da2.aid = a2.aid WHERE LOWER(a1.name) LIKE '%martin%';SELECT DISTINCT a2.aid FROM author AS a1 JOIN domain_author AS da1 ON a1.aid = da1.aid JOIN domain_author AS da2 ON da1.did = da2.did JOIN author AS a2 ON da2.aid = a2.aid WHERE a1.name COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%martin%';SELECT DISTINCT a2.name, a2.aid FROM author AS a1 JOIN domain_author AS da1 ON a1.aid = da1.aid JOIN domain_author AS da2 ON da1.did = da2.did JOIN author AS a2 ON da2.aid = a2.aid WHERE LOWER(a1.name) LIKE LOWER('%martin%');",Which authors belong to the same domain as Martin?,Always filter names using LIKE with percent sign wildcards +academic,tsql,instruct,"SELECT DISTINCT name FROM author WHERE oid IS NULL;SELECT DISTINCT aid FROM author WHERE oid IS NULL;SELECT DISTINCT name, aid FROM author WHERE oid IS NULL;",Which authors are not part of any organization?,Always filter names using LIKE +academic,tsql,instruct,"SELECT DISTINCT publication.title FROM domain JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did AND publication.cid = domain_conference.cid WHERE domain.name LIKE '%Sociology%' COLLATE SQL_Latin1_General_CP1_CI_AS AND publication.year = 2020;SELECT DISTINCT publication.pid FROM domain JOIN domain_author ON domain.did = domain_author.did JOIN author ON domain_author.aid = author.aid JOIN writes ON author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did AND publication.cid = domain_conference.cid WHERE domain.name LIKE '%Sociology%' COLLATE SQL_Latin1_General_CP1_CI_AS AND publication.year = 2020;SELECT DISTINCT publication.title, publication.pid FROM domain JOIN domain_author ON domain.did = domain_author.did JOIN writes ON domain_author.aid = writes.aid JOIN publication ON writes.pid = publication.pid JOIN domain_conference ON domain.did = domain_conference.did WHERE domain.name LIKE '%Sociology%' COLLATE SQL_Latin1_General_CP1_CI_AS AND publication.year = 2020 AND publication.cid = domain_conference.cid;",What are the publications written by authors from the 'Sociology' domain and presented at conferences in the year 2020?,"To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. Finally, to see which ones were presented at conferences, you must join the domain table with the domain_conference table. You must also filter names using LIKE." +academic,tsql,instruct,SELECT DISTINCT author.name FROM author JOIN domain_author ON author.aid = domain_author.aid JOIN domain ON domain_author.did = domain.did JOIN writes ON author.aid = writes.aid JOIN publication ON writes.pid = publication.pid WHERE domain.name LIKE '%computer science%' COLLATE SQL_Latin1_General_CP1_CI_AS;,"What are the names of the authors who have written publications in the domain ""Computer Science""?","To get publications written by authors from a given domain, you would need to join domain, domain_author, author to link the domain to the author first, and then join with write to link with the publication id. You must also filter names using LIKE." advising,tsql,date_functions,"SELECT TOP 1 DATEPART(MONTH, s.admit_term) AS month, COUNT(*) AS total_students FROM student AS s GROUP BY DATEPART(MONTH, s.admit_term) ORDER BY total_students DESC;",What month were most students admitted?, advising,tsql,date_functions,"SELECT AVG(DATEDIFF(day, admit_term, predicted_graduation_semester)) AS average_predicted_time_to_graduation FROM student;",What's the average predicted time to graduation since admission in no. of days?, advising,tsql,date_functions,"SELECT COUNT(*) AS num_students_graduated FROM student WHERE predicted_graduation_semester >= DATEADD(YEAR, -10, GETDATE());",How many students were predicted to graduate in the last 10 years?, @@ -57,7 +57,7 @@ advising,tsql,table_join,"SELECT program.name, COUNT(student.student_id) AS numb advising,tsql,instruct,"SELECT student.firstname, student.lastname FROM student WHERE NOT student.minor IS NULL ORDER BY CASE WHEN student.lastname IS NULL THEN 1 ELSE 0 END, student.lastname;",Which students have declared a minor program? List their firstname and lastname. Order the results by the students' last names.,"student.declare_major is null for students who have not declared their major. student.minor is null for students who have not declared a minor program." advising,tsql,instruct,SELECT AVG(student.total_gpa) FROM student JOIN program ON student.program_id = program.program_id WHERE LOWER(program.name) = 'mathematics';,What is the average GPA of students in the program mathematics?,Match strings case-insensitively -advising,tsql,instruct,"SELECT course.name FROM course WHERE course.department COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%Computer Science%' ORDER BY CASE WHEN course.name IS NULL THEN 1 ELSE 0 END, course.name ASC;",What are the names of all the courses offered by the department of Computer Science?,"Filter strings using ILIKE. +advising,tsql,instruct,"SELECT course.name FROM course WHERE course.department COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%Computer Science%' ORDER BY CASE WHEN course.name IS NULL THEN 1 ELSE 0 END, course.name ASC;",What are the names of all the courses offered by the department of Computer Science?,"Filter strings using LIKE. Use the student_record table for all information relating to students' choices and their course." advising,tsql,instruct,"SELECT course.name, course.easiness_score FROM course WHERE course.department = 'Computer Science';SELECT course.course_id, course.easiness_score FROM course WHERE course.department = 'Computer Science';SELECT course.number, course.easiness_score FROM course WHERE course.department = 'Computer Science';SELECT course.name, course.course_id, course.easiness_score FROM course WHERE course.department = 'Computer Science';SELECT course.name, course.number, course.easiness_score FROM course WHERE course.department = 'Computer Science';SELECT course.course_id, course.number, course.easiness_score FROM course WHERE course.department = 'Computer Science';SELECT course.name, course.course_id, course.number, course.easiness_score FROM course WHERE course.department = 'Computer Science';","What are the easiness scores for courses in the ""Computer Science"" department? Show both courses and scores.",Always filter names using exact string matching advising,tsql,instruct,"SELECT DISTINCT student_id FROM student_record WHERE student_record.how = 'in-person' AND student_record.grade IN ('A', 'C');",Return the student IDs who have taken an in-person course and have gotten a grade of A or C.,"Always filter strings with an exact match. @@ -87,18 +87,18 @@ atis,tsql,table_join,"SELECT DISTINCT airline.airline_name FROM flight_stop JOIN atis,tsql,table_join,"SELECT DISTINCT airline.airline_name FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'LAX' AND flight.to_airport = 'ORD';SELECT DISTINCT airline.airline_code FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'LAX' AND flight.to_airport = 'ORD';SELECT DISTINCT airline.airline_name, airline.airline_code FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'LAX' AND flight.to_airport = 'ORD';",Which airlines offer flights from LAX to ORD?, atis,tsql,table_join,"SELECT airline.airline_name, flight.stops FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'ORD' AND flight.to_airport = 'JFK' ORDER BY CASE WHEN flight.stops IS NULL THEN 1 ELSE 0 END, flight.stops;SELECT airline.airline_code, flight.stops FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'ORD' AND flight.to_airport = 'JFK' ORDER BY CASE WHEN flight.stops IS NULL THEN 1 ELSE 0 END, flight.stops;SELECT airline.airline_name, airline.airline_code, flight.stops FROM flight JOIN airline ON flight.airline_code = airline.airline_code WHERE flight.from_airport = 'ORD' AND flight.to_airport = 'JFK' ORDER BY CASE WHEN flight.stops IS NULL THEN 1 ELSE 0 END, flight.stops;","Which airlines offer flights from Chicago (ORD) to New York (JFK), and how many stops do they have, sorted by number of stops in ascending order?", atis,tsql,table_join,"SELECT DISTINCT airline.airline_name FROM airline WHERE NOT airline.airline_code IN (SELECT flight.airline_code FROM flight WHERE flight.from_airport = 'JFK' OR flight.to_airport = 'JFK' OR flight.stops > 0);SELECT DISTINCT airline.airline_code FROM airline WHERE NOT airline.airline_code IN (SELECT flight.airline_code FROM flight WHERE flight.from_airport = 'JFK' OR flight.to_airport = 'JFK' OR flight.stops > 0);SELECT DISTINCT airline.airline_name, airline.airline_code FROM airline WHERE NOT airline.airline_code IN (SELECT flight.airline_code FROM flight WHERE flight.from_airport = 'JFK' OR flight.to_airport = 'JFK' OR flight.stops > 0);","Which airlines do not have any flights that either depart from/arrive at JFK, or have one or more stops?", -atis,tsql,instruct,SELECT state_code FROM airport WHERE airport_name COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%Orlando International Airport%';,Which state code is Orlando International Airport in?,"Filter airport, city, country names using ILIKE. +atis,tsql,instruct,SELECT state_code FROM airport WHERE airport_name COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%Orlando International Airport%';,Which state code is Orlando International Airport in?,"Filter airport, city, country names using LIKE. Filter state code (eg NY), airport codes (eg JFK) using case-insensitive matches. If multiple flight days are requested, use ILIKE and wildcards for each of the days separately, since they are not necessarily ordered." -atis,tsql,instruct,"SELECT flight.flight_number FROM flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';SELECT flight.flight_id FROM flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';SELECT flight.flight_number, flight.flight_id FROM flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';",Which flights operate on Mondays and Wednesdays? Give me the relevant flight numbers,"Filter airport, city, country names using ILIKE. +atis,tsql,instruct,"SELECT flight.flight_number FROM flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';SELECT flight.flight_id FROM flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';SELECT flight.flight_number, flight.flight_id FROM flight WHERE LOWER(flight.flight_days) LIKE '%mon%' AND LOWER(flight.flight_days) LIKE '%wed%';",Which flights operate on Mondays and Wednesdays? Give me the relevant flight numbers,"Filter airport, city, country names using LIKE. Filter state code (eg NY), airport codes (eg JFK) using case-insensitive matches. If multiple flight days are requested, use ILIKE for each of the days separately, since they are not necessarily ordered." -atis,tsql,instruct,SELECT SUM(fare.round_trip_cost) AS total_round_trip_cost FROM fare WHERE fare.from_airport = 'JFK' AND fare.to_airport = 'LAX';,What is the total cost of all round-trip fares from New York (JFK) to Los Angeles?,"Filter airport, city, country names using ILIKE. +atis,tsql,instruct,SELECT SUM(fare.round_trip_cost) AS total_round_trip_cost FROM fare WHERE fare.from_airport = 'JFK' AND fare.to_airport = 'LAX';,What is the total cost of all round-trip fares from New York (JFK) to Los Angeles?,"Filter airport, city, country names using LIKE. Filter state code (eg NY), airport codes (eg JFK) using case-insensitive matches. fare.round_trip_required is not needed when getting the round trip cost." -atis,tsql,instruct,SELECT minimum_connect_time FROM airport WHERE airport_code = 'JFK';,What is the minimum amount of time required for a connecting flight at JFK Airport?,"Filter airport, city, country names using ILIKE. +atis,tsql,instruct,SELECT minimum_connect_time FROM airport WHERE airport_code = 'JFK';,What is the minimum amount of time required for a connecting flight at JFK Airport?,"Filter airport, city, country names using LIKE. Filter state code (eg NY) and airport codes (eg JFK) using upper-case matches." -atis,tsql,instruct,SELECT COUNT(DISTINCT flight_fare.flight_id) FROM flight_fare JOIN fare ON flight_fare.fare_id = fare.fare_id WHERE fare.round_trip_required = 'Yes';,How many flights require a round-trip to purchase the fare?,"Filter airport, city, country names using ILIKE. +atis,tsql,instruct,SELECT COUNT(DISTINCT flight_fare.flight_id) FROM flight_fare JOIN fare ON flight_fare.fare_id = fare.fare_id WHERE fare.round_trip_required = 'Yes';,How many flights require a round-trip to purchase the fare?,"Filter airport, city, country names using LIKE. Filter state code (eg NY) and airport codes (eg JFK) using upper-case matches." geography,tsql,group_by,"SELECT city.country_name, SUM(city.population) AS total_population FROM city GROUP BY city.country_name ORDER BY total_population DESC;",What is the total population in cities by country?, geography,tsql,group_by,"SELECT river.country_name, AVG(river.length) AS average_length FROM river GROUP BY river.country_name ORDER BY average_length DESC;",What is the average length of rivers in each country?, @@ -120,11 +120,11 @@ geography,tsql,table_join,SELECT border_info.border FROM border_info JOIN lake O geography,tsql,table_join,SELECT lake.lake_name FROM lake JOIN state ON lake.state_name = state.state_name WHERE state.area > 1000 AND lake.lake_name LIKE 'Lake%' ORDER BY lake.lake_name;,"Which lakes have a name that starts with ""Lake""? They should be located in states with an area greater than 1000 square kilometers.", geography,tsql,table_join,"SELECT highlow.state_name, highlow.highest_point, state.density FROM highlow JOIN state ON highlow.state_name = state.state_name;",What is the highest point in each state and what is the population density of that state?, geography,tsql,table_join,"SELECT l.country_name, AVG(r.length) AS average_length FROM river AS r JOIN lake AS l ON r.country_name = l.country_name GROUP BY l.country_name;",What is the average length of rivers per country in countries with a lake?, -geography,tsql,instruct,SELECT state_name FROM state WHERE population < 100000;,Which states have fewer than a hundred thousand people?,Always filter names using ILIKE -geography,tsql,instruct,"SELECT river_name FROM river WHERE traverse LIKE '%,%,%';",Which rivers traverse at least 3 cities/landmarks?,Always filter names using ILIKE -geography,tsql,instruct,"SELECT lake_name, area FROM lake WHERE state_name COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%Michigan%';",What are the names and areas of the lakes in Michigan?,Always filter names using ILIKE -geography,tsql,instruct,"SELECT mountain_name, mountain_altitude FROM mountain WHERE country_name LIKE '%Nepal%' COLLATE SQL_Latin1_General_CP1_CI_AS;",What are the names and altitudes of the mountains in Nepal?,Always filter names using ILIKE -geography,tsql,instruct,"SELECT city_name, population FROM city WHERE country_name COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%United States%';",Get the cities in the United States and their population,Always filter names using ILIKE +geography,tsql,instruct,SELECT state_name FROM state WHERE population < 100000;,Which states have fewer than a hundred thousand people?,Always filter names using LIKE +geography,tsql,instruct,"SELECT river_name FROM river WHERE traverse LIKE '%,%,%';",Which rivers traverse at least 3 cities/landmarks?,Always filter names using LIKE +geography,tsql,instruct,"SELECT lake_name, area FROM lake WHERE state_name COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%Michigan%';",What are the names and areas of the lakes in Michigan?,Always filter names using LIKE +geography,tsql,instruct,"SELECT mountain_name, mountain_altitude FROM mountain WHERE country_name LIKE '%Nepal%' COLLATE SQL_Latin1_General_CP1_CI_AS;",What are the names and altitudes of the mountains in Nepal?,Always filter names using LIKE +geography,tsql,instruct,"SELECT city_name, population FROM city WHERE country_name COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%United States%';",Get the cities in the United States and their population,Always filter names using LIKE restaurants,tsql,group_by,"SELECT restaurant.food_type, COUNT(DISTINCT restaurant.id) AS total_number_of_restaurants FROM restaurant GROUP BY restaurant.food_type;",What is the total number of restaurants serving each type of food?, restaurants,tsql,group_by,"SELECT location.city_name, COUNT(DISTINCT location.restaurant_id) AS total_count FROM LOCATION GROUP BY location.city_name;",What is the total count of restaurants in each city?, restaurants,tsql,group_by,"SELECT restaurant.food_type, AVG(restaurant.rating) AS average_rating FROM restaurant GROUP BY restaurant.food_type ORDER BY average_rating DESC;",What is the average rating of restaurants serving each type of food?, @@ -175,7 +175,7 @@ scholar,tsql,table_join,"SELECT author.authorname, COUNT(DISTINCT writes.paperid scholar,tsql,table_join,SELECT COUNT(DISTINCT paperkeyphrase.keyphraseid) AS total_keyphrases FROM paper JOIN journal ON paper.journalid = journal.journalid JOIN paperkeyphrase ON paper.paperid = paperkeyphrase.paperid WHERE LOWER(journal.journalname) LIKE LOWER('%IEEE Transactions%');,"What is the total number of unique keyphrases associated with papers published in the journal with ""IEEE Transactions"" in its name?", scholar,tsql,table_join,"SELECT journal.journalname, COUNT(DISTINCT paper.paperid) AS total_papers FROM paper JOIN journal ON paper.journalid = journal.journalid GROUP BY journal.journalname ORDER BY CASE WHEN journal.journalname IS NULL THEN 1 ELSE 0 END, journal.journalname;","What is the total number of papers published in each journal, ordered by the journal name?", scholar,tsql,table_join,"SELECT paperdataset.paperid, COUNT(cite.citedpaperid) AS citation_count FROM paperdataset JOIN cite ON paperdataset.paperid = cite.citedpaperid WHERE paperdataset.datasetid = (SELECT datasetid FROM dataset WHERE datasetname LIKE '%COVID-19 Research%') GROUP BY paperdataset.paperid ORDER BY citation_count DESC;SELECT p.title, COUNT(c.citingpaperid) AS num_citing_papers FROM paper AS p JOIN paperdataset AS pd ON p.paperid = pd.paperid JOIN cite AS c ON p.paperid = c.citedpaperid JOIN dataset AS d ON pd.datasetid = d.datasetid WHERE d.datasetname = 'COVID-19 Research' GROUP BY p.title ORDER BY num_citing_papers DESC;","How many papers cite each paper in the dataset named ""COVID-19 Research""?", -scholar,tsql,instruct,"SELECT venue.venuename, COUNT(DISTINCT paper.paperid) FROM paper JOIN venue ON paper.venueid = venue.venueid WHERE paper.venueid = (SELECT venueid FROM paper WHERE paperid = 2) GROUP BY venue.venuename;","What is the name of the venue where the paper with paper ID 2 was published, and how many papers were published in total in that venue?",Always filter strings using ILIKE +scholar,tsql,instruct,"SELECT venue.venuename, COUNT(DISTINCT paper.paperid) FROM paper JOIN venue ON paper.venueid = venue.venueid WHERE paper.venueid = (SELECT venueid FROM paper WHERE paperid = 2) GROUP BY venue.venuename;","What is the name of the venue where the paper with paper ID 2 was published, and how many papers were published in total in that venue?",Always filter strings using LIKE scholar,tsql,instruct,SELECT author.authorname FROM author JOIN writes ON author.authorid = writes.authorid JOIN paper ON writes.paperid = paper.paperid WHERE paper.title = 'The Effects of Climate Change on Agriculture';,"What are the names of the authors who wrote the paper with the title ""The Effects of Climate Change on Agriculture""?",Always filter strings with an exact match scholar,tsql,instruct,SELECT COUNT(paper.paperid) FROM paper JOIN journal ON paper.journalid = journal.journalid WHERE paper.year = 2020 AND LOWER(journal.journalname) LIKE '%nature%';,"How many papers were published in the journal ""nature"" in the year 2020?",Filter strings with case-insensitive matching scholar,tsql,instruct,SELECT COUNT(DISTINCT paper.paperid) FROM paper JOIN journal ON paper.journalid = journal.journalid JOIN paperkeyphrase ON paper.paperid = paperkeyphrase.paperid JOIN keyphrase ON paperkeyphrase.keyphraseid = keyphrase.keyphraseid WHERE LOWER(keyphrase.keyphrasename) = 'machine learning' AND journal.journalname = 'IEEE Transactions on Pattern Analysis and Machine Intelligence';,"How many papers are associated with the keyphrase ""machine learning"" and were published in the journal named ""IEEE Transactions on Pattern Analysis and Machine Intelligence""?","Filter paper names, journal names, using exact matches. Filter keyphrases with case-insensitive matching." @@ -205,27 +205,27 @@ yelp,tsql,table_join,"SELECT category.category_name, SUM(business.review_count) yelp,tsql,table_join,"SELECT category.category_name, SUM(business.review_count) AS total_reviews FROM business JOIN category ON business.business_id = category.business_id GROUP BY category.category_name ORDER BY total_reviews DESC;SELECT category.id, SUM(business.review_count) AS total_reviews FROM business JOIN category ON business.business_id = category.business_id GROUP BY category.id ORDER BY total_reviews DESC;SELECT category.category_name, category.id, SUM(business.review_count) AS total_reviews FROM business JOIN category ON business.business_id = category.business_id GROUP BY category.category_name, category.id ORDER BY total_reviews DESC;",What is the total number of reviews for each business category?, yelp,tsql,table_join,"SELECT business.business_id, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.business_id ORDER BY total_checkins DESC;SELECT business.name, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.name ORDER BY total_checkins DESC;SELECT business.bid, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.bid ORDER BY total_checkins DESC;SELECT business.business_id, business.name, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.business_id, business.name ORDER BY total_checkins DESC;SELECT business.business_id, business.bid, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.business_id, business.bid ORDER BY total_checkins DESC;SELECT business.name, business.bid, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.name, business.bid ORDER BY total_checkins DESC;SELECT business.business_id, business.name, business.bid, SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' GROUP BY business.business_id, business.name, business.bid ORDER BY total_checkins DESC;",What is the total number of check-ins for each business in the state of California?, yelp,tsql,table_join,"SELECT TOP 2 category.category_name FROM (SELECT business.business_id AS business_id, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id GROUP BY business.business_id) AS business_rating JOIN category ON business_rating.business_id = category.business_id GROUP BY category.category_name ORDER BY AVG(business_rating.average_rating) DESC;SELECT TOP 2 category.id FROM (SELECT business.business_id AS business_id, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id GROUP BY business.business_id) AS business_rating JOIN category ON business_rating.business_id = category.business_id GROUP BY category.id ORDER BY AVG(business_rating.average_rating) DESC;SELECT TOP 2 category.category_name, category.id FROM (SELECT business.business_id AS business_id, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id GROUP BY business.business_id) AS business_rating JOIN category ON business_rating.business_id = category.business_id GROUP BY category.category_name, category.id ORDER BY AVG(business_rating.average_rating) DESC;",What are the top 2 categories of businesses with the highest average rating?, -yelp,tsql,instruct,SELECT COUNT(review.rid) AS total_reviews FROM review JOIN category ON review.business_id = category.business_id WHERE review.year = 2021 AND category.category_name COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%Cafe%';,"What is the total number of reviews posted in the year 2021 for businesses in the category ""Cafe""?","Filter strings of users, city, address, business.name using ILIKE with wildcards. +yelp,tsql,instruct,SELECT COUNT(review.rid) AS total_reviews FROM review JOIN category ON review.business_id = category.business_id WHERE review.year = 2021 AND category.category_name COLLATE SQL_Latin1_General_CP1_CI_AS LIKE '%Cafe%';,"What is the total number of reviews posted in the year 2021 for businesses in the category ""Cafe""?","Filter strings of users, city, address, business.name using LIKE with wildcards. Filter strings of state using exact upper case matches. Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. " -yelp,tsql,instruct,"SELECT AVG(sf.average_rating) AS sf_average_rating FROM (SELECT business.business_id AS business_id, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id WHERE LOWER(business.city) LIKE '%san francisco%' GROUP BY business.business_id) AS sf;",What is the average rating of businesses in the city of San Francisco?,"Filter strings of users, city, address, business.name using ILIKE with wildcards. +yelp,tsql,instruct,"SELECT AVG(sf.average_rating) AS sf_average_rating FROM (SELECT business.business_id AS business_id, AVG(review.rating) AS average_rating FROM business JOIN review ON business.business_id = review.business_id WHERE LOWER(business.city) LIKE '%san francisco%' GROUP BY business.business_id) AS sf;",What is the average rating of businesses in the city of San Francisco?,"Filter strings of users, city, address, business.name using LIKE with wildcards. Filter strings of state using exact upper case matches. The rating of businesses in a city refers to the average rating of the businesses in that city. I.e., you must compute the average rating of each business before computing the average rating of businesses in the city. Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. " -yelp,tsql,instruct,"SELECT review.business_id, COUNT(*) AS review_count FROM review WHERE review.year = 2021 GROUP BY review.business_id ORDER BY review_count DESC;",How many reviews were posted for each business id in the year 2021?,"Filter strings of users, city, address, business.name using ILIKE with wildcards. +yelp,tsql,instruct,"SELECT review.business_id, COUNT(*) AS review_count FROM review WHERE review.year = 2021 GROUP BY review.business_id ORDER BY review_count DESC;",How many reviews were posted for each business id in the year 2021?,"Filter strings of users, city, address, business.name using LIKE with wildcards. Filter strings of state using exact upper case matches. Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. " -yelp,tsql,instruct,SELECT COUNT(*) FROM review JOIN users ON review.user_id = users.user_id WHERE LOWER(users.name) LIKE LOWER('%Sarah Williams%') AND review.month = 'April' AND review.year = 2021;,"How many reviews were posted by users with the name ""Sarah Williams"" in the month of April 2021?","Filter strings of users, city, address, business.name using ILIKE with wildcards. +yelp,tsql,instruct,SELECT COUNT(*) FROM review JOIN users ON review.user_id = users.user_id WHERE LOWER(users.name) LIKE LOWER('%Sarah Williams%') AND review.month = 'April' AND review.year = 2021;,"How many reviews were posted by users with the name ""Sarah Williams"" in the month of April 2021?","Filter strings of users, city, address, business.name using LIKE with wildcards. Filter strings of state using exact upper case matches. Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. " -yelp,tsql,instruct,SELECT SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' AND checkin.day = 'Monday';,How many check-ins occurred on Mondays at businesses in the state of California?,"Filter strings of users, city, address, business.name using ILIKE with wildcards. +yelp,tsql,instruct,SELECT SUM(checkin.count) AS total_checkins FROM business JOIN checkin ON business.business_id = checkin.business_id WHERE business.state = 'CA' AND checkin.day = 'Monday';,How many check-ins occurred on Mondays at businesses in the state of California?,"Filter strings of users, city, address, business.name using LIKE with wildcards. Filter strings of state using exact upper case matches. Assume the rating of a business to be its average rating, and compute it before computing other aggregates on it. Always truncate dates in the question to its current month and year (if applicable) before filtering on `review.year` and `review.month`. diff --git a/translate_sql_dialect.py b/translate_sql_dialect.py index b1dd2ea..ae17575 100644 --- a/translate_sql_dialect.py +++ b/translate_sql_dialect.py @@ -43,12 +43,20 @@ df["valid"] = "" df["err_msg"] = "" -# create db_type col where if "Snowflake" in instructions of row, db_type = "Snowflake" else "postgres" +# create db_type col where if "Snowflake" in file name, db_type = "snowflake", else db_type = "postgres" if "snowflake" in dataset_file: df["db_type"] = "snowflake" else: df["db_type"] = "postgres" +# if ILIKE in instructions col, and db_type is in ["sqlite", "bigquery", "tsql"], replace ILIKE with LIKE +if "instructions" in df.columns: + df["instructions"] = df["instructions"].apply( + lambda x: x.replace("ILIKE", "LIKE") + if "ILIKE" in x and dialect in ["sqlite", "bigquery", "tsql"] + else x + ) + # if db_name is empty, use "dbname" df["db_name"] = df.apply( lambda x: (