-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgpt3-prompts.sql
51 lines (46 loc) · 1.13 KB
/
gpt3-prompts.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
DECLARE score_threshold INT64 DEFAULT 200;
WITH
base_tb AS (
SELECT
title,
claps
FROM
`mlops-zoomcamp-361419`.gpt3_blog.medium
),
good_stories AS (
SELECT
*
FROM
base_tb
WHERE
claps >= score_threshold
),
bad_stories AS (
SELECT
*
FROM
base_tb
WHERE
-- sample n rows by doing RAND() < [prop of good post to bad post], vectorized and in O(1) time
-- other option is ORDER BY RAND() LIMIT N but O(nlogn) time
RAND() < (SELECT COUNT(*) FROM good_stories) / ((SELECT COUNT(*) FROM base_tb) - (SELECT COUNT(*) FROM good_stories))
AND
claps < score_threshold
),
all_stories AS (
SELECT
*
FROM
good_stories
UNION ALL (
SELECT
*
FROM
bad_stories
)
)
SELECT DISTINCT
CONCAT("Title: ", title, " ->") AS prompt,
IF (claps >= score_threshold, " good", " bad") AS completion
FROM
all_stories;