Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Example] Add llmrec example files #225

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

305 changes: 305 additions & 0 deletions examples/llmrec/LLM_augmentation_construct_prompt/gpt_ui_aug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,305 @@

import threading
import openai
import time
import pandas as pd
import csv
import requests
import concurrent.futures
import pickle
import torch
import os
import threading
import time
import tqdm
import requests

file_path = ""
max_threads = 5
cnt = 0

# MovieLens
def construct_prompting(item_attribute, item_list, candidate_list):
# make history string
history_string = "User history:\n"
for index in item_list:
title = item_attribute['title'][index]
genre = item_attribute['genre'][index]
history_string += "["
history_string += str(index)
history_string += "] "
history_string += title + ", "
history_string += genre + "\n"
# make candidates
candidate_string = "Candidates:\n"
for index in candidate_list:
title = item_attribute['title'][index.item()]
genre = item_attribute['genre'][index.item()]
candidate_string += "["
candidate_string += str(index.item())
candidate_string += "] "
candidate_string += title + ", "
candidate_string += genre + "\n"
# output format
output_format = "Please output the index of user\'s favorite and least favorite movie only from candidate, but not user history. Please get the index from candidate, at the beginning of each line.\nOutput format:\nTwo numbers separated by '::'. Nothing else.Plese just give the index of candicates, remove [] (just output the digital value), please do not output other thing else, do not give reasoning.\n\n"
# make prompt
prompt = "You are a movie recommendation system and required to recommend user with movies based on user history that each movie with title(same topic/doctor), year(similar years), genre(similar genre).\n"
prompt += history_string
prompt += candidate_string
prompt += output_format
return prompt

# # Netflix
# def construct_prompting(item_attribute, item_list, candidate_list):
# # make history string
# history_string = "User history:\n"
# for index in item_list:
# year = item_attribute['year'][index]
# title = item_attribute['title'][index]
# history_string += "["
# history_string += str(index)
# history_string += "] "
# history_string += str(year) + ", "
# history_string += title + "\n"
# # make candidates
# candidate_string = "Candidates:\n"
# for index in candidate_list:
# year = item_attribute['year'][index.item()]
# title = item_attribute['title'][index.item()]
# candidate_string += "["
# candidate_string += str(index.item())
# candidate_string += "] "
# candidate_string += str(year) + ", "
# candidate_string += title + "\n"
# # output format
# output_format = "Please output the index of user\'s favorite and least favorite movie only from candidate, but not user history. Please get the index from candidate, at the beginning of each line.\nOutput format:\nTwo numbers separated by '::'. Nothing else.Plese just give the index of candicates, remove [] (just output the digital value), please do not output other thing else, do not give reasoning.\n\n"
# # make prompt
# # prompt = "You are a movie recommendation system and required to recommend user with movies based on user history that each movie with title(same topic/doctor), year(similar years), genre(similar genre).\n"
# prompt = ""
# prompt += history_string
# prompt += candidate_string
# prompt += output_format
# return prompt

### read candidate
candidate_indices = pickle.load(open(file_path + 'candidate_indices','rb'))
candidate_indices_dict = {}
for index in range(candidate_indices.shape[0]):
candidate_indices_dict[index] = candidate_indices[index]
### read adjacency_list
adjacency_list_dict = {}
train_mat = pickle.load(open(file_path + 'train_mat','rb'))
for index in range(train_mat.shape[0]):
data_x, data_y = train_mat[index].nonzero()
adjacency_list_dict[index] = data_y
### read item_attribute
toy_item_attribute = pd.read_csv(file_path + 'item_attribute.csv', names=['id','title', 'genre'])
### write augmented dict
augmented_sample_dict = {}
if os.path.exists(file_path + "augmented_sample_dict"):
print(f"The file augmented_sample_dict exists.")
augmented_sample_dict = pickle.load(open(file_path + 'augmented_sample_dict','rb'))
else:
print(f"The file augmented_sample_dict does not exist.")
pickle.dump(augmented_sample_dict, open(file_path + 'augmented_sample_dict','wb'))

def file_reading():
augmented_attribute_dict = pickle.load(open(file_path + 'augmented_sample_dict','rb'))
return augmented_attribute_dict

# baidu
def LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, model_type, augmented_sample_dict):

try:
augmented_sample_dict = file_reading()
except pickle.UnpicklingError as e:
print("Error occurred while unpickling:", e)
LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, model_type, augmented_sample_dict)
if index in augmented_sample_dict:
return 0
else:
try:
print(f"{index}")
prompt = construct_prompting(toy_item_attribute, adjacency_list_dict[index], candidate_indices_dict[index])
url = "http://llms-se.baidu-int.com:8200/chat/completions"
headers={
# "Content-Type": "application/json",
"Authorization": "Bearer your key"

}
params={
"model": model_type,
"messages": [{"role": "user", "content": prompt}],
"temperature":0.6,
"max_tokens": 1000,
"stream": False,
"top_p": 0.1
}

response = requests.post(url=url, headers=headers,json=params)
message = response.json()

content = message['choices'][0]['message']['content']
print(f"content: {content}, model_type: {model_type}")
samples = content.split("::")
pos_sample = int(samples[0])
neg_sample = int(samples[1])
augmented_sample_dict[index] = {}
augmented_sample_dict[index][0] = pos_sample
augmented_sample_dict[index][1] = neg_sample
pickle.dump(augmented_sample_dict, open(file_path + 'augmented_sample_dict','wb'))

# except ValueError as e:
except requests.exceptions.RequestException as e:
print("An HTTP error occurred:", str(e))
time.sleep(10)
except ValueError as ve:
print("An error occurred while parsing the response:", str(ve))
time.sleep(10)
LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, "gpt-3.5-turbo-0613", augmented_sample_dict)
except KeyError as ke:
print("An error occurred while accessing the response:", str(ke))
time.sleep(10)
LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, "gpt-3.5-turbo-0613", augmented_sample_dict)
except Exception as ex:
print("An unknown error occurred:", str(ex))
time.sleep(10)

return 1





# # chatgpt
# def LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, model_type, augmented_sample_dict):

# if index in augmented_sample_dict:
# print(f"g:{index}")
# return 0
# else:
# try:
# print(f"{index}")
# prompt = construct_prompting(toy_item_attribute, adjacency_list_dict[index], candidate_indices_dict[index])
# # url = "http://llms-se.baidu-int.com:8200/chat/completions"
# # url = "https://api.openai.com/v1/completions"
# url = "https://api.openai.com/v1/chat/completions"

# headers={
# # "Content-Type": "application/json",
# # "Authorization": "Bearer your key"
#
# }
# # params={
# # "model": model_type,
# # "prompt": prompt,
# # "max_tokens": 1024,
# # "temperature": 0.6,
# # "stream": False,
# # }

# params = {
# "model": "gpt-3.5-turbo",
# "messages": [{"role": "system", "content": "You are a movie recommendation system and required to recommend user with movies based on user history that each movie with title(same topic/doctor), year(similar years), genre(similar genre).\n"}, {"role": "user", "content": prompt}]
# }

# response = requests.post(url=url, headers=headers,json=params)
# message = response.json()

# content = message['choices'][0]['message']['content']
# # content = message['choices'][0]['text']
# print(f"content: {content}, model_type: {model_type}")
# samples = content.split("::")
# pos_sample = int(samples[0])
# neg_sample = int(samples[1])
# augmented_sample_dict[index] = {}
# augmented_sample_dict[index][0] = pos_sample
# augmented_sample_dict[index][1] = neg_sample
# # pickle.dump(augmented_sample_dict, open('augmented_sample_dict','wb'))
# # pickle.dump(augmented_sample_dict, open('/Users/weiwei/Documents/Datasets/ml-10m/ml-10M100K/preprocessed_raw_MovieLens/toy_MovieLens1000/augmented_sample_dict','wb'))
# pickle.dump(augmented_sample_dict, open(file_path + 'augmented_sample_dict','wb'))

# # # except ValueError as e:
# # except requests.exceptions.RequestException as e:
# # print("An HTTP error occurred:", str(e))
# # # time.sleep(40)
# # except ValueError as ve:
# # print("An error occurred while parsing the response:", str(ve))
# # # time.sleep(40)
# # LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, "gpt-3.5-turbo-0613", augmented_sample_dict)
# # except KeyError as ke:
# # print("An error occurred while accessing the response:", str(ke))
# # # time.sleep(40)
# # LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, "gpt-3.5-turbo-0613", augmented_sample_dict)
# # except Exception as ex:
# # print("An unknown error occurred:", str(ex))
# # # time.sleep(40)

# # return 1

# # except ValueError as e:
# except requests.exceptions.RequestException as e:
# print("An HTTP error occurred:", str(e))
# time.sleep(8)
# # print(content)
# # error_cnt += 1
# # if error_cnt==5:
# # return 1
# # LLM_request(toy_item_attribute, indices, "gpt-3.5-turbo-0613", augmented_attribute_dict, error_cnt)
# LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, model_type, augmented_sample_dict)
# except ValueError as ve:
# print("ValueError error occurred while parsing the response:", str(ve))
# time.sleep(10)
# # error_cnt += 1
# # if error_cnt==5:
# # return 1
# # print(content)
# # LLM_request(toy_item_attribute, indices, "gpt-3.5-turbo-0613", augmented_attribute_dict, error_cnt)
# LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, model_type, augmented_sample_dict)
# except KeyError as ke:
# print("KeyError error occurred while accessing the response:", str(ke))
# time.sleep(10)
# # error_cnt += 1
# # if error_cnt==5:
# # return 1
# # print(content)
# # LLM_request(toy_item_attribute, indices, "gpt-3.5-turbo-0613", augmented_attribute_dict, error_cnt)
# LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, model_type, augmented_sample_dict)
# except IndexError as ke:
# print("IndexError error occurred while accessing the response:", str(ke))
# time.sleep(10)
# # error_cnt += 1
# # if error_cnt==5:
# # return 1
# # # print(content)
# # LLM_request(toy_item_attribute, indices, "gpt-3.5-turbo-0613", augmented_attribute_dict)
# # return 1
# LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, model_type, augmented_sample_dict)
# except EOFError as ke:
# print("EOFError: : Ran out of input error occurred while accessing the response:", str(ke))
# time.sleep(10)
# # error_cnt += 1
# # if error_cnt==5:
# # return 1
# # print(content)
# # LLM_request(toy_item_attribute, indices, "gpt-3.5-turbo-0613", augmented_attribute_dict, error_cnt)
# LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, model_type, augmented_sample_dict)
# except Exception as ex:
# print("An unknown error occurred:", str(ex))
# time.sleep(10)
# # error_cnt += 1
# # if error_cnt==5:
# # return 1
# # print(content)
# # LLM_request(toy_item_attribute, indices, "gpt-3.5-turbo-0613", augmented_attribute_dict, error_cnt)
# LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, model_type, augmented_sample_dict)
# return 1


for index in range(0, len(adjacency_list_dict)):
# # make prompting
re = LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, "gpt-3.5-turbo", augmented_sample_dict)




Loading