Skip to content

Commit

Permalink
TTS cover
Browse files Browse the repository at this point in the history
  • Loading branch information
Wuyuhang11 committed Sep 21, 2024
1 parent a2cb1f2 commit 682e841
Show file tree
Hide file tree
Showing 31 changed files with 1,414 additions and 0 deletions.
162 changes: 162 additions & 0 deletions center/center.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
from flask import Flask, request, jsonify
from werkzeug.utils import secure_filename
from datetime import datetime
import os
from game.service.searchService import getPlacesFromWeb, getUrlsFromWeb # 通过外部搜索得到资料的urls和松江大学城的places
from game.service.searchService import getAroud_smallplace_On_bigPlace
from game.service.ImageService import getTraitsFrom_Images, getTraitsFrom_Image # 业务实现类:得到图像特征
from game.compute.compute_similarity import calculate_embedding_cosine_similarity, \
calculate_tfidf_cosine_similarity # 计算特征之间的相似度
from flask_cors import CORS
from gevent import pywsgi
import shutil # 导入 shutil 模块用于文件操作
import tempfile
import json
from game.utils.similarity_read import write_top_similarities_to_file # 将相似度top-3的信息写入文件
from game.model.modeljudge import judge_byModel # 进行特征的最后决策
from game.utils.process_set_tolist import convert_sets_to_lists # 将set数据转为list数据
from game.tts.text_to_video import generate_audio # 根据文本生成音频

app = Flask(__name__)
# app.json.ensure_ascii = False # 解决中文乱码问题
app.config['JSON_AS_ASCII'] = False
CORS(app, resources={r"/*": {"origins": "*"}})

# 环境配置
app.config['UPLOAD_FOLDER'] = 'uploads' # 确保这个文件夹存在或在代码中创建它
app.config['UPLOAD_FOLDER'] = 'image_temp'
# API密钥配置
os.environ["DASHSCOPE_API_KEY"] = "sk-d07d9d5c4d8d4158abbaf45a40c10042"
save_path = '..//tempfile//'

if not os.path.exists(app.config['UPLOAD_FOLDER']):
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)


# 上传图像和问题得到响应
@app.route('/upload', methods=['POST'])
def upload_file():
print("你好呀~")
# 1.上传文件模块
if 'file' not in request.files:
return jsonify({"error": "No file part"}), 400
file = request.files['file']
if file.filename == '':
return jsonify({"error": "No selected file"}), 400
if not file.filename.lower().endswith(('.png', '.jpg', '.jpeg')):
return jsonify({"error": "File not supported"}), 400

original_filename = secure_filename(file.filename)
timestamp_ms = int(datetime.now().timestamp() * 1000)
filename = f"{timestamp_ms}_{original_filename}"
original_file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)

# 再次确认目录存在
if not os.path.exists(app.config['UPLOAD_FOLDER']):
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
file.save(original_file_path)

# 创建新的文件路径
new_filename = f"processed_{filename}"
new_file_path = os.path.join(app.config['UPLOAD_FOLDER'], new_filename)

# 复制文件到新路径
shutil.copy(original_file_path, new_file_path)
print(f"临时文件名:{new_filename}")
print(f"临时文件URL:{new_file_path}")

# 1.4.获取用户提问
user_question = request.form['question']
print("用户问题:", user_question)

# 2.得到主图像特征
main_traits = getTraitsFrom_Image(new_file_path)
print("该图像的特征如下:", main_traits) # 【1.主图特征】

# 3.首先获取松江大学城内的地标建筑物,缩小范围
first_question = "上海市松江区松江大学城内有哪些建筑物?"
urls_from_web = getUrlsFromWeb(first_question) # 通过外部搜索得到关于question的urls集合 # 【2.参考文献】
songjiang_places = getPlacesFromWeb(first_question) # 得到松江大学城里的建筑物集合 # 【3.附近地点】

big_places_dict = {} # 初始化一个大集合big_places_dict,元素为每一个bigplace
max_similarities = [] # 初始设置为-1,得到最大相似度
small_names = []
small_name_photos = {}

# 4.计算得到相似度最高的小地标名称
for big_place in songjiang_places: # 【3.大地点】
small_names, small_name_photos = getAroud_smallplace_On_bigPlace(big_place) # 得到小地标的名称和小地标的图片集合

# 为当前大地标创建一个字典来存储每个小地标及其与主图像的相似度
small_places = {}

for small_name, small_photos in small_name_photos.items():
print(f"{small_name}{small_photos}") # 【4.大地点的小地点】
if not small_photos:
print(f"{small_name} 没有可用的照片,跳过此地标")
continue
try:
small_traits = getTraitsFrom_Images(small_photos) # 得到小地标的图像特征
print(f"{small_name}的地图特征为:{small_traits}")
small_and_main_similarity = calculate_embedding_cosine_similarity(main_traits, small_traits)
print(f"{small_name}与图像特征的相似度为:{small_and_main_similarity}")
small_places[small_name] = small_traits
except Exception as e:
print(f"处理 {small_name} 的图像时发生错误: {e}")
continue # 遇到错误时跳过此地标

# 更新整体最高相似度列表及其对应的小地标名称、特征及大地标
max_similarities.append((small_name, small_and_main_similarity, big_place, small_traits))

# 将当前大地标及其所有小地标的相似度信息存入字典
big_places_dict[big_place] = small_places

# 5.对相似度列表按照相似度值进行降序排序
max_similarities.sort(key=lambda x: x[1], reverse=True)

# 6.只取前三个
top_3_similarities = max_similarities[:3]

# 打印所有地标中相似度最高的三个小地标信息
if top_3_similarities:
for idx, (small_name, similarity, big_place, traits) in enumerate(top_3_similarities, 1):
print(f"第{idx}名:小地标是:{small_name},位于大地标 {big_place},相似度为:{similarity},特征为:{traits}")

# 7.将前三个相似度最高的地标信息及其特征写入临时文件
top3_file = write_top_similarities_to_file(top_3_similarities, save_path)

# 9.给到决策大模型进行参考,得到最后的answer
judge_content = judge_byModel(main_traits, top3_file)
print(f"最后决策:{judge_content}")

# 10.根据JudgeContent内容生成音频
video_path = generate_audio(judge_content)
print(f"音频路径:{video_path}")

# 11.返回相似度最高的前三个地标信息,按相似度排序后标上序号
response = [
{
"rank": idx, # 直接使用 idx,因为 enumerate 从 1 开始
"small_place": small_name,
"big_place": big_place,
"similarity": similarity,
"traits": convert_sets_to_lists(traits)
}
for idx, (small_name, similarity, big_place, traits) in enumerate(top_3_similarities, start=1)
]

return jsonify({
"main_traits": convert_sets_to_lists(main_traits), # 传入图像的特征
"first_question": first_question, # 问题
"candidate_big_places": convert_sets_to_lists(songjiang_places), # 得到松江大学城里的大地点集合
"search_around_traits": convert_sets_to_lists(big_places_dict), # 大地点下面的小地点特征
"candidate": convert_sets_to_lists(response), # 候选地点和特征
"judge_content": judge_content, # 最后的决策
"video_path": video_path # 音频路径
})


if __name__ == '__main__':
app.debug = True
server = pywsgi.WSGIServer(('0.0.0.0', 8081), app)
server.serve_forever()
71 changes: 71 additions & 0 deletions compute/compute_similarity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity # 导入余弦相似度函数
from game.utils.process_image_traits import get_trait_embedding # 得到特征文本的向量
import numpy as np

"""
1.通过TF-IDF计算词频的方式来计算text1和text2之间的相似度
"""


def calculate_tfidf_cosine_similarity(text1, text2):
vectorizer = TfidfVectorizer()
corpus = [text1, text2]
vectors = vectorizer.fit_transform(corpus)
similarity = cosine_similarity(vectors)
return similarity[0][1] # 返回第一个文本和第二个文本之间的相似度


"""
2.计算text1和text2的相似度
"""


def calculate_embedding_cosine_similarity(text1, text2):
# 1.得到文本的向量
embedding1 = get_trait_embedding(text1)
embedding2 = get_trait_embedding(text2)
# 2.将列表转换为numpy数组,方便执行向量操作
vec1 = np.array(embedding1)
vec2 = np.array(embedding2)

# 3.计算两个向量的点积
dot_product = np.dot(vec1, vec2)

# 4.计算两个向量的欧几里得范数【即表示向量的长度和大小】
norm_vec1 = np.linalg.norm(vec1)
norm_vec2 = np.linalg.norm(vec2)

# 5.计算余弦相似度
cosine_sim = dot_product / (norm_vec1 * norm_vec2)

# 6.返回保留六位小数的结果
return round(cosine_sim, 6)


# # 示例
text1 = """'1. **建筑风格**:
- 这些建筑具有独特的几何形状,尤其是三角形和多边形的结构。
- 建筑物的外观设计独特,可能是由玻璃和混凝土等材料构成。
2. **环境**:
- 周围有绿化带和树木。
- 有水体和桥梁。
3. **地理位置**:
- 这些建筑位于一个较大的区域,周围有其他建筑物和设施。
- 远处可以看到山脉。'"""

text2 = """1. **建筑风格**:
- 图像中的建筑具有独特的几何形状,主要由三角形和多边形构成,呈现出现代主义和未来主义的风格。
- 建筑物的设计独特,具有明显的尖顶和斜面,给人一种科技感和创新感。
2. **环境**:
- 建筑周围有广阔的绿地和水体,环境优美,绿化覆盖率高。
- 建筑物周围有道路和桥梁,交通便利,周围还有其他建筑物和设施,形成一个完整的区域。
3. **地理位置**:
- 建筑位于一个城市或大学校园内,周围有其他建筑物和设施,表明这是一个有人居住和活动的区域。
- 建筑物的地理位置可能是一个重要的地标或文化中心,吸引了大量的游客和参观者。"""
similar_points = calculate_embedding_cosine_similarity(text1, text2)
print("相似度:", similar_points)
164 changes: 164 additions & 0 deletions demo/ImageUnderstanding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import _thread as thread
import base64
import datetime
import hashlib
import hmac
import json
from urllib.parse import urlparse, urlencode
import ssl
from datetime import datetime
from time import mktime
from wsgiref.handlers import format_date_time
import websocket

# 定义API凭证变量
appid = "2ef09001" # APPID
api_secret = "YTViOGRlNDUxYzUxNGQ1YzBiMjY1OTc4" # APISecret
api_key = "8a8b0b84743a48f15f9ffd96d962a024" # APIKey
imagedata = open("SUES03.png", 'rb').read() # 打开并读取图片文件

# 设置图像理解服务的WebSocket URL
imageunderstanding_url = "wss://spark-api.cn-huabei-1.xf-yun.com/v2.1/image"
# 将图像数据编码为base64格式
image_base64 = str(base64.b64encode(imagedata), 'utf-8')

class Ws_Param:
"""用于管理WebSocket连接参数的类"""
def __init__(self, appid, api_key, api_secret, imageunderstanding_url):
self.appid = appid
self.api_key = api_key
self.api_secret = api_secret
self.host = urlparse(imageunderstanding_url).netloc # 解析URL获取主机名
self.path = urlparse(imageunderstanding_url).path # 解析URL获取路径
self.image_understanding_url = imageunderstanding_url

def create_url(self):
"""创建带有认证信息的URL"""
now = datetime.now()
date = format_date_time(mktime(now.timetuple())) # 获取格式化的时间
signature_origin = f"host: {self.host}\ndate: {date}\nGET {self.path} HTTP/1.1"
signature_sha = hmac.new(self.api_secret.encode('utf-8'), signature_origin.encode('utf-8'), hashlib.sha256).digest() # HMAC-SHA256签名
signature_sha_base64 = base64.b64encode(signature_sha).decode(encoding='utf-8') # 对签名进行Base64编码
authorization_origin = (f'api_key="{self.api_key}", algorithm="hmac-sha256", headers="host date request-line", '
f'signature="{signature_sha_base64}"')
authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8') # 对整个授权头进行Base64编码
auth_params = {
"authorization": authorization,
"date": date,
"host": self.host
}
return self.image_understanding_url + '?' + urlencode(auth_params) # 创建完整的URL

class WebSocketHandler:
"""用于处理WebSocket通信的类"""
def __init__(self, appid, messages):
self.appid = appid
self.messages = messages
self.answer = ""

def on_error(self, ws, error):
"""WebSocket发生错误时调用"""
print("错误:", error)

def on_close(self, ws, one=None, two=None):
"""WebSocket关闭时调用"""
print("\n连接已关闭")

def on_open(self, ws):
"""WebSocket连接成功开启时调用"""
thread.start_new_thread(self.run, (ws,))

def run(self, ws):
"""在WebSocket连接开启后执行发送消息的操作"""
data = json.dumps(self.gen_params(self.appid, self.messages))
ws.send(data)

def on_message(self, ws, message):
"""接收到WebSocket消息时调用"""
data = json.loads(message)
code = data['header']['code']
if code != 0:
print(f'请求错误: {code}, {data}')
ws.close()
else:
choices = data["payload"]["choices"]
status = choices["status"]
content = choices["text"][0]["content"]
print(content, end="")
self.answer += content
if status == 2:
ws.close()

@staticmethod
def gen_params(appid, messages):
"""生成WebSocket请求的参数"""
return {
"header": {
"app_id": appid
},
"parameter": {
"chat": {
"domain": "image",
"temperature": 0.5,
"top_k": 4,
"max_tokens": 2028,
"auditing": "default"
}
},
"payload": {
"message": {
"text": messages
}
}
}

def start_websocket(appid, api_key, api_secret, imageunderstanding_url, messages):
"""初始化并启动WebSocket连接"""
ws_param = Ws_Param(appid, api_key, api_secret, imageunderstanding_url) # 创建参数对象
websocket.enableTrace(False) # 禁用WebSocket调试输出
ws_url = ws_param.create_url() # 获取完整的WebSocket URL

ws_handler = WebSocketHandler(appid, messages) # 创建WebSocket处理器
ws = websocket.WebSocketApp(ws_url,
on_message=ws_handler.on_message,
on_error=ws_handler.on_error,
on_close=ws_handler.on_close,
on_open=ws_handler.on_open)
ws.appid = appid
ws.messages = messages
ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE}) # 启动WebSocket,使用不验证证书的SSL选项

if __name__ == '__main__':
prompt = """
请对提供的图像进行分析,关注以下要点:
1. **设计风格**:
- 描述建筑或风景的主要设计特点。
- 讨论其可能的文化或历史联系。
2. **具体特征与位置关系**:
- 描述图像中显著事物的特征。
- 推测其可能的地理位置。
3. **环境描述**:
- 描述自然与人造元素。
- 分析这些元素如何影响场景。
4. **文字与标志**:
- 描述可见的文字或标志。
- 推测其含义。
5. **人物活动**:
- 描述人物的活动。
- 分析其与环境的关系。
请直接、简洁地回答,避免不必要的修饰。
"""

# 将图像数据和提示组合成消息列表
messages = [
{"role": "user", "content": image_base64, "content_type": "image"},
{"role": "user", "content": prompt, "content_type": "text"}
]
print("回答:", end="")
start_websocket(appid, api_key, api_secret, imageunderstanding_url, messages) # 启动WebSocket并发送消息
Loading

0 comments on commit 682e841

Please sign in to comment.