Skip to content

Commit

Permalink
解决一些bug,添加番号扫描,添加百度人脸识别,添加字幕搜索
Browse files Browse the repository at this point in the history
  • Loading branch information
hejianjun committed Apr 10, 2024
1 parent ff4b98c commit 32f02d2
Show file tree
Hide file tree
Showing 13 changed files with 225 additions and 78 deletions.
65 changes: 1 addition & 64 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"type": "python",
"request": "launch",
"program": "D:\\Workspace\\AV_Data_Capture\\Movie_Data_Capture.py",
"cwd": "H:\\test",
"cwd": "Z:\\test",
"console": "integratedTerminal",
"justMyCode": true
},
Expand All @@ -22,69 +22,6 @@
"PYTHONIOENCODING": "utf-8"
},
"program": "${workspaceFolder}/Movie_Data_capture.py",
"program1": "${workspaceFolder}/WebCrawler/javbus.py",
"program2": "${workspaceFolder}/WebCrawler/javdb.py",
"program3": "${workspaceFolder}/WebCrawler/xcity.py",
"program4": "${workspaceFolder}/number_parser.py",
"program5": "${workspaceFolder}/config.py",
"cwd0": "${fileDirname}",
"cwd1": "${workspaceFolder}/dist",
"cwd2": "${env:HOME}${env:USERPROFILE}/.mdc",
"args0": [
"-a",
"-p",
"J:/Downloads",
"-o",
"J:/log"
],
"args1": [
"-g",
"-m",
"3",
"-c",
"1",
"-d",
"0"
],
"args2": [
"-igd0",
"-m3",
"-p",
"J:/output",
"-q",
"121220_001"
],
"args3": [
"-agd0",
"-m3",
"-q",
".*",
"-p",
"J:/#output"
],
"args4": [
"-gic1",
"-d0",
"-m3",
"-o",
"avlog",
"-p",
"I:/output"
],
"args5": [
"-gic1",
"-d0",
"-m1",
"-o",
"avlog",
"-p",
"J:/Downloads"
],
"args6": [
"-z",
"-o",
"J:/log"
]
}
]
}
30 changes: 30 additions & 0 deletions ImageProcessing/baidu-v3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from aip import AipFace
import config,base64


def face_center(filename, model):
app_id = config.getInstance().conf.get("face", "appid")
api_key = config.getInstance().conf.get("face", "key")
app_secret = config.getInstance().conf.get("face", "secret")
app_id="31706772"
api_key="n612hyIb9jdSfuSmZiRK4lko"
app_secret="s6eIWbLFI8fsGbe1sz5dp5YKFBq2Cctu"
client = AipFace(app_id, api_key, app_secret)
with open(filename, 'rb') as fp:
base64_data = base64.b64encode(fp.read())
result = client.detect(base64_data.decode('utf-8'),"BASE64")
if result['error_msg']!='SUCCESS':
raise ValueError(result['error_msg'])
result = result['result']
print('[+]Found face ' + str(result['face_num']))
#
maxRight = 0
maxTop = 0
for face in result["face_list"]:
left = int(face['location']['left'])
top = int(face['location']['top'])
width = int(face['location']['width'])
if left+width > maxRight:
maxRight = left+width
maxTop = top
return maxRight,maxTop
25 changes: 25 additions & 0 deletions ImageProcessing/baidu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from aip import AipBodyAnalysis
import config


def face_center(filename, model):
app_id = config.getInstance().conf.get("face", "appid")
api_key = config.getInstance().conf.get("face", "key")
app_secret = config.getInstance().conf.get("face", "secret")
client = AipBodyAnalysis(app_id, api_key, app_secret)
with open(filename, 'rb') as fp:
img = fp.read()
result = client.bodyAnalysis(img)
if 'error_code' in result:
raise ValueError(result['error_msg'])
print('[+]Found person ' + str(result['person_num']))
# 中心点取鼻子x坐标
maxRight = 0
maxTop = 0
for person_info in result["person_info"]:
x = int(person_info['body_parts']['nose']['x'])
top = int(person_info['location']['top'])
if x > maxRight:
maxRight = x
maxTop = top
return maxRight,maxTop
6 changes: 5 additions & 1 deletion core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from scraper import get_data_from_json
from number_parser import is_uncensored
from ImageProcessing import cutImage
from subtitles import download_subtitles



# from WebCrawler import get_data_from_json
Expand Down Expand Up @@ -994,6 +996,8 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
move_status = move_subtitles(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
if move_status:
cn_sub = True
else:
cn_sub = cn_sub or download_subtitles(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
# 添加水印
if conf.is_watermark():
add_mark(os.path.join(path, poster_path), os.path.join(path, thumb_path), cn_sub, leak, uncensored,
Expand All @@ -1011,7 +1015,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
paste_file_to_folder_mode2(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)

# Move subtitles
move_subtitles(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
move_subtitles(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word) or download_subtitles(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)

elif conf.main_mode() == 3:
path = str(Path(movie_path).parent)
Expand Down
2 changes: 1 addition & 1 deletion number_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
G_spat = re.compile(
"^\w+\.(cc|com|net|me|club|jp|tv|xyz|biz|wiki|info|tw|us|de)@|^22-sht\.me|"
"^(fhd|hd|sd|1080p|720p|4K)(-|_)|"
"(-|_)(fhd|hd|sd|1080p|720p|4K|x264|x265|uncensored|hack|leak)",
"(-|_)(fhd|hd|sd|1080p|720p|4K|x264|x265|uncensored|hack|leaked|leak|uc|u)",
re.IGNORECASE)


Expand Down
10 changes: 10 additions & 0 deletions openssl.cnf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
openssl_conf = openssl_init

[openssl_init]
ssl_conf = ssl_sect

[ssl_sect]
system_default = system_default_sect

[system_default_sect]
Options = UnsafeLegacyRenegotiation
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ urllib3==1.26.5
certifi
MechanicalSoup
opencc-python-reimplemented
chardet
2 changes: 1 addition & 1 deletion scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def get_data_from_json(
specifiedSource=specified_source, specifiedUrl=specified_url,
debug = conf.debug())
# Return if data not found in all sources
if not json_data:
if not json_data or not json_data.get('number'):
print('[-]Movie Number not found!')
return None

Expand Down
2 changes: 1 addition & 1 deletion scrapinglib/airav.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class Airav(Parser):
def extraInit(self):
# for javbus
self.specifiedSource = None
self.addtion_Javbus = True
self.addtion_Javbus = False

def search(self, number):
self.number = number
Expand Down
5 changes: 4 additions & 1 deletion scrapinglib/api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-

from functools import lru_cache
import re
import json
from .parser import Parser
Expand Down Expand Up @@ -35,7 +36,7 @@ class Scraping:
"""
adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou', 'msin',
'getchu', 'gcolle', 'javday', 'pissplay', 'javmenu', 'pcolle', 'caribpr'
'getchu', 'gcolle', 'javday', 'pissplay', 'javmenu', 'pcolle', 'caribpr','madouji'
]

general_full_sources = ['tmdb', 'imdb']
Expand Down Expand Up @@ -69,6 +70,7 @@ def search(self, number, sources=None, proxies=None, verify=None, type='adult',
else:
return self.searchGeneral(number, sources)

@lru_cache(maxsize=None)
def searchGeneral(self, name, sources):
""" 查询电影电视剧
imdb,tmdb
Expand Down Expand Up @@ -115,6 +117,7 @@ def searchGeneral(self, name, sources):

return json_data

@lru_cache(maxsize=None)
def searchAdult(self, number, sources):
if self.specifiedSource:
sources = [self.specifiedSource]
Expand Down
12 changes: 7 additions & 5 deletions scrapinglib/javdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,19 @@ def updateCore(self, core):

def search(self, number: str):
self.number = number
number = number.upper()
if 'FC2-PPV' in number or 'FC2PPV' in number:
number= number.replace('FC2-PPV', 'FC2').replace('FC2PPV', 'FC2')
print(number)
self.allow_number_change = True
self.uncensored = True
self.session = request_session(cookies=self.cookies, proxies=self.proxies, verify=self.verify)
if self.specifiedUrl:
self.detailurl = self.specifiedUrl
else:
self.detailurl = self.queryNumberUrl(number)
self.deatilpage = self.session.get(self.detailurl).text
if '此內容需要登入才能查看或操作' in self.deatilpage or '需要VIP權限才能訪問此內容' in self.deatilpage:
if '此內容需要登入才能查看或操作' in self.deatilpage or '需要VIP權限才能訪問此內容' in self.deatilpage or '開通VIP' in self.deatilpage:
self.noauth = True
self.imagecut = 0
result = self.dictformat(self.querytree)
Expand All @@ -98,10 +104,6 @@ def queryNumberUrl(self, number):
correct_url = urls[0]
else:
ids = self.getTreeAll(self.querytree, '//*[contains(@class,"movie-list")]/div/a/div[contains(@class, "video-title")]/strong/text()')
if 'fc2-ppv' in number:
number= number.replace('fc2-ppv', 'fc2')
self.allow_number_change = True
self.uncensored = True
try:
self.queryid = ids.index(number)
correct_url = urls[self.queryid]
Expand Down
8 changes: 4 additions & 4 deletions scrapinglib/madou.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@
from .parser import Parser

NUM_RULES3=[
r'(?P<eng>(tz|mmz|msd|mdm|yk|pm|pme|pmd|pmc|qdog|qqog|fsog|rs|xkg|xsj|91cm|91kcm|91ycm|tmw|tmq|misav)-?)(?P<num>\d{2,})(?P<part>-(ep\d*|av\d*|\d*|[a-d]*))?.*',
r'(?P<eng>(mm|tz|mmz|msd|mdm|yk|pm|pme|pmd|pmc|pmx|qdog|qqog|fsog|rs|xkg|xsj|91cm|91kcm|91ycm|tmw|tmq|misav|ps|gx|EMTC|KCM|DAD|JDXYX|EMX|xkyp)-?)(?P<num>\d{2,})(?P<part>-(ep\d*|av\d*|\d*|[a-d]*))?.*',
r'(?P<eng>(mky-?[a-z]{2,2})-?)(?P<num>\d{2,})(?P<part>-(ep\d*|av\d*|\d*|[a-d]*))?.*',
r'(?P<eng>xk|xkca|xktc|cz|ly)(?P<num>\d{2,})(?P<part>-(ep\d*|av\d*|\d*|[a-d]*))?.*',
r'(?P<eng>xk|xkca|xktc|cz|ly|tmtc|wmog)(?P<num>\d{2,})(?P<part>-(ep\d*|av\d*|\d*|[a-d]*))?.*',
]

NUM_RULES2=[
r'(?P<eng>xsjtc|xbfsg|xbjpg|xblw|xbtbg)(?P<num>\d{2,})(?P<part>-(ep\d*|av\d*|\d*|[a-d]*))?.*',
r'(?P<eng>wmm|xsjtc|xbfsg|xbjpg|xblw|xbtbg|da|dh)(?P<num>\d{2,})(?P<part>-(ep\d*|av\d*|\d*|[a-d]*))?.*',
]

NUM_RULES4=[
r'(?P<eng>(?<!\w)md[a-ln-z]{0,2}-?)(?P<num>\d{2,})(?P<part>-(ep\d*|av\d*|\d*|[a-d]*))?.*',
r'(?P<eng>(mcy|ras|tmp|fcd|id|tmy|xkk9|blx)-?)(?P<num>\d{2,})(?P<part>-(ep\d*|av\d*|\d*|[a-d]*))?.*',
r'(?P<eng>(mcy|ras|tmp|fcd|id|tmy|xkk9|blx|xjx|idg|MPG)-?)(?P<num>\d{2,})(?P<part>-(ep\d*|av\d*|\d*|[a-d]*))?.*',
]

# modou提取number
Expand Down
Loading

0 comments on commit 32f02d2

Please sign in to comment.