Skip to content

Commit 1841991

Browse files
committed
12.9
1 parent 04ecaaa commit 1841991

File tree

4 files changed

+58
-40
lines changed

4 files changed

+58
-40
lines changed

ModifyFilename.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
dir = os.getcwd()
33
subdir = os.listdir(dir)
44
for i in subdir:
5-
path = os.path.join(dir,i)
6-
if os.path.isdir(path):
7-
end_dir = os.listdir(path)
8-
for i in range(len(end_dir)):
9-
newname = end_dir[i][0:50]
10-
os.rename(os.path.join(path,end_dir[i]),os.path.join(path,newname))
5+
path = os.path.join(dir, i)
6+
if os.path.isdir(path):
7+
end_dir = os.listdir(path)
8+
for i in range(len(end_dir)):
9+
newname = end_dir[i][0:50]
10+
os.rename(os.path.join(path, end_dir[
11+
i]), os.path.join(path, newname))

biyingSpider.py

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import requests
2+
import re
3+
import time
4+
local = time.strftime("%Y.%m.%d")
5+
url = 'http://cn.bing.com/'
6+
con = requests.get(url)
7+
content = con.text
8+
reg = r"(http://s.cn.bing.net/az/hprichbg/rb/.*?.jpg)"
9+
a = re.findall(reg, content, re.S)[0]
10+
print(a)
11+
read = requests.get(a)
12+
f = open('%s.jpg' % local, 'wb')
13+
f.write(read.content)
14+
f.close()

douban_movie.py

+29-26
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
#!/usr/bin/env python
22
# encoding=utf-8
3-
import requests,re
3+
import requests
4+
import re
45
import codecs
56
from bs4 import BeautifulSoup
67
from openpyxl import Workbook
78
wb = Workbook()
89
dest_filename = '电影.xlsx'
9-
ws1 = wb.active
10+
ws1 = wb.active
1011
ws1.title = "电影top250"
1112

1213
DOWNLOAD_URL = 'http://movie.douban.com/top250/'
@@ -24,56 +25,58 @@ def download_page(url):
2425
def get_li(doc):
2526
soup = BeautifulSoup(doc, 'html.parser')
2627
ol = soup.find('ol', class_='grid_view')
27-
name = [] #名字
28-
star_con = [] #评价人数
29-
score = [] #评分
30-
info_list = [] #短评
28+
name = [] # 名字
29+
star_con = [] # 评价人数
30+
score = [] # 评分
31+
info_list = [] # 短评
3132
for i in ol.find_all('li'):
3233
detail = i.find('div', attrs={'class': 'hd'})
33-
movie_name = detail.find('span', attrs={'class': 'title'}).get_text() #电影名字
34-
level_star = i.find('span',attrs={'class':'rating_num'}).get_text() #评分
35-
star = i.find('div',attrs={'class':'star'})
36-
star_num = star.find(text=re.compile('评价')) #评价
34+
movie_name = detail.find(
35+
'span', attrs={'class': 'title'}).get_text() # 电影名字
36+
level_star = i.find(
37+
'span', attrs={'class': 'rating_num'}).get_text() # 评分
38+
star = i.find('div', attrs={'class': 'star'})
39+
star_num = star.find(text=re.compile('评价')) # 评价
3740

38-
info = i.find('span',attrs={'class':'inq'}) #短评
39-
if info: #判断是否有短评
41+
info = i.find('span', attrs={'class': 'inq'}) # 短评
42+
if info: # 判断是否有短评
4043
info_list.append(info.get_text())
4144
else:
4245
info_list.append('无')
4346
score.append(level_star)
44-
4547

4648
name.append(movie_name)
4749
star_con.append(star_num)
48-
page = soup.find('span', attrs={'class': 'next'}).find('a') #获取下一页
50+
page = soup.find('span', attrs={'class': 'next'}).find('a') # 获取下一页
4951
if page:
50-
return name,star_con,score,info_list,DOWNLOAD_URL + page['href']
51-
return name,star_con,score,info_list,None
52+
return name, star_con, score, info_list, DOWNLOAD_URL + page['href']
53+
return name, star_con, score, info_list, None
5254

5355

5456
def main():
5557
url = DOWNLOAD_URL
5658
name = []
57-
star_con=[]
59+
star_con = []
5860
score = []
5961
info = []
6062
while url:
6163
doc = download_page(url)
62-
movie,star,level_num,info_list,url = get_li(doc)
64+
movie, star, level_num, info_list, url = get_li(doc)
6365
name = name + movie
6466
star_con = star_con + star
65-
score = score+level_num
66-
info = info+ info_list
67-
for (i,m,o,p) in zip(name,star_con,score,info):
68-
col_A = 'A%s'%(name.index(i)+1)
69-
col_B = 'B%s'%(name.index(i)+1)
70-
col_C = 'C%s'%(name.index(i)+1)
71-
col_D = 'D%s'%(name.index(i)+1)
72-
ws1[col_A]=i
67+
score = score + level_num
68+
info = info + info_list
69+
for (i, m, o, p) in zip(name, star_con, score, info):
70+
col_A = 'A%s' % (name.index(i) + 1)
71+
col_B = 'B%s' % (name.index(i) + 1)
72+
col_C = 'C%s' % (name.index(i) + 1)
73+
col_D = 'D%s' % (name.index(i) + 1)
74+
ws1[col_A] = i
7375
ws1[col_B] = m
7476
ws1[col_C] = o
7577
ws1[col_D] = p
7678
wb.save(filename=dest_filename)
7779

80+
7881
if __name__ == '__main__':
7982
main()

readExcel.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@
33
from openpyxl.cell import get_column_letter
44
wb = Workbook()
55
dest_filename = 'empty_book2.xlsx'
6-
ws1 = wb.active #第一个表
7-
ws1.title = "range names" #第一个表命名
8-
#遍历第一个表的1到40行,赋值一个600内的随机数
9-
for row in range(1,40):
6+
ws1 = wb.active # 第一个表
7+
ws1.title = "range names" # 第一个表命名
8+
# 遍历第一个表的1到40行,赋值一个600内的随机数
9+
for row in range(1, 40):
1010
ws1.append(range(60))
1111
ws2 = wb.create_sheet(title="Pi")
1212
ws2['F5'] = 3.14
1313
ws3 = wb.create_sheet(title="Data")
14-
for row in range(10,20):
15-
for col in range(27,54):
16-
_=ws3.cell(column=col,row=row,value="%s" % get_column_letter(col))
17-
wb.save(filename=dest_filename)
14+
for row in range(10, 20):
15+
for col in range(27, 54):
16+
_ = ws3.cell(column=col, row=row, value="%s" % get_column_letter(col))
17+
wb.save(filename=dest_filename)

0 commit comments

Comments
 (0)