-
Notifications
You must be signed in to change notification settings - Fork 0
/
getPhotos.py
58 lines (35 loc) · 1.44 KB
/
getPhotos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# Get the pages with photos on
def getPhotosFromAlbum( albumUrl, http, headers ):
"Get a list of photo page urls in this album"
photos = dict()
itemsPerPage = 12
totalCount = 0;
while True:
response, page = http.request( albumUrl + "?start=" + str( totalCount ), 'GET', headers=headers )
photoLocation = 'class="item'
if page.find( photoLocation, 0 ) == -1:
break;
albumTitleLocation = '<h1>'
albumTitleLocationEnd = '</h1>'
posAlbumTitle = page.find( albumTitleLocation, 0 ) + len( albumTitleLocation )
posAlbumTitleEnd = page.find( albumTitleLocationEnd, posAlbumTitle )
albumName = page[posAlbumTitle:posAlbumTitleEnd].strip()
pos = 0
pageCount = 0
photoLocation = 'class="item'
photoHrefLocation = '<a href="'
photoTitleLocation = '<h5>'
photoNameLocation = '<a href="'
while page.find( photoLocation, pos ) != -1 and pageCount < itemsPerPage:
posPhoto = page.find( photoLocation, pos )
posHrefStart = page.find( photoHrefLocation, posPhoto ) + len( photoHrefLocation )
posHrefEnd = page.find( '"', posHrefStart )
posTitle = page.find( photoTitleLocation, posHrefEnd )
posName = page.find( photoNameLocation, posTitle )
posNameStart = page.find( '>', posName ) + 1
posNameEnd = page.find( '<', posNameStart )
photos[page[posNameStart:posNameEnd].strip()] = page[posHrefStart:posHrefEnd].strip()
pos = posNameEnd
pageCount += 1
totalCount += 1
return albumName, photos