-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawler_ajax_data.py
20 lines (20 loc) · 6.33 KB
/
crawler_ajax_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# 抓取 Medium.COM 的文章資料2021/06/11版,加入request data的觀念
import urllib.request as req
import json
# 建立連線網址
url = "https://medium.com/_/graphql"
# 建立一個request物件,附上request headers 和 request data 的資訊
requestData ={"operationName":"WebRecommendedFeedQuery","variables":{},"query":"query WebRecommendedFeedQuery($paging: PagingOptions) {\n webRecommendedFeed(paging: $paging) {\n items {\n feedId\n reason\n moduleSourceEncoding\n postProviderExplanation {\n reason\n topic {\n name\n __typename\n }\n __typename\n }\n post {\n ...HomeFeedItem_post\n ...useHideFeedPosts_post\n __typename\n }\n __typename\n }\n pagingInfo {\n next {\n limit\n to\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\nfragment HomeFeedItem_post on Post {\n __typename\n id\n title\n firstPublishedAt\n mediumUrl\n collection {\n id\n name\n domain\n logo {\n id\n __typename\n }\n __typename\n }\n creator {\n id\n name\n username\n imageId\n mediumMemberAt\n __typename\n }\n previewImage {\n id\n __typename\n }\n previewContent {\n subtitle\n __typename\n }\n readingTime\n tags {\n ...TopicPill_tag\n __typename\n }\n ...BookmarkButton_post\n ...CreatorActionOverflowPopover_post\n ...PostPresentationTracker_post\n ...PostPreviewAvatar_post\n}\n\nfragment TopicPill_tag on Tag {\n __typename\n id\n displayTitle\n}\n\nfragment BookmarkButton_post on Post {\n ...SusiClickable_post\n ...WithSetReadingList_post\n ...AddToCatalogBookmarkButton_post\n __typename\n id\n}\n\nfragment SusiClickable_post on Post {\n id\n mediumUrl\n ...SusiContainer_post\n __typename\n}\n\nfragment SusiContainer_post on Post {\n id\n __typename\n}\n\nfragment WithSetReadingList_post on Post {\n ...ReadingList_post\n __typename\n id\n}\n\nfragment ReadingList_post on Post {\n __typename\n id\n viewerEdge {\n id\n readingList\n __typename\n }\n}\n\nfragment AddToCatalogBookmarkButton_post on Post {\n ...AddToCatalogBase_post\n __typename\n id\n}\n\nfragment AddToCatalogBase_post on Post {\n id\n __typename\n}\n\nfragment CreatorActionOverflowPopover_post on Post {\n allowResponses\n id\n statusForCollection\n isLocked\n isPublished\n clapCount\n mediumUrl\n pinnedAt\n pinnedByCreatorAt\n curationEligibleAt\n mediumUrl\n responseDistribution\n visibility\n ...useIsPinnedInContext_post\n pendingCollection {\n id\n name\n creator {\n id\n __typename\n }\n avatar {\n id\n __typename\n }\n viewerEdge {\n id\n isEditor\n __typename\n }\n domain\n slug\n __typename\n }\n creator {\n id\n viewerEdge {\n id\n isBlocking\n __typename\n }\n ...MutePopoverOptions_creator\n ...auroraHooks_publisher\n __typename\n }\n collection {\n id\n name\n creator {\n id\n __typename\n }\n avatar {\n id\n __typename\n }\n viewerEdge {\n id\n isEditor\n __typename\n }\n domain\n slug\n ...MutePopoverOptions_collection\n ...auroraHooks_publisher\n __typename\n }\n viewerEdge {\n clapCount\n id\n shareKey\n __typename\n }\n ...ClapMutation_post\n __typename\n}\n\nfragment MutePopoverOptions_creator on User {\n id\n __typename\n}\n\nfragment MutePopoverOptions_collection on Collection {\n id\n __typename\n}\n\nfragment ClapMutation_post on Post {\n __typename\n id\n clapCount\n viewerEdge {\n id\n clapCount\n __typename\n }\n ...MultiVoteCount_post\n}\n\nfragment MultiVoteCount_post on Post {\n id\n ...PostVotersNetwork_post\n __typename\n}\n\nfragment PostVotersNetwork_post on Post {\n voterCount\n viewerEdge {\n id\n clapCount\n __typename\n }\n recommenders {\n name\n __typename\n }\n __typename\n id\n}\n\nfragment useIsPinnedInContext_post on Post {\n id\n collection {\n id\n __typename\n }\n pendingCollection {\n id\n __typename\n }\n pinnedAt\n pinnedByCreatorAt\n __typename\n}\n\nfragment auroraHooks_publisher on Publisher {\n __typename\n ... on Collection {\n isAuroraEligible\n isAuroraVisible\n viewerEdge {\n id\n isEditor\n __typename\n }\n __typename\n id\n }\n ... on User {\n isAuroraVisible\n __typename\n id\n }\n}\n\nfragment PostPresentationTracker_post on Post {\n id\n visibility\n previewContent {\n isFullContent\n __typename\n }\n collection {\n id\n slug\n __typename\n }\n __typename\n}\n\nfragment PostPreviewAvatar_post on Post {\n __typename\n id\n collection {\n id\n name\n ...CollectionAvatar_collection\n ...collectionUrl_collection\n __typename\n }\n creator {\n id\n username\n name\n ...UserAvatar_user\n ...userUrl_user\n __typename\n }\n}\n\nfragment CollectionAvatar_collection on Collection {\n name\n avatar {\n id\n __typename\n }\n ...collectionUrl_collection\n __typename\n id\n}\n\nfragment collectionUrl_collection on Collection {\n id\n domain\n slug\n __typename\n}\n\nfragment UserAvatar_user on User {\n __typename\n username\n id\n name\n imageId\n mediumMemberAt\n ...userUrl_user\n}\n\nfragment userUrl_user on User {\n __typename\n id\n customDomainState {\n live {\n domain\n __typename\n }\n __typename\n }\n username\n hasSubdomain\n}\n\nfragment useHideFeedPosts_post on Post {\n __typename\n id\n collection {\n id\n __typename\n }\n creator {\n id\n __typename\n }\n}\n"}
request = req.Request(url, headers = {
"Content-Type" : "application/json",
"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"
}, data = json.dumps(requestData).encode("utf-8"))
# 發出請求
with req.urlopen(request) as response:
result = response.read().decode("utf-8")
# 解析JSON格式的資料,取得每篇文章的標題
result = json.loads(result)
#print(result["data"]["webRecommendedFeed"]["items"][0]["post"]["title"]) # 試著印出第一篇文章的標題
item = result["data"]["webRecommendedFeed"]["items"]
for item in item:
print(item["post"]["title"])