|
| 1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
| 2 | +<html xmlns="http://www.w3.org/1999/xhtml"> |
| 3 | +<head> |
| 4 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
| 5 | + <title>WebTeam5 Spider</title> |
| 6 | + <style> |
| 7 | + </style> |
| 8 | + <script src="http://code.jquery.com/jquery-2.0.3.min.js"></script> |
| 9 | + <script src="usedData.js"></script> |
| 10 | +</head> |
| 11 | +<body> |
| 12 | + Laiwang Spider, powered by Bosn.<br /> |
| 13 | + <br /> |
| 14 | + <br /> |
| 15 | + 说明: 最终结果在result中...使用时要加载usedData来过滤已使用过的数据。在usedData.js中定义:var usedData = [];即可。没有则忽略。 |
| 16 | +</body> |
| 17 | + |
| 18 | +<script type="text/javascript"> |
| 19 | + /*********************************************************** |
| 20 | + * * |
| 21 | + * CONFIGURATIONS BEGIN * |
| 22 | + * * |
| 23 | + ***********************************************************/ |
| 24 | + var ACCESS_TOKEN = '{{id}}'; // token |
| 25 | + var GROUP_LIMIT = 100; // 处理多少个扎堆 |
| 26 | + var POST_IN_GROUP_LIMIT = 30; // 每个扎堆拉多少次数据 |
| 27 | + /*********************************************************** |
| 28 | + * * |
| 29 | + * CONFIGURATIONS END * |
| 30 | + * * |
| 31 | + ***********************************************************/ |
| 32 | + |
| 33 | + var USER_ID = ACCESS_TOKEN.substring(ACCESS_TOKEN.indexOf('_') + 1); |
| 34 | + console.log("token:", ACCESS_TOKEN, ", userId:", USER_ID); |
| 35 | + var usedData = usedData || []; |
| 36 | + |
| 37 | + // obtain groups |
| 38 | + var groups = []; |
| 39 | + var curGroupIndex = 0; |
| 40 | + var groupsLength = 0; |
| 41 | + var dataPool = []; |
| 42 | + var result = []; |
| 43 | + $.ajax({ |
| 44 | + type : "get", |
| 45 | + url : "http://api.laiwang.com/v1/event/favorite/list?access_token=" + ACCESS_TOKEN + "&size=100×tamp=" + new Date().getTime(), |
| 46 | + success : function(o) { |
| 47 | + var arr = o.values; |
| 48 | + var n = arr.length; |
| 49 | + while (n--) { |
| 50 | + groups.push(arr[n]); |
| 51 | + } |
| 52 | + groupsLength = groups.length; |
| 53 | + processGroups(); |
| 54 | + } |
| 55 | + }); |
| 56 | + |
| 57 | + |
| 58 | + // obtain groups data |
| 59 | + function processGroups() { |
| 60 | + console.log("Start processing the ", (curGroupIndex++) + 1, " of ", groupsLength , " group..."); |
| 61 | + processGroup(groups[groups.length - 1], function(data) { |
| 62 | + dataPool.push(data); |
| 63 | + groups.length -= 1; |
| 64 | + if (groups.length <= 0 || curGroupIndex > GROUP_LIMIT - 1) { |
| 65 | + console.log("Processed all groups!"); |
| 66 | + processDataPool(); |
| 67 | + return; |
| 68 | + } else { |
| 69 | + setTimeout(processGroups, 500); |
| 70 | + } |
| 71 | + }); |
| 72 | + |
| 73 | + } |
| 74 | + |
| 75 | + function processGroup(group, cb) { |
| 76 | + var groupId = group.id; |
| 77 | + var data = []; |
| 78 | + var timestamp = new Date().getTime(); |
| 79 | + processGroupSub(data, timestamp, groupId, cb, null, 0); |
| 80 | + } |
| 81 | + |
| 82 | + function processGroupSub(data, timestamp, groupId, cb, nextCursor, index) { |
| 83 | + console.log("Performing the ", index + 1, "th load in group..."); |
| 84 | + var url = "http://api.laiwang.com/v1/feed/post/event/list?access_token=" + ACCESS_TOKEN + "&size=20&eventId=" + groupId + "×tamp=" + timestamp; |
| 85 | + if (!nextCursor) { |
| 86 | + url += "&nextCursor=" + nextCursor; |
| 87 | + } |
| 88 | + $.ajax({ |
| 89 | + type : "get", |
| 90 | + url : url, |
| 91 | + success : function(o) { |
| 92 | + data.push(o.values); |
| 93 | + if (o.nextCursor === 0 || index >= POST_IN_GROUP_LIMIT - 1) { |
| 94 | + cb(data); |
| 95 | + } else { |
| 96 | + setTimeout(function() { |
| 97 | + processGroupSub(data, timestamp, groupId, cb, o.nextCursor, index + 1); |
| 98 | + }, 500); |
| 99 | + } |
| 100 | + } |
| 101 | + }); |
| 102 | + } |
| 103 | + |
| 104 | + function processDataPool() { |
| 105 | + console.log("Start processing dataPool..."); |
| 106 | + var data = dataPool; |
| 107 | + var n; |
| 108 | + var o, o2; |
| 109 | + var i, j, k; |
| 110 | + var usedMap = {}; |
| 111 | + var resultMap = {}; |
| 112 | + var c; |
| 113 | + var id; |
| 114 | + |
| 115 | + console.log("process used map..."); |
| 116 | + n = usedData.length; |
| 117 | + c = 0; |
| 118 | + while (n--) { |
| 119 | + usedMap[usedData[n]] = true; |
| 120 | + c++; |
| 121 | + } |
| 122 | + console.log(c + " items in usedData. We'll filter these duplicated ids."); |
| 123 | + |
| 124 | + n = data.length; |
| 125 | + while (n--) { |
| 126 | + o = data[n]; |
| 127 | + i = o.length; |
| 128 | + while (i--) { |
| 129 | + o2 = o[i]; |
| 130 | + j = o2.length; |
| 131 | + while (j--) { |
| 132 | + o3 = o2[j]; |
| 133 | + k = o3.comments.length; |
| 134 | + while (k--) { |
| 135 | + id = o3.comments[k].commentor.id; |
| 136 | + if (!usedMap[id]) { |
| 137 | + resultMap[id] = true; |
| 138 | + } |
| 139 | + } |
| 140 | + k = o3.emotions.length; |
| 141 | + while (k--) { |
| 142 | + id = o3.emotions[k].user.id; |
| 143 | + if (!usedMap[id]) { |
| 144 | + resultMap[id] = true; |
| 145 | + } |
| 146 | + } |
| 147 | + id = o3.publisher.id; |
| 148 | + if (!usedMap[id]) { |
| 149 | + resultMap[id] = true; |
| 150 | + } |
| 151 | + } |
| 152 | + } |
| 153 | + |
| 154 | + } |
| 155 | + |
| 156 | + for (key in resultMap) { |
| 157 | + result.push(key); |
| 158 | + } |
| 159 | + console.log("Process complete, collected ", result.length, " userIds. Enjoy :), powered by Bosn."); |
| 160 | + } |
| 161 | +</script> |
| 162 | +</html> |
0 commit comments