This repository has been archived by the owner on Apr 7, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
scraper.go
157 lines (134 loc) · 3.64 KB
/
scraper.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
package ss13_se
import (
"crypto/sha256"
"fmt"
"io"
"log"
"net/http"
"os"
"regexp"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"golang.org/x/text/encoding/charmap"
)
const (
byondURL string = "http://www.byond.com/games/Exadv1/SpaceStation13"
//byondURL string = "./tmp/dump.html" // For testing
userAgent string = "ss13hub/2.0pre"
)
var (
rePlayers = regexp.MustCompile(`Logged in: (\d+) player`)
//rePlayers = regexp.MustCompile(`<br/>\s*<br/>\s*Logged in: (\d+) player.*<a href`)
)
func scrapeByond(webClient *http.Client, now time.Time) ([]ServerEntry, error) {
var body io.ReadCloser
if byondURL == "./tmp/dump.html" {
r, err := os.Open(byondURL)
if err != nil {
return nil, err
}
body = r
} else {
r, err := openPage(webClient, byondURL)
if err != nil {
return nil, err
}
body = r
}
defer body.Close()
servers, err := parseByondPage(now, body)
if err != nil {
return nil, err
}
return servers, nil
}
func openPage(webClient *http.Client, url string) (io.ReadCloser, error) {
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Add("User-Agent", userAgent)
resp, err := webClient.Do(req)
if err != nil {
return nil, err
}
if resp.StatusCode != 200 {
return nil, fmt.Errorf("bad http.Response.Status: %s", resp.Status)
}
return resp.Body, nil
}
func parseByondPage(now time.Time, body io.Reader) ([]ServerEntry, error) {
// Yep, Byond serves it's pages with Windows-1252 encoding...
r := charmap.Windows1252.NewDecoder().Reader(body)
doc, err := goquery.NewDocumentFromReader(r)
if err != nil {
return nil, err
}
var servers []ServerEntry
doc.Find(".live_game_entry").Each(func(i int, s *goquery.Selection) {
entry, err := parseEntry(s.Find(".live_game_status"))
if err != nil {
log.Println("Error parsing entry:", err)
return
}
if entry.IsZero() {
return
}
// Make sure we only try to add only one instance of a server.
// And since byond orders the most popular servers up on top,
// we get a small protection from bad guys who's trying to
// influent the history of a server.
for _, s := range servers {
if s.ID == entry.ID {
return
}
}
entry.Time = now
servers = append(servers, entry)
})
return servers, nil
}
func parseEntry(s *goquery.Selection) (ServerEntry, error) {
// Try find a player count (really tricky since it's not in a valid
// html tag by itself)
tmp := strings.TrimSpace(strings.Replace(s.Text(), "\n", "", -1))
r := rePlayers.FindStringSubmatch(tmp)
// 2 == because the regexp returns wholestring + matched part
// If it's less than 2 we couldn't find a match and if it's greater
// than 2 there's multiple matches, which is fishy...
if len(r) != 2 {
return ServerEntry{}, nil
}
players, err := strconv.Atoi(r[1])
// Also ignore empty servers (they'll get updated with 0 players/history
// anyway, in a later stage)
if err != nil || players < 1 {
return ServerEntry{}, err
}
// Grab and sanitize the server's name
title := s.Find("b").First().Text()
title = strings.Replace(strings.TrimSpace(title), "\n", "", -1)
if len(title) < 1 {
// the byond page sometimes has server entries that's basiclly
// blank, no server name or player count (just some byond url)
return ServerEntry{}, nil
}
id := makeID(title)
gameURL := s.Find("span.smaller").Find("nobr").Text()
siteURL := s.Find("a").First().AttrOr("href", "")
if siteURL == "http://" {
siteURL = ""
}
return ServerEntry{
ID: id,
Title: title,
SiteURL: siteURL,
GameURL: gameURL,
Players: players,
}, nil
}
func makeID(title string) string {
return fmt.Sprintf("%x", sha256.Sum256([]byte(title)))
}