-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathforum-poster.go
300 lines (253 loc) · 8.15 KB
/
forum-poster.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
package forumposter
import (
"context"
"errors"
"fmt"
"io"
"io/ioutil"
"net/http"
"net/http/cookiejar"
"os"
"path"
"path/filepath"
"runtime"
"strings"
"time"
"github.com/sirupsen/logrus"
log "github.com/sirupsen/logrus"
)
//Payload is the content of post to send to forum
type Payload struct {
Title string
Message string
Tags string
}
// A CollectorOption sets an option on a Collector.
type CollectorOption func(*Collector)
// Collector provides the scraper instance for a scraping job
type Collector struct {
// UserAgent is the User-Agent string used by HTTP requests
UserAgent string
// Context is the context that will be used for HTTP requests. You can set this
// to support clean cancellation of scraping.
Context context.Context
// LogLevel set the level of logging. You can set this to
// info, debug or trace
// Default is INFO
LogLevel string
// LogFile set the log to print to display or save to file too
// Default is set to false
LogFile bool
//Cookie is the cookie from session
Cookie *cookiejar.Jar
//Client is the http client
Client *http.Client
//Sid is the SID extracted from cookie
Sid string
// SecurityToken is the token gets from login
SecurityToken string
// Is the URL after the redirect
FinalURL string
// Version of PHPBB: 1-3
Version int
}
var (
// ErrForbiddenDomain is the error thrown if visiting
// a domain which is not allowed in AllowedDomains
ErrForbiddenDomain = errors.New("Forbidden domain")
// ErrMissingURL is the error type for missing URL errors
ErrMissingURL = errors.New("Missing URL")
// ErrMaxDepth is the error type for exceeding max depth
ErrMaxDepth = errors.New("Max depth limit reached")
// ErrForbiddenURL is the error thrown if visiting
// a URL which is not allowed by URLFilters
ErrForbiddenURL = errors.New("ForbiddenURL")
// ErrLoginFailed is the error when login to forum
// is done without success
ErrLoginFailed = errors.New("LoginFailed")
// ErrNoURLFiltersMatch is the error thrown if visiting
// a URL which is not allowed by URLFilters
ErrNoURLFiltersMatch = errors.New("No URLFilters match")
// ErrAlreadyVisited is the error type for already visited URLs
ErrAlreadyVisited = errors.New("URL already visited")
// ErrRobotsTxtBlocked is the error type for robots.txt errors
ErrRobotsTxtBlocked = errors.New("URL blocked by robots.txt")
// ErrNoCookieJar is the error type for missing cookie jar
ErrNoCookieJar = errors.New("Cookie jar is not available")
// ErrNoPattern is the error type for LimitRules without patterns
ErrNoPattern = errors.New("No pattern defined in LimitRule")
// ErrEmptyProxyURL is the error type for empty Proxy URL list
ErrEmptyProxyURL = errors.New("Proxy URL list is empty")
// ErrAbortedAfterHeaders is the error returned when OnResponseHeaders aborts the transfer.
ErrAbortedAfterHeaders = errors.New("Aborted after receiving response headers")
// ErrQueueFull is the error returned when the queue is full
ErrQueueFull = errors.New("Queue MaxSize reached")
)
// Init initializes the Collector's private variables and sets default
// configuration for the Collector
func (c *Collector) Init() {
log.Infoln("[ForumPoster] - Starting INIT")
c.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.362s"
c.Context = context.Background()
// Root folder
cwd, err := os.Getwd()
if err != nil {
log.Fatalf("Failed to determine working directory: %s", err)
}
// Set as global variable
os.Setenv("root", cwd)
logFolder := fmt.Sprintf("%s/log/", cwd) //log folder
os.Setenv("logFolder", logFolder)
// Check if folder log exist
if _, err := os.Stat(logFolder); os.IsNotExist(err) {
os.MkdirAll(logFolder, os.ModePerm)
}
// set cookie
c.Cookie, err = cookiejar.New(nil)
if err != nil {
log.Fatalf("Failed to set cookie: %s", err)
}
// Initialize client with cookie shared
c.Client = &http.Client{
Jar: c.Cookie,
}
}
// NewCollector creates a new Collector instance with default configuration
func NewCollector(options ...CollectorOption) *Collector {
c := &Collector{}
c.Init()
for _, f := range options {
f(c)
}
return c
}
// LogLevel sets the log level used by the Collector.
func LogLevel(ll string) {
switch ll {
case "debug":
log.SetLevel(log.DebugLevel)
log.Infoln("DebugLevel LOG Set")
case "trace":
log.SetLevel(log.TraceLevel)
log.Infoln("TraceLevel LOG Set")
default:
log.SetLevel(log.InfoLevel)
log.Infoln("InfoLevel LOG Set")
}
}
//LogFile save to file logs: true save file, false only print to screen
func LogFile(f bool) {
//Logging CONFIG
if f == true {
runID := time.Now().Format("run-02-01-2006--15-04-05")
logLocation := filepath.Join(os.Getenv("logFolder"), runID+".log")
logFile, err := os.OpenFile(logLocation, os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
log.Fatalf("Failed to open log file %s for output: %s", logLocation, err)
}
log.SetOutput(io.MultiWriter(os.Stderr, logFile))
log.RegisterExitHandler(func() {
if logFile == nil {
return
}
logFile.Close()
})
log.WithFields(log.Fields{"at": "start", "log-location": logLocation}).Info()
// perform actions
//log.Exit(0)
logrus.SetReportCaller(true)
//log.SetFormatter(&log.TextFormatter{})
log.SetFormatter(&logrus.JSONFormatter{CallerPrettyfier: func(f *runtime.Frame) (string, string) {
s := strings.Split(f.Function, ".")
funcName := s[len(s)-1]
return funcName, fmt.Sprintf("%s:%d", path.Base(f.File), f.Line)
}})
} else {
log.SetFormatter(&log.TextFormatter{CallerPrettyfier: func(f *runtime.Frame) (string, string) {
s := strings.Split(f.Function, ".")
funcName := s[len(s)-1]
return funcName, fmt.Sprintf("%s:%d", path.Base(f.File), f.Line)
}})
}
}
func (c *Collector) fetch(r *Request) ([]byte, error) {
var payload io.Reader
log.WithFields(log.Fields{
"payload": r.Body,
"writer": r.Writer,
"URL": r.URL,
"Method": r.Method,
}).Debug("[Forum-Poster] - Value Request")
if r.Method != "GET" {
payload = r.Body
} else {
payload = nil
}
req, err := http.NewRequest(r.Method, r.URL, payload)
if err != nil {
log.WithFields(log.Fields{
"payload": r.Body,
"writer": r.Writer,
"URL": r.URL,
"Method": r.Method,
"Error": err,
}).Error("[Forum-Poster] - Make Request")
return nil, fmt.Errorf("[Forum-Poster] - Error in request: %s", err)
}
//req.Header.Add("Cookie", c.Cookie)
if r.Writer != nil {
req.Header.Set("Content-Type", r.Writer.FormDataContentType())
}
req.Header.Set("User-Agent", c.UserAgent)
log.Tracef("User-Agent: %s", c.UserAgent)
req.Header.Set("Referer", r.URL)
log.Tracef("Referer: %s", req.Referer())
res, err := c.Client.Do(req)
if err != nil {
log.WithFields(log.Fields{
"payload": r.Body,
"writer": r.Writer,
"URL": r.URL,
"Method": r.Method,
"Error": err,
}).Error("[Forum-Poster] - Get Response")
return nil, fmt.Errorf("[Forum-Poster] - Error in response: %s", err)
}
if res.StatusCode == 302 {
log.Debugln("Redirect to:", res.Header.Get("Location"))
}
log.Debugln("[Forum-Poster] - Staus Response", res.StatusCode)
if res.StatusCode != 200 {
log.WithFields(log.Fields{
"payload": r.Body,
"writer": r.Writer,
"URL": r.URL,
"Method": r.Method,
"StatusCode": res.StatusCode,
}).Error("[Forum-Poster] - Get Response")
//return nil, fmt.Errorf("[Forum-Poster] - Response not valid: %s", err)
}
defer res.Body.Close()
body, err := ioutil.ReadAll(res.Body)
// Your magic function. The Request in the Response is the last URL the
// client tried to access.
c.FinalURL = res.Request.URL.String()
log.Debugf("The URL you ended up at is: %v\n", res.Request.URL.String())
log.Debugln("Cookie from", r.URL, "are:", res.Cookies())
log.Traceln("[Forum-Poster] - HTML response: ", string(body))
for _, cookie := range res.Cookies() {
log.Tracef(" %s: %s\n", cookie.Name, cookie.Value)
if strings.Contains(cookie.Name, "sid") {
c.Sid = cookie.Value
}
}
// Sleep every request so POST will work
time.Sleep(2 * time.Second)
return body, nil
}
// UserAgent sets the user agent used by the Collector.
func UserAgent(ua string) CollectorOption {
return func(c *Collector) {
c.UserAgent = ua
}
}