-
Notifications
You must be signed in to change notification settings - Fork 4
/
db.go
348 lines (307 loc) · 8.44 KB
/
db.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
package main
import (
"bytes"
"compress/gzip"
"database/sql"
"errors"
"fmt"
_ "github.com/mattn/go-sqlite3"
"io/ioutil"
"log"
"net/http"
"net/url"
"os"
"time"
)
// create the cache db with sqlite3 driver
func CreateDBScheme(dbPath string) (err error) {
_, err = os.Stat(dbPath)
if nil == err {
// db file already exists
log.Printf("[ERROR] db %s already exists", dbPath)
return errors.New("db " + dbPath + " already exists")
}
db, err := sql.Open(DB_DRIVER, dbPath)
if nil != err {
log.Printf("[ERROR] failed to create db %s: %s", dbPath, err)
return
}
defer db.Close()
sqlCreateTables := fmt.Sprintf(`
CREATE TABLE %s (
id INTEGER PRIMARY KEY AUTOINCREMENT,
url TEXT NOT NULL UNIQUE,
date TEXT NOT NULL,
cache_control TEXT,
lastmod TEXT,
etag TEXT,
expires TEXT,
html BLOB
);
`, DB_HTML_CACHE_TABLE)
_, err = db.Exec(sqlCreateTables)
if nil != err {
log.Printf("[ERROR] failed to create tables in db %s, sql is %s, error is %s", dbPath, sqlCreateTables, err)
return
}
sqlCreateIndex := fmt.Sprintf(`
CREATE UNIQUE INDEX IF NOT EXISTS html_cache_url_index ON %s (url);
`, DB_HTML_CACHE_TABLE)
_, err = db.Exec(sqlCreateIndex)
if nil != err {
log.Printf("[ERROR] failed to crate url index in db %s, sql is %s, error is %s", dbPath, sqlCreateIndex, err)
return
}
return
}
func ExecQuerySQL(dbPath string, expectSize int, sqlStr string, args ...interface{}) (caches []*HtmlCache, err error) {
_, err = os.Stat(dbPath)
if nil != err {
// db file not exists
log.Printf("[ERROR] db %s not exists", dbPath)
return
}
db, err := sql.Open(DB_DRIVER, dbPath)
if nil != err {
log.Printf("[ERROR] failed to open db %s: %s", dbPath, err)
return
}
defer db.Close()
statmt, err := db.Prepare(sqlStr)
if nil != err {
log.Printf("[ERROR] failed to prepare statment %s for db %s: %s", sqlStr, dbPath, err)
return
}
defer statmt.Close()
rows, err := statmt.Query(args...)
if nil != err {
log.Printf("[ERROR] failed to query with statment %s, %s", sqlStr, err)
return
}
defer rows.Close()
if expectSize > 0 {
caches = make([]*HtmlCache, expectSize)
}
rowInd := 0
for rows.Next() {
c := new(HtmlCache)
var urlStr, lastmod, expires, dateStr string
err = rows.Scan(
&urlStr,
&dateStr,
&c.CacheControl,
&lastmod,
&c.Etag,
&expires,
&c.Html)
if nil != err {
log.Printf("[ERROR] failed to scan data from result row: %s", err)
return
}
// decompress html data
if 0 != *gGzipCompressLevel {
buff := bytes.NewBuffer(c.Html)
gzipR, err := gzip.NewReader(buff)
if nil != err {
if *gDebug {
log.Printf("[WARN] failed to decompress html data for %s: %s", urlStr, err)
}
} else {
c.Html, err = ioutil.ReadAll(gzipR)
}
}
if c.URL, err = url.Parse(urlStr); nil != err {
log.Printf("[ERROR] failed to parse url from rawurl string %s: %s", urlStr, err)
}
if "" != lastmod {
c.LastModified = new(time.Time)
if *c.LastModified, err = http.ParseTime(lastmod); nil != err {
log.Printf("[ERROR] failed to parse lastmod time string %s: %s", lastmod, err)
}
}
if "" != expires {
c.Expires = new(time.Time)
if *c.Expires, err = http.ParseTime(expires); nil != err {
log.Printf("[ERROR] failed to parse expires time string %s: %s", expires, err)
}
}
if "" != dateStr {
c.Date = new(time.Time)
if *c.Date, err = http.ParseTime(dateStr); nil != err {
log.Printf("[ERROR] failed to parse cache date %s: %s", dateStr, err)
}
}
caches = append(caches[:rowInd], c)
rowInd += 1
}
// no result is also an error
if 0 == rowInd {
err = DBNoRecordError{}
return
}
return
}
func ExecInsertUpdateSQL(caches []*HtmlCache, dbPath string, sqlStr string) (err error) {
_, err = os.Stat(dbPath)
if nil != err {
// db file not exists
log.Printf("[ERROR] db %s not exists", dbPath)
return
}
db, err := sql.Open(DB_DRIVER, dbPath)
if nil != err {
log.Printf("[ERROR] failed to open db %s: %s", dbPath, err)
return
}
defer db.Close()
trans, err := db.Begin()
if nil != err {
log.Printf("[ERROR] failed to start a new transaction for db %s: %s", dbPath, err)
return
}
statmt, err := trans.Prepare(sqlStr)
if nil != err {
log.Printf("[ERROR] failed to prepare a new statement for db %s, sql %s: %s", dbPath, sqlStr, err)
return
}
defer statmt.Close()
urls := ""
for _, c := range caches {
if nil == c {
log.Printf("[ERROR] cache is nil, ignore this one")
continue
}
if nil == c.Date {
log.Printf("[ERROR] cache date is nil, will not save this sucker in cache db")
continue
}
var htmlBuff bytes.Buffer
compressed := false
if 0 != *gGzipCompressLevel {
// compress html data
gzipW, err := gzip.NewWriterLevel(&htmlBuff, *gGzipCompressLevel)
if nil != err {
log.Printf("[ERROR] failed to create gzip writer: %s", err)
continue
}
_, err = gzipW.Write(c.Html)
if nil != err {
// on write error, cache html is saved uncompressed
log.Printf("[ERROR] gzip failed to compress html data: %s, will not compress the html data", err)
}
gzipW.Close()
compressed = true
}
htmlData := c.Html
if compressed {
htmlData = htmlBuff.Bytes()
}
var lastmod, expires string
if nil != c.LastModified {
lastmod = c.LastModified.Format(http.TimeFormat)
}
if nil != c.Expires {
expires = c.Expires.Format(http.TimeFormat)
}
_, err = statmt.Exec(c.URL.String(), c.Date.Format(http.TimeFormat), c.CacheControl, lastmod, c.Etag, expires, htmlData)
urls += c.URL.String() + " "
if nil != err {
log.Printf("[ERROR] failed to exec insert/update sql %s: %s", sqlStr, err)
return
}
}
err = trans.Commit()
if nil != err {
log.Printf("[ERROR] failed to save urls %s: %s", urls, err)
return
}
return
}
func DelHtmlCacheByURL(dbPath, urlStr string) error {
_, err := ExecQuerySQL(
dbPath,
0,
fmt.Sprintf("DELETE FROM %s WHERE url = ?", DB_HTML_CACHE_TABLE),
urlStr)
if nil != err {
switch err.(type) {
case DBNoRecordError:
return nil
default:
log.Printf("[ERROR] failed to delete %s from cache database %s", urlStr, dbPath)
return err
}
}
return nil
}
func GetHtmlCacheByURL(dbPath, urlStr string) (cache *HtmlCache, err error) {
htmlCacheSlice, err := ExecQuerySQL(
dbPath,
1,
fmt.Sprintf("SELECT url, date, cache_control, lastmod, etag, expires, html FROM %s WHERE url = ?", DB_HTML_CACHE_TABLE),
urlStr)
if nil != err {
switch err.(type) {
case DBNoRecordError:
if *gVerbose {
log.Printf("cache not found for %s", urlStr)
}
default:
log.Printf("[ERROR] failed to get cache from db %s by url %s: %s", dbPath, urlStr, err)
}
return nil, err
}
return htmlCacheSlice[0], err
}
func PutHtmlCache(dbPath string, caches []*HtmlCache) (err error) {
sqlInsertHtml := fmt.Sprintf(`
INSERT INTO %s (url, date, cache_control, lastmod, etag, expires, html) VALUES (?, ?, ?, ?, ?, ?, ?);
`, DB_HTML_CACHE_TABLE)
err = ExecInsertUpdateSQL(caches, dbPath, sqlInsertHtml)
if nil != err {
log.Printf("[ERROR] failed to insert cache records to db %s: %s", dbPath, err)
return
}
if *gVerbose {
for _, c := range caches {
log.Printf("successully saved cache for %s", c.URL.String())
}
}
return
}
func UpdateHtmlCache(dbPath string, caches []*HtmlCache) (err error) {
sqlUpdateHtml := ""
for _, c := range caches {
sqlUpdateHtml += fmt.Sprintf(`
UPDATE %s SET url = ?, date = ?, cache_control = ?, lastmod = ?, etag = ?, expires = ?, html = ? WHERE url = '%s';
`, DB_HTML_CACHE_TABLE, c.URL.String())
}
err = ExecInsertUpdateSQL(caches, dbPath, sqlUpdateHtml)
if nil != err {
log.Printf("[ERROR] failed to update cache records to db %s: %s", dbPath, err)
return
}
if *gVerbose {
for _, c := range caches {
log.Printf("successully updated cache for %s", c.URL.String())
}
}
return
}
func RemoveExpiredCache(dbPath string, cacheLifeTime time.Duration) (err error) {
sqlLoadCache := fmt.Sprintf("SELECT url, date, cache_control, lastmod, etag, expires, html FROM %s", DB_HTML_CACHE_TABLE)
caches, err := ExecQuerySQL(dbPath, -1, sqlLoadCache)
if nil != err {
log.Printf("[ERROR] failed to load cache from cache db %s: %s", dbPath, err)
return
}
for _, c := range caches {
if c.Date.Add(cacheLifeTime).Before(time.Now()) {
if *gVerbose {
log.Printf("[WARN] trying to remove expired cache entry %s, date %s", c.URL.String(), c.Date.Format(http.TimeFormat))
}
DelHtmlCacheByURL(dbPath, c.URL.String())
}
}
return
}