-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
367 lines (297 loc) · 7.4 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
package main
import (
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"net/http"
"os"
"path/filepath"
"time"
)
// global config
var ExportType string = "md"
var SleepTime time.Duration = 10 * time.Second
var Retry10 int = 10
var Retry2 int = 2
var Retry20 int = 20
var shimo_sid string = ""
var (
ROOT_URL string = "https://shimo.im/lizard-api/files"
LIST_URL string = "https://shimo.im/lizard-api/files?folder=%s"
EXPORT_URL string = "https://shimo.im/lizard-api/office-gw/files/export?fileGuid=%s&type=%s"
QUERY_URL string = "https://shimo.im/lizard-api/office-gw/files/export/progress?taskId=%s"
)
type FileInfo struct {
Path string
Id string `json:"guid"`
Title string `json:"name"`
Type string `json:"type"`
TaskId string
}
type FileList []FileInfo
type DirInfo struct {
FileInfo
Dirs *DirList
Files *FileList
}
type DirList []DirInfo
type FileResponse []FileInfo
type ExportResponse struct {
Status int `json:"status"`
Message string `json:"message"`
TaskId string `json:"taskId,omitempty"`
}
type TaskResponse struct {
Status int `json:"status"`
Code int `json:"code"`
Data struct {
Progress int `json:"progress"`
DownloadUrl string `json:"downloadUrl"`
FileSize int `json:"fileSize"`
CostTime int `json:"costTime"`
} `json:"data,omitempty"`
}
func (tree DirInfo) String() string {
str := fmt.Sprintf("type: %s id: %s title: %s path: %s &dirs: %p &files: %p", tree.Type, tree.Id, tree.Title, tree.Path, tree.Dirs, tree.Files)
return str
}
func (file FileInfo) String() string {
str := fmt.Sprintf("type: %s id: %s title: %s path: %s", file.Type, file.Id, file.Title, file.Path)
return str
}
// 发起http请求, 必须设置cookie和refer
func httpRequest(uri string, retry int) ([]byte, error) {
defaultstr := []byte("http request error occur")
// 创建一个http.Client
client := &http.Client{}
//fmt.Println(uri)
// 创建一个http.Request
req, err := http.NewRequest("GET", uri, nil)
if err != nil {
return defaultstr, err
}
req.Header.Set("referer", "https://shimo.im/desktop")
// 创建一个Cookie
cookie := &http.Cookie{
Name: "shimo_sid",
Value: shimo_sid,
}
// 将Cookie添加到Request中
req.AddCookie(cookie)
// 使用Client发送Request
resp, err := client.Do(req)
if nil != err {
return defaultstr, err
}
defer resp.Body.Close()
if resp.StatusCode == 429 {
if retry > 0 {
fmt.Println("429 too many requests, retry: ", retry, "...", uri)
time.Sleep(SleepTime)
return httpRequest(uri, retry-1)
}
}
if resp.StatusCode != 200 {
return defaultstr, errors.New("status error: " + resp.Status)
}
body, err := ioutil.ReadAll(resp.Body)
if nil != err {
return defaultstr, err
}
return body, nil
}
// 获取文件夹结构信息
func httpGetInfo(path string, id string, d *DirList, f *FileList) {
uri := ROOT_URL
if id != "" {
uri = fmt.Sprintf(LIST_URL, id)
}
b, err := httpRequest(uri, Retry2)
if err != nil {
fmt.Println(err)
panic(err)
}
var result FileResponse
err = json.Unmarshal(b, &result)
if err != nil {
fmt.Println(err)
}
//fmt.Println("res1:", string(b))
//fmt.Println("res2:", result)
for i := range result {
switch result[i].Type {
case "folder":
*d = append(*d, DirInfo{
FileInfo: FileInfo{
Path: path + "/" + result[i].Title,
Id: result[i].Id,
Title: result[i].Title,
Type: result[i].Type,
},
Dirs: nil,
Files: nil})
case "newdoc":
*f = append(*f, FileInfo{
Path: path + "/" + result[i].Title, //TODO:过滤特殊字符
Id: result[i].Id,
Title: result[i].Title,
Type: result[i].Type,
})
default:
//fmt.Println("need add type: ", result[i].TYPE)
}
}
}
// 获取导出文件,默认按照md格式
func httpExport(id string) (tid string) {
exportType := ExportType
fmt.Println("[httpExport]: start export:")
uri := fmt.Sprintf(EXPORT_URL, id, exportType)
b, err := httpRequest(uri, Retry10)
var result ExportResponse
if err != nil {
fmt.Println(err)
panic(err)
}
err = json.Unmarshal(b, &result)
if err != nil {
fmt.Println(err)
}
if result.TaskId == "" {
panic(errors.New("TaskId empty"))
}
fmt.Printf("[TaskId]: %+v\n", result.TaskId)
return result.TaskId
}
// 查询导出结果
func httpLinkQuery(tid string) string {
uri := fmt.Sprintf(QUERY_URL, tid)
fmt.Println("[httpLinkQuery]: start query progress:")
b, err := httpRequest(uri, Retry20)
var result TaskResponse
if err != nil {
fmt.Println(err)
panic(err)
}
err = json.Unmarshal(b, &result)
if err != nil {
//fmt.Println(err)
}
fmt.Printf("[Progress]: %+v\n", result.Data.Progress)
fmt.Printf("[DownloadUrl]: %+v\n", result.Data.DownloadUrl)
if result.Status != 0 || result.Data.DownloadUrl == "" {
fmt.Println("progress not complete, retry ... ", uri)
time.Sleep(2 * time.Second)
// 针对结果做循环调用,查询是否完成
return httpLinkQuery(tid)
}
return result.Data.DownloadUrl
}
// 下载文件
func httpDownload(uri string) []byte {
fmt.Println("[httpDownload]: start download:", uri)
b, err := httpRequest(uri, Retry2)
if err != nil {
fmt.Println(err)
}
return b
}
// 获取当前层id对应的文件夹和文档列表
func getDirInfo(path, id string, d *DirList, f *FileList) {
httpGetInfo(path, id, d, f)
}
// 递归构造文件结构树,基于深度优先
func StructTree(tree *DirInfo) {
dirs := &DirList{}
files := &FileList{}
getDirInfo(tree.Path, tree.Id, dirs, files)
tree.Files = files
tree.Dirs = dirs
if dirs != nil {
for i := range *dirs {
//node := (*dirs)[i] 此处不能使用变量,会导致只修改局部变量node的值
StructTree(&(*dirs)[i])
}
}
}
// 遍历文件结构
func TraverseTree(tree *DirInfo) {
node := *tree
if node.Files != nil {
fl := *(node.Files)
for i := range fl {
fmt.Println("-------------------")
fmt.Println("[TraverseTree]: ", fl[i])
tid := httpExport(fl[i].Id)
(*(node.Files))[i].TaskId = tid
DiskDownload(fl[i])
fmt.Println("-------------------")
}
}
if *(node.Dirs) == nil {
fmt.Println(node.Id, "dir nil")
return
}
// 深度遍历
dl := *(node.Dirs)
for i := range dl {
TraverseTree(&dl[i])
}
}
// title重复时,累计添加(1)
func duplicateTitle(path string) string {
_, err := os.Stat(path)
if err == nil {
path = path + "(1)"
path = duplicateTitle(path)
} else if os.IsNotExist(err) {
} else {
panic(errors.New("duplicateTitle error "))
}
return path
}
// 将f下载到磁盘
func DiskDownload(f FileInfo) {
dl := httpLinkQuery(f.TaskId)
b := httpDownload(dl)
dir := filepath.Dir(f.Path)
if err := os.MkdirAll(dir, 0755); err != nil {
panic(err)
}
path := duplicateTitle(f.Path)
file, _ := os.Create(path + "." + ExportType)
defer file.Close()
file.Write(b)
}
// 测试下载f.Id="Wr3DpD6VojTG953J"
func test_download() {
tid := httpExport("Wr3DpD6VojTG953J")
dl := httpLinkQuery(tid)
fmt.Println(dl)
b := httpDownload(dl)
dir := filepath.Dir("./download/data.md")
if err := os.MkdirAll(dir, 0755); err != nil {
panic(err)
}
f, _ := os.Create("./download/data.md")
defer f.Close()
f.Write(b)
}
func main() {
rootpath := "./download" // dst dir path
ExportType = "md" // export type options: pdf、jpg、docx、md
shimo_sid = shimo_sid // 石墨cookie内的shimo_sid值
root := &DirInfo{
FileInfo: FileInfo{
Path: rootpath,
Id: "",
Title: "",
Type: "root",
},
Dirs: nil,
Files: nil,
}
StructTree(root)
TraverseTree(root)
}