-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape.go
106 lines (90 loc) · 2 KB
/
scrape.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
package main
import (
"fmt"
"strconv"
"strings"
tea "github.com/charmbracelet/bubbletea"
"github.com/gocolly/colly"
)
type Dep struct {
User string `json:"user"`
Repo string `json:"repo"`
Stars int `json:"stars"`
Avatar string `json:"avatar"`
RepoUrl string `json:"repoUrl"`
Url string `json:"depUrl"`
}
type PageTick struct {
nextUrl string
deps []Dep
}
type InitScrapeTick string
func ToNum(s string) int {
// remove commas and spaces
s = strings.ReplaceAll(s, ",", "")
s = strings.ReplaceAll(s, " ", "")
// convert to int
i, _ := strconv.Atoi(s)
return i
}
func extractStars(e *colly.HTMLElement) int {
parent := e.ChildText("span.color-fg-muted.text-bold.pl-3")
split := strings.TrimSpace(strings.Split(parent, " ")[0])
stars := ToNum(split)
return stars
}
type ScrapeModel struct {
nextUrl string
deps []Dep
}
func scrapePage(url string) (ScrapeModel, error) {
c := colly.NewCollector()
var nextUrl string
// pagination
c.OnHTML("a.btn.BtnGroup-item", func(e *colly.HTMLElement) {
if e.Text == "Next" {
nextUrl = e.Attr("href")
} else {
nextUrl = ""
}
})
// deps
var deps []Dep
c.OnHTML("div.Box-row", func(e *colly.HTMLElement) {
avatar := e.ChildAttr("img", "src")
user := e.ChildAttrs("a", "href")[0][1:]
repo := e.ChildText("a.text-bold")
stars := extractStars(e)
repoUrl := fmt.Sprintf("https://github.com/%s/%s", user, repo)
deps = append(deps, Dep{User: user,
Repo: repo,
Stars: stars,
Avatar: avatar,
RepoUrl: repoUrl,
})
})
err := c.Visit(url)
if err != nil {
return ScrapeModel{}, err
}
return ScrapeModel{nextUrl: nextUrl, deps: deps}, nil
}
func InitScrape() tea.Cmd {
return func() tea.Msg {
return InitScrapeTick("Tick")
}
}
func Scrape(url string) tea.Cmd {
return func() tea.Msg {
scrapeResult, err := scrapePage(url)
if err != nil {
return errMsg(err)
}
nextUrl := scrapeResult.nextUrl
newDeps := scrapeResult.deps
return PageTick{
nextUrl: nextUrl,
deps: newDeps,
}
}
}