Skip to content

Commit

Permalink
Merge pull request #29 from DedSecInside/develop
Browse files Browse the repository at this point in the history
Reorganize code and update documentation
  • Loading branch information
KingAkeem authored Nov 10, 2022
2 parents d8788a5 + 250cb01 commit 60d92f1
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 84 deletions.
42 changes: 14 additions & 28 deletions docs/README.md → api/README.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
# REST API

## Get Tree

### GET `http://localhost:{port}/tree?link=`
# REST HTTP API (Examples are using localhost)

## Get Link Tree
### GET `http://localhost:{port}/tree?link=https://example.com&depth=1`
### Arguments
- link (string): the root URL of the tree
- depth (int): the depth of the tree

e.g. depth of 1
### Response
```json
{
"url": "https://www.example.com",
Expand All @@ -24,41 +21,30 @@ e.g. depth of 1
```

## Get Emails

### GET `http://localhost:{port}/emails?link=`

### GET `http://localhost:{port}/emails?link=https://random.com`
### Arguments
- link (string): the root URL of the tree

### Response
```json
["[email protected]", "[email protected]"]
```

## Get Phone Numbers
### GET `http://localhost:{port}/phone_numbers?link=`
### GET `http://localhost:{port}/phone_numbers?link=https://example.com`
### Arguments
- link (string): the root URL of the tree

### Response
```json
["+1-234-567-8901", "+1-234-567-8902"]
```

## Get IP

## Get current IP of server
### GET `http://localhost:{port}/ip`

```
"Random IP Address"
```

## Get Web Content

### GET `http://localhost:{port}/content?link=`

```
"Returns the HTML content of the webpage"
### Arguments
N/A
### Response
```json
"127.0.0.1" (returns IP address as plain string)
```


39 changes: 21 additions & 18 deletions api/handlers.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// This file contains HTTP REST handlers for interacting with links
package api

import (
Expand All @@ -10,11 +11,11 @@ import (
"strconv"
"strings"

"github.com/KingAkeem/gotor/linktree"
"github.com/KingAkeem/gotor/pkg/linktree"
"golang.org/x/net/html"
)

// GetTreeNode writes a tree using the root and depth given
// GetTreeNode returns a LinkTree with the specified depth passed to the query parameter.
func GetTreeNode(client *http.Client) func(w http.ResponseWriter, r *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
queryMap := r.URL.Query()
Expand All @@ -33,7 +34,9 @@ func GetTreeNode(client *http.Client) func(w http.ResponseWriter, r *http.Reques
log.Printf("processing link %s at a depth of %d\n", link, depth)
node := linktree.NewNode(client, link)
node.Load(depth)
log.Printf("Tree built for %s at depth %d\n", node.URL, depth)
log.Printf("tree built for %s at depth %d\n", node.URL, depth)

w.Header().Set("Content-Type", "application/json")
err = json.NewEncoder(w).Encode(node)
if err != nil {
log.Printf("Error: %+v\n", err)
Expand Down Expand Up @@ -130,7 +133,7 @@ func getWebsiteContent(client *http.Client, link string) string {
if err != nil {
log.Println("Error:", err)
return content
}
}
defer resp.Body.Close()
z := html.NewTokenizer(resp.Body)
for {
Expand All @@ -144,7 +147,7 @@ func getWebsiteContent(client *http.Client, link string) string {
}

return content

}

func GetWebsiteContent(client *http.Client) func(w http.ResponseWriter, r *http.Request) {
Expand All @@ -171,19 +174,19 @@ func getTorIP(client *http.Client) (string, error) {
for {
tokenType := tokenizer.Next()
switch tokenType {
case html.ErrorToken:
err := tokenizer.Err()
if err != io.EOF {
return "", err
}
return "", nil
case html.StartTagToken:
token := tokenizer.Token()
if token.Data == "strong" {
tokenizer.Next()
ipToken := tokenizer.Token()
return ipToken.Data, nil
}
case html.ErrorToken:
err := tokenizer.Err()
if err != io.EOF {
return "", err
}
return "", nil
case html.StartTagToken:
token := tokenizer.Token()
if token.Data == "strong" {
tokenizer.Next()
ipToken := tokenizer.Token()
return ipToken.Data, nil
}
}
}
}
Expand Down
29 changes: 13 additions & 16 deletions api/handlers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ import (

"net/http"

"github.com/KingAkeem/gotor/linktree"
"github.com/KingAkeem/gotor/pkg/linktree"
"github.com/jarcoal/httpmock"
"github.com/stretchr/testify/assert"
)

func assertNode(t *testing.T, n *linktree.Node, link string, numChildren int) {
func assertNode(t *testing.T, n linktree.Node, link string, numChildren int) {
assert.Len(t, n.Children, numChildren, "There should be a single child.")
assert.Equal(t, n.Status, "OK", "The status should be OK.")
assert.Equal(t, n.StatusCode, 200, "The status code should be 200.")
Expand Down Expand Up @@ -117,7 +117,6 @@ func TestPhoneNumbers(t *testing.T) {
assert.Len(t, phone, 4, "There should be 4 phone numbers.")
}


func TestGetTree(t *testing.T) {
// Test getting a tree of depth 1
rootLink := "https://www.root.com"
Expand All @@ -133,7 +132,7 @@ func TestGetTree(t *testing.T) {
node.Load(1)
httpmock.DeactivateAndReset()

assertNode(t, node, rootLink, 1)
assertNode(t, *node, rootLink, 1)

// Test getting a tree of depth 2
rootLink = "https://www.root.com"
Expand All @@ -142,29 +141,27 @@ func TestGetTree(t *testing.T) {
httpmock.Activate()
page = newPage("Tree Site", fmt.Sprintf(`<a href="%s">Child Site</a>`, childLink))
childPage := newPage("Tree Site", fmt.Sprintf(`<a href="%s">Sub Child Site</a>`, subChildLink))
httpmock.RegisterResponder("GET", subChildLink,
httpmock.NewStringResponder(200, newPage("Sub Child Site", "")))
httpmock.RegisterResponder("GET", childLink,
httpmock.NewStringResponder(200, childPage))
httpmock.RegisterResponder("GET", rootLink,
httpmock.NewStringResponder(200, page))
httpmock.RegisterResponder("GET", subChildLink, httpmock.NewStringResponder(200, newPage("Sub Child Site", "")))
httpmock.RegisterResponder("GET", childLink, httpmock.NewStringResponder(200, childPage))
httpmock.RegisterResponder("GET", rootLink, httpmock.NewStringResponder(200, page))

node = linktree.NewNode(http.DefaultClient, rootLink)
node.Load(2)
httpmock.DeactivateAndReset()

assertNode(t, node, rootLink, 1)
assertNode(t, node.Children[0], childLink, 1)
assertNode(t, node.Children[0].Children[0], subChildLink, 0)
defer httpmock.DeactivateAndReset()

assertNode(t, *node, rootLink, 1)
assertNode(t, *node.Children[0], childLink, 1)
assertNode(t, *node.Children[0].Children[0], subChildLink, 0)
}

func TestGetWebsiteContent(t *testing.T) {
link := "https://www.random.com"
httpmock.Activate()
defer httpmock.DeactivateAndReset()
httpmock.RegisterResponder("GET", link,
httpmock.NewStringResponder(200, "Hello World"))
httpmock.NewStringResponder(200, "Hello World"))

content := getWebsiteContent(http.DefaultClient, link)
assert.Equal(t, "Hello World", content, "The content should be same.")
}
}
16 changes: 8 additions & 8 deletions main.go → cmd/main/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"strconv"

"github.com/KingAkeem/gotor/api"
"github.com/KingAkeem/gotor/linktree"
"github.com/KingAkeem/gotor/pkg/linktree"
"github.com/gorilla/mux"
"github.com/mgutz/ansi"
"github.com/xuri/excelize/v2"
Expand Down Expand Up @@ -43,9 +43,9 @@ func writeTree(node *linktree.Node, depth int) {
node.PrintTree()
}

func writeTerminal(node *linktree.Node, depth int) {
func writeTerminal(client *http.Client, node *linktree.Node, depth int) {
printStatus := func(link string) {
n := linktree.NewNode(node.Client, link)
n := linktree.NewNode(client, link)
markError := ansi.ColorFunc("red")
markSuccess := ansi.ColorFunc("green")
if n.StatusCode != 200 {
Expand All @@ -57,7 +57,7 @@ func writeTerminal(node *linktree.Node, depth int) {
node.Crawl(depth, printStatus)
}

func writeExcel(node *linktree.Node, depth int) {
func writeExcel(client *http.Client, node *linktree.Node, depth int) {
f := excelize.NewFile()
err := f.SetCellStr(f.GetSheetName(0), "A1", "Link")
if err != nil {
Expand All @@ -71,7 +71,7 @@ func writeExcel(node *linktree.Node, depth int) {
}
row := 2
addRow := func(link string) {
node := linktree.NewNode(node.Client, link)
node := linktree.NewNode(client, link)
linkCell := fmt.Sprintf("A%d", row)
statusCell := fmt.Sprintf("B%d", row)
err = f.SetCellStr(f.GetSheetName(0), linkCell, node.URL)
Expand Down Expand Up @@ -134,7 +134,7 @@ func main() {
flag.StringVar(&depthInput, "d", "1", "Depth of search. Defaults to 1. (Must be an integer)")
flag.StringVar(&host, "h", "127.0.0.1", "The host used for the SOCKS5 proxy. Defaults to localhost (127.0.0.1.)")
flag.StringVar(&port, "p", "9050", "The port used for the SOCKS5 proxy. Defaults to 9050.")
flag.StringVar(&output, "o", "terminal", "The method of output being used. Defaults to terminal.")
flag.StringVar(&output, "o", "terminal", "The method of output being used. Defaults to terminal. Options are terminal, excel sheet (using xlsx) or tree (a tree representation will be visually printed in text)")
flag.BoolVar(&serve, "server", false, "Determines if the program will behave as an HTTP server.")
flag.Parse()

Expand Down Expand Up @@ -164,9 +164,9 @@ func main() {
node := linktree.NewNode(client, root)
switch output {
case "terminal":
writeTerminal(node, depth)
writeTerminal(client, node, depth)
case "excel":
writeExcel(node, depth)
writeExcel(client, node, depth)
case "tree":
writeTree(node, depth)
}
Expand Down
24 changes: 12 additions & 12 deletions linktree/linktree.go → pkg/linktree/linktree.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ type Node struct {
StatusCode int `json:"status_code"`
Status string `json:"status"`
Children []*Node `json:"children"`
Client *http.Client `json:"-"`
Loaded bool `json:"-"`
LastLoaded time.Time `json:"-"`
client *http.Client `json:"-"`
loaded bool `json:"-"`
lastLoaded time.Time `json:"-"`
}

// PrintTree ...
Expand All @@ -36,7 +36,7 @@ func (n *Node) PrintTree() {

// UpdateStatus updates the status of the URL
func (n *Node) updateStatus() {
resp, err := n.Client.Get(n.URL)
resp, err := n.client.Get(n.URL)
if err != nil {
n.Status = "UNKNOWN"
n.StatusCode = http.StatusInternalServerError
Expand All @@ -57,7 +57,7 @@ func isValidURL(URL string) bool {
func NewNode(client *http.Client, URL string) *Node {
n := &Node{
URL: URL,
Client: client,
client: client,
}
n.updateStatus()
return n
Expand Down Expand Up @@ -147,11 +147,11 @@ func buildTree(parent *Node, depth int, childLinks chan string, wg *sync.WaitGro
defer wg.Done()
// Do not add the link as it's own child
if parent.URL != link {
n := NewNode(parent.Client, link)
n := NewNode(parent.client, link)
parent.Children = append(parent.Children, n)
if depth > 1 {
depth--
tokenStream := streamTokens(n.Client, n.URL)
tokenStream := streamTokens(n.client, n.URL)
filteredStream := filterTokens(tokenStream, filter)
buildTree(n, depth, filteredStream, wg, filter)
}
Expand All @@ -163,7 +163,7 @@ func buildTree(parent *Node, depth int, childLinks chan string, wg *sync.WaitGro

// Load places the tree within memory.
func (n *Node) Load(depth int) {
tokenStream := streamTokens(n.Client, n.URL)
tokenStream := streamTokens(n.client, n.URL)
filter := &TokenFilter{
tags: map[string]bool{"a": true},
attributes: map[string]bool{"href": true},
Expand All @@ -172,8 +172,8 @@ func (n *Node) Load(depth int) {
wg := new(sync.WaitGroup)
buildTree(n, depth, filteredStream, wg, filter)
wg.Wait()
n.Loaded = true
n.LastLoaded = time.Now().UTC()
n.loaded = true
n.lastLoaded = time.Now().UTC()
}

// perform work on each token stream until the deapth has been reached
Expand All @@ -195,13 +195,13 @@ func crawl(client *http.Client, wg *sync.WaitGroup, linkChan <-chan string, dept

// Crawl traverses the children of a node without storing it in memory
func (n *Node) Crawl(depth int, work func(link string)) {
tokenStream := streamTokens(n.Client, n.URL)
tokenStream := streamTokens(n.client, n.URL)
filter := &TokenFilter{
tags: map[string]bool{"a": true},
attributes: map[string]bool{"href": true},
}
filteredStream := filterTokens(tokenStream, filter)
wg := new(sync.WaitGroup)
crawl(n.Client, wg, filteredStream, depth, filter, work)
crawl(n.client, wg, filteredStream, depth, filter, work)
wg.Wait()
}
4 changes: 2 additions & 2 deletions linktree/linktree_test.go → pkg/linktree/linktree_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,15 @@ func TestLoadNode(t *testing.T) {
link := "https://www.test.com"
n := NewNode(http.DefaultClient, link)
n.Load(1)
assert.True(t, n.Loaded)
assert.True(t, n.loaded)

page := newPage("test", `<a href="https://www.child1.com">link to child</a>`)
httpmock.RegisterResponder(http.MethodGet, link,
httpmock.NewStringResponder(http.StatusOK, page))

n = NewNode(http.DefaultClient, link)
n.Load(1)
assert.True(t, n.Loaded)
assert.True(t, n.loaded)
assert.Len(t, n.Children, 1)

httpmock.DeactivateAndReset()
Expand Down

0 comments on commit 60d92f1

Please sign in to comment.