From 97bba324dca606bc3b8ae160de637edbca580d02 Mon Sep 17 00:00:00 2001 From: CameronBadman Date: Tue, 31 Dec 2024 21:41:43 +1000 Subject: [PATCH] ext/har: add HAR logger extension Port HAR logging from abourget/goproxy to ext package. Closes #609 --- ext/go.mod | 4 +- ext/go.sum | 2 - ext/har/logger.go | 110 +++++++++++++ ext/har/logger_test.go | 114 +++++++++++++ ext/har/types.go | 365 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 592 insertions(+), 3 deletions(-) create mode 100644 ext/har/logger.go create mode 100644 ext/har/logger_test.go create mode 100644 ext/har/types.go diff --git a/ext/go.mod b/ext/go.mod index 9c7d3945..1e3a2ffc 100644 --- a/ext/go.mod +++ b/ext/go.mod @@ -2,8 +2,10 @@ module github.com/elazarl/goproxy/ext go 1.20 +replace github.com/elazarl/goproxy => ../ + require ( - github.com/elazarl/goproxy v0.0.0-20241217120900-7711dfa3811c + github.com/elazarl/goproxy v0.0.0 golang.org/x/net v0.33.0 golang.org/x/text v0.21.0 ) diff --git a/ext/go.sum b/ext/go.sum index b9cadd46..845330e7 100644 --- a/ext/go.sum +++ b/ext/go.sum @@ -1,5 +1,3 @@ -github.com/elazarl/goproxy v0.0.0-20241217120900-7711dfa3811c h1:yWAGp1CjD1mQGLUsADqPn5s1n2AkGAX33XLDUgoXzyo= -github.com/elazarl/goproxy v0.0.0-20241217120900-7711dfa3811c/go.mod h1:P73liMk9TZCyF9fXG/RyMeSizmATvpvy3ZS61/1eXn4= golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= diff --git a/ext/har/logger.go b/ext/har/logger.go new file mode 100644 index 00000000..eefafdfb --- /dev/null +++ b/ext/har/logger.go @@ -0,0 +1,110 @@ +package har + + +import ( + "encoding/json" + "net/http" + "os" + "sync" + "time" + + "github.com/elazarl/goproxy" +) + +// Logger implements a HAR logging extension for goproxy +type Logger struct { + mu sync.Mutex + har *Har + captureContent bool +} + +// NewLogger creates a new HAR logger instance +func NewLogger() *Logger { + return &Logger{ + har: New(), + } +} + +// OnRequest handles incoming HTTP requests +func (l *Logger) OnRequest(req *http.Request, ctx *goproxy.ProxyCtx) (*http.Request, *http.Response) { + // Store the start time in context for later use + if ctx != nil { + ctx.UserData = time.Now() + } + return req, nil +} + +// OnResponse handles HTTP responses +func (l *Logger) OnResponse(resp *http.Response, ctx *goproxy.ProxyCtx) *http.Response { + if resp == nil || ctx == nil || ctx.Req == nil || ctx.UserData == nil { + return resp + } + + startTime, ok := ctx.UserData.(time.Time) + if !ok { + return resp + } + + // Create HAR entry + entry := Entry{ + StartedDateTime: startTime, + Time: time.Since(startTime).Milliseconds(), + Request: ParseRequest(ctx.Req, l.captureContent), + Response: ParseResponse(resp, l.captureContent), + Cache: Cache{}, + Timings: Timings{ + Send: 0, + Wait: time.Since(startTime).Milliseconds(), + Receive: 0, + }, + } + + // Add server IP + entry.FillIPAddress(ctx.Req) + + // Add to HAR log thread-safely + l.mu.Lock() + l.har.AppendEntry(entry) + l.mu.Unlock() + + return resp +} + +// SetCaptureContent enables or disables request/response body capture +func (l *Logger) SetCaptureContent(capture bool) { + l.mu.Lock() + defer l.mu.Unlock() + l.captureContent = capture +} + +// SaveToFile writes the current HAR log to a file +func (l *Logger) SaveToFile(filename string) error { + l.mu.Lock() + defer l.mu.Unlock() + + file, err := os.Create(filename) + if err != nil { + return err + } + defer file.Close() + + encoder := json.NewEncoder(file) + encoder.SetIndent("", " ") + return encoder.Encode(l.har) +} + +// Clear resets the HAR log +func (l *Logger) Clear() { + l.mu.Lock() + defer l.mu.Unlock() + l.har = New() +} + +// GetEntries returns a copy of the current HAR entries +func (l *Logger) GetEntries() []Entry { + l.mu.Lock() + defer l.mu.Unlock() + entries := make([]Entry, len(l.har.Log.Entries)) + copy(entries, l.har.Log.Entries) + return entries +} diff --git a/ext/har/logger_test.go b/ext/har/logger_test.go new file mode 100644 index 00000000..5fe0286a --- /dev/null +++ b/ext/har/logger_test.go @@ -0,0 +1,114 @@ + +package har_test + +import ( + "bytes" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "net/url" + "os" + "testing" + + "github.com/elazarl/goproxy" + "github.com/elazarl/goproxy/ext/har" +) + +type ConstantHandler string + +func (h ConstantHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + io.WriteString(w, string(h)) +} + +func oneShotProxy(proxy *goproxy.ProxyHttpServer) (client *http.Client, s *httptest.Server) { + s = httptest.NewServer(proxy) + + proxyUrl, _ := url.Parse(s.URL) + tr := &http.Transport{Proxy: http.ProxyURL(proxyUrl)} + client = &http.Client{Transport: tr} + return +} + +func TestHarLogger(t *testing.T) { + // Create a response we expect + expected := "hello world" + background := httptest.NewServer(ConstantHandler(expected)) + defer background.Close() + + // Set up the proxy with HAR logger + proxy := goproxy.NewProxyHttpServer() + logger := har.NewLogger() + logger.SetCaptureContent(true) + + proxy.OnRequest().DoFunc(logger.OnRequest) + proxy.OnResponse().DoFunc(logger.OnResponse) + + client, proxyserver := oneShotProxy(proxy) + defer proxyserver.Close() + + // Make a request + resp, err := client.Get(background.URL) + if err != nil { + t.Fatal(err) + } + + // Read the response + msg, err := io.ReadAll(resp.Body) + if err != nil { + t.Fatal(err) + } + resp.Body.Close() + + if string(msg) != expected { + t.Errorf("Expected '%s', actual '%s'", expected, string(msg)) + } + + // Test POST request with content + postData := "test=value" + req, err := http.NewRequest("POST", background.URL, bytes.NewBufferString(postData)) + if err != nil { + t.Fatal(err) + } + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + + resp, err = client.Do(req) + if err != nil { + t.Fatal(err) + } + resp.Body.Close() + + // Save HAR file and verify content + tmpfile := "test.har" + err = logger.SaveToFile(tmpfile) + if err != nil { + t.Fatal(err) + } + defer os.Remove(tmpfile) + + // Read and verify HAR content + harData, err := os.ReadFile(tmpfile) + if err != nil { + t.Fatal(err) + } + + var harLog har.Har + if err := json.Unmarshal(harData, &harLog); err != nil { + t.Fatal(err) + } + + // Verify we captured both requests + if len(harLog.Log.Entries) != 2 { + t.Errorf("Expected 2 entries in HAR log, got %d", len(harLog.Log.Entries)) + } + + // Verify GET request + if harLog.Log.Entries[0].Request.Method != "GET" { + t.Errorf("Expected GET request, got %s", harLog.Log.Entries[0].Request.Method) + } + + // Verify POST request + if harLog.Log.Entries[1].Request.Method != "POST" { + t.Errorf("Expected POST request, got %s", harLog.Log.Entries[1].Request.Method) + } +} diff --git a/ext/har/types.go b/ext/har/types.go new file mode 100644 index 00000000..ff94584c --- /dev/null +++ b/ext/har/types.go @@ -0,0 +1,365 @@ +// Original implementation from abourget/goproxy, adapted for use as an extension. +// HAR specification: http://www.softwareishard.com/blog/har-12-spec/ +package har + +import ( + "bytes" + "io" + "io/ioutil" + "log" + "net" + "net/http" + "net/url" + "strings" + "time" +) + +var startingEntrySize int = 1000 + +type Har struct { + Log Log `json:"log"` +} + +type Log struct { + Version string `json:"version"` + Creator Creator `json:"creator"` + Browser *Browser `json:"browser,omitempty"` + Pages []Page `json:"pages,omitempty"` + Entries []Entry `json:"entries"` + Comment string `json:"comment,omitempty"` +} + +func New() *Har { + har := &Har{ + Log: Log{ + Version: "1.2", + Creator: Creator{ + Name: "GoProxy", + Version: "12345", + }, + Pages: make([]Page, 0, 10), + Entries: makeNewEntries(), + }, + } + return har +} + +func (har *Har) AppendEntry(entry ...Entry) { + har.Log.Entries = append(har.Log.Entries, entry...) +} + +func (har *Har) AppendPage(page ...Page) { + har.Log.Pages = append(har.Log.Pages, page...) +} + +func makeNewEntries() []Entry { + return make([]Entry, 0, startingEntrySize) +} + +type Creator struct { + Name string `json:"name"` + Version string `json:"version"` + Comment string `json:"comment,omitempty"` +} + +type Browser struct { + Name string `json:"name"` + Version string `json:"version"` + Comment string `json:"comment,omitempty"` +} + +type Page struct { + ID string `json:"id,omitempty"` + StartedDateTime time.Time `json:"startedDateTime"` + Title string `json:"title"` + PageTimings PageTimings `json:"pageTimings"` + Comment string `json:"comment,omitempty"` +} + +type Entry struct { + PageRef string `json:"pageref,omitempty"` + StartedDateTime time.Time `json:"startedDateTime"` + Time int64 `json:"time"` + Request *Request `json:"request"` + Response *Response `json:"response"` + Cache Cache `json:"cache"` + Timings Timings `json:"timings"` + ServerIpAddress string `json:"serverIpAddress,omitempty"` + Connection string `json:"connection,omitempty"` + Comment string `json:"comment,omitempty"` +} + +type Cache struct { + BeforeRequest *CacheEntry `json:"beforeRequest,omitempty"` + AfterRequest *CacheEntry `json:"afterRequest,omitempty"` +} + +type CacheEntry struct { + Expires string `json:"expires,omitempty"` + LastAccess string `json:"lastAccess"` + ETag string `json:"eTag"` + HitCount int `json:"hitCount"` + Comment string `json:"comment,omitempty"` +} + +type Request struct { + Method string `json:"method"` + Url string `json:"url"` + HttpVersion string `json:"httpVersion"` + Cookies []Cookie `json:"cookies"` + Headers []NameValuePair `json:"headers"` + QueryString []NameValuePair `json:"queryString"` + PostData *PostData `json:"postData,omitempty"` + BodySize int64 `json:"bodySize"` + HeadersSize int64 `json:"headersSize"` +} + +func ParseRequest(req *http.Request, captureContent bool) *Request { + if req == nil { + return nil + } + harRequest := Request{ + Method: req.Method, + Url: req.URL.String(), + HttpVersion: req.Proto, + Cookies: parseCookies(req.Cookies()), + Headers: parseStringArrMap(req.Header), + QueryString: parseStringArrMap((req.URL.Query())), + BodySize: req.ContentLength, + HeadersSize: calcHeaderSize(req.Header), + } + + if captureContent && (req.Method == "POST" || req.Method == "PUT") { + harRequest.PostData = parsePostData(req) + } + + return &harRequest +} + +func (harEntry *Entry) FillIPAddress(req *http.Request) { + host, _, err := net.SplitHostPort(req.URL.Host) + if err != nil { + host = req.URL.Host + } + if ip := net.ParseIP(host); ip != nil { + harEntry.ServerIpAddress = string(ip) + } + + if ipaddr, err := net.LookupIP(host); err == nil { + for _, ip := range ipaddr { + if ip.To4() != nil { + harEntry.ServerIpAddress = ip.String() + return + } + } + } +} + +func calcHeaderSize(header http.Header) int64 { + headerSize := 0 + for headerName, headerValues := range header { + headerSize += len(headerName) + 2 + for _, v := range headerValues { + headerSize += len(v) + } + } + return int64(headerSize) +} + +func parsePostData(req *http.Request) *PostData { + defer func() { + if e := recover(); e != nil { + log.Printf("Error parsing request to %v: %v\n", req.URL, e) + } + }() + + harPostData := new(PostData) + contentType := req.Header["Content-Type"] + if contentType == nil { + panic("Missing content type in request") + } + harPostData.MimeType = contentType[0] + + if len(req.PostForm) > 0 { + for k, vals := range req.PostForm { + for _, v := range vals { + param := PostDataParam{ + Name: k, + Value: v, + } + harPostData.Params = append(harPostData.Params, param) + } + } + } else { + str, _ := ioutil.ReadAll(req.Body) + harPostData.Text = string(str) + } + return harPostData +} + +func parseStringArrMap(stringArrMap map[string][]string) []NameValuePair { + index := 0 + harQueryString := make([]NameValuePair, len(stringArrMap)) + for k, v := range stringArrMap { + escapedKey, _ := url.QueryUnescape(k) + escapedValues, _ := url.QueryUnescape(strings.Join(v, ",")) + harNameValuePair := NameValuePair{ + Name: escapedKey, + Value: escapedValues, + } + harQueryString[index] = harNameValuePair + index++ + } + return harQueryString +} + +func parseCookies(cookies []*http.Cookie) []Cookie { + harCookies := make([]Cookie, len(cookies)) + for i, cookie := range cookies { + harCookie := Cookie{ + Name: cookie.Name, + Domain: cookie.Domain, + HttpOnly: cookie.HttpOnly, + Path: cookie.Path, + Secure: cookie.Secure, + Value: cookie.Value, + } + if !cookie.Expires.IsZero() { + harCookie.Expires = &cookie.Expires + } + harCookies[i] = harCookie + } + return harCookies +} + +type Response struct { + Status int `json:"status"` + StatusText string `json:"statusText"` + HttpVersion string `json:"httpVersion"` + Cookies []Cookie `json:"cookies"` + Headers []NameValuePair `json:"headers"` + Content Content `json:"content"` + RedirectUrl string `json:"redirectURL"` + BodySize int64 `json:"bodySize"` + HeadersSize int64 `json:"headersSize"` + Comment string `json:"comment,omitempty"` +} + +func ParseResponse(resp *http.Response, captureContent bool) *Response { + if resp == nil { + return nil + } + + statusText := resp.Status + if len(resp.Status) > 4 { + statusText = resp.Status[4:] + } + redirectURL := resp.Header.Get("Location") + harResponse := Response{ + Status: resp.StatusCode, + StatusText: statusText, + HttpVersion: resp.Proto, + Cookies: parseCookies(resp.Cookies()), + Headers: parseStringArrMap(resp.Header), + RedirectUrl: redirectURL, + BodySize: resp.ContentLength, + HeadersSize: calcHeaderSize(resp.Header), + } + + if captureContent && resp.Body != nil { + body, err := io.ReadAll(resp.Body) + if err != nil { + log.Printf("Error reading response body: %v", err) + return &harResponse + } + // Create a new reader for the response body + resp.Body = io.NopCloser(bytes.NewBuffer(body)) + + harResponse.Content = Content{ + Size: len(body), + Text: string(body), + MimeType: resp.Header.Get("Content-Type"), + } + } + + return &harResponse +} + +func parseContent(resp *http.Response, harContent *Content) { + defer func() { + if e := recover(); e != nil { + log.Printf("Error parsing response to %v: %v\n", resp.Request.URL, e) + } + }() + + contentType := resp.Header["Content-Type"] + if contentType == nil { + panic("Missing content type in response") + } + harContent.MimeType = contentType[0] + if resp.ContentLength == 0 { + log.Println("Empty content") + return + } + + body, _ := ioutil.ReadAll(resp.Body) + harContent.Text = string(body) + harContent.Size = len(body) + return +} + +type Cookie struct { + Name string `json:"name"` + Value string `json:"value"` + Path string `json:"path,omitempty"` + Domain string `json:"domain,omitempty"` + Expires *time.Time `json:"expires,omitempty"` + HttpOnly bool `json:"httpOnly,omitempty"` + Secure bool `json:"secure,omitempty"` +} + +type NameValuePair struct { + Name string `json:"name"` + Value string `json:"value"` +} + +type PostData struct { + MimeType string `json:"mimeType"` + Params []PostDataParam `json:"params,omitempty"` + Text string `json:"text,omitempty"` + Comment string `json:"comment,omitempty"` +} + +type PostDataParam struct { + Name string `json:"name"` + Value string `json:"value,omitempty"` + FileName string `json:"fileName,omitempty"` + ContentType string `json:"contentType,omitempty"` + Comment string `json:"comment,omitempty"` +} + +type Content struct { + Size int `json:"size"` + Compression int `json:"compression,omitempty"` + MimeType string `json:"mimeType"` + Text string `json:"text,omitempty"` + Encoding string `json:"encoding,omitempty"` + Comment string `json:"comment,omitempty"` +} + +type PageTimings struct { + OnContentLoad int64 `json:"onContentLoad"` + OnLoad int64 `json:"onLoad"` + Comment string `json:"comment,omitempty"` +} + +type Timings struct { + Dns int64 `json:"dns,omitempty"` + Blocked int64 `json:"blocked,omitempty"` + Connect int64 `json:"connect,omitempty"` + Send int64 `json:"send"` + Wait int64 `json:"wait"` + Receive int64 `json:"receive"` + Ssl int64 `json:"ssl,omitempty"` + Comment string `json:"comment,omitempty"` +}