From dae2cf2977b74631a4c06a0c11ea90c1a6879f09 Mon Sep 17 00:00:00 2001 From: Caleb Brown Date: Mon, 8 Nov 2021 15:41:56 +1100 Subject: [PATCH 1/2] Use a concrete struct while parsing json to reduce memory consuption. The npm package json can be very large. Using the `map[string]interface{}` results in the entire json data structure being deserialized. This costs a lot of memory and cpu time. The feed parser only needs the "time" portion of the json data, so by restricting the struct to that data we avoid a lot of overhead. Testing shows memory peaks close to 70Mb with this fix, rather than the 512Mb+ previously. --- pkg/feeds/npm/npm.go | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/pkg/feeds/npm/npm.go b/pkg/feeds/npm/npm.go index 49cc6752..6ff3b616 100644 --- a/pkg/feeds/npm/npm.go +++ b/pkg/feeds/npm/npm.go @@ -90,18 +90,19 @@ func fetchPackage(baseURL, pkgTitle string) ([]*Package, error) { if err != nil { return nil, err } - var jsonMap map[string]interface{} - err = json.Unmarshal(body, &jsonMap) + + // We only care about the `time` field as it contains all the versions in + // date order, from oldest to newest. + // Using a struct for parsing also avoids the cost of deserializing data + // that is ultimately unused. + var packageDetails struct { + Time map[string]string `json:"time"` + } + err = json.Unmarshal(body, &packageDetails) if err != nil { return nil, fmt.Errorf("%w : %v for package %s", errJSON, err, pkgTitle) } - - // The json string `time` contains versions in date order, oldest to newest. - versions, ok := jsonMap["time"].(map[string]interface{}) - if !ok { - return nil, fmt.Errorf("%w : 'time' not found for package %s ", - errJSON, pkgTitle) - } + versions := packageDetails.Time // If `unpublished` exists in the version map then at a given point in time // the package was 'entirely' removed, the packageEvent(s) received are for package @@ -122,7 +123,7 @@ func fetchPackage(baseURL, pkgTitle string) ([]*Package, error) { // are unordered. versionSlice := []*Package{} for version, timestamp := range versions { - date, err := time.Parse(time.RFC3339, timestamp.(string)) + date, err := time.Parse(time.RFC3339, timestamp) if err != nil { return nil, err } From 7337cc10a10cf9643d5a448cc1b92d85fbab9106 Mon Sep 17 00:00:00 2001 From: Caleb Brown Date: Mon, 8 Nov 2021 15:59:30 +1100 Subject: [PATCH 2/2] Values in the time map can be either string, or a struct, so use the empty interface. --- pkg/feeds/npm/npm.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/feeds/npm/npm.go b/pkg/feeds/npm/npm.go index 6ff3b616..aacddd57 100644 --- a/pkg/feeds/npm/npm.go +++ b/pkg/feeds/npm/npm.go @@ -96,7 +96,7 @@ func fetchPackage(baseURL, pkgTitle string) ([]*Package, error) { // Using a struct for parsing also avoids the cost of deserializing data // that is ultimately unused. var packageDetails struct { - Time map[string]string `json:"time"` + Time map[string]interface{} `json:"time"` } err = json.Unmarshal(body, &packageDetails) if err != nil { @@ -123,7 +123,7 @@ func fetchPackage(baseURL, pkgTitle string) ([]*Package, error) { // are unordered. versionSlice := []*Package{} for version, timestamp := range versions { - date, err := time.Parse(time.RFC3339, timestamp) + date, err := time.Parse(time.RFC3339, timestamp.(string)) if err != nil { return nil, err }