From 6495073b47c38e32a44b261abf456f94abbbcb45 Mon Sep 17 00:00:00 2001 From: Pouria Date: Thu, 6 Oct 2022 03:00:04 +0330 Subject: [PATCH 1/2] Now "isIDExistInIDs" function do a binary search instead of linear search It's better for performance --- utils.go | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/utils.go b/utils.go index ced9702..f5137da 100644 --- a/utils.go +++ b/utils.go @@ -8,6 +8,7 @@ import ( "fmt" "io" "os" + "sort" "strconv" "strings" "time" @@ -80,7 +81,7 @@ func dumpTweetsToFile(path string, tweets []*Tweet) error { return json.NewEncoder(f).Encode(tweets) } -// TODO: It should fetched only ID field from file only all the values, and then ignore unwanted fields +// TODO: It should extract only ID fields from json file.(NOT LOAD ALL DATA AND THEN FILTER THE "ID" FIELDS) func loadTweetsIDFromFile(path string) ([]string, error) { data, err := loadTweetsFromFile(path) if err != nil { @@ -102,20 +103,31 @@ func loadTweetsIDFromFile(path string) ([]string, error) { // return list // } -// TODO: Do binary search instead of linear +// We'll do a string binary search +// ID's are number so we can do binary sort on numbers, But then we should convert all string ID's to int (Probably it takes more cost) func isIDExistInIDs(id string, ids []string) bool { - for i := range ids { - if ids[i] == id { - return true - } + return stringBinarySearch(id, ids) +} + +func stringBinarySearch(element string, slice []string) bool { + if !sort.StringsAreSorted(slice) { + sort.Strings(slice) + } + // SearchStrings return where element(input) should be inserted in slice (say returns i) + // If i == len(ids), means that element not exists and should be inserted in end of slice + // If i < len(ids), means that element exists or should be inserted in slice[i], So if slice[i] == element, then means that element exists, otherwise it's not exists + i := sort.SearchStrings(slice, element) + if i < len(slice) && slice[i] == element { + return true + } else { + return false } - return false } -// Fetch specific tweets ("برای") -// It's recive stored tweets for don't fetch tweets that already fetched +// Fetch ("برای") tweets +// It's recives stored tweets id for don't fetch tweets that already fetched // Returns: -// fetched tweets, Tweets, Error +// Number of fetched tweets, Tweets, Error func fetchTweets(stored_tweets_id []string) (int, []*twitterscraper.TweetResult, error) { // fetched count (filtered and unfiltered) // It's all tweets that program fetched, not only needed ones From d3740d4999ab09b6b874c393886bd036a9d55f06 Mon Sep 17 00:00:00 2001 From: Pouria Date: Thu, 6 Oct 2022 03:07:37 +0330 Subject: [PATCH 2/2] Comment typo --- utils.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils.go b/utils.go index f5137da..c195c71 100644 --- a/utils.go +++ b/utils.go @@ -125,7 +125,7 @@ func stringBinarySearch(element string, slice []string) bool { } // Fetch ("برای") tweets -// It's recives stored tweets id for don't fetch tweets that already fetched +// It's recives stored tweets ID for don't fetch tweets that already fetched // Returns: // Number of fetched tweets, Tweets, Error func fetchTweets(stored_tweets_id []string) (int, []*twitterscraper.TweetResult, error) {