Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

"isIDExistInIDs" Binary search #14

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 22 additions & 10 deletions utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"fmt"
"io"
"os"
"sort"
"strconv"
"strings"
"time"
Expand Down Expand Up @@ -80,7 +81,7 @@ func dumpTweetsToFile(path string, tweets []*Tweet) error {
return json.NewEncoder(f).Encode(tweets)
}

// TODO: It should fetched only ID field from file only all the values, and then ignore unwanted fields
// TODO: It should extract only ID fields from json file.(NOT LOAD ALL DATA AND THEN FILTER THE "ID" FIELDS)
func loadTweetsIDFromFile(path string) ([]string, error) {
data, err := loadTweetsFromFile(path)
if err != nil {
Expand All @@ -102,20 +103,31 @@ func loadTweetsIDFromFile(path string) ([]string, error) {
// return list
// }

// TODO: Do binary search instead of linear
// We'll do a string binary search
// ID's are number so we can do binary sort on numbers, But then we should convert all string ID's to int (Probably it takes more cost)
func isIDExistInIDs(id string, ids []string) bool {
for i := range ids {
if ids[i] == id {
return true
}
return stringBinarySearch(id, ids)
}

func stringBinarySearch(element string, slice []string) bool {
if !sort.StringsAreSorted(slice) {
sort.Strings(slice)
}
// SearchStrings return where element(input) should be inserted in slice (say returns i)
// If i == len(ids), means that element not exists and should be inserted in end of slice
// If i < len(ids), means that element exists or should be inserted in slice[i], So if slice[i] == element, then means that element exists, otherwise it's not exists
i := sort.SearchStrings(slice, element)
if i < len(slice) && slice[i] == element {
return true
} else {
return false
}
return false
}

// Fetch specific tweets ("برای")
// It's recive stored tweets for don't fetch tweets that already fetched
// Fetch ("برای") tweets
// It's recives stored tweets ID for don't fetch tweets that already fetched
// Returns:
// fetched tweets, Tweets, Error
// Number of fetched tweets, Tweets, Error
func fetchTweets(stored_tweets_id []string) (int, []*twitterscraper.TweetResult, error) {
// fetched count (filtered and unfiltered)
// It's all tweets that program fetched, not only needed ones
Expand Down