Skip to content

Commit

Permalink
Merge pull request #588 from DocNow/likes_retweets
Browse files Browse the repository at this point in the history
Support likes and retweets endpoints.
  • Loading branch information
edsu authored Jan 27, 2022
2 parents 8bf04b2 + 6d0e529 commit a0afccb
Show file tree
Hide file tree
Showing 4 changed files with 335 additions and 1 deletion.
26 changes: 26 additions & 0 deletions docs/twarc2_en_us.md
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,32 @@ conversation:

twarc2 conversation 266031293945503744 > conversation.jsonl

## Likes

Twarc supports the two approaches that the Twitter API exposes for collecting likes via the `liked-tweets` and `liking-users` commands.

The `liked-tweets` command returns the tweets that have been liked by a specific account. The account is specified by the user ID of that account, in the following example is the account of Twitter's founder:

twarc2 liked-tweets 12 jacks-likes.jsonl

In this case the output file contains all of the likes of publicly accessible tweets. Note that the order of likes is not guaranteed by the API, but is probably reverse chronological, or most recent likes by that account first. The underlying tweet objects contain no information about when the tweet was liked.

The `liking-users` command returns the user profiles of the accounts that have liked a specific tweet (specified by the ID of the tweet):

twarc2 liking-users 1460417326130421765 liking-users.jsonl

In this example the output file contains all of the user profiles of the publicly accessible accounts that have liked that specific tweet. Note that the order of profiles is not guaranteed by the API, but is probably reverse chronological, or the profile of the most recent like for that account first. The underlying profile objects contain no information about when the tweet was liked.

Note that likes of tweets that are not publicly accessible, or likes by accounts that are protected will not be retrieved by either of these methods. Therefore, the metrics available on a tweet object (under the `public_metrics.like_count` field) will likely be higher than the number of likes you can retrieve via the Twitter API using these endpoints.

## Retweets

You can retrieve the user profiles of publicly accessible accounts that have retweeted a specific tweet, using the `retweeted_by` command and the ID of the tweet as an identifier. For example:

twarc2 retweeted-by 1460417326130421765 retweeting-users.jsonl

Unfortunately this only returns the user profiles (presumably in reverse chronological order) of the retweeters of that tweet - this means that important information, like when the tweet was retweeted is not present in the returned object.

## Dehydrate

The `dehydrate` command generates an id list from a file of tweets:
Expand Down
48 changes: 48 additions & 0 deletions test_twarc2.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,54 @@ def test_ensure_user_id():
assert T._ensure_user_id(1033441111677788160) == "1033441111677788160"


def test_liking_users():

# This is one of @jack's tweets about the Twitter API
likes = T.liking_users(1460417326130421765)

like_count = 0

for page in likes:
assert "data" in page
# These should be user objects.
assert "description" in page["data"][0]
like_count += len(page["data"])
if like_count > 300:
break


def test_retweeted_by():

# This is one of @jack's tweets about the Twitter API
retweet_users = T.retweeted_by(1460417326130421765)

retweet_count = 0

for page in retweet_users:
assert "data" in page
# These should be user objects.
assert "description" in page["data"][0]
retweet_count += len(page["data"])
if retweet_count > 150:
break


def test_liked_tweets():

# What has @jack liked?
liked_tweets = T.liked_tweets(12)

like_count = 0

for page in liked_tweets:
assert "data" in page
# These should be tweet objects.
assert "text" in page["data"][0]
like_count += len(page["data"])
if like_count > 300:
break


def test_twarc_metadata():

# With metadata (default)
Expand Down
113 changes: 112 additions & 1 deletion twarc/client2.py
Original file line number Diff line number Diff line change
Expand Up @@ -1034,6 +1034,109 @@ def followers(
url = f"https://api.twitter.com/2/users/{user_id}/followers"
return self.get_paginated(url, params=params)

def liking_users(
self,
tweet_id,
expansions=None,
tweet_fields=None,
user_fields=None,
max_results=100,
pagination_token=None,
):
"""
Retrieve the user profiles of accounts that have liked the given tweet.
"""
url = f"https://api.twitter.com/2/tweets/{tweet_id}/liking_users"

params = self._prepare_params(
tweet_fields=tweet_fields,
user_fields=user_fields,
max_results=max_results,
pagination_token=pagination_token,
)

if expansions:
params["expansions"] = "pinned_tweet_id"

for page in self.get_paginated(url, params=params):
if "data" in page:
yield page
else:
log.info(
f"Retrieved an empty page of results for liking_users of {tweet_id}"
)

def liked_tweets(
self,
user_id,
max_results=100,
expansions=None,
tweet_fields=None,
user_fields=None,
media_fields=None,
poll_fields=None,
place_fields=None,
pagination_token=None,
):
"""
Retrieve the tweets liked by the given user_id.
"""
url = f"https://api.twitter.com/2/users/{user_id}/liked_tweets"

params = self._prepare_params(
max_results=100,
expansions=None,
tweet_fields=None,
user_fields=None,
media_fields=None,
poll_fields=None,
place_fields=None,
pagination_token=None,
)

for page in self.get_paginated(url, params=params):
if "data" in page:
yield page
else:
log.info(
f"Retrieved an empty page of results for liked_tweets of {user_id}"
)

def retweeted_by(
self,
tweet_id,
expansions=None,
tweet_fields=None,
user_fields=None,
max_results=100,
pagination_token=None,
):
"""
Retrieve the user profiles of accounts that have retweeted the given tweet.
"""
url = f"https://api.twitter.com/2/tweets/{tweet_id}/retweeted_by"

params = self._prepare_params(
tweet_fields=tweet_fields,
user_fields=user_fields,
max_results=max_results,
pagination_token=pagination_token,
)

if expansions:
params["expansions"] = "pinned_tweet_id"

for page in self.get_paginated(url, params=params):
if "data" in page:
yield page
else:
log.info(
f"Retrieved an empty page of results for retweeted_by of {tweet_id}"
)

@catch_request_exceptions
@rate_limit
def get(self, *args, **kwargs):
Expand Down Expand Up @@ -1078,7 +1181,15 @@ def get_paginated(self, *args, **kwargs):

yield page

endings = ["mentions", "tweets", "following", "followers"]
endings = [
"mentions",
"tweets",
"following",
"followers",
"liked_tweets",
"liking_users",
"retweeted_by",
]

# The search endpoints only take a next_token, but the timeline
# endpoints take a pagination_token instead - this is a bit of a hack,
Expand Down
149 changes: 149 additions & 0 deletions twarc/command2.py
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,155 @@ def following(T, user, outfile, limit, max_results, hide_progress):
break


@twarc2.command("liking-users")
@click.option(
"--limit",
default=0,
help="Maximum number of liking users to retrieve. Increments of 100 or --max-results if set.",
type=int,
)
@click.option(
"--max-results",
default=100,
help="Maximum number of users (likes) per page. Default is and maximum is 100.",
type=int,
)
@command_line_progressbar_option
@click.argument("tweet_id", type=str)
@click.argument("outfile", type=click.File("w"), default="-")
@click.pass_obj
@cli_api_error
def liking_users(T, tweet_id, outfile, limit, max_results, hide_progress):
"""
Get the users that liked a specific tweet.
Note that the progress bar is approximate.
"""
count = 0
lookup_total = 0

if not re.match("^\d+$", str(tweet_id)):
click.echo(click.style("Please enter a tweet ID", fg="red"), err=True)

if outfile is not None and (outfile.name == "<stdout>"):
hide_progress = True

if not hide_progress:
# TODO: we could probably do this everytime, and avoid doing any lookups
# for tweets that don't exist anymore.
target_tweet = list(T.tweet_lookup([tweet_id]))[0]
if "data" in target_tweet:
lookup_total = target_tweet["data"][0]["public_metrics"]["like_count"]

with tqdm(disable=hide_progress, total=lookup_total) as progress:
for result in T.liking_users(tweet_id, max_results=max_results):
_write(result, outfile)
count += len(result.get("data", []))
progress.update(len(result.get("data", [])))
if limit != 0 and count >= limit:
progress.desc = f"Set --limit of {limit} reached"
break


@twarc2.command("retweeted-by")
@click.option(
"--limit",
default=0,
help="Maximum number of retweeting users to retrieve. Increments of 100 or --max-results if set.",
type=int,
)
@click.option(
"--max-results",
default=100,
help="Maximum number of users (retweets) per page of results. Default and maximum is 100.",
type=int,
)
@command_line_progressbar_option
@click.argument("tweet_id", type=str)
@click.argument("outfile", type=click.File("w"), default="-")
@click.pass_obj
@cli_api_error
def retweeted_by(T, tweet_id, outfile, limit, max_results, hide_progress):
"""
Get the users that retweeted a specific tweet.
Note that the progress bar is approximate.
"""
count = 0
lookup_total = 0

if not re.match("^\d+$", str(tweet_id)):
click.echo(click.style("Please enter a tweet ID", fg="red"), err=True)

if outfile is not None and (outfile.name == "<stdout>"):
hide_progress = True

if not hide_progress:
# TODO: we could probably do this everytime, and avoid doing any lookups
# for tweets that don't exist anymore.
target_tweet = list(T.tweet_lookup([tweet_id]))[0]
if "data" in target_tweet:
lookup_total = target_tweet["data"][0]["public_metrics"]["retweet_count"]

with tqdm(disable=hide_progress, total=lookup_total) as progress:
for result in T.retweeted_by(tweet_id, max_results=max_results):
_write(result, outfile)
count += len(result.get("data", []))
progress.update(len(result.get("data", [])))
if limit != 0 and count >= limit:
progress.desc = f"Set --limit of {limit} reached"
break


@twarc2.command("liked-tweets")
@click.option(
"--limit",
default=0,
help="Maximum number of liked tweets to retrieve. Increments of 100 or --max-results if set.",
type=int,
)
@click.option(
"--max-results",
default=100,
help="Maximum number of liked tweets per page of results. Default and maximum is 100.",
type=int,
)
@command_line_progressbar_option
@click.argument("user_id", type=str)
@click.argument("outfile", type=click.File("w"), default="-")
@click.pass_obj
@cli_api_error
def liked_tweets(T, user_id, outfile, limit, max_results, hide_progress):
"""
Get the tweets liked by a specific user_id.
Note that the progress bar is approximate.
"""
count = 0
lookup_total = 0

if not re.match("^\d+$", str(user_id)):
click.echo(click.style("Please enter a user ID", fg="red"), err=True)

if outfile is not None and (outfile.name == "<stdout>"):
hide_progress = True

# NB: there doesn't appear to be anyway to get the total count of likes
# a user has made, so the progress bar isn't very useful in this case...

with tqdm(disable=hide_progress) as progress:
for result in T.liked_tweets(user_id, max_results=max_results):
_write(result, outfile)
count += len(result.get("data", []))
progress.update(len(result.get("data", [])))
if limit != 0 and count >= limit:
progress.desc = f"Set --limit of {limit} reached"
break


@twarc2.command("sample")
@command_line_expansions_shortcuts
@command_line_expansions_options
Expand Down

0 comments on commit a0afccb

Please sign in to comment.