Skip to content

Commit

Permalink
Excluding retweets and replies from timeline
Browse files Browse the repository at this point in the history
This commit adds the --exclude-retweets and --exclude-replies command
line options to the timeline and timelines subcommands.

Fixes #485
  • Loading branch information
edsu committed Jun 21, 2021
1 parent f1e986c commit e5da256
Show file tree
Hide file tree
Showing 2 changed files with 153 additions and 20 deletions.
61 changes: 55 additions & 6 deletions twarc/client2.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,15 @@ def stream(self, event=None, record_keep_alives=False):
yield data

def _timeline(
self, user_id, timeline_type, since_id, until_id, start_time, end_time
self,
user_id,
timeline_type,
since_id,
until_id,
start_time,
end_time,
exclude_retweets,
exclude_replies,
):
"""
Helper function for user and mention timelines
Expand All @@ -523,7 +531,8 @@ def _timeline(
until_id (int): results with a Tweet ID less than (older) than specified
start_time (datetime): oldest UTC timestamp from which the Tweets will be provided
end_time (datetime): newest UTC timestamp from which the Tweets will be provided
exclude_retweets (boolean): remove retweets from timeline
exlucde_replies (boolean): remove replies from timeline
Returns:
generator[dict]: A generator, dict for each page of results.
"""
Expand All @@ -533,6 +542,12 @@ def _timeline(
params = expansions.EVERYTHING.copy()
params["max_results"] = 100

excludes = []
if exclude_retweets:
excludes.append("retweets")
if exclude_replies:
excludes.append("replies")

if since_id:
params["since_id"] = since_id
if until_id:
Expand All @@ -541,6 +556,8 @@ def _timeline(
params["start_time"] = _ts(start_time)
if end_time:
params["end_time"] = _ts(end_time)
if len(excludes) > 0:
params["exclude"] = ",".join(excludes)

count = 0
for response in self.get_paginated(url, params=params):
Expand All @@ -554,7 +571,14 @@ def _timeline(
log.info(f"No more results for timeline {user_id}.")

def timeline(
self, user, since_id=None, until_id=None, start_time=None, end_time=None
self,
user,
since_id=None,
until_id=None,
start_time=None,
end_time=None,
exclude_retweets=False,
exclude_replies=False,
):
"""
Retrieve up to the 3200 most recent tweets made by the given user.
Expand All @@ -567,17 +591,33 @@ def timeline(
until_id (int): results with a Tweet ID less than (older) than specified
start_time (datetime): oldest UTC timestamp from which the Tweets will be provided
end_time (datetime): newest UTC timestamp from which the Tweets will be provided
exclude_retweets (boolean): remove retweets from timeline results
exclude_replies (boolean): remove replies from timeline results
Returns:
generator[dict]: A generator, dict for each page of results.
"""
user_id = self._ensure_user_id(user)
return self._timeline(
user_id, "tweets", since_id, until_id, start_time, end_time
user_id,
"tweets",
since_id,
until_id,
start_time,
end_time,
exclude_retweets,
exclude_replies,
)

def mentions(
self, user, since_id=None, until_id=None, start_time=None, end_time=None
self,
user,
since_id=None,
until_id=None,
start_time=None,
end_time=None,
exclude_retweets=False,
exclude_replies=False,
):
"""
Retrieve up to the 800 most recent tweets mentioning the given user.
Expand All @@ -590,13 +630,22 @@ def mentions(
until_id (int): results with a Tweet ID less than (older) than specified
start_time (datetime): oldest UTC timestamp from which the Tweets will be provided
end_time (datetime): newest UTC timestamp from which the Tweets will be provided
exclude_retweets (boolean): remove retweets from timeline results
exclude_replies (boolean): remove replies from timeline results
Returns:
generator[dict]: A generator, dict for each page of results.
"""
user_id = self._ensure_user_id(user)
return self._timeline(
user_id, "mentions", since_id, until_id, start_time, end_time
user_id,
"mentions",
since_id,
until_id,
start_time,
end_time,
exclude_retweets,
exclude_replies,
)

def following(self, user):
Expand Down
112 changes: 98 additions & 14 deletions twarc/command2.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,18 @@ def mentions(T, user_id, outfile, since_id, until_id, start_time, end_time):
@click.option("--limit", default=0, help="Maximum number of tweets to return")
@click.option("--since-id", type=int, help="Match tweets sent after tweet id")
@click.option("--until-id", type=int, help="Match tweets sent prior to tweet id")
@click.option(
"--exclude-retweets",
is_flag=True,
default=False,
help="Exclude retweets from timeline",
)
@click.option(
"--exclude-replies",
is_flag=True,
default=False,
help="Exclude replies from timeline",
)
@click.option(
"--start-time",
type=click.DateTime(formats=("%Y-%m-%d", "%Y-%m-%dT%H:%M:%S")),
Expand All @@ -417,17 +429,33 @@ def mentions(T, user_id, outfile, since_id, until_id, start_time, end_time):
@click.pass_obj
@cli_api_error
def timeline(
T, user_id, outfile, since_id, until_id, start_time, end_time, use_search, limit
T,
user_id,
outfile,
since_id,
until_id,
start_time,
end_time,
use_search,
limit,
exclude_retweets,
exclude_replies,
):
"""
Retrieve recent tweets for the given user.
"""

if use_search:
q = f"from:{user_id}"
tweets = T.search_all(q, since_id, until_id, start_time, end_time)
else:
tweets = T.timeline(user_id, since_id, until_id, start_time, end_time)
tweets = _timeline_tweets(
T,
use_search,
user_id,
since_id,
until_id,
start_time,
end_time,
exclude_retweets,
exclude_replies,
)

count = 0
for result in tweets:
Expand All @@ -451,10 +479,31 @@ def timeline(
default=False,
help="Use the search/all API endpoint which is not limited to the last 3200 tweets, but requires Academic Product Track access.",
)
@click.option(
"--exclude-retweets",
is_flag=True,
default=False,
help="Exclude retweets from timeline",
)
@click.option(
"--exclude-replies",
is_flag=True,
default=False,
help="Exclude replies from timeline",
)
@click.argument("infile", type=click.File("r"), default="-")
@click.argument("outfile", type=click.File("w"), default="-")
@click.pass_obj
def timelines(T, infile, outfile, limit, timeline_limit, use_search):
def timelines(
T,
infile,
outfile,
limit,
timeline_limit,
use_search,
exclude_retweets,
exclude_replies,
):
"""
Fetch the timelines of every user in an input source of tweets. If
the input is a line oriented text file of user ids or usernames that will
Expand Down Expand Up @@ -483,13 +532,17 @@ def timelines(T, infile, outfile, limit, timeline_limit, use_search):
continue
seen.add(user)

# which api endpoint to use
if use_search and since_id:
tweets = T.search_all(f"from:{user}", since_id=since_id)
elif use_search:
tweets = T.search_all(f"from:{user}")
else:
tweets = T.timeline(user)
tweets = _timeline_tweets(
T,
use_search,
user,
None,
None,
None,
None,
exclude_retweets,
exclude_replies,
)

timeline_count = 0
for response in tweets:
Expand All @@ -504,6 +557,37 @@ def timelines(T, infile, outfile, limit, timeline_limit, use_search):
return


def _timeline_tweets(
T,
use_search,
user_id,
since_id,
until_id,
start_time,
end_time,
exclude_retweets,
exclude_replies,
):
if use_search:
q = f"from:{user_id}"
if exclude_retweets and "-is:retweet" not in q:
q += " -is:retweet"
if exclude_replies and "-is:reply" not in q:
q += " -is:reply"
tweets = T.search_all(q, since_id, until_id, start_time, end_time)
else:
tweets = T.timeline(
user_id,
since_id,
until_id,
start_time,
end_time,
exclude_retweets,
exclude_replies,
)
return tweets


@twarc2.command("conversation")
@click.option(
"--archive",
Expand Down

0 comments on commit e5da256

Please sign in to comment.