From 59026f9a1d74ff5effed42911a4ad2268120503d Mon Sep 17 00:00:00 2001 From: Camilo Soto Montoya Date: Sat, 8 May 2021 13:18:37 -0500 Subject: [PATCH 1/2] shared class updates --- README.md | 4 +- src/pycoornet/shared.py | 193 +++++++++++++++++++--------------------- src/pycoornet/utils.py | 4 - tests/test_pycoornet.py | 14 ++- 4 files changed, 107 insertions(+), 108 deletions(-) diff --git a/README.md b/README.md index 6cc1baa..4e99376 100644 --- a/README.md +++ b/README.md @@ -40,8 +40,8 @@ def main(): # Init CrowdTangle with api key crowd_tangle = CrowdTangle("abc123def345") ct_df = crowd_tangle.get_shares(urls=links_df, url_column='clean_url', date_column='date',clean_urls=True, platforms='facebook', sleep_time=30) - shared = Shared() - crowtangle_shares_df, shares_graph, q = shared.coord_shares(ct_df, clean_urls=True) + shared = Shared(ct_df) + crowtangle_shares_df, shares_graph, q = shared.coord_shares(clean_urls=True) #Build Gephi File for node in shares_graph.nodes(data=True): diff --git a/src/pycoornet/shared.py b/src/pycoornet/shared.py index adf4bc8..8abb62e 100644 --- a/src/pycoornet/shared.py +++ b/src/pycoornet/shared.py @@ -9,14 +9,24 @@ logger = logging.getLogger(__name__) + + class Shared: - """Shared class docsgring - """ - # def __init__(self): - # pass + """Coordinated Link Sharing Behavor (CLSB) detector. - def __estimate_coord_interval(self, crowtangle_shares_df, q=0.1, p=0.5, clean_urls=False, keep_ourl_only=False): + Args: + crowdtangle_shares_df (pandas.DataFrame): the pandas dataframe of link posts resulting from the function + CrowdTangle shares + Returns: + 2-elementos tuple containing + - **graph** (networkx.Graph): An graph (highly_connected_g) with networks of coordinated entities whose edges also contains a t_coord_share attribute (vector) reporting the timestamps of every time the edge was detected as coordinated sharing. + - **q** (float): Percentile edge weight number of leeped repetedly coordinated link sharing. + """ + def __init__(self, crowdtangle_shares_df): + self.__crowdtangle_shares_df = crowdtangle_shares_df + + def estimate_coord_interval(self, q=0.1, p=0.5, clean_urls=False, keep_ourl_only=False): """Estimates a threshold in seconds that defines a coordinated link share. While it is common that multiple (pages/groups/account) entities share the same link, some tend to perform these actions in an unusually short period of time. Unusual is thus defined here as a function of the median co-share time difference. More specifically, the function ranks all @@ -24,7 +34,6 @@ def __estimate_coord_interval(self, crowtangle_shares_df, q=0.1, p=0.5, clean_ur The value returned is the median time in seconds spent by these URLs to cumulate the p\% (default 0.1) of their total shares. Args: - crowtangle_shares_df (pandas.Dataframe): the dataframe of link posts q (float, optional): controls the quantile of quickest URLs to be filtered. Defaults to 0.1. @@ -37,9 +46,8 @@ def __estimate_coord_interval(self, crowtangle_shares_df, q=0.1, p=0.5, clean_ur Returns: 2-element tuple containing - - **summary** (pandas.DataFrame): summary statistics of q\% quickest second share performing URLs. - - **time** (integer): time in seconds corresponding to the median time spent by these URLs to cumulate the % of their total shares. + - **coordination interval** (integer): time in seconds corresponding to the median time spent by these URLs to cumulate the % of their total shares. """ if 0 1 urls_df = urls_df[urls_df['ct_shares'] >1] - # filter the crowtangle_shares_df that join with urls_df - crowtangle_shares_df = crowtangle_shares_df[crowtangle_shares_df.set_index('expanded').index.isin(urls_df.set_index('URL').index)] + # filter the crowdtangle_shares_df that join with urls_df + crowdtangle_shares_df = crowdtangle_shares_df[crowdtangle_shares_df.set_index('expanded').index.isin(urls_df.set_index('URL').index)] #metrics creation - crowtangle_shares_df['date'] = crowtangle_shares_df['date'].astype('datetime64[ns]') - ranked_shares_df = crowtangle_shares_df[['expanded', 'date']] - shares_gb = crowtangle_shares_df.groupby('expanded') + crowdtangle_shares_df['date'] = crowdtangle_shares_df['date'].astype('datetime64[ns]') + ranked_shares_df = crowdtangle_shares_df[['expanded', 'date']] + shares_gb = crowdtangle_shares_df.groupby('expanded') ranked_shares_df['ct_shares_count']=shares_gb['id'].transform('nunique') ranked_shares_df['first_share_date'] = shares_gb['date'].transform('min') ranked_shares_df['rank'] = shares_gb['date'].rank(ascending=True, method='first') ranked_shares_df['perc_of_shares'] = ranked_shares_df['rank']/ranked_shares_df['ct_shares_count'] - #ranked_shares_df['perc_of_shares'] = shares_gb['date'].rank(ascending=True) ranked_shares_df['sec_from_first_share'] = (ranked_shares_df['date'] - ranked_shares_df['first_share_date']).dt.total_seconds() ranked_shares_df = ranked_shares_df.sort_values(by = 'expanded') @@ -126,7 +134,7 @@ def __estimate_coord_interval(self, crowtangle_shares_df, q=0.1, p=0.5, clean_ur return coord_interval - def __buid_graph(self, crowtangle_shares_df, coordinated_shares_df, percentile_edge_weight = 90, timestamps = False): + def __buid_graph(self, crowdtangle_shares_df, coordinated_shares_df, percentile_edge_weight = 90, timestamps = False): logger.info("Bulding graph") coord_df = coordinated_shares_df[['account_url', 'url', 'share_date']].reset_index(drop=True) coord_graph = nx.from_pandas_edgelist(coord_df, 'account_url', 'url', create_using=nx.DiGraph()) @@ -148,19 +156,19 @@ def __buid_graph(self, crowtangle_shares_df, coordinated_shares_df, percentile_e full_graph = bipartite.weighted_projected_graph(bipartite_graph, account_urls) #pandas helper dataframe to calcule graph node attribues - crowtangle_shares_df['account_name'] = crowtangle_shares_df['account_name'].astype(str) - crowtangle_shares_df['account_handle'] = crowtangle_shares_df['account_handle'].astype(str) - crowtangle_shares_df['account_pageAdminTopCountry'] = crowtangle_shares_df['account_pageAdminTopCountry'].astype(str) - crowtangle_shares_gb = crowtangle_shares_df.groupby('account_url') - crowtangle_shares_df['name_changed']=(crowtangle_shares_gb['account_name'].transform("nunique"))>1 - crowtangle_shares_df['handle_changed']=(crowtangle_shares_gb['account_handle'].transform("nunique"))>1 - crowtangle_shares_df['page_admin_top_country_changed']=(crowtangle_shares_gb['account_pageAdminTopCountry'].transform("nunique"))>1 - crowtangle_shares_df['account_name'] = crowtangle_shares_gb['account_name'].transform(lambda col: '|'.join(col.unique())) - crowtangle_shares_df['account_handle'] = crowtangle_shares_gb['account_handle'].transform(lambda col: '|'.join(col.unique())) - crowtangle_shares_df['account_pageAdminTopCountry'] = crowtangle_shares_gb['account_pageAdminTopCountry'].transform(lambda col: '|'.join(col.unique())) - crowtangle_shares_df[['account_name','account_handle','account_pageAdminTopCountry','name_changed','handle_changed','page_admin_top_country_changed']] - - crowtangle_shares_gb = crowtangle_shares_df.reset_index().groupby(['account_url']) + crowdtangle_shares_df['account_name'] = crowdtangle_shares_df['account_name'].astype(str) + crowdtangle_shares_df['account_handle'] = crowdtangle_shares_df['account_handle'].astype(str) + crowdtangle_shares_df['account_pageAdminTopCountry'] = crowdtangle_shares_df['account_pageAdminTopCountry'].astype(str) + crowtangle_shares_gb = crowdtangle_shares_df.groupby('account_url') + crowdtangle_shares_df['name_changed']=(crowtangle_shares_gb['account_name'].transform("nunique"))>1 + crowdtangle_shares_df['handle_changed']=(crowtangle_shares_gb['account_handle'].transform("nunique"))>1 + crowdtangle_shares_df['page_admin_top_country_changed']=(crowtangle_shares_gb['account_pageAdminTopCountry'].transform("nunique"))>1 + crowdtangle_shares_df['account_name'] = crowtangle_shares_gb['account_name'].transform(lambda col: '|'.join(col.unique())) + crowdtangle_shares_df['account_handle'] = crowtangle_shares_gb['account_handle'].transform(lambda col: '|'.join(col.unique())) + crowdtangle_shares_df['account_pageAdminTopCountry'] = crowtangle_shares_gb['account_pageAdminTopCountry'].transform(lambda col: '|'.join(col.unique())) + crowdtangle_shares_df[['account_name','account_handle','account_pageAdminTopCountry','name_changed','handle_changed','page_admin_top_country_changed']] + + crowtangle_shares_gb = crowdtangle_shares_df.reset_index().groupby(['account_url']) account_info_df = crowtangle_shares_gb['index'].agg([('shares','count')]) account_info_df = account_info_df.merge(pd.DataFrame(crowtangle_shares_gb['is_coordinated'].apply(lambda x: (x==True).sum())).rename(columns={'is_coordinated':'coord_shares'}), left_index=True, right_index=True) @@ -249,22 +257,15 @@ def __buid_graph(self, crowtangle_shares_df, coordinated_shares_df, percentile_e return highly_connected_graph, q - def coord_shares(self, dataframe, coordination_interval=None, parallel=False, percentile_edge_weight=90, clean_urls=False, keep_ourl_only=False, gtimestamps=False): + def coord_shares(self, coordination_interval=None, percentile_edge_weight=90, clean_urls=False, keep_ourl_only=False, gtimestamps=False): """Given a dataframe of CrowdTangle shares and a time threshold, this function detects networks of entities (pages, accounts and groups) that performed coordinated link sharing behavior. Args: - dataframe (pandas.DataFrame): the pandas dataframe of link posts resulting from the function - CrowdTangle shares - coordination_interval (int, optional): a threshold in seconds that defines a coordinated share. Given a dataset of CrowdTangle shares, this threshold is automatically estimated. Alternatively it can be manually passed to the function in seconds. Defaults to None. - parallel (bool, optional): enables parallel processing to speed up the process taking advantage - of multiple cores. The number of cores is automatically set to all the available cores minus one. - Defaults to False. - percentile_edge_weight (float, optional): defines the percentile of the edge distribution to keep in order to identify a network of coordinated entities. In other terms, this value determines the minimum number of times that two entities had to coordinate in order to be considered part of a network. Defaults to 0.90. @@ -278,14 +279,16 @@ def coord_shares(self, dataframe, coordination_interval=None, parallel=False, pe Slow on large networks. Defaults to False. Returns: - 2-element tuple containing + 3-element tuple containing - **coordinated_df** (pandas.DataFrame): The input dataframe of shares with an additional boolean variable (coordinated) that identifies coordinated shares. - **graph** (networkx.Graph): An graph (highly_connected_g) with networks of coordinated entities whose edges also contains a t_coord_share attribute (vector) reporting the timestamps of every time the edge was detected as coordinated sharing. + - **q** (networkx.Graph): Percentile edge weight number of leeped repetedly coordinated link sharing. """ # estimate the coordination interval if not specified by the users + dataframe = self.__crowdtangle_shares_df.copy(deep=True) if coordination_interval == None: - coordination_interval = self.__estimate_coord_interval(dataframe, clean_urls=clean_urls, keep_ourl_only=keep_ourl_only) + coordination_interval = self.estimate_coord_interval(clean_urls=clean_urls, keep_ourl_only=keep_ourl_only) coordination_interval = coordination_interval[1] if coordination_interval == 0: @@ -305,59 +308,51 @@ def coord_shares(self, dataframe, coordination_interval=None, parallel=False, pe urls_df = urls_df[urls_df['ct_shares'] >1] urls_df = urls_df.sort_values('URL') - crowtangle_shares_df = dataframe[dataframe.set_index('expanded').index.isin(urls_df.set_index('URL').index)] - - if parallel: - pass - else: - data_list = [] - urls_count = urls_df.shape[0] - i=0 - - with tqdm(total=urls_df.shape[0]) as pbar: - for index, row in urls_df.iterrows(): - pbar.update(1) - i=i+1 - logger.debug(f"processing {i} of {urls_count}, url={row['URL']}") - summary_df = crowtangle_shares_df[crowtangle_shares_df['expanded'] == row['URL']].copy(deep=True) - if summary_df.groupby('account_url')['account_url'].nunique().shape[0]>1: - summary_df['date'] = summary_df['date'].astype('datetime64[ns]') - #summary_df['cut'] = pd.cut(summary_df['date'], int(coordination_interval)) - date_serie = summary_df['date'].astype('int64') // 10 ** 9 - max = date_serie.max() - min = date_serie.min() - div = (max-min)/coordination_interval + 1 - summary_df["cut"] = pd.cut(summary_df['date'],int(div)).apply(lambda x: x.left).astype('datetime64[ns]') - cut_gb = summary_df.groupby('cut') - summary_df.loc[:,'count'] = cut_gb['cut'].transform('count') - #summary_df = summary_df[['cut', 'count']].copy(deep=True) - # summary_df = summary_df.rename(columns = {'date': 'share_date'}) - summary_df.loc[:,'url'] = row['URL'] - summary_df.loc[:,'account_url'] = cut_gb['account_url'].transform(lambda x: [x.tolist()]*len(x)) - summary_df.loc[:,'share_date'] = cut_gb['date'].transform(lambda x: [x.tolist()]*len(x)) - summary_df = summary_df[['cut', 'count', 'account_url','share_date', 'url']] - summary_df = summary_df[summary_df['count']>1] - if summary_df.shape[0]>1: - summary_df = summary_df.loc[summary_df.astype(str).drop_duplicates().index] - #summary_df['account_url'] = [account_url] * summary_df.shape[0] - #summary_df['share_date'] = [dates] * summary_df.shape[0] - data_list.append(summary_df) - - data_df = pd.concat(data_list) - if data_df.shape[0] == 0: - logger.info('there are not enough shares!') - return None - - coordinated_shares_df = data_df.reset_index(drop=True).apply(pd.Series.explode).reset_index(drop=True) - - crowtangle_shares_df = crowtangle_shares_df.reset_index(drop=True) - crowtangle_shares_df.loc[:,'coord_expanded']=crowtangle_shares_df['expanded'].isin(coordinated_shares_df['url']) - crowtangle_shares_df.loc[:,'coord_date']=crowtangle_shares_df['date'].astype('datetime64[ns]').isin(coordinated_shares_df['share_date']) - crowtangle_shares_df.loc[:,'coord_account_url']=crowtangle_shares_df['account_url'].isin(coordinated_shares_df['account_url']) - - crowtangle_shares_df.loc[:,'is_coordinated'] = crowtangle_shares_df.apply(lambda x : True if (x['coord_expanded'] and x['coord_date'] and x['coord_account_url']) else False, axis=1) - crowtangle_shares_df.drop(['coord_expanded','coord_date', 'coord_account_url'], inplace = True, axis=1) - - highly_connected_graph, q = self.__buid_graph(crowtangle_shares_df, coordinated_shares_df, percentile_edge_weight=percentile_edge_weight, timestamps=gtimestamps) - - return crowtangle_shares_df, highly_connected_graph, q + crowdtangle_shares_df = dataframe[dataframe.set_index('expanded').index.isin(urls_df.set_index('URL').index)] + + data_list = [] + urls_count = urls_df.shape[0] + i=0 + + with tqdm(total=urls_df.shape[0]) as pbar: + for index, row in urls_df.iterrows(): + pbar.update(1) + i=i+1 + logger.debug(f"processing {i} of {urls_count}, url={row['URL']}") + summary_df = crowdtangle_shares_df[crowdtangle_shares_df['expanded'] == row['URL']].copy(deep=True) + if summary_df.groupby('account_url')['account_url'].nunique().shape[0]>1: + summary_df['date'] = summary_df['date'].astype('datetime64[ns]') + date_serie = summary_df['date'].astype('int64') // 10 ** 9 + max = date_serie.max() + min = date_serie.min() + div = (max-min)/coordination_interval + 1 + summary_df["cut"] = pd.cut(summary_df['date'],int(div)).apply(lambda x: x.left).astype('datetime64[ns]') + cut_gb = summary_df.groupby('cut') + summary_df.loc[:,'count'] = cut_gb['cut'].transform('count') + summary_df.loc[:,'url'] = row['URL'] + summary_df.loc[:,'account_url'] = cut_gb['account_url'].transform(lambda x: [x.tolist()]*len(x)) + summary_df.loc[:,'share_date'] = cut_gb['date'].transform(lambda x: [x.tolist()]*len(x)) + summary_df = summary_df[['cut', 'count', 'account_url','share_date', 'url']] + summary_df = summary_df[summary_df['count']>1] + if summary_df.shape[0]>1: + summary_df = summary_df.loc[summary_df.astype(str).drop_duplicates().index] + data_list.append(summary_df) + + data_df = pd.concat(data_list) + if data_df.shape[0] == 0: + logger.info('there are not enough shares!') + return None + + coordinated_shares_df = data_df.reset_index(drop=True).apply(pd.Series.explode).reset_index(drop=True) + + crowdtangle_shares_df = crowdtangle_shares_df.reset_index(drop=True) + crowdtangle_shares_df.loc[:,'coord_expanded']=crowdtangle_shares_df['expanded'].isin(coordinated_shares_df['url']) + crowdtangle_shares_df.loc[:,'coord_date']=crowdtangle_shares_df['date'].astype('datetime64[ns]').isin(coordinated_shares_df['share_date']) + crowdtangle_shares_df.loc[:,'coord_account_url']=crowdtangle_shares_df['account_url'].isin(coordinated_shares_df['account_url']) + + crowdtangle_shares_df.loc[:,'is_coordinated'] = crowdtangle_shares_df.apply(lambda x : True if (x['coord_expanded'] and x['coord_date'] and x['coord_account_url']) else False, axis=1) + crowdtangle_shares_df.drop(['coord_expanded','coord_date', 'coord_account_url'], inplace = True, axis=1) + + highly_connected_graph, q = self.__buid_graph(crowdtangle_shares_df, coordinated_shares_df, percentile_edge_weight=percentile_edge_weight, timestamps=gtimestamps) + + return crowdtangle_shares_df, highly_connected_graph, q diff --git a/src/pycoornet/utils.py b/src/pycoornet/utils.py index ad67226..623a8c3 100644 --- a/src/pycoornet/utils.py +++ b/src/pycoornet/utils.py @@ -66,10 +66,6 @@ def clean_urls(dataframe, url_column): dataframe = dataframe.loc[dataframe[url_column].str.contains('^http://127.0.0.1|^https://127.0.0.1|http://localhost|https://localhost') == False] dataframe = dataframe.loc[dataframe[url_column].str.contains('http://|https://')] - #Remove the URL query parameters and set NaN to protocols that aren't http or https - #dataframe[url_column] = dataframe[url_column].apply(lambda x: urljoin(x, urlparse(x).path) if (urlparse(x).scheme == 'http' or urlparse(x).scheme == 'https') and urlparse(x).netloc != '127.0.0.1' and urlparse(x).netloc != 'localhost' else np.NaN) - #Drop NaN values - #dataframe=dataframe.dropna() dataframe=dataframe.reset_index(drop=True) return dataframe diff --git a/tests/test_pycoornet.py b/tests/test_pycoornet.py index 22f1572..1e00564 100644 --- a/tests/test_pycoornet.py +++ b/tests/test_pycoornet.py @@ -27,10 +27,18 @@ def test_crowdtangle(crowd_token, sample_source_df): def test_shared(sample_ct_df): - shared = Shared() - crowtangle_shares_df, highly_connected_graph, q = shared.coord_shares( - sample_ct_df, clean_urls=True) + shared = Shared(sample_ct_df) + crowtangle_shares_df, highly_connected_graph, q = shared.coord_shares(clean_urls=True) if crowtangle_shares_df.shape[0] > 0 and highly_connected_graph != None and q > 0: assert True else: assert False + +def test_estimate_coord_interval(sample_ct_df): + shared = Shared(sample_ct_df) + summary, coord_interval = shared.estimate_coord_interval(True) + if coord_interval > 0: + assert True + else: + assert False + From 3d7278af86ca5af5666e6dc2b7b8c6b11ecc29a3 Mon Sep 17 00:00:00 2001 From: Camilo Soto Montoya Date: Sat, 8 May 2021 14:45:20 -0500 Subject: [PATCH 2/2] docs --- docs/source/conf.py | 6 +++--- src/pycoornet/crowdtangle.py | 8 +++----- src/pycoornet/shared.py | 19 ++++++++----------- src/pycoornet/utils.py | 2 +- 4 files changed, 15 insertions(+), 20 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 31403ed..7c88d53 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -12,13 +12,13 @@ # import os import sys -sys.path.insert(0, os.path.abspath('../..')) +sys.path.insert(0, os.path.abspath('../../src')) # -- Project information ----------------------------------------------------- project = 'PyCooRnet' -copyright = '2020, PyCooRnet Developers' +copyright = '2021, PyCooRnet Developers' author = 'PyCooRnet Developers' @@ -53,4 +53,4 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +#html_static_path = ['_static'] diff --git a/src/pycoornet/crowdtangle.py b/src/pycoornet/crowdtangle.py index 31796c5..78eea8f 100644 --- a/src/pycoornet/crowdtangle.py +++ b/src/pycoornet/crowdtangle.py @@ -13,8 +13,7 @@ class CrowdTangle: """DescripciĆ³n de la clase. - :param api_key: CrowdTangle API key. - :type api_key: string + api_key (str): CrowdTangle API key. """ def __init__(self, api_key): @@ -29,7 +28,7 @@ def get_shares(self, urls, url_column='url', date_column='date', platforms=('fac """ Get the URLs shares from CrowdTangle from a list of URLs with publish datetime Args: - urls (dataframe): a dataframe with at least a column "url" containing the URLs, and a column "date" with their published date + urls (pandas.DataFrame): a dataframe with at least a column "url" containing the URLs, and a column "date" with their published date url_column (str, optional): name of the column (placed inside quote marks) where the URLs are stored. Defaults to 'url'. date_column (str, optional): name of the column (placed inside quote marks) where the date of the URLs are stored. Defaults to 'date'. platforms (tuple, optional): a tuple of platforms to search. You can specify only facebook to search on Facebook, or only instagram to @@ -48,8 +47,7 @@ def get_shares(self, urls, url_column='url', date_column='date', platforms=('fac SystemExit: [description] Returns: - [pandas.Dataframe]: [A pandas dataframe of posts that shared the URLs and a number of variables returned by the https://github.com/CrowdTangle/API/wiki/Links - CrowdTangle API links endpoint and the original data set of news. + pandas.DataFrame: A pandas dataframe of posts that shared the URLs and a number of variables returned by the https://github.com/CrowdTangle/API/wiki/Links CrowdTangle API links endpoint and the original data set of news. """ try: diff --git a/src/pycoornet/shared.py b/src/pycoornet/shared.py index 8abb62e..136f3c0 100644 --- a/src/pycoornet/shared.py +++ b/src/pycoornet/shared.py @@ -18,10 +18,6 @@ class Shared: Args: crowdtangle_shares_df (pandas.DataFrame): the pandas dataframe of link posts resulting from the function CrowdTangle shares - Returns: - 2-elementos tuple containing - - **graph** (networkx.Graph): An graph (highly_connected_g) with networks of coordinated entities whose edges also contains a t_coord_share attribute (vector) reporting the timestamps of every time the edge was detected as coordinated sharing. - - **q** (float): Percentile edge weight number of leeped repetedly coordinated link sharing. """ def __init__(self, crowdtangle_shares_df): self.__crowdtangle_shares_df = crowdtangle_shares_df @@ -45,9 +41,10 @@ def estimate_coord_interval(self, q=0.1, p=0.5, clean_urls=False, keep_ourl_only keep_ourl_only (bool, optional): restrict the analysis to CrownTangle shares links matching the original URLs. Defaults to False. Returns: - 2-element tuple containing - - **summary** (pandas.DataFrame): summary statistics of q\% quickest second share performing URLs. - - **coordination interval** (integer): time in seconds corresponding to the median time spent by these URLs to cumulate the % of their total shares. + (tuple): 2-element tuple containing + + - **summary** (pandas.DataFrame): summary statistics of q\% quickest second share performing URLs. + - **coordination interval** (integer): time in seconds corresponding to the median time spent by these URLs to cumulate the % of their total shares. """ if 0