From f2fb6700a2b01321fa70a11638dae69626550f98 Mon Sep 17 00:00:00 2001 From: audun myers Date: Tue, 9 Apr 2024 16:00:43 -0400 Subject: [PATCH 1/4] updating factory to fix bugs Fixes: 1. Added documentation to create_df 2. Fixed bug in dictionary factory method to move all other attributes to misc properties and deals with variable length properties. 3. Added fix to create_df where 'misc_properties' for the misc properties col name was not being used. --- hypernetx/classes/factory.py | 379 +++++------------------------------ 1 file changed, 45 insertions(+), 334 deletions(-) diff --git a/hypernetx/classes/factory.py b/hypernetx/classes/factory.py index e1694770..77f0f8cd 100644 --- a/hypernetx/classes/factory.py +++ b/hypernetx/classes/factory.py @@ -1,29 +1,9 @@ import pandas as pd -import numpy as np -from hypernetx.classes.helpers import dict_depth - - -# In[ ]: -# ------------------------------------------------------------------------------------------------- -# Individual factory methods for property stores -# ------------------------------------------------------------------------------------------------- - - -def remove_property_store_duplicates(PS, default_uid_col_names, aggregation_methods={}): - agg_methods = {} - for col in PS.columns: - if col not in aggregation_methods: - agg_methods[col] = "first" - else: - agg_methods[col] = aggregation_methods[col] - return PS.groupby(level=default_uid_col_names).agg(agg_methods) - - -### Alternate code for creating dataframe for PS -import ast, json def mkdict(x): + #function to create a dictionary from object x if it is not already a dicitonary. + import ast, json if isinstance(x, dict): return x else: @@ -50,136 +30,59 @@ def create_df( misc_properties_col=None, aggregation_methods=None, ): + if not isinstance(dfp, pd.DataFrame): raise TypeError("method requires a Pandas DataFrame") else: - # dfp = deepcopy(properties) ### not sure if this is wise - + #checks if the use index variable is called. if it is then use the existing indices. if it is not then an index is set based on the uid columns. if use_index == False: + #if uid cols are specified make those columns the index columns if uid_cols != None: + #create chk function to check if the column specified is a string. if it is not a string then it assumes it is an integer and grabs that columns name. chk = lambda c: c if isinstance(c, str) else dfp.columns[c] + #set indices using the column names in uid_cols using the chk function. dfp = dfp.set_index([chk(c) for c in uid_cols]) - else: + else: #if uid_cols are not specified then assume the first one or two columns (depending on level) are the index columns and set the index. if level == 2: - dfp = dfp.set_index(dfp.columns[0], dfp.columns[1]) + dfp = dfp.set_index([dfp.columns[0], dfp.columns[1]]) else: - dfp = dfp.set_index(dfp.columns[0]) - - if ( - misc_properties_col in dfp.columns - and misc_properties_col != "misc_properties" - ): - dfp = dfp.rename(columns={misc_properties_col: "misc_properties"}) + dfp = dfp.set_index([dfp.columns[0]]) + + # if the misc prop col is in the column names + if misc_properties_col in dfp.columns: + # rename the misc properties column to the default name if it isn't + if misc_properties_col != "misc_properties": + dfp = dfp.rename(columns={misc_properties_col: "misc_properties"}) + # force misc properties to be a dictionary if it is not. dfp.misc_properties = dfp.misc_properties.map(mkdict) - else: + else:#if the column is not specified then create the misc properties column of empty dicitonaries. dfp["misc_properties"] = [{} for row in dfp.index] - + + # check if weight property column name was specified. if weight_prop in dfp.columns: + # if it was specified and it exists then rename to default weight name and fill in the NA weights with the default. dfp = dfp.rename(columns={weight_prop: "weight"}) dfp = dfp.fillna({"weight": default_weight}) + #if weight column is not None and the weight column name was not in the column names then check in the misc properties. elif weight_prop is not None: - def grabweight(cell): + #function to grab weights from the misc properties column. if isinstance(cell, dict): return cell.get(weight_prop, default_weight) else: return default_weight - + #set the weight column to the weights grabbed from the misc properties dictionary (if any). dfp["weight"] = dfp["misc_properties"].map(grabweight) - + + # reorder columns in standard order cols = [c for c in dfp.columns if c not in ["weight", "misc_properties"]] dfp = dfp[["weight"] + cols + ["misc_properties"]] + + #remove duplicate indices and aggregate using aggregation methods specified. dfp = dfp[~dfp.index.duplicated(keep="first")] return dfp -# def create_df(properties, uid_cols, use_indices, -# default_uid_col_names, weight_prop_col, -# misc_prop_col, default_weight, aggregation_methods): - -# #get length of dataframe once to be used throughout this function. -# length_of_dataframe = len(properties) - -# #get column names if integer was provided instead -# if isinstance(weight_prop_col, int): -# weight_prop_col = properties.columns[weight_prop_col] -# if isinstance(misc_prop_col, int): -# misc_prop_col = properties.columns[misc_prop_col] - -# #get list of all column names in properties dataframe -# column_names = list(properties.columns) - - -# # set weight column code: -# # default to use weight column if it exists before looking for default weight array or in misc properties column. -# if weight_prop_col in column_names: -# #do nothing since this is the format we expect by default. -# pass -# #check to see if an array of weights was provided to use for weights column -# elif not isinstance(default_weight, int) and not isinstance(default_weight, float): -# properties[weight_prop_col] = default_weight - -# #check if the weight column name exists in the misc properties. -# elif misc_prop_col in column_names: #check if misc properties exists -# #check if weight_prop_col is a key in any of the misc properties dicitonary. -# if any(weight_prop_col in misc_dict for misc_dict in properties[misc_prop_col]): -# #create list of cell weights from misc properties dictionaries and use default value if not in keys -# weights_from_misc_dicts = [] -# for misc_dict in properties[misc_prop_col]: -# if weight_prop_col in misc_dict: -# weights_from_misc_dicts.append(misc_dict[weight_prop_col]) -# else: -# weights_from_misc_dicts.append(default_weight) -# properties[weight_prop_col] = weights_from_misc_dicts - -# #if not provided anywhere then add in as default value -# else: -# properties[weight_prop_col] = [default_weight]*length_of_dataframe - -# #rename the columns where needed -# #start by defining dictionary of column renaming with uid columns. -# if not use_indices: #include uid columns if they are not indices. -# col_rename_dict = {uid_cols[i]: default_uid_col_names[i] for i in range(len(uid_cols))} #renaming dictionary -# else: -# col_rename_dict = {} -# #add weight column renaming -# col_rename_dict[weight_prop_col] = 'weight' -# #set misc properties column if not already provided and if set then update renaming dictionary. -# if misc_prop_col not in column_names: -# properties['misc_properties'] = [{}]*length_of_dataframe -# else: -# col_rename_dict[misc_prop_col] = 'misc_properties' -# #rename the columns -# properties.rename(columns = col_rename_dict, inplace = True) #rename the columns - - -# #set index for dataframe using the default uid column names that are dependent on the level if indices flag not on. -# if not use_indices: -# properties = properties.set_index(default_uid_col_names) -# else: #otherwise just rename the incides to the default names. -# properties.index.names = default_uid_col_names - - -# #remove any NaN values or missing values in weight column -# properties['weight'].fillna(default_weight, inplace = True) - - -# # remove any duplicate indices and combine using aggregation methods (defaults to 'first' if none provided). -# properties = remove_property_store_duplicates(properties, default_uid_col_names, aggregation_methods = aggregation_methods) - - -# #reorder columns to have properties last -# # Get the column names and the specific column -# specific_col = 'misc_properties' -# # Create a new order for the columns -# updated_column_names = list(properties.columns) -# new_order = [col for col in updated_column_names if col != specific_col] + [specific_col] -# # Reorder the dataframe using reindex -# properties = properties.reindex(columns=new_order) - -# return properties - - def dataframe_factory_method( DF, level, @@ -235,53 +138,18 @@ def dataframe_factory_method( PS = None else: - if use_indices: - uid_cols = DF.index.names - else: - # uid column name setting if they are not provided - if ( - uid_cols is None - ): # if none are provided set to the names of the first or first two columns depending on level - if level == 0 or level == 1: - uid_cols = [DF.columns[0]] - elif level == 2: - uid_cols = [DF.columns[0], DF.columns[1]] - - # get column names if integer was provided instead and create new uid_cols with string names. - uid_cols_to_str = [] - for col in uid_cols: - if isinstance(col, int): - uid_cols_to_str.append(DF.columns[col]) - else: - uid_cols_to_str.append(col) - uid_cols = uid_cols_to_str - - # set default uid column name(s) - if level == 0 or level == 1: - default_uid_col_names = ["uid"] - elif level == 2: - default_uid_col_names = ["edges", "nodes"] - - # PS = create_df(DF, uid_cols = uid_cols, use_indices = use_indices, - # default_uid_col_names = default_uid_col_names, - # weight_prop_col = weight_col, - # misc_prop_col = misc_properties_col, - # default_weight = default_weight, - # aggregation_methods = aggregate_by) - PS = create_df( DF, uid_cols=uid_cols, + level=level, use_index=use_indices, weight_prop=weight_col, misc_properties_col=misc_properties_col, default_weight=default_weight, - aggregation_methods=aggregate_by, - ) + aggregation_methods=aggregate_by,) return PS - def dict_factory_method( D, level, @@ -339,23 +207,27 @@ def dict_factory_method( DF = None # if the dictionary data provided is for the setsystem (incidence data) elif level == 2: - # explode list of lists into incidence pairs as a pandas dataframe using pandas series explode. DF = pd.DataFrame(pd.Series(D).explode()).reset_index() # rename columns to correct column names for edges and nodes DF = DF.rename(columns=dict(zip(DF.columns, ["edges", "nodes"]))) - # if attributes are stored on the dictionary (ie, it has a depth greater than 2) - if dict_depth(D) > 2: - attribute_data = [] - for _, incidence_pair in DF.iterrows(): - edge, node = incidence_pair + attribute_data = {weight_col: [], misc_properties_col: []} + for _, incidence_pair in DF.iterrows(): + edge, node = incidence_pair + if isinstance(D[edge], dict): attributes_of_incidence_pair = D[edge][node] - attribute_data.append(attributes_of_incidence_pair) - attribute_df = pd.DataFrame(attribute_data) - DF = pd.concat([DF, attribute_df], axis=1) - - else: + if weight_col in attributes_of_incidence_pair: + weight_val = attributes_of_incidence_pair.pop(weight_col) + attribute_data[weight_col] += [weight_val] + else: + attribute_data[weight_col] += [default_weight] + attribute_data[misc_properties_col] += [attributes_of_incidence_pair] + attribute_df = pd.DataFrame(attribute_data) + DF = pd.concat([DF, attribute_df], axis=1) + + #id the dataeframe is for edges or nodes. + elif level == 1 or level == 0: attribute_data = [] for key in D: attributes_of_key = D[key] @@ -379,7 +251,6 @@ def dict_factory_method( return PS - def list_factory_method( L, level, @@ -452,163 +323,3 @@ def list_factory_method( ) return PS - - -""" -# In[ ]: testing code -# Only runs if running from this file (This will show basic examples and testing of the code) - - -if __name__ == "__main__": - - run_list_example = False - if run_list_example: - - list_of_iterables = [[1, 1, 2], {1, 2}, {1, 2, 3}] - display(list_of_iterables) - - IPS = list_factory_method(list_of_iterables, level = 2, - aggregate_by = {'weight': 'sum'}) - display(IPS) - print('-'*100) - - - - run_simple_dict_example = True - if run_simple_dict_example: - - cell_dict = {'e1':[1,2],'e2':[1,2],'e3':[1,2,3]} - - print('Provided Dataframes') - print('-'*100) - display(cell_dict) - - print('\n \nRestructured Dataframes using single factory method for property store repeated') - print('-'*100) - - IPS = dict_factory_method(cell_dict, level = 2) - - display(IPS) - print('-'*100) - - - run_dict_example = True - if run_dict_example: - - cell_prop_dict = {'e1':{ 1: {'w':0.5, 'name': 'related_to'}, - 2: {'w':0.1, 'name': 'related_to','startdate': '05.13.2020'}}, - 'e2':{ 1: {'w':0.52, 'name': 'owned_by'}, - 2: {'w':0.2}}, - 'e3':{ 1: {'w':0.5, 'name': 'related_to'}, - 2: {'w':0.2, 'name': 'owner_of'}, - 3: {'w':1, 'type': 'relationship'}}} - - edge_prop_dict = {'e1': {'number': 1}, - 'e2': {'number': 2}, - 'e3': {'number': 3}} - - print('Provided Dataframes') - print('-'*100) - display(cell_prop_dict) - - print('\n \nRestructured Dataframes using single factory method for property store repeated') - print('-'*100) - - IPS = dict_factory_method(cell_prop_dict, level = 2, weight_col = 'w') - display(IPS) - - - EPS = dict_factory_method(edge_prop_dict, level = 0) - display(EPS) - - - NPS = dict_factory_method(None, level = 1, weight_col = 'w') - display(NPS) - print('-'*100) - - - run_simple_dataframe_example = False - if run_simple_dataframe_example: - - incidence_dataframe = pd.DataFrame({'e': ['a', 'a', 'a', 'b', 'c', 'c'], 'n': [1, 1, 2, 3, 2, 3],}) - - - print('Provided Dataframes') - print('-'*100) - display(incidence_dataframe) - - - - print('\n \nRestructured Dataframes using single factory method for property store repeated') - print('-'*100) - - - - IPS = dataframe_factory_method(incidence_dataframe, level = 2, - uid_cols = ['e', 'n'], - aggregate_by = {'weight': 'sum'},) - IS = IPS.index - - display(IS) - display(IPS) - - EPS = dataframe_factory_method(None, level = 0) - display(EPS) - - NPS = dataframe_factory_method(None, level = 1, uid_cols = ['nodes']) - display(NPS) - print('-'*100) - - - run_dataframe_example = True - if run_dataframe_example: - print('') - print('='*100) - print('='*100) - print('='*100) - print('') - - cell_prop_dataframe = pd.DataFrame({'E': ['a', 'a', 'a', 'b', 'c', 'c'], 'nodes': [1, 1, 2, 3, 2, 3], - 'color': ['red', 'red', 'red', 'red', 'red', 'blue'], - 'other_properties': [{}, {}, {'weight': 5}, {'time': 3}, {}, {}]}) - - edge_prop_dataframe = pd.DataFrame({'edges': ['a', 'b', 'c'], - 'strength': [2, np.nan, 3]}) - - node_prop_dataframe = pd.DataFrame({'N': [1], - 'temperature': [60]}) - node_prop_dataframe.set_index(['N'], inplace = True) - - print(list(node_prop_dataframe.columns)) - - print('Provided Dataframes') - print('-'*100) - display(cell_prop_dataframe) - display(edge_prop_dataframe) - display(node_prop_dataframe) - - print('\n \nRestructured Dataframes using single factory method for property store repeated') - print('-'*100) - - - IPS = dataframe_factory_method(cell_prop_dataframe, level = 2, - uid_cols = ['E', 'nodes'], - misc_properties_col = 'other_properties', - aggregate_by = {'weight': 'sum'},) - IS = IPS.index - - display(IS) - - display(IPS) - - - EPS = dataframe_factory_method(edge_prop_dataframe, level = 0, - weight_col = 1, uid_cols = [0]) - display(EPS) - - - NPS = dataframe_factory_method(node_prop_dataframe, level = 1, - use_indices = True) - display(NPS) - print('-'*100) -""" From a1bd87365eac8c2e5fa389470bc2b18e927509ec Mon Sep 17 00:00:00 2001 From: audun myers Date: Tue, 9 Apr 2024 16:10:13 -0400 Subject: [PATCH 2/4] Create testing_factory.ipynb --- hypernetx/classes/testing_factory.ipynb | 1606 +++++++++++++++++++++++ 1 file changed, 1606 insertions(+) create mode 100644 hypernetx/classes/testing_factory.ipynb diff --git a/hypernetx/classes/testing_factory.ipynb b/hypernetx/classes/testing_factory.ipynb new file mode 100644 index 00000000..4d196c63 --- /dev/null +++ b/hypernetx/classes/testing_factory.ipynb @@ -0,0 +1,1606 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c371e4d7", + "metadata": { + "toc": true + }, + "source": [ + "

Table of Contents

\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "72589a65", + "metadata": {}, + "outputs": [], + "source": [ + "from factory import *" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e8abfb3d", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "id": "ca44245b", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# iterable of iterables example" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "b0894957", + "metadata": { + "hidden": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[[1, 1, 2], {1, 2}, {1, 2, 3}]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weightmisc_properties
edgesnodes
011.0{}
21.0{}
111.0{}
21.0{}
211.0{}
21.0{}
31.0{}
\n", + "
" + ], + "text/plain": [ + " weight misc_properties\n", + "edges nodes \n", + "0 1 1.0 {}\n", + " 2 1.0 {}\n", + "1 1 1.0 {}\n", + " 2 1.0 {}\n", + "2 1 1.0 {}\n", + " 2 1.0 {}\n", + " 3 1.0 {}" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "list_of_iterables = [[1, 1, 2], {1, 2}, {1, 2, 3}]\n", + "display(list_of_iterables)\n", + "\n", + "IPS = list_factory_method(list_of_iterables, level = 2,\n", + " aggregate_by = {'weight': 'sum'})\n", + "display(IPS)\n", + "print('-'*100)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "55c657e1", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# simple dictionary" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "37a4268e", + "metadata": { + "hidden": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Provided Dataframes\n", + "----------------------------------------------------------------------------------------------------\n" + ] + }, + { + "data": { + "text/plain": [ + "{'e1': [1, 2], 'e2': [1, 2], 'e3': [1, 2, 3]}" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " \n", + "Restructured Dataframes using single factory method for property store repeated\n", + "----------------------------------------------------------------------------------------------------\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weightmisc_properties
edgesnodes
e111.0{}
21.0{}
e211.0{}
21.0{}
e311.0{}
21.0{}
31.0{}
\n", + "
" + ], + "text/plain": [ + " weight misc_properties\n", + "edges nodes \n", + "e1 1 1.0 {}\n", + " 2 1.0 {}\n", + "e2 1 1.0 {}\n", + " 2 1.0 {}\n", + "e3 1 1.0 {}\n", + " 2 1.0 {}\n", + " 3 1.0 {}" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "\n", + "cell_dict = {'e1':[1,2],'e2':[1,2],'e3':[1,2,3]}\n", + "\n", + "print('Provided Dataframes')\n", + "print('-'*100)\n", + "display(cell_dict)\n", + "\n", + "print('\\n \\nRestructured Dataframes using single factory method for property store repeated')\n", + "print('-'*100)\n", + "\n", + "IPS = dict_factory_method(cell_dict, level = 2)\n", + "\n", + "display(IPS)\n", + "print('-'*100)" + ] + }, + { + "cell_type": "markdown", + "id": "1514b86b", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# complex dicitonary" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "10a6177e", + "metadata": { + "hidden": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Provided Dataframes\n", + "----------------------------------------------------------------------------------------------------\n" + ] + }, + { + "data": { + "text/plain": [ + "{'e1': {1: {'w': 0.5, 'name': 'related_to'},\n", + " 2: {'w': 0.1, 'name': 'related_to', 'startdate': '05.13.2020'}},\n", + " 'e2': {1: {'w': 0.52, 'name': 'owned_by'}, 2: {'w': 0.2}},\n", + " 'e3': {1: {'w': 0.5, 'name': 'related_to'},\n", + " 2: {'w': 0.2, 'name': 'owner_of'},\n", + " 3: {'w': 1, 'type': 'relationship'}}}" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " \n", + "Restructured Dataframes using single factory method for property store repeated\n", + "----------------------------------------------------------------------------------------------------\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weightmisc_properties
edgesnodes
e110.50{'name': 'related_to'}
20.10{'name': 'related_to', 'startdate': '05.13.2020'}
e210.52{'name': 'owned_by'}
20.20{}
e310.50{'name': 'related_to'}
20.20{'name': 'owner_of'}
31.00{'type': 'relationship'}
\n", + "
" + ], + "text/plain": [ + " weight misc_properties\n", + "edges nodes \n", + "e1 1 0.50 {'name': 'related_to'}\n", + " 2 0.10 {'name': 'related_to', 'startdate': '05.13.2020'}\n", + "e2 1 0.52 {'name': 'owned_by'}\n", + " 2 0.20 {}\n", + "e3 1 0.50 {'name': 'related_to'}\n", + " 2 0.20 {'name': 'owner_of'}\n", + " 3 1.00 {'type': 'relationship'}" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weightnumbermisc_properties
0
e11.01{}
e21.02{}
e31.03{}
\n", + "
" + ], + "text/plain": [ + " weight number misc_properties\n", + "0 \n", + "e1 1.0 1 {}\n", + "e2 1.0 2 {}\n", + "e3 1.0 3 {}" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "None" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "\n", + "cell_prop_dict = {'e1':{ 1: {'w':0.5, 'name': 'related_to'},\n", + " 2: {'w':0.1, 'name': 'related_to','startdate': '05.13.2020'}},\n", + " 'e2':{ 1: {'w':0.52, 'name': 'owned_by'},\n", + " 2: {'w':0.2}},\n", + " 'e3':{ 1: {'w':0.5, 'name': 'related_to'},\n", + " 2: {'w':0.2, 'name': 'owner_of'},\n", + " 3: {'w':1, 'type': 'relationship'}}}\n", + "\n", + "edge_prop_dict = {'e1': {'number': 1},\n", + " 'e2': {'number': 2},\n", + " 'e3': {'number': 3}}\n", + "\n", + "print('Provided Dataframes')\n", + "print('-'*100)\n", + "display(cell_prop_dict)\n", + "\n", + "print('\\n \\nRestructured Dataframes using single factory method for property store repeated')\n", + "print('-'*100)\n", + "\n", + "IPS = dict_factory_method(cell_prop_dict, level = 2, weight_col = 'w')\n", + "display(IPS)\n", + "\n", + "\n", + "EPS = dict_factory_method(edge_prop_dict, level = 0)\n", + "display(EPS)\n", + "\n", + "\n", + "NPS = dict_factory_method(None, level = 1, weight_col = 'w')\n", + "display(NPS)\n", + "print('-'*100)" + ] + }, + { + "cell_type": "markdown", + "id": "76b8565e", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# simple dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "6ec431ae", + "metadata": { + "hidden": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Provided Dataframes\n", + "----------------------------------------------------------------------------------------------------\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
en
0a1
1a1
2a2
3b3
4c2
5c3
\n", + "
" + ], + "text/plain": [ + " e n\n", + "0 a 1\n", + "1 a 1\n", + "2 a 2\n", + "3 b 3\n", + "4 c 2\n", + "5 c 3" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " \n", + "Restructured Dataframes using single factory method for property store repeated\n", + "----------------------------------------------------------------------------------------------------\n" + ] + }, + { + "data": { + "text/plain": [ + "MultiIndex([('a', 1),\n", + " ('a', 2),\n", + " ('b', 3),\n", + " ('c', 2),\n", + " ('c', 3)],\n", + " names=['e', 'n'])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weightmisc_properties
en
a11.0{}
21.0{}
b31.0{}
c21.0{}
31.0{}
\n", + "
" + ], + "text/plain": [ + " weight misc_properties\n", + "e n \n", + "a 1 1.0 {}\n", + " 2 1.0 {}\n", + "b 3 1.0 {}\n", + "c 2 1.0 {}\n", + " 3 1.0 {}" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "None" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "None" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "incidence_dataframe = pd.DataFrame({'e': ['a', 'a', 'a', 'b', 'c', 'c'], \n", + " 'n': [1, 1, 2, 3, 2, 3],})\n", + "\n", + "\n", + "print('Provided Dataframes')\n", + "print('-'*100)\n", + "display(incidence_dataframe)\n", + "\n", + "\n", + "\n", + "print('\\n \\nRestructured Dataframes using single factory method for property store repeated')\n", + "print('-'*100)\n", + "\n", + "\n", + "\n", + "IPS = dataframe_factory_method(incidence_dataframe, level = 2,\n", + " uid_cols = ['e', 'n'],\n", + " aggregate_by = {'weight': 'sum'},)\n", + "IS = IPS.index\n", + "\n", + "display(IS)\n", + "display(IPS)\n", + "\n", + "EPS = dataframe_factory_method(None, level = 0)\n", + "display(EPS)\n", + "\n", + "NPS = dataframe_factory_method(None, level = 1, uid_cols = ['nodes'])\n", + "display(NPS)\n", + "print('-'*100)\n" + ] + }, + { + "cell_type": "markdown", + "id": "73cbb837", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# complex dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "68cf96df", + "metadata": { + "hidden": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Provided Dataframes\n", + "----------------------------------------------------------------------------------------------------\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Enodescolorother_properties
0a1red{}
1a1red{}
2a2red{'weight': 5}
3b3red{'time': 3}
4c2red{}
5c3blue{}
\n", + "
" + ], + "text/plain": [ + " E nodes color other_properties\n", + "0 a 1 red {}\n", + "1 a 1 red {}\n", + "2 a 2 red {'weight': 5}\n", + "3 b 3 red {'time': 3}\n", + "4 c 2 red {}\n", + "5 c 3 blue {}" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
edgesstrength
0a2.0
1bNaN
2c3.0
\n", + "
" + ], + "text/plain": [ + " edges strength\n", + "0 a 2.0\n", + "1 b NaN\n", + "2 c 3.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Ntemperature
0160
\n", + "
" + ], + "text/plain": [ + " N temperature\n", + "0 1 60" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " \n", + "Restructured Dataframes using single factory method for property store repeated\n", + "----------------------------------------------------------------------------------------------------\n" + ] + }, + { + "data": { + "text/plain": [ + "MultiIndex([('a', 1),\n", + " ('a', 2),\n", + " ('b', 3),\n", + " ('c', 2),\n", + " ('c', 3)],\n", + " names=['E', 'nodes'])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weightcolormisc_properties
Enodes
a11.0red{}
25.0red{'weight': 5}
b31.0red{'time': 3}
c21.0red{}
31.0blue{}
\n", + "
" + ], + "text/plain": [ + " weight color misc_properties\n", + "E nodes \n", + "a 1 1.0 red {}\n", + " 2 5.0 red {'weight': 5}\n", + "b 3 1.0 red {'time': 3}\n", + "c 2 1.0 red {}\n", + " 3 1.0 blue {}" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weightstrengthmisc_properties
edges
a1.02.0{}
b1.0NaN{}
c1.03.0{}
\n", + "
" + ], + "text/plain": [ + " weight strength misc_properties\n", + "edges \n", + "a 1.0 2.0 {}\n", + "b 1.0 NaN {}\n", + "c 1.0 3.0 {}" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "None" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "cell_prop_dataframe = pd.DataFrame({'E': ['a', 'a', 'a', 'b', 'c', 'c'], 'nodes': [1, 1, 2, 3, 2, 3],\n", + " 'color': ['red', 'red', 'red', 'red', 'red', 'blue'],\n", + " 'other_properties': [{}, {}, {'weight': 5}, {'time': 3}, {}, {}]})\n", + "\n", + "edge_prop_dataframe = pd.DataFrame({'edges': ['a', 'b', 'c'],\n", + " 'strength': [2, np.nan, 3]})\n", + "\n", + "node_prop_dataframe = pd.DataFrame({'N': [1],\n", + " 'temperature': [60]})\n", + "\n", + "print('Provided Dataframes')\n", + "print('-'*100)\n", + "display(cell_prop_dataframe)\n", + "display(edge_prop_dataframe)\n", + "display(node_prop_dataframe)\n", + "\n", + "print('\\n \\nRestructured Dataframes using single factory method for property store repeated')\n", + "print('-'*100)\n", + "\n", + "\n", + "IPS = dataframe_factory_method(cell_prop_dataframe, level = 2,\n", + " uid_cols = ['E', 'nodes'],\n", + " misc_properties_col = 'other_properties',\n", + " aggregate_by = {'weight': 'sum'},)\n", + "IS = IPS.index\n", + "\n", + "display(IS)\n", + "\n", + "display(IPS)\n", + "\n", + "\n", + "EPS = dataframe_factory_method(edge_prop_dataframe, level = 0,\n", + " weight_col = 1, uid_cols = [0])\n", + "display(EPS)\n", + "\n", + "\n", + "NPS = dataframe_factory_method(None, level = 1,)\n", + "display(NPS)\n", + "print('-'*100)\n" + ] + }, + { + "cell_type": "markdown", + "id": "944095a5", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# meetings dictionary " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "7e429de7", + "metadata": { + "hidden": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Provided Dataframes\n", + "----------------------------------------------------------------------------------------------------\n" + ] + }, + { + "data": { + "text/plain": [ + "{'M1': {'P4': {'topics': ['A, B']}, 'P5': {'topics': ['A', 'C']}},\n", + " 'M2': {'P5': {'topics': ['E', 'F']}, 'P6': {'topics': ['F']}},\n", + " 'M3': {'P2': {'topics': ['C', 'D']},\n", + " 'P3': {'topics': ['B', 'C', 'D']},\n", + " 'P4': {'topics': ['D']}},\n", + " 'M4': {'P1': {'topics': ['C']},\n", + " 'P2': {'topics': ['C']},\n", + " 'P3': {'topics': ['C']}},\n", + " 'M5': {'P1'}}" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "{'M1': {'interval': [0, 1]},\n", + " 'M2': {'interval': [2, 6]},\n", + " 'M3': {'interval': [3, 5]},\n", + " 'M4': {'interval': [7, 9]}}" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " \n", + "Restructured Dataframes using single factory method for property store repeated\n", + "----------------------------------------------------------------------------------------------------\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weightmisc_properties
edgesnodes
M1P41.0{'topics': ['A, B']}
P51.0{'topics': ['A', 'C']}
M2P51.0{'topics': ['E', 'F']}
P61.0{'topics': ['F']}
M3P21.0{'topics': ['C', 'D']}
P31.0{'topics': ['B', 'C', 'D']}
P41.0{'topics': ['D']}
M4P11.0{'topics': ['C']}
P21.0{'topics': ['C']}
P31.0{'topics': ['C']}
M5P11.0{}
\n", + "
" + ], + "text/plain": [ + " weight misc_properties\n", + "edges nodes \n", + "M1 P4 1.0 {'topics': ['A, B']}\n", + " P5 1.0 {'topics': ['A', 'C']}\n", + "M2 P5 1.0 {'topics': ['E', 'F']}\n", + " P6 1.0 {'topics': ['F']}\n", + "M3 P2 1.0 {'topics': ['C', 'D']}\n", + " P3 1.0 {'topics': ['B', 'C', 'D']}\n", + " P4 1.0 {'topics': ['D']}\n", + "M4 P1 1.0 {'topics': ['C']}\n", + " P2 1.0 {'topics': ['C']}\n", + " P3 1.0 {'topics': ['C']}\n", + "M5 P1 1.0 {}" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "NS = 'P'\n", + "ES = 'M'\n", + "\n", + "cell_props = {\n", + " ES+'1': {NS+'4': {'topics': ['A, B']}, \n", + " NS+'5': {'topics': ['A', 'C']}}, \n", + " ES+'2': {NS+'5': {'topics': ['E', 'F']}, \n", + " NS+'6': {'topics': ['F']}}, \n", + " ES+'3': {NS+'2': {'topics': ['C', 'D']}, \n", + " NS+'3': {'topics': ['B', 'C', 'D']}, \n", + " NS+'4': {'topics': ['D']}}, \n", + " ES+'4': {NS+'1': {'topics': ['C']}, \n", + " NS+'2': {'topics': ['C']}, \n", + " NS+'3': {'topics': ['C']}}, \n", + " ES+'5': {NS+'1'}\n", + " }\n", + " \n", + "edge_props = {\n", + " ES+'1': {'interval': [0, 1]}, \n", + " ES+'2': {'interval': [2, 6]}, \n", + " ES+'3': {'interval': [3, 5]}, \n", + " ES+'4': {'interval': [7, 9]}, \n", + " }\n", + "\n", + "print('Provided Dataframes')\n", + "print('-'*100)\n", + "display(cell_props)\n", + "display(edge_props)\n", + "\n", + "print('\\n \\nRestructured Dataframes using single factory method for property store repeated')\n", + "print('-'*100)\n", + "\n", + "\n", + "IPS = dict_factory_method(cell_props, level = 2)\n", + "\n", + "display(IPS)\n", + "\n", + "print('-'*100)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a39142a0", + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "985ae9fd", + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": false, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": true, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 15ebb130c7d2a21b831996fb565b1cc0480023d0 Mon Sep 17 00:00:00 2001 From: audun myers Date: Tue, 9 Apr 2024 16:14:46 -0400 Subject: [PATCH 3/4] Delete testing_factory.ipynb --- hypernetx/classes/testing_factory.ipynb | 1606 ----------------------- 1 file changed, 1606 deletions(-) delete mode 100644 hypernetx/classes/testing_factory.ipynb diff --git a/hypernetx/classes/testing_factory.ipynb b/hypernetx/classes/testing_factory.ipynb deleted file mode 100644 index 4d196c63..00000000 --- a/hypernetx/classes/testing_factory.ipynb +++ /dev/null @@ -1,1606 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "c371e4d7", - "metadata": { - "toc": true - }, - "source": [ - "

Table of Contents

\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "72589a65", - "metadata": {}, - "outputs": [], - "source": [ - "from factory import *" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e8abfb3d", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "id": "ca44245b", - "metadata": { - "heading_collapsed": true - }, - "source": [ - "# iterable of iterables example" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "b0894957", - "metadata": { - "hidden": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[[1, 1, 2], {1, 2}, {1, 2, 3}]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
weightmisc_properties
edgesnodes
011.0{}
21.0{}
111.0{}
21.0{}
211.0{}
21.0{}
31.0{}
\n", - "
" - ], - "text/plain": [ - " weight misc_properties\n", - "edges nodes \n", - "0 1 1.0 {}\n", - " 2 1.0 {}\n", - "1 1 1.0 {}\n", - " 2 1.0 {}\n", - "2 1 1.0 {}\n", - " 2 1.0 {}\n", - " 3 1.0 {}" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "list_of_iterables = [[1, 1, 2], {1, 2}, {1, 2, 3}]\n", - "display(list_of_iterables)\n", - "\n", - "IPS = list_factory_method(list_of_iterables, level = 2,\n", - " aggregate_by = {'weight': 'sum'})\n", - "display(IPS)\n", - "print('-'*100)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "55c657e1", - "metadata": { - "heading_collapsed": true - }, - "source": [ - "# simple dictionary" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "37a4268e", - "metadata": { - "hidden": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Provided Dataframes\n", - "----------------------------------------------------------------------------------------------------\n" - ] - }, - { - "data": { - "text/plain": [ - "{'e1': [1, 2], 'e2': [1, 2], 'e3': [1, 2, 3]}" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - " \n", - "Restructured Dataframes using single factory method for property store repeated\n", - "----------------------------------------------------------------------------------------------------\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
weightmisc_properties
edgesnodes
e111.0{}
21.0{}
e211.0{}
21.0{}
e311.0{}
21.0{}
31.0{}
\n", - "
" - ], - "text/plain": [ - " weight misc_properties\n", - "edges nodes \n", - "e1 1 1.0 {}\n", - " 2 1.0 {}\n", - "e2 1 1.0 {}\n", - " 2 1.0 {}\n", - "e3 1 1.0 {}\n", - " 2 1.0 {}\n", - " 3 1.0 {}" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "\n", - "cell_dict = {'e1':[1,2],'e2':[1,2],'e3':[1,2,3]}\n", - "\n", - "print('Provided Dataframes')\n", - "print('-'*100)\n", - "display(cell_dict)\n", - "\n", - "print('\\n \\nRestructured Dataframes using single factory method for property store repeated')\n", - "print('-'*100)\n", - "\n", - "IPS = dict_factory_method(cell_dict, level = 2)\n", - "\n", - "display(IPS)\n", - "print('-'*100)" - ] - }, - { - "cell_type": "markdown", - "id": "1514b86b", - "metadata": { - "heading_collapsed": true - }, - "source": [ - "# complex dicitonary" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "10a6177e", - "metadata": { - "hidden": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Provided Dataframes\n", - "----------------------------------------------------------------------------------------------------\n" - ] - }, - { - "data": { - "text/plain": [ - "{'e1': {1: {'w': 0.5, 'name': 'related_to'},\n", - " 2: {'w': 0.1, 'name': 'related_to', 'startdate': '05.13.2020'}},\n", - " 'e2': {1: {'w': 0.52, 'name': 'owned_by'}, 2: {'w': 0.2}},\n", - " 'e3': {1: {'w': 0.5, 'name': 'related_to'},\n", - " 2: {'w': 0.2, 'name': 'owner_of'},\n", - " 3: {'w': 1, 'type': 'relationship'}}}" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - " \n", - "Restructured Dataframes using single factory method for property store repeated\n", - "----------------------------------------------------------------------------------------------------\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
weightmisc_properties
edgesnodes
e110.50{'name': 'related_to'}
20.10{'name': 'related_to', 'startdate': '05.13.2020'}
e210.52{'name': 'owned_by'}
20.20{}
e310.50{'name': 'related_to'}
20.20{'name': 'owner_of'}
31.00{'type': 'relationship'}
\n", - "
" - ], - "text/plain": [ - " weight misc_properties\n", - "edges nodes \n", - "e1 1 0.50 {'name': 'related_to'}\n", - " 2 0.10 {'name': 'related_to', 'startdate': '05.13.2020'}\n", - "e2 1 0.52 {'name': 'owned_by'}\n", - " 2 0.20 {}\n", - "e3 1 0.50 {'name': 'related_to'}\n", - " 2 0.20 {'name': 'owner_of'}\n", - " 3 1.00 {'type': 'relationship'}" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
weightnumbermisc_properties
0
e11.01{}
e21.02{}
e31.03{}
\n", - "
" - ], - "text/plain": [ - " weight number misc_properties\n", - "0 \n", - "e1 1.0 1 {}\n", - "e2 1.0 2 {}\n", - "e3 1.0 3 {}" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "None" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "\n", - "cell_prop_dict = {'e1':{ 1: {'w':0.5, 'name': 'related_to'},\n", - " 2: {'w':0.1, 'name': 'related_to','startdate': '05.13.2020'}},\n", - " 'e2':{ 1: {'w':0.52, 'name': 'owned_by'},\n", - " 2: {'w':0.2}},\n", - " 'e3':{ 1: {'w':0.5, 'name': 'related_to'},\n", - " 2: {'w':0.2, 'name': 'owner_of'},\n", - " 3: {'w':1, 'type': 'relationship'}}}\n", - "\n", - "edge_prop_dict = {'e1': {'number': 1},\n", - " 'e2': {'number': 2},\n", - " 'e3': {'number': 3}}\n", - "\n", - "print('Provided Dataframes')\n", - "print('-'*100)\n", - "display(cell_prop_dict)\n", - "\n", - "print('\\n \\nRestructured Dataframes using single factory method for property store repeated')\n", - "print('-'*100)\n", - "\n", - "IPS = dict_factory_method(cell_prop_dict, level = 2, weight_col = 'w')\n", - "display(IPS)\n", - "\n", - "\n", - "EPS = dict_factory_method(edge_prop_dict, level = 0)\n", - "display(EPS)\n", - "\n", - "\n", - "NPS = dict_factory_method(None, level = 1, weight_col = 'w')\n", - "display(NPS)\n", - "print('-'*100)" - ] - }, - { - "cell_type": "markdown", - "id": "76b8565e", - "metadata": { - "heading_collapsed": true - }, - "source": [ - "# simple dataframe" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "6ec431ae", - "metadata": { - "hidden": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Provided Dataframes\n", - "----------------------------------------------------------------------------------------------------\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
en
0a1
1a1
2a2
3b3
4c2
5c3
\n", - "
" - ], - "text/plain": [ - " e n\n", - "0 a 1\n", - "1 a 1\n", - "2 a 2\n", - "3 b 3\n", - "4 c 2\n", - "5 c 3" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - " \n", - "Restructured Dataframes using single factory method for property store repeated\n", - "----------------------------------------------------------------------------------------------------\n" - ] - }, - { - "data": { - "text/plain": [ - "MultiIndex([('a', 1),\n", - " ('a', 2),\n", - " ('b', 3),\n", - " ('c', 2),\n", - " ('c', 3)],\n", - " names=['e', 'n'])" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
weightmisc_properties
en
a11.0{}
21.0{}
b31.0{}
c21.0{}
31.0{}
\n", - "
" - ], - "text/plain": [ - " weight misc_properties\n", - "e n \n", - "a 1 1.0 {}\n", - " 2 1.0 {}\n", - "b 3 1.0 {}\n", - "c 2 1.0 {}\n", - " 3 1.0 {}" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "None" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "None" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "incidence_dataframe = pd.DataFrame({'e': ['a', 'a', 'a', 'b', 'c', 'c'], \n", - " 'n': [1, 1, 2, 3, 2, 3],})\n", - "\n", - "\n", - "print('Provided Dataframes')\n", - "print('-'*100)\n", - "display(incidence_dataframe)\n", - "\n", - "\n", - "\n", - "print('\\n \\nRestructured Dataframes using single factory method for property store repeated')\n", - "print('-'*100)\n", - "\n", - "\n", - "\n", - "IPS = dataframe_factory_method(incidence_dataframe, level = 2,\n", - " uid_cols = ['e', 'n'],\n", - " aggregate_by = {'weight': 'sum'},)\n", - "IS = IPS.index\n", - "\n", - "display(IS)\n", - "display(IPS)\n", - "\n", - "EPS = dataframe_factory_method(None, level = 0)\n", - "display(EPS)\n", - "\n", - "NPS = dataframe_factory_method(None, level = 1, uid_cols = ['nodes'])\n", - "display(NPS)\n", - "print('-'*100)\n" - ] - }, - { - "cell_type": "markdown", - "id": "73cbb837", - "metadata": { - "heading_collapsed": true - }, - "source": [ - "# complex dataframe" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "68cf96df", - "metadata": { - "hidden": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Provided Dataframes\n", - "----------------------------------------------------------------------------------------------------\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Enodescolorother_properties
0a1red{}
1a1red{}
2a2red{'weight': 5}
3b3red{'time': 3}
4c2red{}
5c3blue{}
\n", - "
" - ], - "text/plain": [ - " E nodes color other_properties\n", - "0 a 1 red {}\n", - "1 a 1 red {}\n", - "2 a 2 red {'weight': 5}\n", - "3 b 3 red {'time': 3}\n", - "4 c 2 red {}\n", - "5 c 3 blue {}" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
edgesstrength
0a2.0
1bNaN
2c3.0
\n", - "
" - ], - "text/plain": [ - " edges strength\n", - "0 a 2.0\n", - "1 b NaN\n", - "2 c 3.0" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Ntemperature
0160
\n", - "
" - ], - "text/plain": [ - " N temperature\n", - "0 1 60" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - " \n", - "Restructured Dataframes using single factory method for property store repeated\n", - "----------------------------------------------------------------------------------------------------\n" - ] - }, - { - "data": { - "text/plain": [ - "MultiIndex([('a', 1),\n", - " ('a', 2),\n", - " ('b', 3),\n", - " ('c', 2),\n", - " ('c', 3)],\n", - " names=['E', 'nodes'])" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
weightcolormisc_properties
Enodes
a11.0red{}
25.0red{'weight': 5}
b31.0red{'time': 3}
c21.0red{}
31.0blue{}
\n", - "
" - ], - "text/plain": [ - " weight color misc_properties\n", - "E nodes \n", - "a 1 1.0 red {}\n", - " 2 5.0 red {'weight': 5}\n", - "b 3 1.0 red {'time': 3}\n", - "c 2 1.0 red {}\n", - " 3 1.0 blue {}" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
weightstrengthmisc_properties
edges
a1.02.0{}
b1.0NaN{}
c1.03.0{}
\n", - "
" - ], - "text/plain": [ - " weight strength misc_properties\n", - "edges \n", - "a 1.0 2.0 {}\n", - "b 1.0 NaN {}\n", - "c 1.0 3.0 {}" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "None" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "cell_prop_dataframe = pd.DataFrame({'E': ['a', 'a', 'a', 'b', 'c', 'c'], 'nodes': [1, 1, 2, 3, 2, 3],\n", - " 'color': ['red', 'red', 'red', 'red', 'red', 'blue'],\n", - " 'other_properties': [{}, {}, {'weight': 5}, {'time': 3}, {}, {}]})\n", - "\n", - "edge_prop_dataframe = pd.DataFrame({'edges': ['a', 'b', 'c'],\n", - " 'strength': [2, np.nan, 3]})\n", - "\n", - "node_prop_dataframe = pd.DataFrame({'N': [1],\n", - " 'temperature': [60]})\n", - "\n", - "print('Provided Dataframes')\n", - "print('-'*100)\n", - "display(cell_prop_dataframe)\n", - "display(edge_prop_dataframe)\n", - "display(node_prop_dataframe)\n", - "\n", - "print('\\n \\nRestructured Dataframes using single factory method for property store repeated')\n", - "print('-'*100)\n", - "\n", - "\n", - "IPS = dataframe_factory_method(cell_prop_dataframe, level = 2,\n", - " uid_cols = ['E', 'nodes'],\n", - " misc_properties_col = 'other_properties',\n", - " aggregate_by = {'weight': 'sum'},)\n", - "IS = IPS.index\n", - "\n", - "display(IS)\n", - "\n", - "display(IPS)\n", - "\n", - "\n", - "EPS = dataframe_factory_method(edge_prop_dataframe, level = 0,\n", - " weight_col = 1, uid_cols = [0])\n", - "display(EPS)\n", - "\n", - "\n", - "NPS = dataframe_factory_method(None, level = 1,)\n", - "display(NPS)\n", - "print('-'*100)\n" - ] - }, - { - "cell_type": "markdown", - "id": "944095a5", - "metadata": { - "heading_collapsed": true - }, - "source": [ - "# meetings dictionary " - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "7e429de7", - "metadata": { - "hidden": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Provided Dataframes\n", - "----------------------------------------------------------------------------------------------------\n" - ] - }, - { - "data": { - "text/plain": [ - "{'M1': {'P4': {'topics': ['A, B']}, 'P5': {'topics': ['A', 'C']}},\n", - " 'M2': {'P5': {'topics': ['E', 'F']}, 'P6': {'topics': ['F']}},\n", - " 'M3': {'P2': {'topics': ['C', 'D']},\n", - " 'P3': {'topics': ['B', 'C', 'D']},\n", - " 'P4': {'topics': ['D']}},\n", - " 'M4': {'P1': {'topics': ['C']},\n", - " 'P2': {'topics': ['C']},\n", - " 'P3': {'topics': ['C']}},\n", - " 'M5': {'P1'}}" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "{'M1': {'interval': [0, 1]},\n", - " 'M2': {'interval': [2, 6]},\n", - " 'M3': {'interval': [3, 5]},\n", - " 'M4': {'interval': [7, 9]}}" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - " \n", - "Restructured Dataframes using single factory method for property store repeated\n", - "----------------------------------------------------------------------------------------------------\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
weightmisc_properties
edgesnodes
M1P41.0{'topics': ['A, B']}
P51.0{'topics': ['A', 'C']}
M2P51.0{'topics': ['E', 'F']}
P61.0{'topics': ['F']}
M3P21.0{'topics': ['C', 'D']}
P31.0{'topics': ['B', 'C', 'D']}
P41.0{'topics': ['D']}
M4P11.0{'topics': ['C']}
P21.0{'topics': ['C']}
P31.0{'topics': ['C']}
M5P11.0{}
\n", - "
" - ], - "text/plain": [ - " weight misc_properties\n", - "edges nodes \n", - "M1 P4 1.0 {'topics': ['A, B']}\n", - " P5 1.0 {'topics': ['A', 'C']}\n", - "M2 P5 1.0 {'topics': ['E', 'F']}\n", - " P6 1.0 {'topics': ['F']}\n", - "M3 P2 1.0 {'topics': ['C', 'D']}\n", - " P3 1.0 {'topics': ['B', 'C', 'D']}\n", - " P4 1.0 {'topics': ['D']}\n", - "M4 P1 1.0 {'topics': ['C']}\n", - " P2 1.0 {'topics': ['C']}\n", - " P3 1.0 {'topics': ['C']}\n", - "M5 P1 1.0 {}" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------------------------------------------------------------------\n" - ] - } - ], - "source": [ - "NS = 'P'\n", - "ES = 'M'\n", - "\n", - "cell_props = {\n", - " ES+'1': {NS+'4': {'topics': ['A, B']}, \n", - " NS+'5': {'topics': ['A', 'C']}}, \n", - " ES+'2': {NS+'5': {'topics': ['E', 'F']}, \n", - " NS+'6': {'topics': ['F']}}, \n", - " ES+'3': {NS+'2': {'topics': ['C', 'D']}, \n", - " NS+'3': {'topics': ['B', 'C', 'D']}, \n", - " NS+'4': {'topics': ['D']}}, \n", - " ES+'4': {NS+'1': {'topics': ['C']}, \n", - " NS+'2': {'topics': ['C']}, \n", - " NS+'3': {'topics': ['C']}}, \n", - " ES+'5': {NS+'1'}\n", - " }\n", - " \n", - "edge_props = {\n", - " ES+'1': {'interval': [0, 1]}, \n", - " ES+'2': {'interval': [2, 6]}, \n", - " ES+'3': {'interval': [3, 5]}, \n", - " ES+'4': {'interval': [7, 9]}, \n", - " }\n", - "\n", - "print('Provided Dataframes')\n", - "print('-'*100)\n", - "display(cell_props)\n", - "display(edge_props)\n", - "\n", - "print('\\n \\nRestructured Dataframes using single factory method for property store repeated')\n", - "print('-'*100)\n", - "\n", - "\n", - "IPS = dict_factory_method(cell_props, level = 2)\n", - "\n", - "display(IPS)\n", - "\n", - "print('-'*100)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a39142a0", - "metadata": { - "hidden": true - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "985ae9fd", - "metadata": { - "hidden": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.5" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": false, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": true, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": true - }, - "varInspector": { - "cols": { - "lenName": 16, - "lenType": 16, - "lenVar": 40 - }, - "kernels_config": { - "python": { - "delete_cmd_postfix": "", - "delete_cmd_prefix": "del ", - "library": "var_list.py", - "varRefreshCmd": "print(var_dic_list())" - }, - "r": { - "delete_cmd_postfix": ") ", - "delete_cmd_prefix": "rm(", - "library": "var_list.r", - "varRefreshCmd": "cat(var_dic_list()) " - } - }, - "types_to_exclude": [ - "module", - "function", - "builtin_function_or_method", - "instance", - "_Feature" - ], - "window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From bcfe9d067373c311d56da80e14b917cae2f83a89 Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Tue, 9 Apr 2024 13:44:41 -0700 Subject: [PATCH 4/4] Run linter --- hypernetx/classes/factory.py | 46 ++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/hypernetx/classes/factory.py b/hypernetx/classes/factory.py index 77f0f8cd..c3f99f52 100644 --- a/hypernetx/classes/factory.py +++ b/hypernetx/classes/factory.py @@ -2,8 +2,9 @@ def mkdict(x): - #function to create a dictionary from object x if it is not already a dicitonary. + # function to create a dictionary from object x if it is not already a dicitonary. import ast, json + if isinstance(x, dict): return x else: @@ -30,24 +31,24 @@ def create_df( misc_properties_col=None, aggregation_methods=None, ): - + if not isinstance(dfp, pd.DataFrame): raise TypeError("method requires a Pandas DataFrame") else: - #checks if the use index variable is called. if it is then use the existing indices. if it is not then an index is set based on the uid columns. + # checks if the use index variable is called. if it is then use the existing indices. if it is not then an index is set based on the uid columns. if use_index == False: - #if uid cols are specified make those columns the index columns + # if uid cols are specified make those columns the index columns if uid_cols != None: - #create chk function to check if the column specified is a string. if it is not a string then it assumes it is an integer and grabs that columns name. + # create chk function to check if the column specified is a string. if it is not a string then it assumes it is an integer and grabs that columns name. chk = lambda c: c if isinstance(c, str) else dfp.columns[c] - #set indices using the column names in uid_cols using the chk function. + # set indices using the column names in uid_cols using the chk function. dfp = dfp.set_index([chk(c) for c in uid_cols]) - else: #if uid_cols are not specified then assume the first one or two columns (depending on level) are the index columns and set the index. + else: # if uid_cols are not specified then assume the first one or two columns (depending on level) are the index columns and set the index. if level == 2: dfp = dfp.set_index([dfp.columns[0], dfp.columns[1]]) else: dfp = dfp.set_index([dfp.columns[0]]) - + # if the misc prop col is in the column names if misc_properties_col in dfp.columns: # rename the misc properties column to the default name if it isn't @@ -55,30 +56,32 @@ def create_df( dfp = dfp.rename(columns={misc_properties_col: "misc_properties"}) # force misc properties to be a dictionary if it is not. dfp.misc_properties = dfp.misc_properties.map(mkdict) - else:#if the column is not specified then create the misc properties column of empty dicitonaries. + else: # if the column is not specified then create the misc properties column of empty dicitonaries. dfp["misc_properties"] = [{} for row in dfp.index] - + # check if weight property column name was specified. if weight_prop in dfp.columns: # if it was specified and it exists then rename to default weight name and fill in the NA weights with the default. dfp = dfp.rename(columns={weight_prop: "weight"}) dfp = dfp.fillna({"weight": default_weight}) - #if weight column is not None and the weight column name was not in the column names then check in the misc properties. + # if weight column is not None and the weight column name was not in the column names then check in the misc properties. elif weight_prop is not None: + def grabweight(cell): - #function to grab weights from the misc properties column. + # function to grab weights from the misc properties column. if isinstance(cell, dict): return cell.get(weight_prop, default_weight) else: return default_weight - #set the weight column to the weights grabbed from the misc properties dictionary (if any). + + # set the weight column to the weights grabbed from the misc properties dictionary (if any). dfp["weight"] = dfp["misc_properties"].map(grabweight) - + # reorder columns in standard order cols = [c for c in dfp.columns if c not in ["weight", "misc_properties"]] dfp = dfp[["weight"] + cols + ["misc_properties"]] - - #remove duplicate indices and aggregate using aggregation methods specified. + + # remove duplicate indices and aggregate using aggregation methods specified. dfp = dfp[~dfp.index.duplicated(keep="first")] return dfp @@ -146,10 +149,12 @@ def dataframe_factory_method( weight_prop=weight_col, misc_properties_col=misc_properties_col, default_weight=default_weight, - aggregation_methods=aggregate_by,) + aggregation_methods=aggregate_by, + ) return PS + def dict_factory_method( D, level, @@ -224,9 +229,9 @@ def dict_factory_method( attribute_data[weight_col] += [default_weight] attribute_data[misc_properties_col] += [attributes_of_incidence_pair] attribute_df = pd.DataFrame(attribute_data) - DF = pd.concat([DF, attribute_df], axis=1) - - #id the dataeframe is for edges or nodes. + DF = pd.concat([DF, attribute_df], axis=1) + + # id the dataeframe is for edges or nodes. elif level == 1 or level == 0: attribute_data = [] for key in D: @@ -251,6 +256,7 @@ def dict_factory_method( return PS + def list_factory_method( L, level,