generated from csinva/cookiecutter-ml-research
-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
541 additions
and
223 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
196 changes: 196 additions & 0 deletions
196
notebooks_stories/3_analyze_pilot2/01_load_results_pilot3.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"The autoreload extension is already loaded. To reload it, use:\n", | ||
" %reload_ext autoreload\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"%load_ext autoreload\n", | ||
"%autoreload 2\n", | ||
"import os\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"import seaborn as sns\n", | ||
"from os.path import join\n", | ||
"from tqdm import tqdm\n", | ||
"import pandas as pd\n", | ||
"import sys\n", | ||
"import joblib\n", | ||
"from scipy.special import softmax\n", | ||
"import sasc.config\n", | ||
"import numpy as np\n", | ||
"from collections import defaultdict\n", | ||
"from copy import deepcopy\n", | ||
"import pandas as pd\n", | ||
"# import story_helper\n", | ||
"from sasc.modules.fmri_module import convert_module_num_to_voxel_num\n", | ||
"from sasc.config import FMRI_DIR" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Read all the info from stories into a single pickle file" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# load stuff\n", | ||
"# double check all of these, intro paragraph may be the same...\n", | ||
"output_file = join(sasc.config.RESULTS_DIR, \"pilot3_story_data.pkl\")\n", | ||
"STORIES_DIR = join(sasc.config.RESULTS_DIR, \"stories\")\n", | ||
"story_mapping = {\n", | ||
" 'default/uts03___jun14___seed=5': 'GenStory12_resps.npy',\n", | ||
" 'default/uts03___jun14___seed=1': 'GenStory13_resps.npy',\n", | ||
"\n", | ||
" 'interactions/uts03___jun14___seed=5': 'GenStory14_resps.npy',\n", | ||
" 'interactions/uts03___jun14___seed=6': 'GenStory15_resps.npy',\n", | ||
"\n", | ||
" 'polysemantic/uts03___jun14___seed=3': 'GenStory16_resps.npy',\n", | ||
" 'polysemantic/uts03___jun14___seed=7': 'GenStory17_resps.npy',\n", | ||
"}" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 11, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"['/home/chansingh/automated-explanations/results/pilot3_story_data.pkl']" | ||
] | ||
}, | ||
"execution_count": 11, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"# cluster_neighbors = joblib.load(join(FMRI_DIR, \"voxel_neighbors_and_pcs\", \"cluster_neighbors_v1.pkl\"))\n", | ||
"perfs = joblib.load(join(sasc.config.FMRI_DIR, 'rj_models',\n", | ||
" 'opt_model', 'new_setup_performance.jbl'))\n", | ||
"\n", | ||
"# add keys\n", | ||
"stories_data_dict = defaultdict(list)\n", | ||
"for story_idx, story_name in enumerate(story_mapping.keys()):\n", | ||
" # add scalar story descriptions\n", | ||
" stories_data_dict[\"story_name_original\"].append(story_name)\n", | ||
" stories_data_dict[\"story_setting\"].append(story_name.split(\"/\")[0])\n", | ||
" stories_data_dict[\"story_name_new\"].append(story_mapping[story_name])\n", | ||
" stories_data_dict[\"story_text\"].append(\n", | ||
" open(join(STORIES_DIR, story_name, \"story.txt\"), \"r\").read()\n", | ||
" )\n", | ||
" prompts_paragraphs = joblib.load(\n", | ||
" join(STORIES_DIR, story_name, \"prompts_paragraphs.pkl\")\n", | ||
" )\n", | ||
"\n", | ||
" # add paragraph-level descriptions\n", | ||
" stories_data_dict[\"timing\"].append(\n", | ||
" pd.read_csv(join(STORIES_DIR, story_name, \"timings_processed.csv\"))\n", | ||
" )\n", | ||
" stories_data_dict[\"prompts\"].append(prompts_paragraphs[\"prompts\"])\n", | ||
" stories_data_dict[\"paragraphs\"].append(prompts_paragraphs[\"paragraphs\"])\n", | ||
"\n", | ||
" # add paragraph-level metadata\n", | ||
" # rows\n", | ||
" # rows = pd.read_csv(join(STORIES_DIR, story_name, \"rows.csv\"))\n", | ||
" story_metadata_per_paragraph = pd.read_pickle(\n", | ||
" join(STORIES_DIR, story_name, \"rows.pkl\"))\n", | ||
" story_metadata_per_paragraph[\"voxel_num\"] = story_metadata_per_paragraph.apply(\n", | ||
" lambda row: convert_module_num_to_voxel_num(\n", | ||
" row[\"module_num\"], row[\"subject\"]),\n", | ||
" axis=1,\n", | ||
" )\n", | ||
" story_metadata_per_paragraph = story_metadata_per_paragraph[\n", | ||
" [\n", | ||
" \"expl\",\n", | ||
" \"module_num\",\n", | ||
" \"top_explanation_init_strs\",\n", | ||
" \"subject\",\n", | ||
" \"fmri_test_corr\",\n", | ||
" # \"top_score_synthetic\",\n", | ||
" \"top_score_normalized\",\n", | ||
" \"roi_anat\",\n", | ||
" \"roi_func\",\n", | ||
" \"voxel_num\",\n", | ||
" ]\n", | ||
" ]\n", | ||
" story_metadata_per_paragraph['test_corr_new'] = story_metadata_per_paragraph['voxel_num'].apply(\n", | ||
" lambda x: perfs[x])\n", | ||
" # rows['cluster_nums'] = rows['voxel_num'].map(cluster_neighbors)\n", | ||
" stories_data_dict[\"rows\"].append(story_metadata_per_paragraph)\n", | ||
"\n", | ||
" if \"interactions\" in list(story_mapping.keys())[story_idx]:\n", | ||
" rows1 = pd.read_pickle(join(STORIES_DIR, story_name, \"rows1.pkl\"))\n", | ||
" rows2 = pd.read_pickle(join(STORIES_DIR, story_name, \"rows2.pkl\"))\n", | ||
" rows1[\"voxel_num\"] = rows1.apply(\n", | ||
" lambda row: convert_module_num_to_voxel_num(\n", | ||
" row[\"module_num\"], row[\"subject\"]\n", | ||
" ),\n", | ||
" axis=1,\n", | ||
" )\n", | ||
" rows2[\"voxel_num\"] = rows2.apply(\n", | ||
" lambda row: convert_module_num_to_voxel_num(\n", | ||
" row[\"module_num\"], row[\"subject\"]\n", | ||
" ),\n", | ||
" axis=1,\n", | ||
" )\n", | ||
" stories_data_dict['voxel_num1'].append(rows1['voxel_num'])\n", | ||
" stories_data_dict['voxel_num2'].append(rows2['voxel_num'])\n", | ||
" stories_data_dict['expl1'].append(rows1['expl'])\n", | ||
" stories_data_dict['expl2'].append(rows2['expl'])\n", | ||
" else:\n", | ||
" stories_data_dict['voxel_num1'].append([])\n", | ||
" stories_data_dict['voxel_num2'].append([])\n", | ||
" stories_data_dict['expl1'].append([])\n", | ||
" stories_data_dict['expl2'].append([])\n", | ||
"\n", | ||
"\n", | ||
"joblib.dump(stories_data_dict, output_file)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": ".llm", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.5" | ||
}, | ||
"orig_nbformat": 4, | ||
"vscode": { | ||
"interpreter": { | ||
"hash": "a9ff692d44ea03fd8a03facee7621117bbbb82def09bacaacf0a2cbc238b7b91" | ||
} | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.