Notebooks for the decoding and encoding analyses.

Initial commit of all the Jupyter Notebooks that were used in the decoding, encoding and hierarchical analyses for the Cell Reports manuscript entitled: Sparse Ensemble Neural Code for a Complete Vocal Repertoire by Hermina Robotka et. al.
ftheunissen · Nov 21, 2022 · 2fc7b03 · 2fc7b03
commit 2fc7b03
Show file tree

Hide file tree

Showing 47 changed files with 127,823 additions and 0 deletions.
diff --git a/DataBaseMerge.ipynb b/DataBaseMerge.ipynb
diff --git a/DisplayKDEandPCJulie.ipynb b/DisplayKDEandPCJulie.ipynb
diff --git a/Download good pkl files.ipynb b/Download good pkl files.ipynb
diff --git a/EPHYS1-Generate_Dataframes.ipynb b/EPHYS1-Generate_Dataframes.ipynb
diff --git a/EPHYS2-Population_Coding.ipynb b/EPHYS2-Population_Coding.ipynb
diff --git a/EPHYS3-Neural_Clustering.ipynb b/EPHYS3-Neural_Clustering.ipynb
diff --git a/EPHYS4-Population_Clustering.ipynb b/EPHYS4-Population_Clustering.ipynb
diff --git a/Generate vocParamTable.ipynb b/Generate vocParamTable.ipynb
diff --git a/GenerateDataBase.ipynb b/GenerateDataBase.ipynb
diff --git a/GenerateDataBase2.ipynb b/GenerateDataBase2.ipynb
diff --git a/GenerateDataBasePosPC.ipynb b/GenerateDataBasePosPC.ipynb
diff --git a/GenerateMLFilters.ipynb b/GenerateMLFilters.ipynb
diff --git a/GenerateMLFiltersJulie.ipynb b/GenerateMLFiltersJulie.ipynb
diff --git a/GenerateMLFiltersJuliePerZone.ipynb b/GenerateMLFiltersJuliePerZone.ipynb
diff --git a/GenerateMLFiltersPerZone.ipynb b/GenerateMLFiltersPerZone.ipynb
diff --git a/GenerateReport.ipynb b/GenerateReport.ipynb
diff --git a/GenerateReport2.ipynb b/GenerateReport2.ipynb
diff --git a/Methods_walkthrough.ipynb b/Methods_walkthrough.ipynb
diff --git a/PlotPSTHCategoriesGUI.ipynb b/PlotPSTHCategoriesGUI.ipynb
diff --git a/PlotSpikeSortedCategoriesGUI.ipynb b/PlotSpikeSortedCategoriesGUI.ipynb
diff --git a/SpikeSortingStatistics.ipynb b/SpikeSortingStatistics.ipynb
diff --git a/SpikeSortingStatisticsPart2.ipynb b/SpikeSortingStatisticsPart2.ipynb
diff --git a/SummaryStatistics.ipynb b/SummaryStatistics.ipynb
diff --git a/Supplementary Calculations.ipynb b/Supplementary Calculations.ipynb
@@ -0,0 +1,162 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# First load the data\n",
+    "dbHermina = pd.read_pickle('../data/ephys_Germany/HerminaDatabase.')\n",
+    "\n",
+    "numUnits = [1, 2, 3, 4, 5, 10, 20]\n",
+    "groupings = []\n",
+    "pcc = []\n",
+    "confMat = []\n",
+    "dataPath = '/auto/zdrive/lthomas/Code/zebra-finch-categories/data/ephys_Germany/'\n",
+    "for nUnit in numUnits:\n",
+    "    inPath = dataPath + 'HerminaDatabase%dGoodUnitPC.pkl' % nUnit\n",
+    "    fileIn = open(inPath,\"rb\")\n",
+    "    # groupings\n",
+    "    groupings.append(pk.load(fileIn))\n",
+    "    # conf matricies\n",
+    "    confMat.append(pk.load(fileIn))\n",
+    "    # percent correct scores for each grouping\n",
+    "    pcc.append(pk.load(fileIn))\n",
+    "    fileIn.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate pkl files for each grouping after aris are calculated, comparing against calltype\n",
+    "\n",
+    "import multiprocessing,time\n",
+    "\n",
+    "stims = np.unique(dbHermina.stim)  \n",
+    "identities_orig = [x.split('_')[0] for x in stims]\n",
+    "calltypes_orig = [x.split('_')[2] for x in stims]\n",
+    "n_pcs = 5\n",
+    "\n",
+    "def cluster_analysis_short(db_sites):\n",
+    "    avg_responses = np.zeros((len(stims),len(db_sites)*(1+n_pcs)))\n",
+    "    for ind, db_site in enumerate(db_sites):\n",
+    "        for stim in np.unique(db_site.stim):\n",
+    "            stim_ind = np.where(stims==stim)\n",
+    "            db_site_stim = db_site[db_site.stim == stim]\n",
+    "            avg_responses[stim_ind,ind*(1+n_pcs)] = np.mean(db_site_stim.z)\n",
+    "            n_trials[stim_ind,ind] = len(db_site_stim.z)\n",
+    "            for i in range(n_pcs):\n",
+    "                avg_responses[stim_ind,ind*(1+n_pcs)+i+1] = np.mean([x[i] for x in db_site_stim.PC])\n",
+    "    return cluster_analysis(avg_responses,calltypes_orig,score='ari')# and_plot = True\n",
+    "        \n",
+    "#args = []\n",
+    "\n",
+    "\n",
+    "for i in range(len(groupings)):\n",
+    "    pccs_rand = []\n",
+    "    print(i)\n",
+    "    # accumulate all groupings of sites for this group size\n",
+    "    g_sites = []\n",
+    "    \n",
+    "    # go through each grouping to generate collection of sites\n",
+    "    for g in range(len(groupings[i])):\n",
+    "        # get the grouping and pcc for this grouping\n",
+    "        grp = groupings[i][g]\n",
+    "        p = pcc[i][g]\n",
+    "        \n",
+    "        pccs_rand.append(p)\n",
+    "\n",
+    "        # response len will be (z,pc1,pc2...pcn) for each unit\n",
+    "        query = None\n",
+    "        sites = []\n",
+    "        for ind, g in enumerate(grp):\n",
+    "            # for each group\n",
+    "            s,g = g.split('_goodPlayback')\n",
+    "            query = (dbHermina.site == s)&(dbHermina.unit == 'goodPlayback%s'%g)\n",
+    "            db_site = dbHermina[query]\n",
+    "            sites.append(db_site.copy())\n",
+    "            \n",
+    "        # append this collection of sites to the collection of groups of sites\n",
+    "        g_sites.append(sites)\n",
+    "\n",
+    "    print(\"starting multiproc for %s\"%i)\n",
+    "    tst = time.time()\n",
+    "    pool = multiprocessing.Pool(12)\n",
+    "    results = pool.map(cluster_analysis_short,g_sites)       \n",
+    "    print(\"ending %s\"%(time.time() - tst))\n",
+    "    #result_dict[i] = results\n",
+    "    pool.close()\n",
+    "    rd = dict()\n",
+    "    rd['pccs'] = pccs_rand\n",
+    "    rd['results'] = results\n",
+    "    rd['group'] = groupings[i]\n",
+    "    with open('../data/ephys_Germany/Groupings/Groupings_%s.pkl'%len(g_sites[0]), 'wb') as handle:\n",
+    "        pk.dump(rd, handle, protocol=pk.HIGHEST_PROTOCOL)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# CALCULATE ari_neural_vs_aud\n",
+    "\n",
+    "import glob,time\n",
+    "import pickle as pk\n",
+    "from sklearn.metrics import adjusted_rand_score,adjusted_mutual_info_score\n",
+    "\n",
+    "groups = glob.glob(\"../data/ephys_Germany/Groupings/Groupings_*.pkl\")\n",
+    "# make a dataframe for easy processing\n",
+    "dfout_add = dict({\n",
+    "    'neural_grouping': [],\n",
+    "    'ari_neural_vs_aud': []})\n",
+    "for gi, group_fp in enumerate(groups):\n",
+    "#def proc_group_pf(group_fp):\n",
+    "    print(\"Starting %s\"%group_fp)\n",
+    "    tst = time.time()\n",
+    "    group = pk.load(open(group_fp,'rb'))\n",
+    "    pccs = group['pccs']\n",
+    "    results = group['results']\n",
+    "    neural_groupings = [r[4] for r in results]\n",
+    "    \n",
+    "    n_v_a_aris = [ adjusted_rand_score(ng[has_wavs],aud_lbl_umap) if ng is not None else 0.0 for ng in neural_groupings]\n",
+    "    dfout_add['neural_grouping'].extend(neural_groupings)\n",
+    "    dfout_add['ari_neural_vs_aud'].extend(n_v_a_aris)\n",
+    "    print(\"Finished %s in %s\"%(group_fp,time.time()-tst))\n",
+    "    #return results[0][4], adjusted_rand_score(results[0][4][has_wavs],aud_lbl_umap)\n",
+    "\n",
+    "\n",
+    "\n",
+    "dfout_f['neural_grouping'] =  dfout_add['neural_grouping']# = pd.DataFrame(dfout)\n",
+    "dfout_f['ari_neural_vs_aud'] = dfout_add['ari_neural_vs_aud']#\n",
+    "dfout_f.to_hdf(\"../data/ephys_Germany/Groupings_metadata_with_ng.hdf\",'data')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "py35",
+   "language": "python",
+   "name": "py35"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/config.py b/config.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Jun  4 10:13:58 2021
+
+@author: frederictheunissen
+"""
+import pickle as pk
+
+# Load the data base
+rootPath = '/Users/frederictheunissen/Code/songephys/'
+dataPath = 'data/birds/'
+outPath =  rootPath + dataPath + 'HerminaDataBase.pkl'
+
+# rootPath = '/Users/frederictheunissen/Google Drive/My Drive/julie/'
+# outPath = rootPath+'JulieDataBase.pkl'
+
+fileIn = open(outPath,"rb")
+DFAll = pk.load(fileIn)
+#DF = DFAll[DFAll['call'] != 'Wh']
+DF = DFAll
+
+fileIn.close()