Skip to content

Commit

Permalink
Add results for the skewed marginal distributions ablation experiment
Browse files Browse the repository at this point in the history
  • Loading branch information
yukw777 committed Jun 11, 2024
1 parent c215ac1 commit 892f03f
Showing 1 changed file with 114 additions and 0 deletions.
114 changes: 114 additions & 0 deletions figures/ablation_figures.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,120 @@
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Skewed Marginal Distributions (Top 100 Common Actions)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"skewed_eilev_data = {\n",
" \"EILEV BLIP-2 OPT-2.7B\": {\n",
" \"STS-CE\": [0.222, 0.5117, 0.51, 0.5939, 0.6602, 0.6775, 0.6814],\n",
" \"STS-BE\": [0.3672, 0.5864, 0.5978, 0.6727, 0.7311, 0.7466, 0.7512],\n",
" \"BERTScore-F1\": [0.5451, 0.6229, 0.6323, 0.639, 0.6444, 0.6449, 0.6448],\n",
" \"ROUGE-L\": [0.1916, 0.5245, 0.5601, 0.6069, 0.6424, 0.6547, 0.6606],\n",
" \"BLEU\": [0.01493, 0.1183, 0.2028, 0.2526, 0.2888, 0.3037, 0.3145],\n",
" \"meta\": {\"shots\": shots, \"linestyle\": \"-\"},\n",
" },\n",
" \"EILEV BLIP-2 Flan-T5-xl\": {\n",
" \"STS-CE\": [0.3368, 0.5243, 0.5319, 0.5983, 0.6553, 0.6794, 0.6889],\n",
" \"STS-BE\": [0.4282, 0.595, 0.6037, 0.6606, 0.726, 0.7484, 0.7569],\n",
" \"BERTScore-F1\": [0.5189, 0.6147, 0.6184, 0.6258, 0.6347, 0.6396, 0.642],\n",
" \"ROUGE-L\": [0.3103, 0.5236, 0.5448, 0.592, 0.644, 0.6605, 0.6653],\n",
" \"BLEU\": [0.05684, 0.1503, 0.1947, 0.258, 0.3148, 0.3278, 0.3266],\n",
" \"meta\": {\"shots\": shots, \"linestyle\": \"-\"},\n",
" },\n",
"}\n",
"skewed_ablation_data = {\n",
" \"T100 BLIP-2 OPT-2.7B\": {\n",
" \"STS-CE\": [0.2118, 0.4958, 0.4682, 0.5153, 0.5623, 0.5765, 0.5643],\n",
" \"STS-BE\": [0.3817, 0.5728, 0.5564, 0.5961, 0.6348, 0.6451, 0.6344],\n",
" \"BERTScore-F1\": [0.5512, 0.6255, 0.6386, 0.6444, 0.6516, 0.6535, 0.6549],\n",
" \"ROUGE-L\": [0.18, 0.5134, 0.5329, 0.5574, 0.5807, 0.5899, 0.588],\n",
" \"BLEU\": [0.007519, 0.1243, 0.1654, 0.1842, 0.2059, 0.2184, 0.2175],\n",
" \"meta\": {\"shots\": shots, \"linestyle\": \"--\"},\n",
" },\n",
" \"T100 BLIP-2 Flan-T5-xl\": {\n",
" \"STS-CE\": [0.2485, 0.4929, 0.4881, 0.517, 0.5625, 0.5845, 0.5917],\n",
" \"STS-BE\": [0.3826, 0.5686, 0.5672, 0.5945, 0.6401, 0.6603, 0.6666],\n",
" \"BERTScore-F1\": [0.4717, 0.622, 0.6329, 0.6319, 0.5809, 0.491, 0.3668],\n",
" \"ROUGE-L\": [0.2245, 0.5222, 0.5396, 0.5663, 0.6004, 0.6126, 0.6151],\n",
" \"BLEU\": [0.0309, 0.1564, 0.185, 0.2212, 0.2615, 0.2736, 0.2757],\n",
" \"meta\": {\"shots\": shots, \"linestyle\": \"--\"},\n",
" },\n",
"}\n",
"models = [\n",
" \"EILEV BLIP-2 OPT-2.7B\",\n",
" \"EILEV BLIP-2 Flan-T5-xl\",\n",
" \"T100 BLIP-2 OPT-2.7B\",\n",
" \"T100 BLIP-2 Flan-T5-xl\",\n",
"]\n",
"\n",
"draw_graphs(\n",
" {**skewed_eilev_data, **skewed_ablation_data},\n",
" models,\n",
" metrics,\n",
" \"skewed-t100-ablation.pdf\",\n",
" [0, 4, 8, 12, 16],\n",
" 2,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Skewed Marginal Distributions (Top 500 Common Actions)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"skewed_ablation_data = {\n",
" \"T500 BLIP-2 OPT-2.7B\": {\n",
" \"STS-CE\": [0.3136, 0.5058, 0.5086, 0.584, 0.6448, 0.6634, 0.6635],\n",
" \"STS-BE\": [0.4181, 0.5807, 0.5952, 0.6633, 0.7177, 0.7346, 0.7352],\n",
" \"BERTScore-F1\": [0.3434, 0.6046, 0.6316, 0.6384, 0.644, 0.6445, 0.6431],\n",
" \"ROUGE-L\": [0.2508, 0.5095, 0.5556, 0.6027, 0.6387, 0.6515, 0.6543],\n",
" \"BLEU\": [0.02951, 0.1266, 0.2012, 0.2551, 0.2948, 0.3161, 0.3247],\n",
" \"meta\": {\"shots\": shots, \"linestyle\": \"--\"},\n",
" },\n",
" \"T500 BLIP-2 Flan-T5-xl\": {\n",
" \"STS-CE\": [0.3934, 0.5232, 0.5302, 0.5815, 0.6482, 0.6694, 0.6761],\n",
" \"STS-BE\": [0.4617, 0.5957, 0.6051, 0.6554, 0.7207, 0.7402, 0.746],\n",
" \"BERTScore-F1\": [0.2386, 0.6129, 0.6165, 0.6273, 0.6378, 0.639, 0.6263],\n",
" \"ROUGE-L\": [0.3651, 0.5258, 0.5493, 0.5925, 0.6432, 0.6562, 0.6589],\n",
" \"BLEU\": [0.07617, 0.1538, 0.2025, 0.2591, 0.312, 0.3184, 0.3147],\n",
" \"meta\": {\"shots\": shots, \"linestyle\": \"--\"},\n",
" },\n",
"}\n",
"models = [\n",
" \"EILEV BLIP-2 OPT-2.7B\",\n",
" \"EILEV BLIP-2 Flan-T5-xl\",\n",
" \"T500 BLIP-2 OPT-2.7B\",\n",
" \"T500 BLIP-2 Flan-T5-xl\",\n",
"]\n",
"\n",
"draw_graphs(\n",
" {**skewed_eilev_data, **skewed_ablation_data},\n",
" models,\n",
" metrics,\n",
" \"skewed-t500-ablation.pdf\",\n",
" [0, 4, 8, 12, 16],\n",
" 2,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down

0 comments on commit 892f03f

Please sign in to comment.