Skip to content

Commit

Permalink
notebook status
Browse files Browse the repository at this point in the history
  • Loading branch information
Dylan Bourgeois committed Mar 8, 2019
1 parent 7d1d498 commit 251616c
Show file tree
Hide file tree
Showing 9 changed files with 9,993 additions and 19,070 deletions.
498 changes: 0 additions & 498 deletions graph_cls/Untitled.ipynb

This file was deleted.

19 changes: 14 additions & 5 deletions large-corpus/Merge Vocab.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -33,24 +33,33 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"files = ['pytorch_mlm_vocab-code.txt', 'sklearn_mlm_vocab-code.txt', 'keras_mlm_vocab-code.txt']\n",
"corpus_members = ['pytorch', 'sklearn', 'keras', 'ansible', 'youtube-dl', 'flask', 'httpie', 'requests', 'django', 'bert']"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"files = [f+'_mlm_vocab-code.txt' for f in corpus_members]\n",
"out_file = 'global_vocab.csv'"
]
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3823\n"
"9769\n"
]
}
],
Expand Down
131 changes: 131 additions & 0 deletions mid-corpus/Merge Vocab.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import csv\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"def merge_vocab(files, out):\n",
" global_vocab = []\n",
" for f in files:\n",
" with open(f, 'r') as csvfile:\n",
" reader = csv.reader(csvfile, delimiter=' ', quotechar='|')\n",
" for row in reader:\n",
" if row[0] not in global_vocab:\n",
" global_vocab.append(row[0])\n",
" print(len(set(global_vocab)))\n",
" with open(out, 'w') as csvfile:\n",
" writer = csv.writer(csvfile, delimiter=' ',\n",
" quotechar='|', quoting=csv.QUOTE_MINIMAL)\n",
" for v in global_vocab:\n",
" writer.writerow([v])"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"files = ['pytorch_mlm_vocab-code.txt', 'sklearn_mlm_vocab-code.txt', 'keras_mlm_vocab-code.txt']\n",
"out_file = 'global_vocab.csv'"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3823\n"
]
}
],
"source": [
"merge_vocab(files, out_file)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"label_files = ['pytorch_cls_vocab-label.txt', 'sklearn_cls_vocab-label.txt', 'keras_cls_vocab-label.txt']\n",
"label_out = 'label_vocab.csv'"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2490\n"
]
}
],
"source": [
"merge_vocab(label_files, label_out)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
File renamed without changes.
Loading

0 comments on commit 251616c

Please sign in to comment.