-
Notifications
You must be signed in to change notification settings - Fork 0
/
wordcount_richard.py
48 lines (41 loc) · 1.82 KB
/
wordcount_richard.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import numpy as np
import requests
# import io
# from pathlib import Path
import json
import zipfile
#Change include zip in same folder as this code to get wordcount
with zipfile.ZipFile("FinalProjects-Sp21-main.zip", "r") as f:
for name in f.namelist():
if '.ipynb' not in name:
continue
rf = json.loads(f.read(name))
wordcount = 0
for i, cell in enumerate(rf['cells']):
if cell['cell_type'] == 'markdown':
for string in cell['source']:
try:
#converts string into list
words = string.split(" ")
#remove non words
while('\n' in words):
words.remove('\n')
while('#' in words):
words.remove('#')
while('##' in words):
words.remove('##')
while('###' in words):
words.remove('###')
while('####' in words):
words.remove('####')
while('-' in words):
words.remove('-')
#check output if neccessary
#print(words)
wordcount += len(words)
except IndexError:
print("INDEXERROR", string)
print(name, wordcount)
#print(name)
#print(wordcount)
#print(name + ',', wordcount) #CSV format