Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spelling #1

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Doc2Vec.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@
"\n",
"def remove_punctuation(data):\n",
" \"\"\"\n",
" Removes various punctation from Hebrew text.\n",
" Removes various punctuation from Hebrew text.\n",
" :param data: String of Hebrew Text\n",
" :return: String without punctuation\n",
" \"\"\"\n",
Expand Down Expand Up @@ -281,7 +281,7 @@
"\n",
"def get_segments(filename):\n",
" \"\"\"\n",
" Combs through the entire Sefaris Hebrew Library and cleans the text for Doc2Vec.\n",
" Combs through the entire Sefaria Hebrew Library and cleans the text for Doc2Vec.\n",
" Creates a dict:\n",
" Key: Ref\n",
" Value: The text of that ref cleaned and ready for Doc2Vec\n",
Expand Down
2 changes: 1 addition & 1 deletion Hebrew Spellcheck.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
"\n",
"def determine_beginning_stopword(beginning_stopword):\n",
" \"\"\"\n",
" Appends the approproate beginning stopword. There are three possibilites, \n",
" Appends the appropriate beginning stopword. There are three possibilities, \n",
" therefore the correct option needs to be determine\n",
" :param beginning_stopword: The modified word\n",
" :return: Appropriate beginning stopword\n",
Expand Down
2 changes: 1 addition & 1 deletion RAKE Hebrew.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1078,7 +1078,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# This just gets a list of every word and the number of occurences within the entire corpus. Used to help build a stopword list"
"# This just gets a list of every word and the number of occurrences within the entire corpus. Used to help build a stopword list"
]
},
{
Expand Down
10 changes: 5 additions & 5 deletions create_docs_for_doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def remove_dicta_prefix(string, marker):

def remove_punctuation(data):
"""
Removes various punctation from Hebrew text.
Removes various punctuation from Hebrew text.
:param data: String of Hebrew Text
:return: String without punctuation
"""
Expand Down Expand Up @@ -236,9 +236,9 @@ def extract_reference(data):
return data.split(u'~')[0]


def concatenate_sematically_linked_segments(topic_ranged_refs, ranged_to_segment):
def concatenate_semantically_linked_segments(topic_ranged_refs, ranged_to_segment):
"""
Combines multiple Sefaria Segments into one larger segment based on sematical meaning
Combines multiple Sefaria Segments into one larger segment based on semantical meaning
:param topic_ranged_refs: List of ranged trefs that define the semantic separation
:param ranged_to_segment: Nested dict. First layer points from Ranged Refs to all sub-seg-refs. The nested dict points from the sub_seg_ref to the text of said sub_seg_ref
:return: Dict containing semantically define ranged refs corresponding to their concatenated text
Expand Down Expand Up @@ -294,8 +294,8 @@ def get_segments(filename):
else:
all_data[ref] = data

all_data.update(concatenate_sematically_linked_segments(tanakh_topic_ranged_refs, tanakh_ranged_to_segment))
all_data.update(concatenate_sematically_linked_segments(talmud_topic_ranged_refs, talmud_ranged_to_segment))
all_data.update(concatenate_semantically_linked_segments(tanakh_topic_ranged_refs, tanakh_ranged_to_segment))
all_data.update(concatenate_semantically_linked_segments(talmud_topic_ranged_refs, talmud_ranged_to_segment))

return all_data

Expand Down
2 changes: 1 addition & 1 deletion hebrew_spellcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def we_havent_added_prefix_or_suffix(word):

def determine_beginning_stopword(beginning_stopword):
"""
Appends the approproate beginning stopword. There are three possibilites,
Appends the appropriate beginning stopword. There are three possibilities,
therefore the correct option needs to be determine
:param beginning_stopword: The modified word
:return: Appropriate beginning stopword
Expand Down
10 changes: 5 additions & 5 deletions high_conf_links.json
Original file line number Diff line number Diff line change
Expand Up @@ -55691,7 +55691,7 @@
],
[
"Daniel 7:9",
"Likutei Halachot, Yoreh Deah, Laws of Fobidden Fabric Blends 3:2:1",
"Likutei Halachot, Yoreh Deah, Laws of Forbidden Fabric Blends 3:2:1",
146.76882352941178
],
[
Expand Down Expand Up @@ -279761,7 +279761,7 @@
],
[
"Chagigah 13a:9",
"Likutei Halachot, Yoreh Deah, Laws of Fobidden Fabric Blends 4:3:1",
"Likutei Halachot, Yoreh Deah, Laws of Forbidden Fabric Blends 4:3:1",
163.4555
],
[
Expand Down Expand Up @@ -389716,7 +389716,7 @@
],
[
"Berakhot 6a:5",
"Likutei Halachot, Yoreh Deah, Laws of Fobidden Fabric Blends 2:2:1",
"Likutei Halachot, Yoreh Deah, Laws of Forbidden Fabric Blends 2:2:1",
83.0
],
[
Expand Down Expand Up @@ -391401,7 +391401,7 @@
],
[
"Proverbs 25:2",
"Likutei Halachot, Yoreh Deah, Laws of Fobidden Fabric Blends 3:5:1",
"Likutei Halachot, Yoreh Deah, Laws of Forbidden Fabric Blends 3:5:1",
66.27947368421053
],
[
Expand Down Expand Up @@ -563561,7 +563561,7 @@
],
[
"Psalms 99:6",
"Divrei Emet, Divrei Emet on Torah, A Collection on Sciptures 5",
"Divrei Emet, Divrei Emet on Torah, A Collection on Scriptures 5",
102.61833333333334
],
[
Expand Down
2 changes: 1 addition & 1 deletion test_topics.json
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,7 @@
0.7636483311653137
],
[
"Siddur Sefard, Additional Prayers , Six Rememberances 10",
"Siddur Sefard, Additional Prayers , Six Remembrances 10",
0.7589909434318542
],
[
Expand Down