diff --git a/Doc2Vec.ipynb b/Doc2Vec.ipynb index 4b0160b..7a3348b 100644 --- a/Doc2Vec.ipynb +++ b/Doc2Vec.ipynb @@ -222,7 +222,7 @@ "\n", "def remove_punctuation(data):\n", " \"\"\"\n", - " Removes various punctation from Hebrew text.\n", + " Removes various punctuation from Hebrew text.\n", " :param data: String of Hebrew Text\n", " :return: String without punctuation\n", " \"\"\"\n", @@ -281,7 +281,7 @@ "\n", "def get_segments(filename):\n", " \"\"\"\n", - " Combs through the entire Sefaris Hebrew Library and cleans the text for Doc2Vec.\n", + " Combs through the entire Sefaria Hebrew Library and cleans the text for Doc2Vec.\n", " Creates a dict:\n", " Key: Ref\n", " Value: The text of that ref cleaned and ready for Doc2Vec\n", diff --git a/Hebrew Spellcheck.ipynb b/Hebrew Spellcheck.ipynb index 49e068c..c89e2ca 100644 --- a/Hebrew Spellcheck.ipynb +++ b/Hebrew Spellcheck.ipynb @@ -68,7 +68,7 @@ "\n", "def determine_beginning_stopword(beginning_stopword):\n", " \"\"\"\n", - " Appends the approproate beginning stopword. There are three possibilites, \n", + " Appends the appropriate beginning stopword. There are three possibilities, \n", " therefore the correct option needs to be determine\n", " :param beginning_stopword: The modified word\n", " :return: Appropriate beginning stopword\n", diff --git a/RAKE Hebrew.ipynb b/RAKE Hebrew.ipynb index a5c3922..06c4424 100644 --- a/RAKE Hebrew.ipynb +++ b/RAKE Hebrew.ipynb @@ -1078,7 +1078,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# This just gets a list of every word and the number of occurences within the entire corpus. Used to help build a stopword list" + "# This just gets a list of every word and the number of occurrences within the entire corpus. Used to help build a stopword list" ] }, { diff --git a/create_docs_for_doc2vec.py b/create_docs_for_doc2vec.py index 93d9fb8..7ab9497 100644 --- a/create_docs_for_doc2vec.py +++ b/create_docs_for_doc2vec.py @@ -168,7 +168,7 @@ def remove_dicta_prefix(string, marker): def remove_punctuation(data): """ - Removes various punctation from Hebrew text. + Removes various punctuation from Hebrew text. :param data: String of Hebrew Text :return: String without punctuation """ @@ -236,9 +236,9 @@ def extract_reference(data): return data.split(u'~')[0] -def concatenate_sematically_linked_segments(topic_ranged_refs, ranged_to_segment): +def concatenate_semantically_linked_segments(topic_ranged_refs, ranged_to_segment): """ - Combines multiple Sefaria Segments into one larger segment based on sematical meaning + Combines multiple Sefaria Segments into one larger segment based on semantical meaning :param topic_ranged_refs: List of ranged trefs that define the semantic separation :param ranged_to_segment: Nested dict. First layer points from Ranged Refs to all sub-seg-refs. The nested dict points from the sub_seg_ref to the text of said sub_seg_ref :return: Dict containing semantically define ranged refs corresponding to their concatenated text @@ -294,8 +294,8 @@ def get_segments(filename): else: all_data[ref] = data - all_data.update(concatenate_sematically_linked_segments(tanakh_topic_ranged_refs, tanakh_ranged_to_segment)) - all_data.update(concatenate_sematically_linked_segments(talmud_topic_ranged_refs, talmud_ranged_to_segment)) + all_data.update(concatenate_semantically_linked_segments(tanakh_topic_ranged_refs, tanakh_ranged_to_segment)) + all_data.update(concatenate_semantically_linked_segments(talmud_topic_ranged_refs, talmud_ranged_to_segment)) return all_data diff --git a/hebrew_spellcheck.py b/hebrew_spellcheck.py index 42c709c..a50cdcb 100644 --- a/hebrew_spellcheck.py +++ b/hebrew_spellcheck.py @@ -47,7 +47,7 @@ def we_havent_added_prefix_or_suffix(word): def determine_beginning_stopword(beginning_stopword): """ - Appends the approproate beginning stopword. There are three possibilites, + Appends the appropriate beginning stopword. There are three possibilities, therefore the correct option needs to be determine :param beginning_stopword: The modified word :return: Appropriate beginning stopword diff --git a/high_conf_links.json b/high_conf_links.json index ac3fb22..5fc6493 100644 --- a/high_conf_links.json +++ b/high_conf_links.json @@ -55691,7 +55691,7 @@ ], [ "Daniel 7:9", - "Likutei Halachot, Yoreh Deah, Laws of Fobidden Fabric Blends 3:2:1", + "Likutei Halachot, Yoreh Deah, Laws of Forbidden Fabric Blends 3:2:1", 146.76882352941178 ], [ @@ -279761,7 +279761,7 @@ ], [ "Chagigah 13a:9", - "Likutei Halachot, Yoreh Deah, Laws of Fobidden Fabric Blends 4:3:1", + "Likutei Halachot, Yoreh Deah, Laws of Forbidden Fabric Blends 4:3:1", 163.4555 ], [ @@ -389716,7 +389716,7 @@ ], [ "Berakhot 6a:5", - "Likutei Halachot, Yoreh Deah, Laws of Fobidden Fabric Blends 2:2:1", + "Likutei Halachot, Yoreh Deah, Laws of Forbidden Fabric Blends 2:2:1", 83.0 ], [ @@ -391401,7 +391401,7 @@ ], [ "Proverbs 25:2", - "Likutei Halachot, Yoreh Deah, Laws of Fobidden Fabric Blends 3:5:1", + "Likutei Halachot, Yoreh Deah, Laws of Forbidden Fabric Blends 3:5:1", 66.27947368421053 ], [ @@ -563561,7 +563561,7 @@ ], [ "Psalms 99:6", - "Divrei Emet, Divrei Emet on Torah, A Collection on Sciptures 5", + "Divrei Emet, Divrei Emet on Torah, A Collection on Scriptures 5", 102.61833333333334 ], [ diff --git a/test_topics.json b/test_topics.json index f0657a3..d17f765 100644 --- a/test_topics.json +++ b/test_topics.json @@ -695,7 +695,7 @@ 0.7636483311653137 ], [ - "Siddur Sefard, Additional Prayers , Six Rememberances 10", + "Siddur Sefard, Additional Prayers , Six Remembrances 10", 0.7589909434318542 ], [