huggingface
diff --git a/‎bindings/python/examples/custom_components.py
Lines changed: 1 addition & 1 deletion b/‎bindings/python/examples/custom_components.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎bindings/python/py_src/tokenizers/decoders/__init__.pyi
Lines changed: 1 addition & 1 deletion b/‎bindings/python/py_src/tokenizers/decoders/__init__.pyi
Lines changed: 1 addition & 1 deletion
diff --git a/‎bindings/python/py_src/tokenizers/normalizers/__init__.pyi
Lines changed: 1 addition & 1 deletion b/‎bindings/python/py_src/tokenizers/normalizers/__init__.pyi
Lines changed: 1 addition & 1 deletion
diff --git a/‎bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi
Lines changed: 1 addition & 1 deletion b/‎bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi
Lines changed: 1 addition & 1 deletion
diff --git a/‎bindings/python/py_src/tokenizers/tools/visualizer.py
Lines changed: 1 addition & 1 deletion b/‎bindings/python/py_src/tokenizers/tools/visualizer.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎bindings/python/src/decoders.rs
Lines changed: 1 addition & 1 deletion b/‎bindings/python/src/decoders.rs
Lines changed: 1 addition & 1 deletion
diff --git a/‎bindings/python/src/lib.rs
Lines changed: 1 addition & 1 deletion b/‎bindings/python/src/lib.rs
Lines changed: 1 addition & 1 deletion
diff --git a/‎bindings/python/src/normalizers.rs
Lines changed: 1 addition & 1 deletion b/‎bindings/python/src/normalizers.rs
Lines changed: 1 addition & 1 deletion
diff --git a/‎bindings/python/src/pre_tokenizers.rs
Lines changed: 1 addition & 1 deletion b/‎bindings/python/src/pre_tokenizers.rs
Lines changed: 1 addition & 1 deletion
diff --git a/‎bindings/python/stub.py
Lines changed: 1 addition & 1 deletion b/‎bindings/python/stub.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎bindings/python/tests/bindings/test_trainers.py
Lines changed: 1 addition & 1 deletion b/‎bindings/python/tests/bindings/test_trainers.py
Lines changed: 1 addition & 1 deletion
@@ -49,7 +49,7 @@ class CustomNormalizer:
     def normalize(self, normalized: NormalizedString):
         # Most of these can be replaced by a `Sequence` combining some provided Normalizer,
         # (ie Sequence([ NFKC(), Replace(Regex("\s+"), " "), Lowercase() ])
-        # and it should be the prefered way. That being said, here is an example of the kind
+        # and it should be the preferred way. That being said, here is an example of the kind
         # of things that can be done here:
         normalized.nfkc()
         normalized.filter(lambda char: not char.isnumeric())
 
@@ -57,7 +57,7 @@ class ByteFallback(Decoder):
     ByteFallback Decoder
     ByteFallback is a simple trick which converts tokens looking like `<0x61>`
     to pure bytes, and attempts to make them into a string. If the tokens
-    cannot be decoded you will get � instead for each inconvertable byte token
+    cannot be decoded you will get � instead for each inconvertible byte token
 
     """
     def __init__(self):
 
@@ -389,7 +389,7 @@ class Nmt(Normalizer):
 class Precompiled(Normalizer):
     """
     Precompiled normalizer
-    Don't use manually it is used for compatiblity for SentencePiece.
+    Don't use manually it is used for compatibility for SentencePiece.
     """
     def __init__(self, precompiled_charsmap):
         pass
 
@@ -48,7 +48,7 @@ class BertPreTokenizer(PreTokenizer):
     BertPreTokenizer
 
     This pre-tokenizer splits tokens on spaces, and also on punctuation.
-    Each occurence of a punctuation character will be treated separately.
+    Each occurrence of a punctuation character will be treated separately.
     """
     def __init__(self):
         pass
 
@@ -325,7 +325,7 @@ def __make_anno_map(text: str, annotations: AnnotationList) -> PartialIntList:
 
         Returns:
             A list of  length len(text) whose entry at index i is None if there is no annotation on
-            charachter i or k, the index of the annotation that covers index i where k is with
+            character i or k, the index of the annotation that covers index i where k is with
             respect to the list of annotations
         """
         annotation_map = [None] * len(text)
 
@@ -263,7 +263,7 @@ impl PyWordPieceDec {
 /// ByteFallback Decoder
 /// ByteFallback is a simple trick which converts tokens looking like `<0x61>`
 /// to pure bytes, and attempts to make them into a string. If the tokens
-/// cannot be decoded you will get � instead for each inconvertable byte token
+/// cannot be decoded you will get � instead for each inconvertible byte token
 ///
 #[pyclass(extends=PyDecoder, module = "tokenizers.decoders", name = "ByteFallback")]
 pub struct PyByteFallbackDec {}
 
@@ -23,7 +23,7 @@ use pyo3::wrap_pymodule;
 pub const VERSION: &str = env!("CARGO_PKG_VERSION");
 
 // For users using multiprocessing in python, it is quite easy to fork the process running
-// tokenizers, ending up with a deadlock because we internaly make use of multithreading. So
+// tokenizers, ending up with a deadlock because we internally make use of multithreading. So
 // we register a callback to be called in the event of a fork so that we can warn the user.
 #[cfg(target_family = "unix")]
 static mut REGISTERED_FORK_CALLBACK: bool = false;
 
@@ -534,7 +534,7 @@ impl PyNmt {
 }
 
 /// Precompiled normalizer
-/// Don't use manually it is used for compatiblity for SentencePiece.
+/// Don't use manually it is used for compatibility for SentencePiece.
 #[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "Precompiled")]
 pub struct PyPrecompiled {}
 #[pymethods]
 
@@ -430,7 +430,7 @@ impl PyCharDelimiterSplit {
 /// BertPreTokenizer
 ///
 /// This pre-tokenizer splits tokens on spaces, and also on punctuation.
-/// Each occurence of a punctuation character will be treated separately.
+/// Each occurrence of a punctuation character will be treated separately.
 #[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name = "BertPreTokenizer")]
 pub struct PyBertPreTokenizer {}
 #[pymethods]
 
@@ -100,7 +100,7 @@ def pyi_file(obj, indent=""):
         string += function(obj, indent)
 
     elif inspect.isgetsetdescriptor(obj):
-        # TODO it would be interesing to add the setter maybe ?
+        # TODO it would be interesting to add the setter maybe ?
         string += f"{indent}@property\n"
         string += function(obj, indent, text_signature="(self)")
     else:
 
@@ -287,7 +287,7 @@ def test_can_modify(self):
         trainer.initial_alphabet = ["d", "z"]
         assert sorted(trainer.initial_alphabet) == ["d", "z"]
 
-    def test_continuing_prefix_trainer_mistmatch(self):
+    def test_continuing_prefix_trainer_mismatch(self):
         UNK = "[UNK]"
         special_tokens = [UNK]
         tokenizer = Tokenizer(models.BPE(unk_token=UNK, continuing_subword_prefix="##"))
Original file line number	Diff line number	Diff line change
`@@ -534,7 +534,7 @@ impl PyNmt {`
`534`	`534`	`}`
`535`	`535`
`536`	`536`	`/// Precompiled normalizer`
`537`		`-/// Don't use manually it is used for compatiblity for SentencePiece.`
	`537`	`+/// Don't use manually it is used for compatibility for SentencePiece.`
`538`	`538`	`#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "Precompiled")]`
`539`	`539`	`pub struct PyPrecompiled {}`
`540`	`540`	`#[pymethods]`