Skip to content

Commit bdfc38b

Browse files
tinyboxvkNarsil
andauthored
Fix typos (#1715)
* Fix typos Signed-off-by: tinyboxvk <[email protected]> * Update docs/source/quicktour.rst * Update docs/source-doc-builder/quicktour.mdx --------- Signed-off-by: tinyboxvk <[email protected]> Co-authored-by: Nicolas Patry <[email protected]>
1 parent 6945933 commit bdfc38b

File tree

25 files changed

+50
-50
lines changed

25 files changed

+50
-50
lines changed

bindings/python/examples/custom_components.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class CustomNormalizer:
4949
def normalize(self, normalized: NormalizedString):
5050
# Most of these can be replaced by a `Sequence` combining some provided Normalizer,
5151
# (ie Sequence([ NFKC(), Replace(Regex("\s+"), " "), Lowercase() ])
52-
# and it should be the prefered way. That being said, here is an example of the kind
52+
# and it should be the preferred way. That being said, here is an example of the kind
5353
# of things that can be done here:
5454
normalized.nfkc()
5555
normalized.filter(lambda char: not char.isnumeric())

bindings/python/py_src/tokenizers/decoders/__init__.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class ByteFallback(Decoder):
5757
ByteFallback Decoder
5858
ByteFallback is a simple trick which converts tokens looking like `<0x61>`
5959
to pure bytes, and attempts to make them into a string. If the tokens
60-
cannot be decoded you will get � instead for each inconvertable byte token
60+
cannot be decoded you will get � instead for each inconvertible byte token
6161
6262
"""
6363
def __init__(self):

bindings/python/py_src/tokenizers/normalizers/__init__.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ class Nmt(Normalizer):
389389
class Precompiled(Normalizer):
390390
"""
391391
Precompiled normalizer
392-
Don't use manually it is used for compatiblity for SentencePiece.
392+
Don't use manually it is used for compatibility for SentencePiece.
393393
"""
394394
def __init__(self, precompiled_charsmap):
395395
pass

bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class BertPreTokenizer(PreTokenizer):
4848
BertPreTokenizer
4949
5050
This pre-tokenizer splits tokens on spaces, and also on punctuation.
51-
Each occurence of a punctuation character will be treated separately.
51+
Each occurrence of a punctuation character will be treated separately.
5252
"""
5353
def __init__(self):
5454
pass

bindings/python/py_src/tokenizers/tools/visualizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ def __make_anno_map(text: str, annotations: AnnotationList) -> PartialIntList:
325325
326326
Returns:
327327
A list of length len(text) whose entry at index i is None if there is no annotation on
328-
charachter i or k, the index of the annotation that covers index i where k is with
328+
character i or k, the index of the annotation that covers index i where k is with
329329
respect to the list of annotations
330330
"""
331331
annotation_map = [None] * len(text)

bindings/python/src/decoders.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ impl PyWordPieceDec {
263263
/// ByteFallback Decoder
264264
/// ByteFallback is a simple trick which converts tokens looking like `<0x61>`
265265
/// to pure bytes, and attempts to make them into a string. If the tokens
266-
/// cannot be decoded you will get � instead for each inconvertable byte token
266+
/// cannot be decoded you will get � instead for each inconvertible byte token
267267
///
268268
#[pyclass(extends=PyDecoder, module = "tokenizers.decoders", name = "ByteFallback")]
269269
pub struct PyByteFallbackDec {}

bindings/python/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use pyo3::wrap_pymodule;
2323
pub const VERSION: &str = env!("CARGO_PKG_VERSION");
2424

2525
// For users using multiprocessing in python, it is quite easy to fork the process running
26-
// tokenizers, ending up with a deadlock because we internaly make use of multithreading. So
26+
// tokenizers, ending up with a deadlock because we internally make use of multithreading. So
2727
// we register a callback to be called in the event of a fork so that we can warn the user.
2828
#[cfg(target_family = "unix")]
2929
static mut REGISTERED_FORK_CALLBACK: bool = false;

bindings/python/src/normalizers.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,7 @@ impl PyNmt {
534534
}
535535

536536
/// Precompiled normalizer
537-
/// Don't use manually it is used for compatiblity for SentencePiece.
537+
/// Don't use manually it is used for compatibility for SentencePiece.
538538
#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "Precompiled")]
539539
pub struct PyPrecompiled {}
540540
#[pymethods]

bindings/python/src/pre_tokenizers.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ impl PyCharDelimiterSplit {
430430
/// BertPreTokenizer
431431
///
432432
/// This pre-tokenizer splits tokens on spaces, and also on punctuation.
433-
/// Each occurence of a punctuation character will be treated separately.
433+
/// Each occurrence of a punctuation character will be treated separately.
434434
#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name = "BertPreTokenizer")]
435435
pub struct PyBertPreTokenizer {}
436436
#[pymethods]

bindings/python/stub.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def pyi_file(obj, indent=""):
100100
string += function(obj, indent)
101101

102102
elif inspect.isgetsetdescriptor(obj):
103-
# TODO it would be interesing to add the setter maybe ?
103+
# TODO it would be interesting to add the setter maybe ?
104104
string += f"{indent}@property\n"
105105
string += function(obj, indent, text_signature="(self)")
106106
else:

bindings/python/tests/bindings/test_trainers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ def test_can_modify(self):
287287
trainer.initial_alphabet = ["d", "z"]
288288
assert sorted(trainer.initial_alphabet) == ["d", "z"]
289289

290-
def test_continuing_prefix_trainer_mistmatch(self):
290+
def test_continuing_prefix_trainer_mismatch(self):
291291
UNK = "[UNK]"
292292
special_tokens = [UNK]
293293
tokenizer = Tokenizer(models.BPE(unk_token=UNK, continuing_subword_prefix="##"))

0 commit comments

Comments
 (0)