-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpyproject.toml
85 lines (72 loc) · 2.14 KB
/
pyproject.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
[tool.poetry]
name = "kl3m-tokenizers"
version = "0.1.0"
description = "Tokenizers for the KL3M project from the ALEA Institute"
authors = ["ALEA Institute <[email protected]>"]
license = "MIT"
readme = "README.md"
homepage = "https://aleainstitute.ai/"
repository = "https://github.com/alea-institute/kl3m-tokenizers"
keywords = ["alea"]
# dist build configuration
exclude = ["tests", "docs", "examples", "docker"]
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Science/Research",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Scientific/Engineering :: Information Analysis",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Utilities"
]
[tool.poetry.dependencies]
python = ">=3.10,<4.0.0"
tokenizers = "^0.20.1"
transformers = "^4.45.2"
tiktoken = "^0.8.0"
orjsonl = "^1.0.0"
httpx = "^0.27.2"
alea-preprocess = "^0.1.12"
[tool.poetry.group.dev.dependencies]
types-lxml = "^2024.8.7"
pytest = "^8.3.2"
pytest-asyncio = "^0.23.8"
pytest-benchmark = "^4.0.0"
pytest-cov = "^5.0.0"
pylint = "^3.2.7"
# extras
[tool.poetry.extras]
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.pylint]
max-line-length = 120
disable = [
"line-too-long",
"too-few-public-methods",
"no-self-argument",
"cyclic-import"
]
extension-pkg-allow-list = ["lxml"]
ignore-patterns = ["test_.*?.py", "conftest.py"]
[tool.pylint.messages_control]
max-args = 10
max-attributes = 15
max-branches = 15
max-locals = 25
min-public-methods = 0
[tool.isort]
profile = "black"
line_length = 120
[tool.pytest.ini_options]
addopts = "--cov=kl3m_tokenizers --cov-report=term-missing --cov-report=xml --cov-report=html"
[tool.mypy]
ignore_missing_imports = true