-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.py
41 lines (29 loc) · 1.08 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import tiktoken
def dict_to_cheat_sheet(data: dict, depth: int = 1) -> str:
"""Convert a nested dictionary to a markdown cheat sheet"""
markdown_str = ""
for key, value in data.items():
if depth == 1:
markdown_str += f"# {key}\n\n"
else:
markdown_str += f"{' ' * (depth - 2)}- {key}\n"
if isinstance(value, dict):
markdown_str += dict_to_cheat_sheet(value, depth + 1)
else:
markdown_str += f"{' ' * (depth - 1)}- {value}\n"
return markdown_str
def print_markdown(md_str: str):
for line in md_str.split("\n"):
if line.startswith("#"):
print("-" * 50)
print(line)
def count_tokens(text: str, model_name: str) -> int:
encoding = tiktoken.encoding_for_model(model_name)
token_count = len(encoding.encode(text))
return token_count
def strip_text_fragments(text: str, fragments: set[str]) -> str:
for fragment in fragments:
prefix = f"{fragment}:"
if text.startswith(prefix):
return text[len(prefix) :].lstrip()
return text