-
-
Notifications
You must be signed in to change notification settings - Fork 238
/
Copy pathtest_summarize.py
146 lines (133 loc) · 6.95 KB
/
test_summarize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
from copy import deepcopy
from deepdiff.summarize import summarize, _truncate
class TestSummarize:
def test_empty_dict(self):
summary = summarize({}, max_length=50)
assert summary == "{}", "Empty dict should be summarized as {}"
def test_empty_list(self):
summary = summarize([], max_length=50)
assert summary == "[]", "Empty list should be summarized as []"
def test_primitive_int_truncation(self):
summary = summarize(1234567890123, max_length=10)
# The summary should be the string representation, truncated to max_length
assert isinstance(summary, str)
assert len(summary) <= 10
def test_primitive_string_no_truncation(self):
summary = summarize("short", max_length=50)
assert '"short"' == summary, "Short strings should not be truncated, but we are adding double quotes to it."
def test_small_dict_summary(self):
data = {"a": "alpha", "b": "beta"}
summary = summarize(data, max_length=50)
# Should be JSON-like, start with { and end with } and not exceed the max length.
assert summary.startswith("{") and summary.endswith("}")
assert len(summary) <= 50
def test_long_value_truncation_in_dict(self):
data = {
"key1": "a" * 100,
"key2": "b" * 50,
"key3": "c" * 150
}
summary = summarize(data, max_length=100)
# The summary should be under 100 characters and include ellipsis to indicate truncation.
assert len(summary) == 113, "Yes we are going slightly above"
assert "..." in summary
def test_nested_structure_summary1(self):
data = {
"RecordType": "CID",
"RecordNumber": 2719,
"RecordTitle": "Chloroquine",
"Section": [
{
"TOCHeading": "Structures",
"Description": "Structure depictions and information for 2D, 3D, and crystal related",
"Section": [
{
"TOCHeading": "2D Structure",
"Description": "A two-dimensional representation of the compound",
"DisplayControls": {"MoveToTop": True},
"Information": [
{
"ReferenceNumber": 69,
"Value": {"Boolean": [True]}
}
]
},
{
"TOCHeading": "3D Conformer",
"Description": ("A three-dimensional representation of the compound. "
"The 3D structure is not experimentally determined, but computed by PubChem. "
"More detailed information on this conformer model is described in the PubChem3D thematic series published in the Journal of Cheminformatics."),
"DisplayControls": {"MoveToTop": True},
"Information": [
{
"ReferenceNumber": 69,
"Description": "Chloroquine",
"Value": {"Number": [2719]}
}
]
}
]
},
{
"TOCHeading": "Chemical Safety",
"Description": "Launch the Laboratory Chemical Safety Summary datasheet, and link to the safety and hazard section",
"DisplayControls": {"HideThisSection": True, "MoveToTop": True},
"Information": [
{
"ReferenceNumber": 69,
"Name": "Chemical Safety",
"Value": {
"StringWithMarkup": [
{
"String": " ",
"Markup": [
{
"Start": 0,
"Length": 1,
"URL": "https://pubchem.ncbi.nlm.nih.gov/images/ghs/GHS07.svg",
"Type": "Icon",
"Extra": "Irritant"
}
]
}
]
}
}
]
}
]
}
data_copy = deepcopy(data)
summary = summarize(data_copy, max_length=200)
assert len(summary) == 240, "Yes slightly above"
# Check that some expected keys are in the summary
assert '"RecordType"' in summary
assert '"RecordNumber"' in summary
assert '"RecordTitle"' in summary
expected = '{"Section":[{"Section":[{"Description":""},{"Description":""}],"Description":"Structure depictions a...ed"},{"Information":[{"Name":"C"}],"Description":"Launch the ...on"}],"RecordTitle":"Chloroquine","RecordNumber":2719,"RecordType":"CID"}'
assert expected == summary
assert data_copy == data, "We should not have modified the original data"
def test_nested_structure_summary2(self, compounds):
summary = summarize(compounds, max_length=200)
assert len(summary) == 319, "Ok yeah max_length is more like a guide"
data_copy = deepcopy(compounds)
expected = '{"Section":[{"Section":[{"Description":""},{"Description":""}],"Description":"Toxicity information r...y."},{"Section":[{"Section":["..."]},{"Section":["..."]}],"Description":"Spectral ...ds"},"..."],"Reference":[{"LicenseNote":"Use of th...e.","Description":"T...s."},{"LicenseNote":"U...e.","Description":"T"},"..."]}'
assert expected == summary
assert data_copy == compounds, "We should not have modified the original data"
def test_list_summary(self):
data = [1, 2, 3, 4]
summary = summarize(data, max_length=50)
# The summary should start with '[' and end with ']'
assert summary.startswith("[") and summary.endswith("]")
# When more than one element exists, expect a trailing ellipsis or indication of more elements
assert "..." not in summary
data2 = list(range(1, 200))
summary2 = summarize(data2, max_length=14)
assert "..." in summary2
expected = '[100,101,102,103,10,"..."]'
assert expected == summary2
def test_direct_truncate_function(self):
s = "abcdefghijklmnopqrstuvwxyz"
truncated = _truncate(s, 20)
assert len(truncated) == 20
assert "..." in truncated