1
1
"""Module containing DataSource sub class responsible for JSON ingestion."""
2
2
3
3
import os
4
- from typing import Iterator
4
+ from typing import Generator , Iterator , Any
5
5
import json
6
6
from logging import getLogger
7
+ from io import TextIOWrapper
7
8
8
9
from tqdm import tqdm
9
10
from pydantic import ValidationError
13
14
from .json_jq_converter import (
14
15
generate_records_from_compiled_jq ,
15
16
compile_jq_query ,
16
- get_jq_query_from_config
17
+ get_jq_query_from_config ,
17
18
)
18
19
from .json_config import JSONDataSourceConfig
19
20
20
-
21
21
LOGGER = getLogger (__name__ )
22
22
23
23
@@ -40,9 +40,7 @@ def __init__(self, config: JSONDataSourceConfig) -> None:
40
40
)
41
41
self .file_list = self .get_file_list ()
42
42
self .jq_query = get_jq_query_from_config (self .config )
43
- self .compiled_jq = compile_jq_query (
44
- self .jq_query
45
- )
43
+ self .compiled_jq = compile_jq_query (self .jq_query )
46
44
self .file_pbar = tqdm (
47
45
total = len (self .file_list ),
48
46
desc = "Ingesting JSON files" ,
@@ -117,10 +115,9 @@ def parse_json_stream(self, filepath: str) -> Iterator[OTelEvent]:
117
115
:rtype: `Iterator`[:class:`OTelEvent`]
118
116
"""
119
117
with open (filepath , "r" , encoding = "utf-8" ) as file :
120
- if self .config .json_per_line :
121
- jsons = (json .loads (line , strict = False ) for line in file )
122
- else :
123
- jsons = (data for data in [json .load (file , strict = False )])
118
+ jsons = get_jsons_from_file (
119
+ file , filepath , self .config .json_per_line
120
+ )
124
121
for data in jsons :
125
122
for record in generate_records_from_compiled_jq (
126
123
data , self .compiled_jq
@@ -160,3 +157,38 @@ def __next__(self) -> OTelEvent:
160
157
self .events_pbar .close ()
161
158
self .event_error_pbar .close ()
162
159
raise StopIteration
160
+
161
+
162
+ def get_jsons_from_file (
163
+ file_io : TextIOWrapper , filepath : str , json_per_line : bool = False
164
+ ) -> Generator [Any , Any , None ]:
165
+ """Generator function to yield JSON data from a file.
166
+
167
+ :param file_io: The file object to read from
168
+ :type file_io: :class:`TextIOWrapper`
169
+ :param filepath: The path to the file
170
+ :type filepath: `str`
171
+ :param json_per_line: Whether the JSON data is formatted with one JSON
172
+ object per line. Defaults to `False`.
173
+ :type json_per_line: `bool`
174
+ :return: A generator yielding JSON objects
175
+ :rtype: `Generator`[:class:`Any`, `Any`, `None`]
176
+ """
177
+ counter = 0
178
+ if json_per_line :
179
+ try :
180
+ for line in file_io :
181
+ yield json .loads (line , strict = False )
182
+ counter += 1
183
+ return
184
+ except json .JSONDecodeError :
185
+ pass
186
+ try :
187
+ yield json .load (file_io , strict = False )
188
+ except json .JSONDecodeError :
189
+ LOGGER .error (
190
+ f"Error decoding JSON data in file: { filepath } \n "
191
+ "However, if the current line counter for the file is"
192
+ "greater than 0 then that number of lines was able to"
193
+ f" be decoded: { counter } lines."
194
+ )
0 commit comments