From a1f6a2dd72a9c69214092f675eb4fd570f85fa0e Mon Sep 17 00:00:00 2001 From: gaainf Date: Wed, 13 Mar 2019 13:09:59 +0300 Subject: [PATCH] improve timestamp parsing --- README.rst | 12 ++++--- pcaper/_version.py | 2 +- pcaper/har_gen.py | 29 ++-------------- pcaper/pcaper.py | 7 ++-- setup.py | 3 +- tests/test_pcaper.py | 82 ++++++++++++++++++++++++++++++++++++++++++-- 6 files changed, 96 insertions(+), 39 deletions(-) diff --git a/README.rst b/README.rst index 85927d5..8d79327 100644 --- a/README.rst +++ b/README.rst @@ -21,10 +21,12 @@ pcaper :target: https://github.com/gaainf/pcaper/blob/master/LICENSE The package helps to assemble and iterate HTTP requests. -Pcaper provides class to read traffic files in pcap or har formats, executable converters - `pcap2txt` and `har2txt`. +Pcaper provides class to read traffic files in pcap or har formats, +executable converters - `pcap2txt` and `har2txt`. `PcapParser` based on `dpkt `_. `HarParser` uses built-in json package. -`pcaper` extends dpkt.http.Request class. Following fields of HTTP request are available: +`pcaper` extends dpkt.http.Request class. +Following fields of HTTP request are available: - `timestamp` - timestamp of the last packet of original HTTP request - `src` - source IP address @@ -242,9 +244,9 @@ Use excluding filters also har2txt -F '"rambler.ru" not in http.uri' file.har -Filter packets with destination IP. -`pcaper` extracts data from har file, -which contains destination IP (`dst` filed), but doesn't contain source IP, source and destination ports. +Filter packets with destination IP. +`pcaper` extracts data from har file, which contains destination IP +(`dst` filed), but doesn't contain source IP, source and destination ports. .. code:: bash diff --git a/pcaper/_version.py b/pcaper/_version.py index 479a755..45564f2 100644 --- a/pcaper/_version.py +++ b/pcaper/_version.py @@ -7,4 +7,4 @@ # See LICENSE file in the project root for full license information. # -__version__ = '1.0.6' +__version__ = '1.0.8' diff --git a/pcaper/har_gen.py b/pcaper/har_gen.py index fa55a84..25a7607 100644 --- a/pcaper/har_gen.py +++ b/pcaper/har_gen.py @@ -9,21 +9,6 @@ # Additional methods for har file generating import pcaper -from datetime import datetime - - -def format_microseconds(date, accuracy=3): - """Format datetime in microseconds - Args: - date (datetime): date - accuracy (int): round accuaracy - - Returns: - datetime: formated string - """ - - return datetime.strftime(date, '%Y-%m-%dT%H:%M:%S.') + \ - datetime.strftime(date, '%f')[:accuracy] + 'Z' def get_har_file_struct(): @@ -42,12 +27,7 @@ def get_har_file_struct(): }, "pages": [ { - "startedDateTime": format_microseconds( - datetime.strptime( - '2019-03-08T21:10:26.123Z', - '%Y-%m-%dT%H:%M:%S.%fZ' - ) - ), + "startedDateTime": "2019-03-08T21:10:26.123Z", "id": "page_1", "title": "http://example.com/", "pageTimings": { @@ -78,12 +58,7 @@ def generate_http_request_har_object(http_request): request_dict = parser.parse_request(http_request) har_structure["log"]["entries"].append({ - "startedDateTime": format_microseconds( - datetime.strptime( - '2019-03-08T21:10:26.123Z', - '%Y-%m-%dT%H:%M:%S.%fZ' - ) - ), + "startedDateTime": '2019-03-08T21:10:26.123Z', "time": 0.0, "request": { "method": request_dict["method"], diff --git a/pcaper/pcaper.py b/pcaper/pcaper.py index fcd0656..822045e 100644 --- a/pcaper/pcaper.py +++ b/pcaper/pcaper.py @@ -19,6 +19,7 @@ from .HTTPRequest import HTTPRequest import json from datetime import datetime +from dateutil import parser as date_parser class HTTPParser: @@ -521,12 +522,14 @@ def read_har(self, params): http_request['headers'][pair['name'].lower()] = \ pair['value'] if 'postData' in entry['request']: + if 'text' not in entry['request']['postData']: + continue http_request['body'] = \ entry['request']['postData']['text'] if 'startedDateTime' in entry: - timestamp = datetime.strptime( + timestamp = date_parser.parse( entry['startedDateTime'], - '%Y-%m-%dT%H:%M:%S.%fZ' + ignoretz=True ) http_request['timestamp'] = \ (timestamp - datetime(1970, 1, 1)).total_seconds() diff --git a/setup.py b/setup.py index c1eb152..6d9f386 100755 --- a/setup.py +++ b/setup.py @@ -20,7 +20,8 @@ 'install_requires': [ 'dpkt>=1.9.1', 'flake8>=3.5.0', - 'six>=1.11.0' + 'six>=1.11.0', + 'python-dateutil>=2.8.0' ], 'setup_requires': 'pytest-runner', 'tests_require': [ diff --git a/tests/test_pcaper.py b/tests/test_pcaper.py index 2840636..28d0be1 100644 --- a/tests/test_pcaper.py +++ b/tests/test_pcaper.py @@ -986,6 +986,8 @@ def test_read_pcap_parse_pcapng_format(self, capsys, remove_data_file): # HarParser + # Fixtures + def set_har_file(self, filename, data): """Prepare har file""" @@ -1135,7 +1137,7 @@ def test_read_har_parse_absent_url(self, prepare_har_file): @pytest.mark.negative def test_read_har_parse_absent_method(self, prepare_har_file): - """Check read_har method handles incorrect json object correctly""" + """Check read_har method handles absent method as expected""" http_request = "GET https://rambler.ru/\r\n" + \ "Host: rambler.ru\r\n" + \ @@ -1155,7 +1157,7 @@ def test_read_har_parse_absent_method(self, prepare_har_file): @pytest.mark.negative def test_read_har_parse_empty_json(self, prepare_har_file, capsys): - """Check read_har method handles empty json correctly""" + """Check read_har method handles empty json as expected""" filename = prepare_har_file({}) reader = pcaper.HarParser() @@ -1175,7 +1177,7 @@ def test_read_har_parse_empty_json(self, prepare_har_file, capsys): @pytest.mark.positive def test_read_har_parse_incorrect_json(self, prepare_har_file): - """Check read_har method handles incorrect json correctly""" + """Check read_har method handles incorrect json as expected""" filename = prepare_har_file({"log": {"entries": ['REQ']}}) reader = pcaper.HarParser() @@ -1228,6 +1230,80 @@ def test_read_har_empty_http_filter(self, prepare_har_file): packets = packets + 1 assert packets == 1, "unexpected packets count" + @pytest.mark.negative + def test_read_har_parse_unexpected_post_data(self, prepare_har_file): + """Check read_har method handles unexpected postData + format correctly""" + + http_request = "POST https://rambler.ru/\r\n" + \ + "Host: rambler.ru\r\n" + \ + "Content-Length: 7\r\n\r\n" + \ + "param=0" + data = har_gen.generate_http_request_har_object(http_request) + del data['log']['entries'][-1]['request']['postData']['text'] + filename = prepare_har_file(data) + reader = pcaper.HarParser() + + packets = 0 + for request in reader.read_har({ + 'input': filename + } + ): + packets = packets + 1 + assert packets == 0, "unexpected packets count" + + @pytest.mark.positive + def test_read_har_parse_another_timestamp_format( + self, + prepare_har_file + ): + """Check read_har method handles another timestamp + format correctly""" + + http_request = "POST https://rambler.ru/\r\n" + \ + "Host: rambler.ru\r\n" + \ + "Content-Length: 7\r\n\r\n" + \ + "param=0" + data = har_gen.generate_http_request_har_object(http_request) + data['log']['entries'][-1]['startedDateTime'] = \ + '2018-11-15T19:14:11.930+03:00' + filename = prepare_har_file(data) + reader = pcaper.HarParser() + + packets = 0 + for request in reader.read_har({ + 'input': filename + } + ): + packets = packets + 1 + assert packets == 1, "unexpected packets count" + + @pytest.mark.negative + def test_read_har_parse_unexpected_timestamp_format( + self, + prepare_har_file + ): + """Check read_har method handles unexpected timestamp + format correctly""" + + http_request = "POST https://rambler.ru/\r\n" + \ + "Host: rambler.ru\r\n" + \ + "Content-Length: 7\r\n\r\n" + \ + "param=0" + data = har_gen.generate_http_request_har_object(http_request) + data['log']['entries'][-1]['startedDateTime'] = \ + '2018-11-15T19:14' + filename = prepare_har_file(data) + reader = pcaper.HarParser() + + packets = 0 + for request in reader.read_har({ + 'input': filename + } + ): + packets = packets + 1 + assert packets == 1, "unexpected packets count" + # HTTPParser @pytest.mark.negative