Skip to content

Commit

Permalink
improve timestamp parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
gaainf committed Mar 13, 2019
1 parent 31a1866 commit a1f6a2d
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 39 deletions.
12 changes: 7 additions & 5 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@ pcaper
:target: https://github.com/gaainf/pcaper/blob/master/LICENSE

The package helps to assemble and iterate HTTP requests.
Pcaper provides class to read traffic files in pcap or har formats, executable converters - `pcap2txt` and `har2txt`.
Pcaper provides class to read traffic files in pcap or har formats,
executable converters - `pcap2txt` and `har2txt`.
`PcapParser` based on `dpkt <https://github.com/kbandla/dpkt/>`_. `HarParser` uses built-in json package.

`pcaper` extends dpkt.http.Request class. Following fields of HTTP request are available:
`pcaper` extends dpkt.http.Request class.
Following fields of HTTP request are available:

- `timestamp` - timestamp of the last packet of original HTTP request
- `src` - source IP address
Expand Down Expand Up @@ -242,9 +244,9 @@ Use excluding filters also
har2txt -F '"rambler.ru" not in http.uri' file.har
Filter packets with destination IP.
`pcaper` extracts data from har file,
which contains destination IP (`dst` filed), but doesn't contain source IP, source and destination ports.
Filter packets with destination IP.
`pcaper` extracts data from har file, which contains destination IP
(`dst` filed), but doesn't contain source IP, source and destination ports.

.. code:: bash
Expand Down
2 changes: 1 addition & 1 deletion pcaper/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
# See LICENSE file in the project root for full license information.
#

__version__ = '1.0.6'
__version__ = '1.0.8'
29 changes: 2 additions & 27 deletions pcaper/har_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,6 @@
# Additional methods for har file generating

import pcaper
from datetime import datetime


def format_microseconds(date, accuracy=3):
"""Format datetime in microseconds
Args:
date (datetime): date
accuracy (int): round accuaracy
Returns:
datetime: formated string
"""

return datetime.strftime(date, '%Y-%m-%dT%H:%M:%S.') + \
datetime.strftime(date, '%f')[:accuracy] + 'Z'


def get_har_file_struct():
Expand All @@ -42,12 +27,7 @@ def get_har_file_struct():
},
"pages": [
{
"startedDateTime": format_microseconds(
datetime.strptime(
'2019-03-08T21:10:26.123Z',
'%Y-%m-%dT%H:%M:%S.%fZ'
)
),
"startedDateTime": "2019-03-08T21:10:26.123Z",
"id": "page_1",
"title": "http://example.com/",
"pageTimings": {
Expand Down Expand Up @@ -78,12 +58,7 @@ def generate_http_request_har_object(http_request):
request_dict = parser.parse_request(http_request)

har_structure["log"]["entries"].append({
"startedDateTime": format_microseconds(
datetime.strptime(
'2019-03-08T21:10:26.123Z',
'%Y-%m-%dT%H:%M:%S.%fZ'
)
),
"startedDateTime": '2019-03-08T21:10:26.123Z',
"time": 0.0,
"request": {
"method": request_dict["method"],
Expand Down
7 changes: 5 additions & 2 deletions pcaper/pcaper.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from .HTTPRequest import HTTPRequest
import json
from datetime import datetime
from dateutil import parser as date_parser


class HTTPParser:
Expand Down Expand Up @@ -521,12 +522,14 @@ def read_har(self, params):
http_request['headers'][pair['name'].lower()] = \
pair['value']
if 'postData' in entry['request']:
if 'text' not in entry['request']['postData']:
continue
http_request['body'] = \
entry['request']['postData']['text']
if 'startedDateTime' in entry:
timestamp = datetime.strptime(
timestamp = date_parser.parse(
entry['startedDateTime'],
'%Y-%m-%dT%H:%M:%S.%fZ'
ignoretz=True
)
http_request['timestamp'] = \
(timestamp - datetime(1970, 1, 1)).total_seconds()
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
'install_requires': [
'dpkt>=1.9.1',
'flake8>=3.5.0',
'six>=1.11.0'
'six>=1.11.0',
'python-dateutil>=2.8.0'
],
'setup_requires': 'pytest-runner',
'tests_require': [
Expand Down
82 changes: 79 additions & 3 deletions tests/test_pcaper.py
Original file line number Diff line number Diff line change
Expand Up @@ -986,6 +986,8 @@ def test_read_pcap_parse_pcapng_format(self, capsys, remove_data_file):

# HarParser

# Fixtures

def set_har_file(self, filename, data):
"""Prepare har file"""

Expand Down Expand Up @@ -1135,7 +1137,7 @@ def test_read_har_parse_absent_url(self, prepare_har_file):

@pytest.mark.negative
def test_read_har_parse_absent_method(self, prepare_har_file):
"""Check read_har method handles incorrect json object correctly"""
"""Check read_har method handles absent method as expected"""

http_request = "GET https://rambler.ru/\r\n" + \
"Host: rambler.ru\r\n" + \
Expand All @@ -1155,7 +1157,7 @@ def test_read_har_parse_absent_method(self, prepare_har_file):

@pytest.mark.negative
def test_read_har_parse_empty_json(self, prepare_har_file, capsys):
"""Check read_har method handles empty json correctly"""
"""Check read_har method handles empty json as expected"""

filename = prepare_har_file({})
reader = pcaper.HarParser()
Expand All @@ -1175,7 +1177,7 @@ def test_read_har_parse_empty_json(self, prepare_har_file, capsys):

@pytest.mark.positive
def test_read_har_parse_incorrect_json(self, prepare_har_file):
"""Check read_har method handles incorrect json correctly"""
"""Check read_har method handles incorrect json as expected"""

filename = prepare_har_file({"log": {"entries": ['REQ']}})
reader = pcaper.HarParser()
Expand Down Expand Up @@ -1228,6 +1230,80 @@ def test_read_har_empty_http_filter(self, prepare_har_file):
packets = packets + 1
assert packets == 1, "unexpected packets count"

@pytest.mark.negative
def test_read_har_parse_unexpected_post_data(self, prepare_har_file):
"""Check read_har method handles unexpected postData
format correctly"""

http_request = "POST https://rambler.ru/\r\n" + \
"Host: rambler.ru\r\n" + \
"Content-Length: 7\r\n\r\n" + \
"param=0"
data = har_gen.generate_http_request_har_object(http_request)
del data['log']['entries'][-1]['request']['postData']['text']
filename = prepare_har_file(data)
reader = pcaper.HarParser()

packets = 0
for request in reader.read_har({
'input': filename
}
):
packets = packets + 1
assert packets == 0, "unexpected packets count"

@pytest.mark.positive
def test_read_har_parse_another_timestamp_format(
self,
prepare_har_file
):
"""Check read_har method handles another timestamp
format correctly"""

http_request = "POST https://rambler.ru/\r\n" + \
"Host: rambler.ru\r\n" + \
"Content-Length: 7\r\n\r\n" + \
"param=0"
data = har_gen.generate_http_request_har_object(http_request)
data['log']['entries'][-1]['startedDateTime'] = \
'2018-11-15T19:14:11.930+03:00'
filename = prepare_har_file(data)
reader = pcaper.HarParser()

packets = 0
for request in reader.read_har({
'input': filename
}
):
packets = packets + 1
assert packets == 1, "unexpected packets count"

@pytest.mark.negative
def test_read_har_parse_unexpected_timestamp_format(
self,
prepare_har_file
):
"""Check read_har method handles unexpected timestamp
format correctly"""

http_request = "POST https://rambler.ru/\r\n" + \
"Host: rambler.ru\r\n" + \
"Content-Length: 7\r\n\r\n" + \
"param=0"
data = har_gen.generate_http_request_har_object(http_request)
data['log']['entries'][-1]['startedDateTime'] = \
'2018-11-15T19:14'
filename = prepare_har_file(data)
reader = pcaper.HarParser()

packets = 0
for request in reader.read_har({
'input': filename
}
):
packets = packets + 1
assert packets == 1, "unexpected packets count"

# HTTPParser

@pytest.mark.negative
Expand Down

0 comments on commit a1f6a2d

Please sign in to comment.