-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add datapackage.json generation in WACZ (#15)
- Loading branch information
1 parent
ba6dcaa
commit 9ea5f71
Showing
9 changed files
with
253 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__version__ = "0.0.1.dev2" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
import pytest | ||
|
||
|
||
@pytest.fixture | ||
def warc_example(): | ||
return b"\ | ||
WARC/1.0\r\n\ | ||
Content-Length: 10\r\n\ | ||
WARC-Date: 2024-02-10T16:15:52Z\r\n\ | ||
Content-Type: application/http; msgtype=request\r\n\ | ||
WARC-Type: request\r\n\ | ||
WARC-Record-ID: <urn:uuid:80fb9262-5402-11e1-8206-545200690126>\r\n\ | ||
WARC-Target-URI: http://example.com/\r\n\ | ||
\r\n\ | ||
Helloworld\ | ||
\r\n\r\n\ | ||
" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import hashlib | ||
import io | ||
|
||
import pytest | ||
|
||
from scrapy_webarchive.utils import BUFF_SIZE, hash_stream | ||
|
||
|
||
def test_hash_stream_with_empty_stream(): | ||
# Test with an empty stream | ||
data = b"" | ||
stream = io.BytesIO(data) | ||
size, result = hash_stream("sha256", stream) | ||
|
||
assert size == 0 | ||
assert result == f"sha256:{hashlib.sha256(data).hexdigest()}" | ||
|
||
def test_hash_stream_with_md5_algorithm(): | ||
data = b"Hello world" | ||
expected_hash = hashlib.md5(data).hexdigest() | ||
|
||
stream = io.BytesIO(data) | ||
size, result = hash_stream("md5", stream) | ||
|
||
assert size == len(data) | ||
assert result == f"md5:{expected_hash}" | ||
|
||
def test_hash_stream_with_sha256_algorithm(): | ||
data = b"Hello world" | ||
expected_hash = hashlib.sha256(data).hexdigest() | ||
|
||
stream = io.BytesIO(data) | ||
size, result = hash_stream("sha256", stream) | ||
|
||
assert size == len(data) | ||
assert result == f"sha256:{expected_hash}" | ||
|
||
def test_hash_stream_with_unsupported_hash_type(): | ||
data = b"Hello world" | ||
stream = io.BytesIO(data) | ||
|
||
with pytest.raises(ValueError): | ||
hash_stream("unsupported_hash", stream) | ||
|
||
def test_hash_stream_with_large_stream(): | ||
data = b"a" * (2 * BUFF_SIZE) # Twice the buffer size | ||
expected_hash = hashlib.sha256(data).hexdigest() | ||
|
||
stream = io.BytesIO(data) | ||
size, result = hash_stream("sha256", stream) | ||
|
||
assert size == len(data) | ||
assert result == f"sha256:{expected_hash}" |
Oops, something went wrong.