Skip to content

Commit

Permalink
feature: implement zip remote file support
Browse files Browse the repository at this point in the history
  • Loading branch information
mutantsan committed Sep 1, 2023
1 parent c7d9f11 commit 7eab34a
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 21 deletions.
45 changes: 41 additions & 4 deletions ckanext/unfold/adapters/zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,34 @@

import logging
from datetime import datetime as dt
from typing import Any
from io import BytesIO
from typing import Any, Optional
from zipfile import BadZipFile, LargeZipFile, ZipFile, ZipInfo

import ckan.plugins.toolkit as tk
import requests

import ckanext.unfold.types as unf_types
import ckanext.unfold.utils as unf_utils

log = logging.getLogger(__name__)


def build_directory_tree(filepath: str) -> list[unf_types.Node]:
def build_directory_tree(
filepath: str, remote: Optional[bool] = False
) -> list[unf_types.Node]:
try:
with ZipFile(filepath) as archive:
file_list: list[ZipInfo] = archive.infolist()
if remote:
file_list = get_ziplist_from_url(filepath)
else:
with ZipFile(filepath) as archive:
file_list: list[ZipInfo] = archive.infolist()
except (LargeZipFile, BadZipFile) as e:
log.error(f"Error openning zip archive: {e}")
return []
except requests.RequestException as e:
log.error(f"Error fetching remote zip archive: {e}")
return []

nodes: list[unf_types.Node] = []

Expand Down Expand Up @@ -59,3 +69,30 @@ def _prepare_table_data(entry: ZipInfo) -> dict[str, Any]:
"format": fmt,
"modified_at": modified_at,
}


def get_ziplist_from_url(url) -> list[ZipInfo]:
head = requests.head(url)
end = None

if "content-length" in head.headers:
end = int(head.headers["content-length"])

if "content-range" in head.headers:
end = int(head.headers["content-range"].split("/")[1])

if not end:
return []

return _get_remote_zip_infolist(url, end - 65536, end)


def _get_remote_zip_infolist(url: str, start, end) -> list[ZipInfo]:
resp = requests.get(
url,
headers={
"Range": "bytes={}-{}".format(start, end),
},
)

return ZipFile(BytesIO(resp.content)).infolist()
1 change: 0 additions & 1 deletion ckanext/unfold/assets/unfold-init-jstree.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ ckan.module("unfold-init-jstree", function ($, _) {
$.proxyAll(this, /_on/);

this.tree = $(this.el)
const tree = this.tree;

$("#jstree-search").on("change", this._onSearch);
$("#jstree-search-clear").click(this._onClearSearch);
Expand Down
29 changes: 13 additions & 16 deletions ckanext/unfold/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,9 @@

import json
import logging
from typing import Any
from typing import Any, Optional

import ckan.lib.uploader as uploader
import requests
from requests.exceptions import RequestException

import ckanext.unfold.adapters as unf_adapters
import ckanext.unfold.types as unf_types
Expand All @@ -16,32 +14,31 @@


def get_archive_tree(resource: dict[str, Any]) -> str | None:
remote = False

if resource.get("url_type") == "upload":
upload = uploader.get_resource_uploader(resource)
filepath = upload.get_path(resource["id"])
else:
# TODO: implement remote resource support
if resource.get("url"):
try:
resp = requests.get(resource["url"])
except RequestException as e:
log.error("Error fetching data for resource: %s", resource["url"])
else:
data = resp.text
if not resource.get("url"):
return

filepath = resource["url"]
remote = True

tree = unf_utils.get_archive_structure(resource["id"])

if not tree:
tree = parse_archive(
resource["format"].lower(), filepath
)
tree = parse_archive(resource["format"].lower(), filepath, remote)
unf_utils.save_archive_structure(tree, resource["id"])

return json.dumps(tree) if tree else None


def parse_archive(fmt: str, filepath) -> list[unf_types.Node]:
def parse_archive(
fmt: str, filepath: str, remote: Optional[bool] = False
) -> list[unf_types.Node]:
if fmt not in unf_adapters.ADAPTERS:
raise TypeError(f"No adapter for `{fmt}` archives")

return unf_adapters.ADAPTERS[fmt](filepath)
return unf_adapters.ADAPTERS[fmt](filepath, remote)

0 comments on commit 7eab34a

Please sign in to comment.