Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add "AIPs by file format" and "AIPs by PUID" reports #76

Merged
merged 1 commit into from
Nov 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ config.pyc
aipscan.db
celerytasks.db
AIPscan/Aggregator/downloads/
.tox/
96 changes: 74 additions & 22 deletions AIPscan/API/namespace_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,19 @@
remixed as the caller desires. Data is 'unkempt' not raw. No data is
raw. No data is without bias.
"""
from distutils.util import strtobool

from flask import request
from flask_restx import Namespace, Resource

from AIPscan.helpers import parse_bool
from AIPscan.Data import data

FILE_FORMAT_FIELD = "file_format"
FILE_TYPE_FIELD = "file_type"
LIMIT_FIELD = "limit"
ORIGINAL_FILES_FIELD = "original_files"
PUID_FIELD = "puid"

api = Namespace("data", description="Retrieve data from AIPscan to shape as you desire")

"""
Expand All @@ -22,19 +29,12 @@
"""


def parse_bool(val, default=True):
try:
return bool(strtobool(val))
except (ValueError, AttributeError):
return default


@api.route("/aip-overview/<storage_service_id>")
class FMTList(Resource):
@api.doc(
"list_formats",
params={
"original_files": {
ORIGINAL_FILES_FIELD: {
"description": "Return data for original files or copies",
"in": "query",
"type": "bool",
Expand All @@ -43,10 +43,7 @@ class FMTList(Resource):
)
def get(self, storage_service_id):
"""AIP overview One"""
try:
original_files = parse_bool(request.args.get("original_files"), True)
except TypeError:
pass
original_files = parse_bool(request.args.get(ORIGINAL_FILES_FIELD, True))
aip_data = data.aip_overview(
storage_service_id=storage_service_id, original_files=original_files
)
Expand All @@ -58,19 +55,16 @@ class AIPList(Resource):
@api.doc(
"list_formats",
params={
"original_files": {
"description": "Return data for original files or copies",
ORIGINAL_FILES_FIELD: {
"description": "Return data for original files or preservation derivatives",
"in": "query",
"type": "bool",
}
},
)
def get(self, storage_service_id):
"""AIP overview two"""
try:
original_files = parse_bool(request.args.get("original_files"), True)
except TypeError:
pass
original_files = parse_bool(request.args.get(ORIGINAL_FILES_FIELD, True))
aip_data = data.aip_overview_two(
storage_service_id=storage_service_id, original_files=original_files
)
Expand All @@ -91,12 +85,12 @@ class LargestFileList(Resource):
@api.doc(
"list_formats",
params={
"file_type": {
FILE_TYPE_FIELD: {
"description": "Optional file type filter (original or preservation)",
"in": "query",
"type": "str",
},
"limit": {
LIMIT_FIELD: {
"description": "Number of results to return (default is 20)",
"in": "query",
"type": "int",
Expand All @@ -105,7 +99,7 @@ class LargestFileList(Resource):
)
def get(self, storage_service_id, file_type=None, limit=20):
"""Largest files"""
file_type = request.args.get("file_type", None)
file_type = request.args.get(FILE_TYPE_FIELD)
try:
limit = int(request.args.get("limit", 20))
except ValueError:
Expand All @@ -114,3 +108,61 @@ def get(self, storage_service_id, file_type=None, limit=20):
storage_service_id=storage_service_id, file_type=file_type, limit=limit
)
return file_data


@api.route("/aips-by-file-format/<storage_service_id>")
class AIPsByFormatList(Resource):
@api.doc(
"list_aips_by_format",
params={
FILE_FORMAT_FIELD: {
"description": "File format name (must be exact match)",
ross-spencer marked this conversation as resolved.
Show resolved Hide resolved
"in": "query",
"type": "str",
},
ORIGINAL_FILES_FIELD: {
"description": "Return data for original files or preservation derivatives",
"in": "query",
"type": "bool",
},
},
)
def get(self, storage_service_id):
"""AIPs containing given file format."""
file_format = request.args.get(FILE_FORMAT_FIELD, "")
original_files = parse_bool(request.args.get(ORIGINAL_FILES_FIELD, True))
aip_data = data.aips_by_file_format(
storage_service_id=storage_service_id,
file_format=file_format,
original_files=original_files,
)
return aip_data


@api.route("/aips-by-puid/<storage_service_id>")
class AIPsByPUIDList(Resource):
@api.doc(
"list_aips_by_puid",
params={
PUID_FIELD: {
"description": "PRONOM ID (PUID)",
ross-spencer marked this conversation as resolved.
Show resolved Hide resolved
"in": "query",
"type": "str",
},
ORIGINAL_FILES_FIELD: {
"description": "Return data for original files or preservation derivatives",
"in": "query",
"type": "bool",
},
},
)
def get(self, storage_service_id):
"""AIPs containing given format version, specified by PUID."""
puid = request.args.get(PUID_FIELD, "")
original_files = parse_bool(request.args.get(ORIGINAL_FILES_FIELD, True))
aip_data = data.aips_by_puid(
storage_service_id=storage_service_id,
puid=puid,
original_files=original_files,
)
return aip_data
2 changes: 1 addition & 1 deletion AIPscan/Aggregator/templates/edit_storage_service.html
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@
</table>
<button type="submit" class="btn btn-success">Save</button>
</form>
<a href="{{ url_for('aggregator.ss') }}"><button class="btn btn-danger" style="margin-top: 10px;">Cancel</button></a>
<a href="{{ url_for('aggregator.storage_services') }}"><button class="btn btn-danger" style="margin-top: 10px;">Cancel</button></a>

{% endblock %}
10 changes: 5 additions & 5 deletions AIPscan/Aggregator/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def ss_default():
storage_service = StorageService.query.first()
# there are no storage services defined at all
if storage_service is None:
return redirect(url_for("aggregator.ss"))
return redirect(url_for("aggregator.storage_services"))
ross-spencer marked this conversation as resolved.
Show resolved Hide resolved
mets_fetch_jobs = FetchJob.query.filter_by(
storage_service_id=storage_service.id
).all()
Expand All @@ -71,7 +71,7 @@ def storage_service(id):


@aggregator.route("/storage_services", methods=["GET"])
def ss():
def storage_services():
storage_services = StorageService.query.all()
return render_template("storage_services.html", storage_services=storage_services)

Expand Down Expand Up @@ -102,7 +102,7 @@ def edit_storage_service(id):
storage_service.default = form.default.data
db.session.commit()
flash("Storage service {} updated".format(form.name.data))
return redirect(url_for("aggregator.ss"))
return redirect(url_for("aggregator.storage_services"))
return render_template(
"edit_storage_service.html", title="Storage Service", form=form
)
Expand All @@ -124,7 +124,7 @@ def new_storage_service():
db.session.add(ss)
db.session.commit()
flash("New storage service {} created".format(form.name.data))
return redirect(url_for("aggregator.ss"))
return redirect(url_for("aggregator.storage_services"))
return render_template(
"edit_storage_service.html", title="Storage Service", form=form
)
Expand All @@ -140,7 +140,7 @@ def delete_storage_service(id):
db.session.delete(storage_service)
db.session.commit()
flash("Storage service '{}' is deleted".format(storage_service.name))
return redirect(url_for("aggregator.ss"))
return redirect(url_for("aggregator.storage_services"))


@aggregator.route("/new_fetch_job/<id>", methods=["POST"])
Expand Down
142 changes: 136 additions & 6 deletions AIPscan/Data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

from datetime import datetime

from AIPscan import db
from AIPscan.models import AIP, File, FileType, StorageService

VALID_FILE_TYPES = set(item.value for item in FileType)
ross-spencer marked this conversation as resolved.
Show resolved Hide resolved

FIELD_AIP = "AIP"
FIELD_AIP_ID = "AIPID"
Expand Down Expand Up @@ -47,11 +49,17 @@


def _get_storage_service(storage_service_id):
DEFAULT_STORAGE_SERVICE_ID = 1
if storage_service_id == 0 or storage_service_id is None:
storage_service_id = DEFAULT_STORAGE_SERVICE_ID
storage_service = StorageService.query.get(storage_service_id)
return StorageService.query.first() if not storage_service else storage_service
"""Return Storage Service with ID or None.

Unlike elsewhere in our application, here we do not fall back to
a different StorageService if the user-supplied ID is invalid to
prevent inaccurate information from being returned.
ross-spencer marked this conversation as resolved.
Show resolved Hide resolved

:param storage_service_id: Storage Service ID

:returns: StorageService object or None
"""
return StorageService.query.get(storage_service_id)


def _split_ms(date_string):
Expand Down Expand Up @@ -221,7 +229,6 @@ def _largest_files_query(storage_service_id, file_type, limit):

This is separated into its own helper function to aid in testing.
"""
VALID_FILE_TYPES = set(item.value for item in FileType)
if file_type is not None and file_type in VALID_FILE_TYPES:
files = (
File.query.join(AIP)
Expand Down Expand Up @@ -292,3 +299,126 @@ def largest_files(storage_service_id, file_type=None, limit=20):
report[FIELD_FILES].append(file_info)

return report


def _query_aips_by_file_format_or_puid(
storage_service_id, search_string, original_files=True, file_format=True
):
"""Fetch information on all AIPs with given format or PUID from db.

:param storage_service_id: Storage Service ID (int)
:param search_string: File format or PUID (str)
:param original_files: Flag indicating whether returned data
describes original (default) or preservation files (bool)
:param file_format: Flag indicating whether to filter on file
format (default) or PUID (bool)

:returns: SQLAlchemy query results
"""
AIP_ID = "id"
TRANSFER_NAME = "name"
AIP_UUID = "uuid"
FILE_COUNT = "file_count"
FILE_SIZE = "total_size"
ross-spencer marked this conversation as resolved.
Show resolved Hide resolved
aips = (
db.session.query(
AIP.id.label(AIP_ID),
AIP.transfer_name.label(TRANSFER_NAME),
AIP.uuid.label(AIP_UUID),
db.func.count(File.id).label(FILE_COUNT),
db.func.sum(File.size).label(FILE_SIZE),
)
.join(File)
.join(StorageService)
.filter(StorageService.id == storage_service_id)
.group_by(AIP.id)
.order_by(db.func.count(File.id).desc(), db.func.sum(File.size).desc())
)

if original_files is False:
aips = aips.filter(File.file_type == FileType.preservation.value)
else:
aips = aips.filter(File.file_type == FileType.original.value)

if file_format:
return aips.filter(File.file_format == search_string)
return aips.filter(File.puid == search_string)


def _aips_by_file_format_or_puid(
storage_service_id, search_string, original_files=True, file_format=True
):
"""Return overview of all AIPs containing original files in format

:param storage_service_id: Storage Service ID (int)
:param search_string: File format name or PUID (str)
:param original_files: Flag indicating whether returned data
describes original (default) or preservation files (bool)
:param file_format: Flag indicating whether to filter on file
format (default) or PUID (bool)

:returns: "report" dict containing following fields:
report["StorageName"]: Name of Storage Service queried
report["AIPs"]: List of result AIPs ordered desc by count
ross-spencer marked this conversation as resolved.
Show resolved Hide resolved
"""
report = {}

storage_service = _get_storage_service(storage_service_id)
report[FIELD_STORAGE_NAME] = storage_service.name

if file_format:
report[FIELD_FORMAT] = search_string
else:
report[FIELD_PUID] = search_string

report[FIELD_AIPS] = []
results = _query_aips_by_file_format_or_puid(
storage_service_id, search_string, original_files, file_format
)
for result in results:
aip_info = {}

aip_info["id"] = result.id
aip_info[FIELD_AIP_NAME] = result.name
aip_info[FIELD_UUID] = result.uuid
aip_info[FIELD_COUNT] = result.file_count
aip_info[FIELD_SIZE] = result.total_size

report[FIELD_AIPS].append(aip_info)

return report


def aips_by_file_format(storage_service_id, file_format, original_files=True):
"""Return overview of AIPs containing original files in format.

:param storage_service_id: Storage Service ID (int)
:param file_format: File format name (str)
:param original_files: Flag indicating whether returned data
describes original (default) or preservation files (bool)

:returns: Report dict provided by _aips_by_file_format_or_puid
"""
return _aips_by_file_format_or_puid(
storage_service_id=storage_service_id,
search_string=file_format,
original_files=original_files,
)


def aips_by_puid(storage_service_id, puid, original_files=True):
"""Return overview of AIPs containing original files in format.

:param storage_service_id: Storage Service ID (int)
:param puid: PUID (str)
:param original_files: Flag indicating whether returned data
describes original (default) or preservation files (bool)

:returns: Report dict provided by _aips_by_file_format_or_puid
"""
return _aips_by_file_format_or_puid(
storage_service_id=storage_service_id,
search_string=puid,
original_files=original_files,
file_format=False,
)
Loading