diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 00000000..07e76c37 --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1 @@ +* @IndicoDataSolutions/pr-be-indicodata-ai \ No newline at end of file diff --git a/examples/submission-filters.py b/examples/submission-filters.py new file mode 100644 index 00000000..fefff9d5 --- /dev/null +++ b/examples/submission-filters.py @@ -0,0 +1,52 @@ +from indico import IndicoClient, IndicoConfig +from indico.filters import DateRangeFilter, SubmissionFilter, and_, or_ +from indico.queries import ListSubmissions + +# Create an Indico API client +my_config = IndicoConfig( + host="try.indico.io", api_token_path="./path/to/indico_api_token.txt" +) +client = IndicoClient(config=my_config) + +workflow_id = 5 + +""" +Example 1 +List all submissions that are COMPLETE or FAILED +""" +sub_filter = or_(SubmissionFilter(status="COMPLETE"), SubmissionFilter(status="FAILED")) +submissions = client.call(ListSubmissions(filters=sub_filter)) + +""" +Example 2 +List all submissions that are COMPLETE and FAILED +""" +sub_filter = and_( + SubmissionFilter(status="COMPLETE"), SubmissionFilter(status="FAILED") +) +submisions = client.call(ListSubmissions(filters=sub_filter)) + +""" +Example 3 +List all submissions that are retrieved and have a filename that contains 'property' +""" +sub_filter = SubmissionFilter(retrieved=True, input_filename="property") +submissions = client.call(ListSubmissions(filters=sub_filter)) + +""" +Example 4 +List all submissions that are created and updated within a certain date range +""" +date_filter = DateRangeFilter(filter_from="2022-01-01", filter_to="2023-01-01") +sub_filter = SubmissionFilter(created_at=date_filter, updated_at=date_filter) +submissions = client.call(ListSubmissions(filters=sub_filter)) + +""" +Example 5 +List all submissions that are not in progress of being reviewed and are completed +""" +submissions = client.call( + ListSubmissions( + filters=SubmissionFilter(status="COMPLETE", review_in_progress=False) + ) +) diff --git a/indico/filters/__init__.py b/indico/filters/__init__.py index b24af746..44d495be 100644 --- a/indico/filters/__init__.py +++ b/indico/filters/__init__.py @@ -1,5 +1,5 @@ import datetime -from typing import Any, Iterable, Mapping +from typing import Any, Iterable, List, Mapping, Union from indico.errors import IndicoInputError @@ -48,7 +48,10 @@ class SubmissionReviewFilter(Filter): __options__ = ("rejected", "created_by", "review_type") def __init__( - self, rejected: bool = None, created_by: int = None, review_type: str = None + self, + rejected: Union[bool, None] = None, + created_by: Union[int, None] = None, + review_type: Union[str, None] = None, ): kwargs = { "rejected": rejected, @@ -59,33 +62,75 @@ def __init__( super().__init__(**kwargs) +class DateRangeFilter(dict): + """ + Create a Filter when querying for Submissions within a certain date range + Args: + filter_from (str): A valid string representation of a datetime for start date to filter + filter_to (str): A valid string representation of a datetime for end date to filter + """ + + def __init__( + self, filter_from: Union[str, None] = None, filter_to: Union[str, None] = None + ): + kwargs = {"from": filter_from, "to": filter_to} + self.update(kwargs) + + class SubmissionFilter(Filter): """ Create a Filter when querying for WorkflowSubmissions. Args: + file_type (list): submissions with a file type in this list. Options: + [CSV, PDF, EXCEL, DOC, DOCX, PPT, PPTX, PNG, JPG, TIFF, TXT, RTF, XLS, XLSX, UNKNOWN, MSG, EML] input_filename (str): submissions with input file names containing this string status (str): submissions in this status. Options: [PROCESSING, PENDING_REVIEW, PENDING_ADMIN_REVIEW, COMPLETE, FAILED] - retrieved(bool): Filter submissions on the retrieved flag + retrieved (bool): submissions that have been retrieved (True) or not (False) + reviews (SubmissionReviewFilter): submissions whose completed reviews match this review filter + review_in_progress (bool): submissions where a review is in progress (True) or not (False) + files_deleted (bool): submissions that have had their internal files removed (True) or not (False) + created_at (DateRangeFilter): submissions created during given time range + updated_at (DateRangeFilter): submissions updated during given time range Returns: dict containing query filter parameters """ - __options__ = ("input_filename", "status", "retrieved") + __options__ = ( + "file_type", + "input_filename", + "status", + "retrieved", + "reviews", + "review_in_progress", + "files_deleted", + "created_at", + "updated_at", + ) def __init__( self, - input_filename: str = None, - status: str = None, - retrieved: bool = None, - reviews: SubmissionReviewFilter = None, + file_type: Union[List[str], None] = None, + input_filename: Union[str, None] = None, + status: Union[str, None] = None, + retrieved: Union[bool, None] = None, + reviews: Union[SubmissionReviewFilter, None] = None, + review_in_progress: Union[bool, None] = None, + files_deleted: Union[bool, None] = None, + created_at: Union[DateRangeFilter, None] = None, + updated_at: Union[DateRangeFilter, None] = None, ): kwargs = { + "filetype": file_type, "inputFilename": input_filename, "status": status.upper() if status else status, "retrieved": retrieved, "reviews": reviews, + "reviewInProgress": review_in_progress, + "filesDeleted": files_deleted, + "createdAt": created_at, + "updatedAt": updated_at, } super().__init__(**kwargs) @@ -110,10 +155,10 @@ class ModelGroupExampleFilter(Filter): def __init__( self, - file_name: str = None, - partial: bool = None, - status: str = None, - text_search: str = None, + file_name: Union[str, None] = None, + partial: Union[bool, None] = None, + status: Union[str, None] = None, + text_search: Union[str, None] = None, ): kwargs = { "fileName": file_name, @@ -138,7 +183,9 @@ class UserMetricsFilter(Filter): __options__ = ("user_id", "user_email") - def __init__(self, user_id: int = None, user_email: str = None): + def __init__( + self, user_id: Union[int, None] = None, user_email: Union[str, None] = None + ): kwargs = {"userId": user_id, "userEmail": user_email} super().__init__(**kwargs) @@ -172,13 +219,13 @@ class DocumentReportFilter(Filter): def __init__( self, - submission_id: int = None, - workflow_id: int = None, - status: str = None, - created_at_start_date: datetime = None, - created_at_end_date: datetime = None, - updated_at_start_date: datetime = None, - updated_at_end_date: datetime = None, + submission_id: Union[int, None] = None, + workflow_id: Union[int, None] = None, + status: Union[str, None] = None, + created_at_start_date: Union[datetime.datetime, None] = None, + created_at_end_date: Union[datetime.datetime, None] = None, + updated_at_start_date: Union[datetime.datetime, None] = None, + updated_at_end_date: Union[datetime.datetime, None] = None, ): kwargs = {"workflowId": workflow_id, "id": submission_id, "status": status} if created_at_end_date and not created_at_start_date: @@ -186,9 +233,11 @@ def __init__( if created_at_start_date: kwargs["createdAt"] = { "from": created_at_start_date.strftime("%Y-%m-%d"), - "to": created_at_end_date.strftime("%Y-%m-%d") - if created_at_end_date is not None - else datetime.datetime.now().strftime("%Y-%m-%d"), + "to": ( + created_at_end_date.strftime("%Y-%m-%d") + if created_at_end_date is not None + else datetime.datetime.now().strftime("%Y-%m-%d") + ), } if updated_at_end_date and not updated_at_start_date: @@ -196,8 +245,10 @@ def __init__( if updated_at_start_date is not None: kwargs["updatedAt"] = { "from": updated_at_start_date.strftime("%Y-%m-%d"), - "to": updated_at_end_date.strftime("%Y-%m-%d") - if updated_at_end_date is not None - else datetime.datetime.now().strftime("%Y-%m-%d"), + "to": ( + updated_at_end_date.strftime("%Y-%m-%d") + if updated_at_end_date is not None + else datetime.datetime.now().strftime("%Y-%m-%d") + ), } super().__init__(**kwargs) diff --git a/indico/queries/submission.py b/indico/queries/submission.py index e1a316f0..832a3542 100644 --- a/indico/queries/submission.py +++ b/indico/queries/submission.py @@ -33,57 +33,91 @@ class ListSubmissions(PagedRequest): query = """ query ListSubmissions( - $submissionIds: [Int], - $workflowIds: [Int], - $filters: SubmissionFilter, - $limit: Int, - $orderBy: SUBMISSION_COLUMN_ENUM, - $desc: Boolean, + $submissionIds: [Int] + $workflowIds: [Int] + $filters: SubmissionFilter + $limit: Int + $orderBy: SUBMISSION_COLUMN_ENUM + $desc: Boolean $after: Int - - ){ + ) { submissions( - submissionIds: $submissionIds, - workflowIds: $workflowIds, - filters: $filters, + submissionIds: $submissionIds + workflowIds: $workflowIds + filters: $filters limit: $limit - orderBy: $orderBy, - desc: $desc, + orderBy: $orderBy + desc: $desc after: $after - - ){ + ) { submissions { + id + datasetId + workflowId + status + createdAt + updatedAt + createdBy + updatedBy + completedAt + errors + filesDeleted + inputFiles { id - datasetId - workflowId - status - inputFiles { - id - filename - filepath - filetype - fileSize - numPages - } - inputFile - inputFilename - resultFile - deleted - retrieved - errors - reviews { - id - createdAt - createdBy - completedAt - rejected - reviewType - notes - } + filepath + filename + filetype + submissionId + fileSize + numPages + } + inputFile + inputFilename + resultFile + outputFiles { + id + filepath + submissionId + componentId + createdAt + } + retrieved + autoReview { + id + submissionId + createdAt + createdBy + startedAt + completedAt + rejected + reviewType + notes + } + retries { + id + submissionId + previousErrors + previousStatus + retryErrors + } + reviews { + id + submissionId + createdAt + createdBy + startedAt + completedAt + rejected + reviewType + notes + } + reviewInProgress } pageInfo { - endCursor - hasNextPage + startCursor + endCursor + hasNextPage + aggregateCount } } } diff --git a/indico/types/__init__.py b/indico/types/__init__.py index 4477ab1d..d434b223 100644 --- a/indico/types/__init__.py +++ b/indico/types/__init__.py @@ -3,6 +3,7 @@ from .jobs import * from .model_group import * from .model import * +from .output_file import * from .submission_file import * from .submission import * from .workflow import * diff --git a/indico/types/base.py b/indico/types/base.py index 266bba95..82229a88 100644 --- a/indico/types/base.py +++ b/indico/types/base.py @@ -44,7 +44,7 @@ def __init__(self, **kwargs): k = cc_to_snake(k) if k in attrs: attr_type = attrs[k] - if inspect.isclass(attr_type) and issubclass(attr_type, BaseType): + if v is not None and inspect.isclass(attr_type) and issubclass(attr_type, BaseType): v = attrs[k](**v) if attr_type == JSONType: diff --git a/indico/types/output_file.py b/indico/types/output_file.py new file mode 100644 index 00000000..36f1fc8f --- /dev/null +++ b/indico/types/output_file.py @@ -0,0 +1,22 @@ +import datetime + +from indico.types import BaseType + + +class OutputFile(BaseType): + """ + An Output File in the Indico Platform. + + Attributes: + id (int): The Output file id + filepath (str): URL of the output datafile within the Indico Platform. + submission_id (int): The parent Submission id + component_id (int): The id of the corresponding component the output is linked to + created_at (datetime): The date the output file was created + """ + + id: int + filepath: str + submission_id: int + component_id: int + created_at: datetime diff --git a/indico/types/submission.py b/indico/types/submission.py index 1b8c9afd..8bbd4816 100644 --- a/indico/types/submission.py +++ b/indico/types/submission.py @@ -1,5 +1,9 @@ +import datetime +from typing import Optional + from indico.types import BaseType, List -from . import SubmissionFile + +from . import OutputFile, SubmissionFile VALID_SUBMISSION_STATUSES = [ "COMPLETE", @@ -30,6 +34,7 @@ class SubmissionRetries(BaseType): retry_errors (str): Errors encountered on this retry. submission_id (int): The ID of the submission being retried. """ + id: int previous_errors: str previous_status: str @@ -37,7 +42,7 @@ class SubmissionRetries(BaseType): submission_id: int -class SubmissionReviews(BaseType): +class SubmissionReview(BaseType): f""" Information about a submission's Reviews. @@ -60,6 +65,7 @@ class SubmissionReviews(BaseType): review_type: str notes: str + class Submission(BaseType): f""" A Submission in the Indico Platform. @@ -71,32 +77,49 @@ class Submission(BaseType): to generate the result file. Attributes: - id (int): the Submission id - dataset_id (int): the Dataset id - workflow_id (int): the Workflow id + id (int): The Submission id + dataset_id (int): The Dataset id + workflow_id (int): The Workflow id status (str): status of the submission. One of {VALID_SUBMISSION_STATUSES} - input_files (list[SubmissionFile]): the SubmissionFiles for the Submission + created_at (datetime): Datetime the submission was created + updated_at (datetime): Datetime the submission was updated + created_by (int): Id of the user who created the submission + updated_by (int): Id of the user who updated the submission + completed_at (datetime): Datetime the submission reached a completed state + files_deleted (bool): Submission files have been deleted (True) or not deleted (False) from file store + input_files (List[SubmissionFile]): The SubmissionFiles for the Submission input_file (str): URL of the first input datafile within the Indico Platform. - input_filename (str): name of the first original file - result_file (str): URL of the result datafile within the Indico Platform + input_filename (str): Name of the first original file + result_file (str): URL of the latest result file for this submission + output_files (List[OutputFile]): List of output files from submission retrieved (bool): Whether the submission has been retrieved by a user This flag is set manually by users. - deleted (bool): Whether the submission result has been deleted from the server + auto_review (SubmissionReview): Latest auto review for submission errors (str): Any errors raised while processing the submission retries (List[SubmissionRetries]): If requested, information about previous retries of this submission. + reviews (List[SubmissionReview]): Completed reviews of this submission, without changes + review_in_progress (bool): True if the submission is being actively reviewed """ id: int dataset_id: int workflow_id: int status: str + created_at: datetime + updated_at: datetime + created_by: int + updated_by: int + completed_at: datetime + files_deleted: bool input_files: List[SubmissionFile] input_file: str input_filename: str result_file: str + output_files: List[OutputFile] retrieved: bool - deleted: bool + auto_review: SubmissionReview errors: str retries: List[SubmissionRetries] - reviews: List[SubmissionReviews] + reviews: List[SubmissionReview] + review_in_progress: bool diff --git a/tests/integration/queries/test_submission.py b/tests/integration/queries/test_submission.py new file mode 100644 index 00000000..b2f0499e --- /dev/null +++ b/tests/integration/queries/test_submission.py @@ -0,0 +1,66 @@ +import pytest +from datetime import datetime + +from indico.client import IndicoClient, IndicoConfig +from indico.filters import DateRangeFilter, SubmissionFilter, SubmissionReviewFilter +from indico.queries import ListSubmissions + + +def test_list_submissions(indico): + client = IndicoClient() + + subs = client.call(ListSubmissions(limit=10)) + assert len(subs) == 10 + + +def test_list_submissions_filter_filetype(indico): + client = IndicoClient() + + subs = client.call(ListSubmissions(filters=SubmissionFilter(file_type=["PDF"]), limit=10)) + assert len(subs) > 0 + for sub in subs: + sub_filetype = sub.input_filename.split(".")[-1].upper() + assert sub_filetype == "PDF" or sub_filetype.lower() == sub.input_filename + + +@pytest.mark.parametrize( + "_input_filename, _should_contain", + [ + ("pdf", True), + ("randomstring", False) + ], +) +def test_list_submissions_filter_filename(indico, _input_filename, _should_contain): + client = IndicoClient() + + subs = client.call(ListSubmissions(filters=SubmissionFilter(input_filename=_input_filename), limit=10)) + for sub in subs: + assert (_input_filename in sub.input_filename) == _should_contain + + +def test_list_submissions_filter_reviews(indico): + client = IndicoClient() + + review_filter = SubmissionReviewFilter(rejected=False) + + subs = client.call(ListSubmissions(filters=SubmissionFilter(reviews=review_filter), limit=10)) + assert len(subs) >= 0 + +def test_list_submissions_filter_reviews_in_progress(indico): + client = IndicoClient() + + subs = client.call(ListSubmissions(filters=SubmissionFilter(review_in_progress=False), limit=10)) + assert len(subs) > 0 + + +def test_list_submissions_filter_created_at(indico): + client = IndicoClient() + + date_filter = DateRangeFilter( + filter_from=datetime(year=2020, month=2, day=2).strftime("%Y-%m-%d"), + filter_to=datetime.now().strftime("%Y-%m-%d") + ) + subs = client.call(ListSubmissions(filters=SubmissionFilter(created_at=date_filter), limit=10)) + assert len(subs) > 0 + subs = client.call(ListSubmissions(filters=SubmissionFilter(updated_at=date_filter), limit=10)) + assert len(subs) > 0