Skip to content

Commit

Permalink
DASB-434 - Added upload stats (#109)
Browse files Browse the repository at this point in the history
* DASB-434 - Update upload statistics endpoint to include more info

* DASB-434 - Updated duplicate filenames count field name

* Added completed uploads count to the upload stats endpoint result

* Updated openapi spec

---------

Co-authored-by: Matt B Krystof <[email protected]>
  • Loading branch information
mkrystof and Matt B Krystof authored May 20, 2024
1 parent 47ac9c0 commit a07b501
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 48 deletions.
133 changes: 103 additions & 30 deletions docs/processing-status-openapi-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -452,34 +452,42 @@ paths:
content:
application/json:
schema:
anyOf:
- $ref: '#/components/schemas/HL7Report'
- $ref: '#/components/schemas/CSVReport'
type: array
items:
anyOf:
- $ref: '#/components/schemas/HL7Report'
- $ref: '#/components/schemas/CSVReport'
'400':
description: Bad request. The identifier provided was not found.
'401':
description: Authorization information is missing or invalid.
'/report/ids/{dataStreamId}':
'/report/stage/{dataStreamId}/{stageName}':
get:
tags:
- DEX Reports
summary: Provides list of report identifiers for given criteria. (Not yet implemented)
summary: Report counts for the given criteria data stream ID and stage name.
description: >-
Provides list of report identifiers for the given criteria. **(Not yet implemented)**
Provides report counts for the given criteria.
parameters:
- name: dataStreamId
in: path
required: true
schema:
type: string
description: Filter by the data stream identifier to retrieve the report counts for.
- name: dataStreamRoute
in: query
required: false
schema:
type: string
description: >-
If specified, also filter by dataStreamRoute that was provided with the upload.
example: RoutineImmunization
- name: dataStreamId
description: If specified, also filter by data_stream_route that was provided with the upload.
example: routineImmunization
- name: stageName
in: path
required: true
schema:
type: string
description: Data Stream identifier to retrieve all the upload statuses for.
description: Filter by the data stream identifier to retrieve the report counts for.
- name: date_start
in: query
schema:
Expand All @@ -496,28 +504,23 @@ paths:
End date of filter. Format is YYYYMMDDTHHMMSSZ (UTC format,
exclusive). Defaults to today.
example: 20231030T14103000Z
- name: days_interval
in: query
schema:
type: string
description: >-
Number of days for which the query should be executed.
responses:
'200':
description: List of report identifiers
description: Report counts for the given criteria
content:
application/json:
schema:
type: array
items:
type: object
properties:
report_id:
type: string
format: uuid
timestamp:
type: string
format: date-time
destination_id:
type: string
event_type:
type: string
anyOf:
- $ref: '#/components/schemas/HL7Report'
- $ref: '#/components/schemas/CSVReport'
'400':
description: Bad request. The identifier provided was not found.
description: Bad request. The destination identifier provided was not found.
'401':
description: Authorization information is missing or invalid.
'/info/dataStreamId':
Expand Down Expand Up @@ -690,7 +693,7 @@ paths:
description: Bad request. The destination identifier provided was not found.
'401':
description: Authorization information is missing or invalid.
'/report/counts/{uploadId}':
'/report/counts/id/{uploadId}':
get:
tags:
- DEX Status
Expand All @@ -717,6 +720,75 @@ paths:
description: Bad request. The destination identifier provided was not found.
'401':
description: Authorization information is missing or invalid.
'/report/counts/uploadStats':
get:
tags:
- DEX Status
summary: Get upload stats for the data stream ID and route.
description: >-
Get upload stats for the data stream ID and route provided over the date range or days interval provided.
parameters:
- name: data_stream_id
in: query
required: true
schema:
type: string
description: Filter by the data stream identifier to retrieve the report counts for.
- name: data_stream_route
in: query
required: true
schema:
type: string
description: Filter by the data stream route that was provided with the upload.
example: routineImmunization
- name: date_start
in: query
schema:
type: string
description: >-
Start date of filter. Format is YYYYMMDDTHHMMSSZ (UTC format,
inclusive). Defaults to 3 months prior to today.
example: 20231020T14103000Z
- name: date_end
in: query
schema:
type: string
description: >-
End date of filter. Format is YYYYMMDDTHHMMSSZ (UTC format,
exclusive). Defaults to today.
example: 20231030T14103000Z
- name: days_interval
in: query
schema:
type: string
description: >-
Number of days for which the query should be executed.
responses:
'200':
description: Report counts for the given upload identifier
content:
application/json:
schema:
type: object
properties:
unique_upload_ids_count:
type: integer
uploads_with_status_count:
type: integer
bad_metadata_count:
type: integer
in_progress_upload_count:
type: integer
completed_upload_count:
type: integer
uploads_with_duplicate_filenames:
type: integer
query_time_millis:
type: integer
'400':
description: Bad request. The destination identifier provided was not found.
'401':
description: Authorization information is missing or invalid.
'/report/counts':
get:
tags:
Expand Down Expand Up @@ -759,7 +831,7 @@ paths:
schema:
type: string
description: >-
Number of days for which the query should be executed.
Number of days for which the query should be executed.
responses:
'200':
description: Report counts for the given upload identifier
Expand Down Expand Up @@ -813,7 +885,7 @@ paths:
schema:
type: string
description: >-
Number of days for which the query should be executed.
Number of days for which the query should be executed.
responses:
'200':
description: Report counts for the given criteria
Expand Down Expand Up @@ -1117,6 +1189,7 @@ paths:
description: Bad request. The destination identifier provided was not found.
'401':
description: Authorization information is missing or invalid.

'/health':
get:
tags:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,13 +191,13 @@ public HttpResponseMessage getReportByStage(
@HttpTrigger(
name = "req",
methods = {HttpMethod.GET},
route = "report/{dataStreamId}/{stageName}",
route = "report/stage/{dataStreamId}/{stageName}",
authLevel = AuthorizationLevel.ANONYMOUS
) HttpRequestMessage<Optional<String>> request,
@BindingName("dataStreamId") String dataStreamId,
@BindingName("stageName") String stageName,
final ExecutionContext context) {
context.getLogger().info("getReportByStage: dataStreamId=" + Encode.forJava(dataStreamId) + ", stageName=" + Encode.forJava(stageName));
context.getLogger().info("GetReportForStage: dataStreamId=" + Encode.forJava(dataStreamId) + ", stageName=" + Encode.forJava(stageName));
return new GetReportFunction(request).withDataStreamId(dataStreamId, stageName);
}

Expand All @@ -206,7 +206,7 @@ public HttpResponseMessage getReportCountsByUploadId(
@HttpTrigger(
name = "req",
methods = {HttpMethod.GET},
route = "report/counts/{uploadId}",
route = "report/counts/id/{uploadId}",
authLevel = AuthorizationLevel.ANONYMOUS
) HttpRequestMessage<Optional<String>> request,
@BindingName("uploadId") String uploadId) {
Expand Down Expand Up @@ -257,15 +257,15 @@ public HttpResponseMessage getHL7DirectIndirectMessageCounts(
return new GetReportCountsFunction(request).getHL7DirectIndirectMessageCounts();
}

@FunctionName("GetBadMetadataAndUnfinishedUploadCounts")
public HttpResponseMessage GetUnfinishedUploadCounts(
@FunctionName("GetUploadStats")
public HttpResponseMessage GetUploadStats(
@HttpTrigger(
name = "req",
methods = {HttpMethod.GET},
route = "report/counts/badMetadataAndUnfinishedUploadCounts",
route = "report/counts/uploadStats",
authLevel = AuthorizationLevel.ANONYMOUS
) HttpRequestMessage<Optional<String>> request) {
return new GetReportCountsFunction(request).getBadMetadataAndUnfinishedUploadCounts();
return new GetReportCountsFunction(request).getUploadStats();
}

@FunctionName("GetInvalidMessageCounts")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -655,11 +655,16 @@ class GetReportCountsFunction(
}

/**
* Get the number of uploads stopped due to metadata issues and also unfinished upload counts.
* Get various upload stats including:
* - Unique upload ids
* - Uploads that have at least one upload status
* - Number of uploads that were rejected for bad metadata
* - Unfinished uploads or uploads in progress
* - List of uploads with duplicate filenames.
*
* @return HttpResponseMessage
*/
fun getBadMetadataAndUnfinishedUploadCounts(): HttpResponseMessage {
fun getUploadStats(): HttpResponseMessage {
val queryParams = prepareQueryParameters(request)

// Verify the request is complete
Expand Down Expand Up @@ -689,40 +694,109 @@ class GetReportCountsFunction(

val startTime = System.currentTimeMillis()

val badMetadataCountsQuery = (
val numUniqueUploadsQuery = (
"select r.uploadId from $reportsContainerName r "
+ "where r.dataStreamId = '${queryParams["dataStreamId"]}' and r.dataStreamRoute = '${queryParams["dataStreamRoute"]}' and "
+ "$timeRangeWhereClause group by r.uploadId"
// "select value count(1) from(select r.uploadId from Reports r "
// + "where r.dataStreamId = '${queryParams["dataStreamId"]}' and r.dataStreamRoute = '${queryParams["dataStreamRoute"]}' and "
// + "$timeRangeWhereClause group by r.uploadId)"
)

val numUploadsWithStatusQuery = (
"select value count(1) "
+ "from $reportsContainerName r "
+ "where r.dataStreamId = '${queryParams["dataStreamId"]}' and r.dataStreamRoute = '${queryParams["dataStreamRoute"]}' and "
+ "r.content.schema_name = 'upload' and "
+ timeRangeWhereClause
)

val badMetadataCountQuery = (
"select value count(1) "
+ "from $reportsContainerName r "
+ "where r.dataStreamId = '${queryParams["dataStreamId"]}' and r.dataStreamRoute = '${queryParams["dataStreamRoute"]}' and "
+ "r.content.schema_name = 'dex-metadata-verify' and "
+ "ARRAY_LENGTH(r.content.issues) > 0 and $timeRangeWhereClause"
)

val unfinishedUploadsCountsQuery = (
val inProgressUploadsCountQuery = (
"select value count(1) "
+ "from $reportsContainerName r "
+ "where r.dataStreamId = '${queryParams["dataStreamId"]}' and r.dataStreamRoute = '${queryParams["dataStreamRoute"]}' and "
+ "r.content.schema_name = 'upload' and "
+ "r.content['offset'] < r.content['size'] and $timeRangeWhereClause"
)

val badMetadataCountsResult = reportsContainer.queryItems(
badMetadataCountsQuery, CosmosQueryRequestOptions(),
val completedUploadsCountQuery = (
"select value count(1) "
+ "from $reportsContainerName r "
+ "where r.dataStreamId = '${queryParams["dataStreamId"]}' and r.dataStreamRoute = '${queryParams["dataStreamRoute"]}' and "
+ "r.content.schema_name = 'upload' and "
+ "r.content['offset'] = r.content['size'] and $timeRangeWhereClause"
)

val duplicateFilenameCountQuery = (
"select * from "
+ "(select r.content.metadata.filename, count(1) as totalCount "
+ "from $reportsContainerName r "
+ "where r.dataStreamId = '${queryParams["dataStreamId"]}' and r.dataStreamRoute = '${queryParams["dataStreamRoute"]}' and "
+ "r.content.schema_name = 'dex-metadata-verify' and "
+ "$timeRangeWhereClause "
+ "group by r.content.metadata.filename"
+ ") r where r.totalCount > 1"
)

val uniqueUploadIdsResult = reportsContainer.queryItems(
numUniqueUploadsQuery, CosmosQueryRequestOptions(),
UploadCounts::class.java
)

val uniqueUploadIdsCount = uniqueUploadIdsResult.count()

val uploadsWithStatusResult = reportsContainer.queryItems(
numUploadsWithStatusQuery, CosmosQueryRequestOptions(),
Float::class.java
)

val badMetadataCount = badMetadataCountsResult.firstOrNull() ?: 0
val uploadsWithStatusCount = uploadsWithStatusResult.firstOrNull() ?: 0

val unfinishedUploadsCountsResult = reportsContainer.queryItems(
unfinishedUploadsCountsQuery, CosmosQueryRequestOptions(),
val badMetadataCountResult = reportsContainer.queryItems(
badMetadataCountQuery, CosmosQueryRequestOptions(),
Float::class.java
)

val unfinishedUploadsCount = unfinishedUploadsCountsResult.firstOrNull() ?: 0
val badMetadataCount = badMetadataCountResult.firstOrNull() ?: 0

val inProgressUploadCountResult = reportsContainer.queryItems(
inProgressUploadsCountQuery, CosmosQueryRequestOptions(),
Float::class.java
)

val inProgressUploadsCount = inProgressUploadCountResult.firstOrNull() ?: 0

val completedUploadsCountResult = reportsContainer.queryItems(
completedUploadsCountQuery, CosmosQueryRequestOptions(),
Float::class.java
)

val completedUploadsCount = completedUploadsCountResult.firstOrNull() ?: 0

val duplicateFilenameCountResult = reportsContainer.queryItems(
duplicateFilenameCountQuery, CosmosQueryRequestOptions(),
DuplicateFilenameCounts::class.java
)

val duplicateFilenames = if (duplicateFilenameCountResult.count() > 0)
duplicateFilenameCountResult.toList() else listOf()

val endTime = System.currentTimeMillis()
val countsJson = JSONObject()
.put("unique_upload_ids_count", uniqueUploadIdsCount)
.put("uploads_with_status_count", uploadsWithStatusCount)
.put("bad_metadata_count", badMetadataCount)
.put("unfinished_upload_count", unfinishedUploadsCount)
.put("in_progress_upload_count", inProgressUploadsCount)
.put("completed_upload_count", completedUploadsCount)
.put("uploads_with_duplicate_filenames", duplicateFilenames)
.put("query_time_millis", endTime - startTime)

return request
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package gov.cdc.ocio.processingstatusapi.model.reports

data class DuplicateFilenameCounts(

var filename: String? = null,

var totalCount: Long = 0
)

0 comments on commit a07b501

Please sign in to comment.