-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added README.md for the data_quality_tool.
- Loading branch information
1 parent
f8cbc2f
commit a906590
Showing
37 changed files
with
1,955 additions
and
182 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,7 +26,7 @@ jobs: | |
run: | | ||
poetry config virtualenvs.create false | ||
poetry install | ||
working-directory: converter | ||
working-directory: data_quality_tool | ||
|
||
- name: Set PYTHONPATH | ||
run: echo "PYTHONPATH=${{ github.workspace }}" >> $GITHUB_ENV | ||
|
@@ -35,7 +35,7 @@ jobs: | |
run: | | ||
poetry run coverage run -m pytest | ||
poetry run coverage xml | ||
working-directory: converter/tests | ||
working-directory: data_quality_tool/tests | ||
|
||
- name: Upload Coverage to Codecov | ||
uses: codecov/[email protected] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Use an official Python runtime as a parent image | ||
FROM python:3.9-slim | ||
|
||
# The working directory should be set to where your Dockerfile and pyproject.toml are located | ||
WORKDIR /app | ||
|
||
# Since the Dockerfile and pyproject.toml are in the same directory, copy the current directory | ||
COPY . . | ||
|
||
# Install Poetry | ||
RUN pip install poetry | ||
|
||
# Configure Poetry: disable the creation of virtual environments | ||
RUN poetry config virtualenvs.create false | ||
|
||
# Install project dependencies | ||
RUN poetry install --no-dev | ||
|
||
# Expose the port your app runs on | ||
EXPOSE 8000 | ||
|
||
# Environment variables for Gunicorn to run your Flask app | ||
ENV MODULE_NAME=converter.controller | ||
ENV VARIABLE_NAME=app | ||
ENV PORT=8000 | ||
|
||
# Use the environment variable in the command | ||
CMD ["sh", "-c", "poetry run gunicorn --bind 0.0.0.0:${PORT} controller:app"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
## Data quality tool service | ||
|
||
### Build docker image | ||
|
||
To build a new image you must be on folder `datacatalogue/data_quality_tool`, then | ||
|
||
``` | ||
docker build -t <USERNAME>/data_quality_tool:<IMAGETAG> . | ||
Example: | ||
docker build -t madgik/data_quality_tool:latest . | ||
``` | ||
|
||
|
||
Then start the container with: | ||
|
||
``` | ||
docker run -d -p 8000:8000 --name <CONTAINER_NAME> <USERNAME>/data_quality_tool:<IMAGETAG> | ||
Example: | ||
docker run -d -p 8000:8000 --name data_quality_tool madgik/data_quality_tool:latest | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# Mapping of Excel columns to JSON keys, adjust as necessary, | ||
# Did not contain the values because it is not in a 1 to 1 scenario. | ||
EXCEL_JSON_FIELDS_MAP = { | ||
"csvFile": "csvFile", | ||
"name": "label", | ||
"code": "code", | ||
"type": "type", | ||
"values": "enumerations", | ||
"unit": "units", | ||
"description": "description", | ||
"canBeNull": "canBeNull", | ||
"comments": "comments", | ||
"conceptPath": "conceptPath", | ||
"methodology": "methodology", | ||
} | ||
|
||
|
||
EXCEL_TYPE_2_SQL_TYPE_ISCATEGORICAL_MAP = { | ||
"nominal": ("text", True), | ||
"real": ("real", False), | ||
"integer": ("int", False), | ||
"text": ("text", False), | ||
} | ||
|
||
EXCEL_COLUMNS = [ | ||
"csvFile", | ||
"name", | ||
"code", | ||
"type", | ||
"values", | ||
"unit", | ||
"description", | ||
"canBeNull", | ||
"comments", | ||
"conceptPath", | ||
"methodology", | ||
] | ||
REQUIRED_COLUMNS = ["name", "code", "type", "conceptPath"] | ||
|
||
|
||
class InvalidDataModelError(Exception): | ||
"""Exception raised for errors in the input data model.""" | ||
|
||
|
||
JSON_EXCEL_FIELDS_MAP = { | ||
"label": "name", | ||
"code": "code", | ||
"type": "type", | ||
"enumerations": "values", | ||
"minValue": "values", | ||
"maxValue": "values", | ||
"units": "units", | ||
"description": "description", | ||
} | ||
MIN_MAX_PATTERN = r"^([-+]?\d*\.?\d+)-([-+]?\d*\.?\d+)$" | ||
ENUMERATION_PATTERN = r'^\{"[^"]+",\s*"[^"]+"\}(,\s*\{"[^"]+",\s*"[^"]+"\})*$' | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.