Skip to content

Commit

Permalink
Merge branch 'batch-validation' of https://github.com/AuScope/ckan-do…
Browse files Browse the repository at this point in the history
…cker into dev-sample
  • Loading branch information
NTaherifar committed Aug 22, 2024
2 parents ebe4b25 + 7797803 commit 082f168
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,37 +7,10 @@
import logging
import json
import re
from ckanext.igsn_theme.logic.batch_validation import validate_parent_samples, is_numeric, is_cell_empty, is_url, validate_related_resources, validate_user_keywords, validate_authors, validate_samples
from ckanext.igsn_theme.logic.batch_validation import validate_parent_samples, is_numeric, is_cell_empty, is_url, validate_related_resources, validate_user_keywords, validate_authors, validate_samples, generate_sample_name, generate_sample_title
log = logging.getLogger(__name__)


def generate_sample_name(org_id, sample_type, sample_number):

org_name= get_organization_name(org_id)
org_name = org_name.replace(' ', '_')
sample_type = sample_type.replace(' ', '_')
sample_number = sample_number.replace(' ', '_')

name = f"{org_name}-{sample_type}-Sample-{sample_number}"
name = re.sub(r'[^a-z0-9-_]', '', name.lower())
return name

def generate_sample_title(org_id, sample_type, sample_number):

org_name= get_organization_name(org_id)
org_name = org_name
sample_type = sample_type
sample_number = sample_number

title= f"{org_name} - {sample_type} Sample {sample_number}"
return title
def get_organization_name(organization_id):
try:
organization = get_action('organization_show')({}, {'id': organization_id})
organization_name = organization['name']
return organization_name
except:
return None
def generate_location_geojson(coordinates_list):
features = []
for lat, lng in coordinates_list:
Expand All @@ -64,8 +37,6 @@ def process_author_emails(sample, authors_df):

def prepare_samples_data(samples_df, authors_df, related_resources_df, funding_df, org_id):
samples_data = []
existing_names = set()
errors = []
for _, row in samples_df.iterrows():
sample = row.to_dict()
sample["author"] = process_author_emails(sample, authors_df)
Expand Down Expand Up @@ -104,23 +75,8 @@ def prepare_samples_data(samples_df, authors_df, related_resources_df, funding_d

sample["name"] = generate_sample_name(org_id, sample['sample_type'], sample['sample_number'])
sample["title"] = generate_sample_title(org_id, sample['sample_type'], sample['sample_number'])
# Check for uniqueness
if sample["name"] in existing_names:
errors.append(f"Duplicate sample name: {sample['name']}")
else:
existing_names.add(sample["name"])

samples_data.append(sample)
try:
package_list = toolkit.get_action('package_list')({}, {})
for package in package_list:
package_data = toolkit.get_action('package_show')({}, {'id': package})
existing_name = package_data.get('name')
if existing_name in existing_names:
errors.append(f"Sample name {existing_name} already exists in CKAN")
except Exception as e:
errors.append(f"Error fetching CKAN data: {str(e)}")
return samples_data, errors
return samples_data

def process_related_resources(sample, related_resources_df):
related_resources_urls = sample.get("related_resources_urls")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import ckan.plugins.toolkit as toolkit
from ckan.plugins.toolkit import get_action
import re
import pandas as pd


def validate_sample_depth(sample_df):
errors = []

Expand Down Expand Up @@ -443,7 +443,52 @@ def validate_authors(authors_df):
errors.extend(validate_affiliation_identifier(authors_df, valid_affiliation_identifier_types))
errors.extend(validate_author_identifier(authors_df, valid_identifier_types))
return errors

def generate_sample_name(org_id, sample_type, sample_number):
org_name= get_organization_name(org_id)
org_name = org_name.replace(' ', '_')
sample_type = sample_type.replace(' ', '_')
sample_number = sample_number.replace(' ', '_')

name = f"{org_name}-{sample_type}-Sample-{sample_number}"
name = re.sub(r'[^a-z0-9-_]', '', name.lower())
return name
def generate_sample_title(org_id, sample_type, sample_number):
org_name= get_organization_name(org_id)
org_name = org_name
sample_type = sample_type
sample_number = sample_number
title= f"{org_name} - {sample_type} Sample {sample_number}"
return title
def get_organization_name(organization_id):
try:
organization = get_action('organization_show')({}, {'id': organization_id})
organization_name = organization['name']
return organization_name
except:
return None
def validate_sample_names(samples_df, org_id):
samples_data = []
existing_names = set()
errors = []
for _, row in samples_df.iterrows():
sample = row.to_dict()
sample["name"] = generate_sample_name(org_id, sample['sample_type'], sample['sample_number'])
# Check for uniqueness
if sample["name"] in existing_names:
errors.append(f"Duplicate sample name: {sample['name']}")
else:
existing_names.add(sample["name"])
samples_data.append(sample)
try:
package_list = toolkit.get_action('package_list')({}, {})
for package in package_list:
package_data = toolkit.get_action('package_show')({}, {'id': package})
existing_name = package_data.get('name')
if existing_name in existing_names:
errors.append(f"Sample name {existing_name} already exists in CKAN")
except Exception as e:
errors.append(f"Error fetching CKAN data: {str(e)}")
return errors

def validate_samples(samples_df, related_resources_df, authors_df, funding_df):
errors = []
Expand Down
13 changes: 5 additions & 8 deletions ckan/src/ckanext-igsn-theme/ckanext/igsn_theme/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
import pandas as pd
from datetime import date
import re
from ckanext.igsn_theme.logic.batch_validation import validate_parent_samples, is_numeric, is_cell_empty, is_url, validate_related_resources, validate_user_keywords, validate_authors, validate_samples
from ckanext.igsn_theme.logic.batch_process import generate_sample_name, generate_sample_title, get_organization_name, generate_location_geojson, process_author_emails, prepare_samples_data, process_related_resources, process_funding_info, get_epsg_name, set_parent_sample, find_parent_package, get_created_sample_id, read_excel_sheets
from ckanext.igsn_theme.logic.batch_validation import validate_parent_samples, validate_related_resources, validate_authors, validate_samples, validate_sample_names
from ckanext.igsn_theme.logic.batch_process import prepare_samples_data, set_parent_sample, read_excel_sheets
from ckanext.igsn_theme.logic import (
email_notifications
)
Expand Down Expand Up @@ -84,19 +84,16 @@ def process_excel(self, uploaded_file, org_id):
all_errors.extend(validate_authors(authors_df))
all_errors.extend(validate_related_resources(related_resources_df))
all_errors.extend(validate_parent_samples(samples_df))



samples_data, errors = prepare_samples_data(samples_df, authors_df, related_resources_df, funding_df, org_id)
all_errors.extend(errors)

all_errors.extend(validate_sample_names(samples_df, org_id))
if all_errors:
error_list = "\n".join(f"Error {i+1}. {error}. " for i, error in enumerate(all_errors))
# format the error list to be displayed in human readable format
formatted_errors = f"<pre style='white-space: pre-wrap;'>{error_list}</pre>"
raise ValueError(f"""The following errors were found:
{formatted_errors}""")

samples_data = prepare_samples_data(samples_df, authors_df, related_resources_df, funding_df, org_id)

return_value = {
"samples": samples_data,
"authors": authors_df.to_dict("records"),
Expand Down

0 comments on commit 082f168

Please sign in to comment.