diff --git a/ckan/src/ckanext-igsn-theme/ckanext/igsn_theme/logic/batch_process.py b/ckan/src/ckanext-igsn-theme/ckanext/igsn_theme/logic/batch_process.py index f2fc9757..df16f5b8 100644 --- a/ckan/src/ckanext-igsn-theme/ckanext/igsn_theme/logic/batch_process.py +++ b/ckan/src/ckanext-igsn-theme/ckanext/igsn_theme/logic/batch_process.py @@ -7,37 +7,10 @@ import logging import json import re -from ckanext.igsn_theme.logic.batch_validation import validate_parent_samples, is_numeric, is_cell_empty, is_url, validate_related_resources, validate_user_keywords, validate_authors, validate_samples +from ckanext.igsn_theme.logic.batch_validation import validate_parent_samples, is_numeric, is_cell_empty, is_url, validate_related_resources, validate_user_keywords, validate_authors, validate_samples, generate_sample_name, generate_sample_title log = logging.getLogger(__name__) -def generate_sample_name(org_id, sample_type, sample_number): - - org_name= get_organization_name(org_id) - org_name = org_name.replace(' ', '_') - sample_type = sample_type.replace(' ', '_') - sample_number = sample_number.replace(' ', '_') - - name = f"{org_name}-{sample_type}-Sample-{sample_number}" - name = re.sub(r'[^a-z0-9-_]', '', name.lower()) - return name - -def generate_sample_title(org_id, sample_type, sample_number): - - org_name= get_organization_name(org_id) - org_name = org_name - sample_type = sample_type - sample_number = sample_number - - title= f"{org_name} - {sample_type} Sample {sample_number}" - return title -def get_organization_name(organization_id): - try: - organization = get_action('organization_show')({}, {'id': organization_id}) - organization_name = organization['name'] - return organization_name - except: - return None def generate_location_geojson(coordinates_list): features = [] for lat, lng in coordinates_list: @@ -64,8 +37,6 @@ def process_author_emails(sample, authors_df): def prepare_samples_data(samples_df, authors_df, related_resources_df, funding_df, org_id): samples_data = [] - existing_names = set() - errors = [] for _, row in samples_df.iterrows(): sample = row.to_dict() sample["author"] = process_author_emails(sample, authors_df) @@ -104,23 +75,8 @@ def prepare_samples_data(samples_df, authors_df, related_resources_df, funding_d sample["name"] = generate_sample_name(org_id, sample['sample_type'], sample['sample_number']) sample["title"] = generate_sample_title(org_id, sample['sample_type'], sample['sample_number']) - # Check for uniqueness - if sample["name"] in existing_names: - errors.append(f"Duplicate sample name: {sample['name']}") - else: - existing_names.add(sample["name"]) - samples_data.append(sample) - try: - package_list = toolkit.get_action('package_list')({}, {}) - for package in package_list: - package_data = toolkit.get_action('package_show')({}, {'id': package}) - existing_name = package_data.get('name') - if existing_name in existing_names: - errors.append(f"Sample name {existing_name} already exists in CKAN") - except Exception as e: - errors.append(f"Error fetching CKAN data: {str(e)}") - return samples_data, errors + return samples_data def process_related_resources(sample, related_resources_df): related_resources_urls = sample.get("related_resources_urls") diff --git a/ckan/src/ckanext-igsn-theme/ckanext/igsn_theme/logic/batch_validation.py b/ckan/src/ckanext-igsn-theme/ckanext/igsn_theme/logic/batch_validation.py index bed15c65..32494efe 100644 --- a/ckan/src/ckanext-igsn-theme/ckanext/igsn_theme/logic/batch_validation.py +++ b/ckan/src/ckanext-igsn-theme/ckanext/igsn_theme/logic/batch_validation.py @@ -1,8 +1,8 @@ import ckan.plugins.toolkit as toolkit +from ckan.plugins.toolkit import get_action import re import pandas as pd - def validate_sample_depth(sample_df): errors = [] @@ -443,7 +443,52 @@ def validate_authors(authors_df): errors.extend(validate_affiliation_identifier(authors_df, valid_affiliation_identifier_types)) errors.extend(validate_author_identifier(authors_df, valid_identifier_types)) return errors - +def generate_sample_name(org_id, sample_type, sample_number): + org_name= get_organization_name(org_id) + org_name = org_name.replace(' ', '_') + sample_type = sample_type.replace(' ', '_') + sample_number = sample_number.replace(' ', '_') + + name = f"{org_name}-{sample_type}-Sample-{sample_number}" + name = re.sub(r'[^a-z0-9-_]', '', name.lower()) + return name +def generate_sample_title(org_id, sample_type, sample_number): + org_name= get_organization_name(org_id) + org_name = org_name + sample_type = sample_type + sample_number = sample_number + title= f"{org_name} - {sample_type} Sample {sample_number}" + return title +def get_organization_name(organization_id): + try: + organization = get_action('organization_show')({}, {'id': organization_id}) + organization_name = organization['name'] + return organization_name + except: + return None +def validate_sample_names(samples_df, org_id): + samples_data = [] + existing_names = set() + errors = [] + for _, row in samples_df.iterrows(): + sample = row.to_dict() + sample["name"] = generate_sample_name(org_id, sample['sample_type'], sample['sample_number']) + # Check for uniqueness + if sample["name"] in existing_names: + errors.append(f"Duplicate sample name: {sample['name']}") + else: + existing_names.add(sample["name"]) + samples_data.append(sample) + try: + package_list = toolkit.get_action('package_list')({}, {}) + for package in package_list: + package_data = toolkit.get_action('package_show')({}, {'id': package}) + existing_name = package_data.get('name') + if existing_name in existing_names: + errors.append(f"Sample name {existing_name} already exists in CKAN") + except Exception as e: + errors.append(f"Error fetching CKAN data: {str(e)}") + return errors def validate_samples(samples_df, related_resources_df, authors_df, funding_df): errors = [] diff --git a/ckan/src/ckanext-igsn-theme/ckanext/igsn_theme/views.py b/ckan/src/ckanext-igsn-theme/ckanext/igsn_theme/views.py index 8071d132..184fbcea 100644 --- a/ckan/src/ckanext-igsn-theme/ckanext/igsn_theme/views.py +++ b/ckan/src/ckanext-igsn-theme/ckanext/igsn_theme/views.py @@ -15,8 +15,8 @@ import pandas as pd from datetime import date import re -from ckanext.igsn_theme.logic.batch_validation import validate_parent_samples, is_numeric, is_cell_empty, is_url, validate_related_resources, validate_user_keywords, validate_authors, validate_samples -from ckanext.igsn_theme.logic.batch_process import generate_sample_name, generate_sample_title, get_organization_name, generate_location_geojson, process_author_emails, prepare_samples_data, process_related_resources, process_funding_info, get_epsg_name, set_parent_sample, find_parent_package, get_created_sample_id, read_excel_sheets +from ckanext.igsn_theme.logic.batch_validation import validate_parent_samples, validate_related_resources, validate_authors, validate_samples, validate_sample_names +from ckanext.igsn_theme.logic.batch_process import prepare_samples_data, set_parent_sample, read_excel_sheets from ckanext.igsn_theme.logic import ( email_notifications ) @@ -84,12 +84,7 @@ def process_excel(self, uploaded_file, org_id): all_errors.extend(validate_authors(authors_df)) all_errors.extend(validate_related_resources(related_resources_df)) all_errors.extend(validate_parent_samples(samples_df)) - - - - samples_data, errors = prepare_samples_data(samples_df, authors_df, related_resources_df, funding_df, org_id) - all_errors.extend(errors) - + all_errors.extend(validate_sample_names(samples_df, org_id)) if all_errors: error_list = "\n".join(f"Error {i+1}. {error}. " for i, error in enumerate(all_errors)) # format the error list to be displayed in human readable format @@ -97,6 +92,8 @@ def process_excel(self, uploaded_file, org_id): raise ValueError(f"""The following errors were found: {formatted_errors}""") + samples_data = prepare_samples_data(samples_df, authors_df, related_resources_df, funding_df, org_id) + return_value = { "samples": samples_data, "authors": authors_df.to_dict("records"),