Skip to content

Commit

Permalink
Merge pull request #273 from AuScope/dev-sample
Browse files Browse the repository at this point in the history
Merge Dev sample in master
  • Loading branch information
bmotevalli authored Aug 26, 2024
2 parents f961518 + 082f168 commit f68f623
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 57 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,37 +7,10 @@
import logging
import json
import re
from ckanext.igsn_theme.logic.batch_validation import validate_parent_samples, is_numeric, is_cell_empty, is_url, validate_related_resources, validate_user_keywords, validate_authors, validate_samples
from ckanext.igsn_theme.logic.batch_validation import validate_parent_samples, is_numeric, is_cell_empty, is_url, validate_related_resources, validate_user_keywords, validate_authors, validate_samples, generate_sample_name, generate_sample_title
log = logging.getLogger(__name__)


def generate_sample_name(org_id, sample_type, sample_number):

org_name= get_organization_name(org_id)
org_name = org_name.replace(' ', '_')
sample_type = sample_type.replace(' ', '_')
sample_number = sample_number.replace(' ', '_')

name = f"{org_name}-{sample_type}-Sample-{sample_number}"
name = re.sub(r'[^a-z0-9-_]', '', name.lower())
return name

def generate_sample_title(org_id, sample_type, sample_number):

org_name= get_organization_name(org_id)
org_name = org_name
sample_type = sample_type
sample_number = sample_number

title= f"{org_name} - {sample_type} Sample {sample_number}"
return title
def get_organization_name(organization_id):
try:
organization = get_action('organization_show')({}, {'id': organization_id})
organization_name = organization['name']
return organization_name
except:
return None
def generate_location_geojson(coordinates_list):
features = []
for lat, lng in coordinates_list:
Expand All @@ -64,8 +37,6 @@ def process_author_emails(sample, authors_df):

def prepare_samples_data(samples_df, authors_df, related_resources_df, funding_df, org_id):
samples_data = []
existing_names = set()
errors = []
for _, row in samples_df.iterrows():
sample = row.to_dict()
sample["author"] = process_author_emails(sample, authors_df)
Expand Down Expand Up @@ -104,23 +75,8 @@ def prepare_samples_data(samples_df, authors_df, related_resources_df, funding_d

sample["name"] = generate_sample_name(org_id, sample['sample_type'], sample['sample_number'])
sample["title"] = generate_sample_title(org_id, sample['sample_type'], sample['sample_number'])
# Check for uniqueness
if sample["name"] in existing_names:
errors.append(f"Duplicate sample name: {sample['name']}")
else:
existing_names.add(sample["name"])

samples_data.append(sample)
try:
package_list = toolkit.get_action('package_list')({}, {})
for package in package_list:
package_data = toolkit.get_action('package_show')({}, {'id': package})
existing_name = package_data.get('name')
if existing_name in existing_names:
errors.append(f"Sample name {existing_name} already exists in CKAN")
except Exception as e:
errors.append(f"Error fetching CKAN data: {str(e)}")
return samples_data, errors
return samples_data

def process_related_resources(sample, related_resources_df):
related_resources_urls = sample.get("related_resources_urls")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import ckan.plugins.toolkit as toolkit
from ckan.plugins.toolkit import get_action
import re
import pandas as pd


def validate_sample_depth(sample_df):
errors = []

Expand Down Expand Up @@ -443,7 +443,52 @@ def validate_authors(authors_df):
errors.extend(validate_affiliation_identifier(authors_df, valid_affiliation_identifier_types))
errors.extend(validate_author_identifier(authors_df, valid_identifier_types))
return errors

def generate_sample_name(org_id, sample_type, sample_number):
org_name= get_organization_name(org_id)
org_name = org_name.replace(' ', '_')
sample_type = sample_type.replace(' ', '_')
sample_number = sample_number.replace(' ', '_')

name = f"{org_name}-{sample_type}-Sample-{sample_number}"
name = re.sub(r'[^a-z0-9-_]', '', name.lower())
return name
def generate_sample_title(org_id, sample_type, sample_number):
org_name= get_organization_name(org_id)
org_name = org_name
sample_type = sample_type
sample_number = sample_number
title= f"{org_name} - {sample_type} Sample {sample_number}"
return title
def get_organization_name(organization_id):
try:
organization = get_action('organization_show')({}, {'id': organization_id})
organization_name = organization['name']
return organization_name
except:
return None
def validate_sample_names(samples_df, org_id):
samples_data = []
existing_names = set()
errors = []
for _, row in samples_df.iterrows():
sample = row.to_dict()
sample["name"] = generate_sample_name(org_id, sample['sample_type'], sample['sample_number'])
# Check for uniqueness
if sample["name"] in existing_names:
errors.append(f"Duplicate sample name: {sample['name']}")
else:
existing_names.add(sample["name"])
samples_data.append(sample)
try:
package_list = toolkit.get_action('package_list')({}, {})
for package in package_list:
package_data = toolkit.get_action('package_show')({}, {'id': package})
existing_name = package_data.get('name')
if existing_name in existing_names:
errors.append(f"Sample name {existing_name} already exists in CKAN")
except Exception as e:
errors.append(f"Error fetching CKAN data: {str(e)}")
return errors

def validate_samples(samples_df, related_resources_df, authors_df, funding_df):
errors = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<p> The AuScope Virtual Research Environment (AVRE) is an AuScope program that focuses on delivering data, services, and tools to support the future research of the Australian geoscience research community. AVRE provides a rich ecosystem of Findable, Accessible, Interoperable and Reusable (FAIR) data and tools to a diverse range of Australian research organisations, government geological surveys and the international community. The AVRE program has contributed to the development and spearheaded the adoption of the International Generic Sample Numbers (IGSN) system in Australia in collaboration with the IGSN e.V. Organisation, DataCite, and the Australian Research Data Commons (ARDC).</p>
<p>
The AuScope Sample Repository aims to offer a digital repository for the AuScope community to register persistent identifiers for specimen data and publish specimen metadata following the
The AuScope Sample Repository aims to offer a digital repository for the AuScope communities to register persistent identifiers for specimen and publish specimen metadata following the
<a href="https://www.go-fair.org/fair-principles/" class="custom-link" target="_blank"> FAIR Data Guiding Principles.</a>
The AuScope communities comprise NCRIS-funded data projects and Australian geoscience research communities. The AuScope Sample Repository is operated as a self-service facility for the storage, dissemination, and publication of metadata from AuScope-funded projects and instruments.
</p>
Expand Down
13 changes: 5 additions & 8 deletions ckan/src/ckanext-igsn-theme/ckanext/igsn_theme/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
import pandas as pd
from datetime import date
import re
from ckanext.igsn_theme.logic.batch_validation import validate_parent_samples, is_numeric, is_cell_empty, is_url, validate_related_resources, validate_user_keywords, validate_authors, validate_samples
from ckanext.igsn_theme.logic.batch_process import generate_sample_name, generate_sample_title, get_organization_name, generate_location_geojson, process_author_emails, prepare_samples_data, process_related_resources, process_funding_info, get_epsg_name, set_parent_sample, find_parent_package, get_created_sample_id, read_excel_sheets
from ckanext.igsn_theme.logic.batch_validation import validate_parent_samples, validate_related_resources, validate_authors, validate_samples, validate_sample_names
from ckanext.igsn_theme.logic.batch_process import prepare_samples_data, set_parent_sample, read_excel_sheets
from ckanext.igsn_theme.logic import (
email_notifications
)
Expand Down Expand Up @@ -84,19 +84,16 @@ def process_excel(self, uploaded_file, org_id):
all_errors.extend(validate_authors(authors_df))
all_errors.extend(validate_related_resources(related_resources_df))
all_errors.extend(validate_parent_samples(samples_df))



samples_data, errors = prepare_samples_data(samples_df, authors_df, related_resources_df, funding_df, org_id)
all_errors.extend(errors)

all_errors.extend(validate_sample_names(samples_df, org_id))
if all_errors:
error_list = "\n".join(f"Error {i+1}. {error}. " for i, error in enumerate(all_errors))
# format the error list to be displayed in human readable format
formatted_errors = f"<pre style='white-space: pre-wrap;'>{error_list}</pre>"
raise ValueError(f"""The following errors were found:
{formatted_errors}""")

samples_data = prepare_samples_data(samples_df, authors_df, related_resources_df, funding_df, org_id)

return_value = {
"samples": samples_data,
"authors": authors_df.to_dict("records"),
Expand Down

0 comments on commit f68f623

Please sign in to comment.