-
Notifications
You must be signed in to change notification settings - Fork 10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add FEFF schema #41
base: main
Are you sure you want to change the base?
Add FEFF schema #41
Changes from 1 commit
aecd753
afe5b50
69b7b8d
aac4c9e
6269e1b
edf3720
96ce6d7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import copy | ||
import pandas as pd | ||
from pathlib import Path | ||
|
||
from aimmdb.ingest import load_feff_data | ||
|
||
|
||
DATA_PATH = Path("aimmdb/_tests/data/feff/65272_C_007") | ||
|
||
|
||
def test_load_feff_data(): | ||
|
||
data, metadata = load_feff_data(DATA_PATH) | ||
|
||
assert isinstance(data, pd.DataFrame) | ||
assert isinstance(metadata, dict) | ||
assert isinstance(metadata["feff.inp"], str) | ||
assert isinstance(metadata["feff.out"], str) | ||
assert isinstance(metadata["xmu.dat-comments"], str) | ||
|
||
|
||
def copy_feff_data(): | ||
|
||
data, metadata = load_feff_data(DATA_PATH) | ||
|
||
data_copy = copy.deepcopy(data) | ||
|
||
assert data.equals(data_copy) | ||
|
||
metadata_copy = copy.deepcopy(metadata) | ||
|
||
assert metadata.equals(metadata_copy) | ||
|
||
assert isinstance(data_copy, pd.DataFrame) | ||
assert isinstance(metadata_copy, dict) | ||
assert isinstance(metadata_copy["feff.inp"], str) | ||
assert isinstance(metadata_copy["feff.out"], str) | ||
assert isinstance(metadata_copy["xmu.dat-comments"], str) |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import copy\n", | ||
"from pathlib import Path\n", | ||
"\n", | ||
"import numpy as np\n", | ||
"import pandas as pd" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from tiled.client import from_uri\n", | ||
"client = from_uri(\"http://localhost:8000/api\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"\n", | ||
"import os\n", | ||
"from pathlib import Path\n", | ||
"\n", | ||
"\n", | ||
"DATA_PATH = Path(\"aimmdb/data/feff/65272_C_007\")\n", | ||
"print(\"Data Path:\", DATA_PATH)\n", | ||
"\n", | ||
"contents = os.listdir(DATA_PATH)\n", | ||
"print(\"Contents:\", contents)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from load_FEFF_Data import load_feff_data\n", | ||
"\n", | ||
"data, metadata = load_feff_data(DATA_PATH)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def ingest_feff(client, df, verbose=False):\n", | ||
" \"\"\"\n", | ||
" Upload the FEFF dataset to database\n", | ||
" \"\"\"\n", | ||
"\n", | ||
" for (name, prep), g in df.groupby([\"sample.name\", \"sample.prep\"]):\n", | ||
" if verbose:\n", | ||
" print(f\"{name}: {prep}, {len(g)}\")\n", | ||
"\n", | ||
" sample_id = client.write_sample({\"name\" : name, \"prep\" : prep})\n", | ||
"\n", | ||
" for i, row in g.iterrows():\n", | ||
" feff_df, _ = read_dat(row.file)\n", | ||
" metadata = row.metadata\n", | ||
" metadata[\"dataset\"] = \"feff\"\n", | ||
" metadata[\"sample_id\"] = sample_id\n", | ||
" client[\"uid\"].write_dataframe(feff_df, metadata=metadata, specs=[\"FEFF\"])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"print(\"starting ingestion...\")\n", | ||
"ingest_feff(client, feff, verbose=True)\n", | ||
"print(\"finished.\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"FEFFInputMetadata.schema()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"FEFFOutputMetadata.schema()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# we will enforce that XAS metadata satisfies the following schema\n", | ||
"ExperimentalXASMetadata.schema()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"client[\"uid\"]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# with the correct metadata we can write to the server\n", | ||
"# NOTE this doesn't prevent you from writing garbage but does help\n", | ||
"df = pd.DataFrame({\"a\" : np.random.rand(100), \"b\" : np.random.rand(100)})\n", | ||
"metadata = {\"dataset\" : \"feff\", \"foo\" : \"bar\", \"element\" : {\"symbol\" : \"Au\", \"edge\" : \"K\"}, \"facility\" : {\"name\" : \"ALS\"}, \"beamline\" : {\"name\" : \"8.0.1\"}}\n", | ||
"node = client[\"uid\"].write_dataframe(df, metadata=metadata, specs=[\"FEFF\"])\n", | ||
"node" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3.10.4 ('my_pymatgen')", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.4" | ||
}, | ||
"orig_nbformat": 4, | ||
"vscode": { | ||
"interpreter": { | ||
"hash": "8cf392b7cd98023928c855fd79964086ca343b5f82a42ebb28f5e83ba8cfe45c" | ||
} | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"execution_count": 1, | ||
"id": "3d0110f7-ba1d-4817-95e0-1a9ec70770a0", | ||
"metadata": {}, | ||
"outputs": [], | ||
|
@@ -34,10 +34,47 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"execution_count": 2, | ||
"id": "1ff6333b-690f-4119-a2b2-bbe5c94c3112", | ||
"metadata": {}, | ||
"outputs": [], | ||
"outputs": [ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All of the notebooks we store in GitHub should have their outputs stripped. This is because often outputs can inadvertently contain images or just lots of text, and this can be very space-intensive. There are a variety of ways to do this, including by using the command palette in Jupyter or by doing it from the command line, e.g. this. |
||
{ | ||
"data": { | ||
"text/plain": [ | ||
"{'title': 'ExperimentalXASMetadata',\n", | ||
" 'type': 'object',\n", | ||
" 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n", | ||
" 'measurement_type': {'default': 'xas',\n", | ||
" 'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n", | ||
" 'dataset': {'title': 'Dataset', 'type': 'string'},\n", | ||
" 'sample_id': {'title': 'Sample Id', 'type': 'string'},\n", | ||
" 'facility': {'$ref': '#/definitions/FacilityMetadata'},\n", | ||
" 'beamline': {'$ref': '#/definitions/BeamlineMetadata'}},\n", | ||
" 'required': ['element', 'dataset', 'facility', 'beamline'],\n", | ||
" 'definitions': {'XDIElement': {'title': 'XDIElement',\n", | ||
" 'type': 'object',\n", | ||
" 'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n", | ||
" 'edge': {'title': 'Edge', 'type': 'string'}},\n", | ||
" 'required': ['symbol', 'edge']},\n", | ||
" 'MeasurementEnum': {'title': 'MeasurementEnum',\n", | ||
" 'description': 'An enumeration.',\n", | ||
" 'enum': ['xas', 'rixs'],\n", | ||
" 'type': 'string'},\n", | ||
" 'FacilityMetadata': {'title': 'FacilityMetadata',\n", | ||
" 'type': 'object',\n", | ||
" 'properties': {'name': {'title': 'Name', 'type': 'string'}},\n", | ||
" 'required': ['name']},\n", | ||
" 'BeamlineMetadata': {'title': 'BeamlineMetadata',\n", | ||
" 'type': 'object',\n", | ||
" 'properties': {'name': {'title': 'Name', 'type': 'string'}},\n", | ||
" 'required': ['name']}}}" | ||
] | ||
}, | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"# we will enforce that XAS metadata satisfies the following schema\n", | ||
"ExperimentalXASMetadata.schema()" | ||
|
@@ -56,10 +93,21 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"execution_count": 3, | ||
"id": "cbf5e6a8-77b8-4c1d-80f5-f773cbcc5681", | ||
"metadata": {}, | ||
"outputs": [], | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"ExperimentalXASMetadata(element=XDIElement(symbol='Fe', edge='K'), measurement_type='xas', dataset='example', sample_id=None, facility=FacilityMetadata(name=None), beamline=BeamlineMetadata(name='8.0.1'))" | ||
] | ||
}, | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"# example of valid metadata\n", | ||
"metadata = {\n", | ||
|
@@ -1056,9 +1104,9 @@ | |
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python [conda env:aimm]", | ||
"display_name": "Python 3.9.13 ('aimm')", | ||
"language": "python", | ||
"name": "conda-env-aimm-py" | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
|
@@ -1070,7 +1118,12 @@ | |
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.4" | ||
"version": "3.9.13" | ||
}, | ||
"vscode": { | ||
"interpreter": { | ||
"hash": "189d756eac8438d33e11f8e23aa09bdc4c99760ed11a3bfefa464e31dcca4c4a" | ||
} | ||
} | ||
}, | ||
"nbformat": 4, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same as my previous comment: