diff --git a/aimmdb/_tests/ingest/test_feff.py b/aimmdb/_tests/ingest/test_feff.py new file mode 100644 index 0000000..b24124d --- /dev/null +++ b/aimmdb/_tests/ingest/test_feff.py @@ -0,0 +1,38 @@ +import copy +import pandas as pd +from pathlib import Path + +from aimmdb.ingest import load_feff_data + + +DATA_PATH = Path("aimmdb/_tests/data/feff/65272_C_007") + + +def test_load_feff_data(): + + data, metadata = load_feff_data(DATA_PATH) + + assert isinstance(data, pd.DataFrame) + assert isinstance(metadata, dict) + assert isinstance(metadata["feff.inp"], str) + assert isinstance(metadata["feff.out"], str) + assert isinstance(metadata["xmu.dat-comments"], str) + + +def copy_feff_data(): + + data, metadata = load_feff_data(DATA_PATH) + + data_copy = copy.deepcopy(data) + + assert data.equals(data_copy) + + metadata_copy = copy.deepcopy(metadata) + + assert metadata.equals(metadata_copy) + + assert isinstance(data_copy, pd.DataFrame) + assert isinstance(metadata_copy, dict) + assert isinstance(metadata_copy["feff.inp"], str) + assert isinstance(metadata_copy["feff.out"], str) + assert isinstance(metadata_copy["xmu.dat-comments"], str) diff --git a/aimmdb/adapters/feff.py b/aimmdb/adapters/feff.py new file mode 100644 index 0000000..d166f84 --- /dev/null +++ b/aimmdb/adapters/feff.py @@ -0,0 +1,6 @@ +from tiled.adapters.dataframe import DataFrameAdapter + + +# dataframe adapter representing FEFF data +class FEFFAdapter(DataFrameAdapter): + specs = ["FEFF"] diff --git a/aimmdb/ingest/__init__.py b/aimmdb/ingest/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/aimmdb/ingest/load_FEFF_Data.py b/aimmdb/ingest/load_FEFF_Data.py new file mode 100644 index 0000000..e354cea --- /dev/null +++ b/aimmdb/ingest/load_FEFF_Data.py @@ -0,0 +1,65 @@ +import copy +import pathlib + +import numpy as np +import pandas as pd + +from tiled.client import from_uri +from tiled.examples.xdi import read_xdi +from tiled.queries import Key + +import pandas as pd +from pathlib import Path + +def load_feff_data(data_path, verbose=True): + """ + Parameters + ---------- + data_path : os.PathLike + path to the feff.inp, feff.out, and xmu.dat file. + verbose : bool, optional + Prints debug information if True. + + Returns + ------- + feff_data : pandas.Dataframe + dataframe containing the xmu.dat data + metadata : dict + dictionary containing the feff.inp, feff.out, and xmu.dat metadata + """ + + data_path = Path(data_path) + + feff_inp = data_path / "feff.inp" + feff_out = data_path / "feff.out" + xmu_dat = data_path / "xmu.dat" + + data = pd.read_csv( + xmu_dat, + sep="\s+", + header=None, + names=["omega", "e", "k", "mu", "mu0", "chi"], + comment="#", + ) + + metadata = { + "feff.inp": feff_inp.read_text(), + "feff.out": feff_out.read_text(), + } + + dat = [ + line + for line in xmu_dat.read_text().splitlines() + if line.startswith("#") + ] + metadata["xmu.dat-comments"] = "\n".join(dat) + + if verbose: + print("FEFF Input:", feff_inp) + print("FEFF Output:", feff_out) + print("FEFF Data:", xmu_dat) + print(data) + print(metadata) + + # returns data and metadata, a pd.DataFrame and dict, respectively. + return data, metadata \ No newline at end of file diff --git a/aimmdb/schemas.py b/aimmdb/schemas.py index 0471325..fa02f9e 100644 --- a/aimmdb/schemas.py +++ b/aimmdb/schemas.py @@ -145,3 +145,33 @@ class BatteryChargeMetadataInternal(pydantic.BaseModel): class BatteryChargeMetadata(pydantic.BaseModel, extra=pydantic.Extra.allow): charge: BatteryChargeMetadataInternal + + +class FEFFcards(pydantic.BaseModel, extra=pydantic.Extra.allow): + atoms: float + control: int + exchange: float + title: Optional[str] + rpath: int + potentials: float + xanes: float + edge: str + scf: float + fms: float + S02: float + corehole: str + +class FEFFInputMetadata(pydantic.BaseModel, extra=pydantic.Extra.allow): + element: XDIElement + measurement_type: MeasurementEnum = pydantic.Field("FEFF", const=True) + dataset: str + sample_id: str + #input_script: str + +class FEFFOutputMetadata(pydantic.BaseModel, extra=pydantic.Extra.allow): + element: XDIElement + measurement_type: MeasurementEnum = pydantic.Field("FEFF", const=True) + dataset: str + sample_id: str + #output_log: str + diff --git a/aimmdb/validation.py b/aimmdb/validation.py index ae07c45..491fa3a 100644 --- a/aimmdb/validation.py +++ b/aimmdb/validation.py @@ -45,3 +45,14 @@ def validate_battery_charge_data(metadata, structure_family, structure, spec): metadata = BatteryChargeMetadata.parse_obj(metadata) except pydantic.ValidationError as e: raise ValidationError(str(e)) + +def validate_feff_data(data, structure): + #validate_xas_metadata(metadata, structure_family, structure, spec) + + columns = set(structure.macro.columns) + + required_columns = {"omega", "e", "k", "mu", "mu0", "chi"} + + + if not required_columns.issubset(columns): + raise ValidationError(f"columns {columns} must contain {required_columns}") \ No newline at end of file diff --git a/ingest/ingest.ipynb b/notebooks/ingest.ipynb similarity index 100% rename from ingest/ingest.ipynb rename to notebooks/ingest.ipynb diff --git a/notebooks/ingest_FEFF.ipynb b/notebooks/ingest_FEFF.ipynb new file mode 100644 index 0000000..bddaaa5 --- /dev/null +++ b/notebooks/ingest_FEFF.ipynb @@ -0,0 +1,189 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import copy\n", + "from pathlib import Path\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import nbconvert" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from tiled.client import from_uri\n", + "client = from_uri(\"http://localhost:8000/api\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import os\n", + "from pathlib import Path\n", + "\n", + "\n", + "DATA_PATH = Path(\"C:/Users/msega/aimmdb/aimmdb/_tests/data/feff/65272_C_007/\")\n", + "\n", + "#DATA_PATH = Path(\"aimmdb/data/feff/65272_C_007\")\n", + "print(\"Data Path:\", DATA_PATH)\n", + "\n", + "contents = os.listdir(DATA_PATH)\n", + "print(\"Contents:\", contents)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from aimmdb.ingest.load_FEFF_Data import load_feff_data\n", + "\n", + "data, metadata = load_feff_data(DATA_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def ingest_feff(client, data_path, verbose=False):\n", + " \"\"\"\n", + " Parameters\n", + " ----------\n", + " client : tiled.client.Client\n", + " The client to use to connect to the server.\n", + " data : os.PathLike\n", + " path to the feff.inp, feff.out, and xmu.dat files.\n", + " verbose : bool, optional\n", + " Prints debug information if True.\n", + " \"\"\"\n", + " client = from_uri(\"http://localhost:8000/api\")\n", + " data_path = Path(\"C:/Users/msega/aimmdb/aimmdb/_tests/data/feff/65272_C_007/\")\n", + "\n", + " files = list(data_path.rglob(\"*\"))\n", + " print(\"found {len(files)} files to ingest\")\n", + " print(\"Ingesting FEFF data from:\", data_path)\n", + "\n", + "\n", + " feff_data, metadata = load_feff_data(data_path, verbose=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from aimmdb.ingest.load_FEFF_Data import load_feff_data\n", + "from tiled.client import from_uri\n", + "from aimmdb.ingest.load_FEFF_Data import load_feff_data\n", + "\n", + "client = from_uri(\"http://localhost:8000/api\")\n", + "\n", + "data, metadata = load_feff_data(DATA_PATH, verbose=False)\n", + "\n", + "client[\"uid\"].write_dataframe(data, metadata, specs=[\"FEFF\"])\n", + "\n", + "#ingest_feff(client, DATA_PATH, verbose=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"starting ingestion...\")\n", + "ingest_feff(client, DATA_PATH, verbose=True)\n", + "print(\"finished.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import aimmdb.schemas as schemas\n", + "\n", + "schemas.FEFFInputMetadata.schema()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import aimmdb.schemas as schemas\n", + "\n", + "schemas.FEFFOutputMetadata.schema()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import aimmdb.schemas as schemas\n", + "schemas.FEFFcards.schema()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#show feff.inp as a schema\n", + "import aimmdb.schemas as schemas\n", + "\n", + "schemas.FEFFInputMetadata.schema()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.13 ('aimm')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "189d756eac8438d33e11f8e23aa09bdc4c99760ed11a3bfefa464e31dcca4c4a" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/ingest/ingest_newville_example.ipynb b/notebooks/ingest_newville_example.ipynb similarity index 92% rename from ingest/ingest_newville_example.ipynb rename to notebooks/ingest_newville_example.ipynb index f178340..5851cf8 100644 --- a/ingest/ingest_newville_example.ipynb +++ b/notebooks/ingest_newville_example.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "3d0110f7-ba1d-4817-95e0-1a9ec70770a0", "metadata": {}, "outputs": [], @@ -34,10 +34,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "1ff6333b-690f-4119-a2b2-bbe5c94c3112", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'title': 'ExperimentalXASMetadata',\n", + " 'type': 'object',\n", + " 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n", + " 'measurement_type': {'default': 'xas',\n", + " 'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n", + " 'dataset': {'title': 'Dataset', 'type': 'string'},\n", + " 'sample_id': {'title': 'Sample Id', 'type': 'string'},\n", + " 'facility': {'$ref': '#/definitions/FacilityMetadata'},\n", + " 'beamline': {'$ref': '#/definitions/BeamlineMetadata'}},\n", + " 'required': ['element', 'dataset', 'facility', 'beamline'],\n", + " 'definitions': {'XDIElement': {'title': 'XDIElement',\n", + " 'type': 'object',\n", + " 'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n", + " 'edge': {'title': 'Edge', 'type': 'string'}},\n", + " 'required': ['symbol', 'edge']},\n", + " 'MeasurementEnum': {'title': 'MeasurementEnum',\n", + " 'description': 'An enumeration.',\n", + " 'enum': ['xas', 'rixs'],\n", + " 'type': 'string'},\n", + " 'FacilityMetadata': {'title': 'FacilityMetadata',\n", + " 'type': 'object',\n", + " 'properties': {'name': {'title': 'Name', 'type': 'string'}},\n", + " 'required': ['name']},\n", + " 'BeamlineMetadata': {'title': 'BeamlineMetadata',\n", + " 'type': 'object',\n", + " 'properties': {'name': {'title': 'Name', 'type': 'string'}},\n", + " 'required': ['name']}}}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# we will enforce that XAS metadata satisfies the following schema\n", "ExperimentalXASMetadata.schema()" @@ -56,10 +93,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "cbf5e6a8-77b8-4c1d-80f5-f773cbcc5681", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "ExperimentalXASMetadata(element=XDIElement(symbol='Fe', edge='K'), measurement_type='xas', dataset='example', sample_id=None, facility=FacilityMetadata(name=None), beamline=BeamlineMetadata(name='8.0.1'))" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# example of valid metadata\n", "metadata = {\n", @@ -1056,9 +1104,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:aimm]", + "display_name": "Python 3.9.13 ('aimm')", "language": "python", - "name": "conda-env-aimm-py" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -1070,7 +1118,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.9.13" + }, + "vscode": { + "interpreter": { + "hash": "189d756eac8438d33e11f8e23aa09bdc4c99760ed11a3bfefa464e31dcca4c4a" + } } }, "nbformat": 4,