AI-multimodal · msegal347 · Oct 14, 2022 · Oct 22, 2022 · Oct 22, 2022 · Oct 24, 2022
diff --git a/aimmdb/_tests/ingest/test_feff.py b/aimmdb/_tests/ingest/test_feff.py
@@ -0,0 +1,38 @@
+import copy
+import pandas as pd
+from pathlib import Path
+
+from aimmdb.ingest import load_feff_data
+
+
+DATA_PATH = Path("aimmdb/_tests/data/feff/65272_C_007")
+
+
+def test_load_feff_data():
+
+    data, metadata = load_feff_data(DATA_PATH)
+
+    assert isinstance(data, pd.DataFrame)
+    assert isinstance(metadata, dict)
+    assert isinstance(metadata["feff.inp"], str)
+    assert isinstance(metadata["feff.out"], str)
+    assert isinstance(metadata["xmu.dat-comments"], str)
+
+
+def copy_feff_data():
+
+    data, metadata = load_feff_data(DATA_PATH)
+
+    data_copy = copy.deepcopy(data)
+
+    assert data.equals(data_copy)
+
+    metadata_copy = copy.deepcopy(metadata)
+
+    assert metadata.equals(metadata_copy)
+
+    assert isinstance(data_copy, pd.DataFrame)
+    assert isinstance(metadata_copy, dict)
+    assert isinstance(metadata_copy["feff.inp"], str)
+    assert isinstance(metadata_copy["feff.out"], str)
+    assert isinstance(metadata_copy["xmu.dat-comments"], str)
diff --git a/aimmdb/ingest/__init__.py b/aimmdb/ingest/__init__.py
diff --git a/ingest/load_FEFF_Data.py → aimmdb/ingest/load_FEFF_Data.py b/ingest/load_FEFF_Data.py → aimmdb/ingest/load_FEFF_Data.py
diff --git a/aimmdb/schemas.py b/aimmdb/schemas.py
@@ -146,14 +146,6 @@ class BatteryChargeMetadataInternal(pydantic.BaseModel):
 class BatteryChargeMetadata(pydantic.BaseModel, extra=pydantic.Extra.allow):
     charge: BatteryChargeMetadataInternal
 
-class FEFFpotentials(pydantic.BaseModel, extra=pydantic.Extra.allow):
-    x: Optional[str]
-    ipot: int
-    Z: str
-    element: int
-    l_scmt: int
-    l_fms: int
-
 
 class FEFFcards(pydantic.BaseModel, extra=pydantic.Extra.allow):
     atoms: float

diff --git a/ingest/ingest_FEFF.ipynb b/ingest/ingest_FEFF.ipynb
diff --git a/ingest/ingest.ipynb → notebooks/ingest.ipynb b/ingest/ingest.ipynb → notebooks/ingest.ipynb
diff --git a/notebooks/ingest_FEFF.ipynb b/notebooks/ingest_FEFF.ipynb
@@ -0,0 +1,170 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import copy\n",
+    "from pathlib import Path\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from tiled.client import from_uri\n",
+    "client = from_uri(\"http://localhost:8000/api\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "import os\n",
+    "from pathlib import Path\n",
+    "\n",
+    "\n",
+    "DATA_PATH = Path(\"aimmdb/data/feff/65272_C_007\")\n",
+    "print(\"Data Path:\", DATA_PATH)\n",
+    "\n",
+    "contents = os.listdir(DATA_PATH)\n",
+    "print(\"Contents:\", contents)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from load_FEFF_Data import load_feff_data\n",
+    "\n",
+    "data, metadata = load_feff_data(DATA_PATH)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def ingest_feff(client, df, verbose=False):\n",
+    "    \"\"\"\n",
+    "    Upload the FEFF dataset to database\n",
+    "    \"\"\"\n",
+    "\n",
+    "    for (name, prep), g in df.groupby([\"sample.name\", \"sample.prep\"]):\n",
+    "        if verbose:\n",
+    "            print(f\"{name}: {prep}, {len(g)}\")\n",
+    "\n",
+    "        sample_id = client.write_sample({\"name\" : name, \"prep\" : prep})\n",
+    "\n",
+    "        for i, row in g.iterrows():\n",
+    "            feff_df, _ = read_dat(row.file)\n",
+    "            metadata = row.metadata\n",
+    "            metadata[\"dataset\"] = \"feff\"\n",
+    "            metadata[\"sample_id\"] = sample_id\n",
+    "            client[\"uid\"].write_dataframe(feff_df, metadata=metadata, specs=[\"FEFF\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"starting ingestion...\")\n",
+    "ingest_feff(client, feff, verbose=True)\n",
+    "print(\"finished.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "FEFFInputMetadata.schema()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "FEFFOutputMetadata.schema()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# we will enforce that XAS metadata satisfies the following schema\n",
+    "ExperimentalXASMetadata.schema()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client[\"uid\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# with the correct metadata we can write to the server\n",
+    "# NOTE this doesn't prevent you from writing garbage but does help\n",
+    "df = pd.DataFrame({\"a\" : np.random.rand(100), \"b\" : np.random.rand(100)})\n",
+    "metadata = {\"dataset\" : \"feff\", \"foo\" : \"bar\", \"element\" : {\"symbol\" : \"Au\", \"edge\" : \"K\"}, \"facility\" : {\"name\" : \"ALS\"}, \"beamline\" : {\"name\" : \"8.0.1\"}}\n",
+    "node = client[\"uid\"].write_dataframe(df, metadata=metadata, specs=[\"FEFF\"])\n",
+    "node"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10.4 ('my_pymatgen')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "8cf392b7cd98023928c855fd79964086ca343b5f82a42ebb28f5e83ba8cfe45c"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/ingest/ingest_newville_example.ipynb → notebooks/ingest_newville_example.ipynb b/ingest/ingest_newville_example.ipynb → notebooks/ingest_newville_example.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "3d0110f7-ba1d-4817-95e0-1a9ec70770a0",
    "metadata": {},
    "outputs": [],
@@ -34,10 +34,47 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "1ff6333b-690f-4119-a2b2-bbe5c94c3112",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'title': 'ExperimentalXASMetadata',\n",
+       " 'type': 'object',\n",
+       " 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n",
+       "  'measurement_type': {'default': 'xas',\n",
+       "   'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n",
+       "  'dataset': {'title': 'Dataset', 'type': 'string'},\n",
+       "  'sample_id': {'title': 'Sample Id', 'type': 'string'},\n",
+       "  'facility': {'$ref': '#/definitions/FacilityMetadata'},\n",
+       "  'beamline': {'$ref': '#/definitions/BeamlineMetadata'}},\n",
+       " 'required': ['element', 'dataset', 'facility', 'beamline'],\n",
+       " 'definitions': {'XDIElement': {'title': 'XDIElement',\n",
+       "   'type': 'object',\n",
+       "   'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n",
+       "    'edge': {'title': 'Edge', 'type': 'string'}},\n",
+       "   'required': ['symbol', 'edge']},\n",
+       "  'MeasurementEnum': {'title': 'MeasurementEnum',\n",
+       "   'description': 'An enumeration.',\n",
+       "   'enum': ['xas', 'rixs'],\n",
+       "   'type': 'string'},\n",
+       "  'FacilityMetadata': {'title': 'FacilityMetadata',\n",
+       "   'type': 'object',\n",
+       "   'properties': {'name': {'title': 'Name', 'type': 'string'}},\n",
+       "   'required': ['name']},\n",
+       "  'BeamlineMetadata': {'title': 'BeamlineMetadata',\n",
+       "   'type': 'object',\n",
+       "   'properties': {'name': {'title': 'Name', 'type': 'string'}},\n",
+       "   'required': ['name']}}}"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# we will enforce that XAS metadata satisfies the following schema\n",
     "ExperimentalXASMetadata.schema()"
@@ -56,10 +93,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "cbf5e6a8-77b8-4c1d-80f5-f773cbcc5681",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "ExperimentalXASMetadata(element=XDIElement(symbol='Fe', edge='K'), measurement_type='xas', dataset='example', sample_id=None, facility=FacilityMetadata(name=None), beamline=BeamlineMetadata(name='8.0.1'))"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# example of valid metadata\n",
     "metadata = {\n",
@@ -1056,9 +1104,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python [conda env:aimm]",
+   "display_name": "Python 3.9.13 ('aimm')",
    "language": "python",
-   "name": "conda-env-aimm-py"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -1070,7 +1118,12 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.4"
+   "version": "3.9.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "189d756eac8438d33e11f8e23aa09bdc4c99760ed11a3bfefa464e31dcca4c4a"
+   }
   }
  },
  "nbformat": 4,