From aecd753e83d9e12202480dc1188f4732b0af16a4 Mon Sep 17 00:00:00 2001 From: msegal347 Date: Fri, 14 Oct 2022 09:21:16 -0400 Subject: [PATCH 1/7] update 2022OCT14 0921 --- aimmdb/adapters/feff.py | 6 + aimmdb/schemas.py | 63 ++++++ aimmdb/validation.py | 13 +- ingest/ingest_FEFF.ipynb | 416 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 497 insertions(+), 1 deletion(-) create mode 100644 aimmdb/adapters/feff.py create mode 100644 ingest/ingest_FEFF.ipynb diff --git a/aimmdb/adapters/feff.py b/aimmdb/adapters/feff.py new file mode 100644 index 0000000..d166f84 --- /dev/null +++ b/aimmdb/adapters/feff.py @@ -0,0 +1,6 @@ +from tiled.adapters.dataframe import DataFrameAdapter + + +# dataframe adapter representing FEFF data +class FEFFAdapter(DataFrameAdapter): + specs = ["FEFF"] diff --git a/aimmdb/schemas.py b/aimmdb/schemas.py index 0471325..57ca102 100644 --- a/aimmdb/schemas.py +++ b/aimmdb/schemas.py @@ -145,3 +145,66 @@ class BatteryChargeMetadataInternal(pydantic.BaseModel): class BatteryChargeMetadata(pydantic.BaseModel, extra=pydantic.Extra.allow): charge: BatteryChargeMetadataInternal + +class FEFFpotentials(pydantic.BaseModel, extra=pydantic.Extra.allow): + x: Optional[str] + ipot: int + Z: str + element: int + l_scmt: int + l_fms: int + #FEFFpotentials = (x, ipot, Z, element, l_scmt, l_fms) + #converted_potentials = str(FEFFpotentials) + #smiles = string + +class FEFFcards(pydantic.BaseModel, extra=pydantic.Extra.allow): + atoms: float + control: int + exchange: float + title: Optional[str] + rpath: int + potentials: FEFFpotentials + xanes: float + edge: str + scf: float + fms: float + S02: float + corehole: str + #smiles = string + +#class FEFFDataFrame(DataFrameStructure): + #file_input = "xmu.dat" + #omega: float + #e: float + #k: float + #mu: float + #mu0: float + #chi: float + #FEFFDataFrame_inputs = (omega, e, k, mu, mu0, chi) + #smiles = string + + #need to write validation for the Dataframe + +#class ExperimentalFEFFMetadata(pydantic.BaseModel, extra=pydantic.Extra.allow): + #FileType = "feff.out"; "feff.inp" + #measurement_type: MeasurementEnum = pydantic.Field("feff", const=True) + #title = measurement_type(pydantic.field("title")) + #absorbing_atom = measurement_type(pydantic.field("edge")) + #cards = measurement_type(FEFFcards) + #smiles = string + +class FEFFInputMetadata(pydantic.BaseModel, extra=pydantic.Extra.allow): + element: XDIElement + measurement_type: MeasurementEnum = pydantic.Field("FEFF", const=True) + dataset: str + sample_id: str + input_script: str + +class FEFFOutputMetadata(pydantic.BaseModel, extra=pydantic.Extra.allow): + element: XDIElement + measurement_type: MeasurementEnum = pydantic.Field("FEFF", const=True) + dataset: str + sample_id: str + #change to output log + output_script: str + diff --git a/aimmdb/validation.py b/aimmdb/validation.py index ae07c45..168663b 100644 --- a/aimmdb/validation.py +++ b/aimmdb/validation.py @@ -1,7 +1,7 @@ import pydantic from tiled.validation_registration import ValidationError -from .schemas import BatteryChargeMetadata, ExperimentalXASMetadata +from .schemas import BatteryChargeMetadata, ExperimentalXASMetadata, ExperimentalFEFFMetadata def validate_xas_metadata(metadata, structure_family, structure, spec): @@ -45,3 +45,14 @@ def validate_battery_charge_data(metadata, structure_family, structure, spec): metadata = BatteryChargeMetadata.parse_obj(metadata) except pydantic.ValidationError as e: raise ValidationError(str(e)) + +def validate_feff(metadata, structure_family, structure, spec): + #validate_xas_metadata(metadata, structure_family, structure, spec) + + columns = set(structure.macro.columns) + + required_columns = {"omega", "e", "k", "mu", "mu0", "chi"} + + + if not required_columns.issubset(columns): + raise ValidationError(f"columns {columns} must contain {required_columns}") \ No newline at end of file diff --git a/ingest/ingest_FEFF.ipynb b/ingest/ingest_FEFF.ipynb new file mode 100644 index 0000000..0fdf0f0 --- /dev/null +++ b/ingest/ingest_FEFF.ipynb @@ -0,0 +1,416 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import copy\n", + "import pathlib\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from tiled.client import from_uri\n", + "from tiled.examples.xdi import read_xdi\n", + "from tiled.queries import Key\n", + "\n", + "from aimmdb.schemas import ExperimentalXASMetadata\n", + "from aimmdb.schemas import FEFFInputMetadata\n", + "from aimmdb.schemas import FEFFOutputMetadata" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "ename": "ConnectError", + "evalue": "[WinError 10061] No connection could be made because the target machine actively refused it", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mConnectionRefusedError\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_exceptions.py:8\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m----> 8\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 9\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE786\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\backends\\sync.py:86\u001b[0m, in \u001b[0;36mSyncBackend.connect_tcp\u001b[1;34m(self, host, port, timeout, local_address)\u001b[0m\n\u001b[0;32m 85\u001b[0m \u001b[39mwith\u001b[39;00m map_exceptions(exc_map):\n\u001b[1;32m---> 86\u001b[0m sock \u001b[39m=\u001b[39m socket\u001b[39m.\u001b[39;49mcreate_connection(\n\u001b[0;32m 87\u001b[0m address, timeout, source_address\u001b[39m=\u001b[39;49msource_address\n\u001b[0;32m 88\u001b[0m )\n\u001b[0;32m 89\u001b[0m \u001b[39mreturn\u001b[39;00m SyncStream(sock)\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\socket.py:844\u001b[0m, in \u001b[0;36mcreate_connection\u001b[1;34m(address, timeout, source_address)\u001b[0m\n\u001b[0;32m 843\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 844\u001b[0m \u001b[39mraise\u001b[39;00m err\n\u001b[0;32m 845\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[0;32m 846\u001b[0m \u001b[39m# Break explicitly a reference cycle\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\socket.py:832\u001b[0m, in \u001b[0;36mcreate_connection\u001b[1;34m(address, timeout, source_address)\u001b[0m\n\u001b[0;32m 831\u001b[0m sock\u001b[39m.\u001b[39mbind(source_address)\n\u001b[1;32m--> 832\u001b[0m sock\u001b[39m.\u001b[39;49mconnect(sa)\n\u001b[0;32m 833\u001b[0m \u001b[39m# Break explicitly a reference cycle\u001b[39;00m\n", + "\u001b[1;31mConnectionRefusedError\u001b[0m: [WinError 10061] No connection could be made because the target machine actively refused it", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mConnectError\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_transports\\default.py:60\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 59\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 60\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 61\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE-786\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_transports\\default.py:218\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[1;32m--> 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_pool\u001b[39m.\u001b[39;49mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\connection_pool.py:253\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 252\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresponse_closed(status)\n\u001b[1;32m--> 253\u001b[0m \u001b[39mraise\u001b[39;00m exc\n\u001b[0;32m 254\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\connection_pool.py:237\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 236\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 237\u001b[0m response \u001b[39m=\u001b[39m connection\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 238\u001b[0m \u001b[39mexcept\u001b[39;00m ConnectionNotAvailable:\n\u001b[0;32m 239\u001b[0m \u001b[39m# The ConnectionNotAvailable exception is a special case, that\u001b[39;00m\n\u001b[0;32m 240\u001b[0m \u001b[39m# indicates we need to retry the request on a new connection.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[39m# might end up as an HTTP/2 connection, but which actually ends\u001b[39;00m\n\u001b[0;32m 245\u001b[0m \u001b[39m# up as HTTP/1.1.\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\connection.py:86\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 85\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_connect_failed \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n\u001b[1;32m---> 86\u001b[0m \u001b[39mraise\u001b[39;00m exc\n\u001b[0;32m 87\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_connection\u001b[39m.\u001b[39mis_available():\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\connection.py:63\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 62\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 63\u001b[0m stream \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_connect(request)\n\u001b[0;32m 65\u001b[0m ssl_object \u001b[39m=\u001b[39m stream\u001b[39m.\u001b[39mget_extra_info(\u001b[39m\"\u001b[39m\u001b[39mssl_object\u001b[39m\u001b[39m\"\u001b[39m)\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\connection.py:111\u001b[0m, in \u001b[0;36mHTTPConnection._connect\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 108\u001b[0m \u001b[39mwith\u001b[39;00m Trace(\n\u001b[0;32m 109\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mconnection.connect_tcp\u001b[39m\u001b[39m\"\u001b[39m, request, kwargs\n\u001b[0;32m 110\u001b[0m ) \u001b[39mas\u001b[39;00m trace:\n\u001b[1;32m--> 111\u001b[0m stream \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_network_backend\u001b[39m.\u001b[39mconnect_tcp(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 112\u001b[0m trace\u001b[39m.\u001b[39mreturn_value \u001b[39m=\u001b[39m stream\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\backends\\sync.py:86\u001b[0m, in \u001b[0;36mSyncBackend.connect_tcp\u001b[1;34m(self, host, port, timeout, local_address)\u001b[0m\n\u001b[0;32m 85\u001b[0m \u001b[39mwith\u001b[39;00m map_exceptions(exc_map):\n\u001b[1;32m---> 86\u001b[0m sock \u001b[39m=\u001b[39m socket\u001b[39m.\u001b[39mcreate_connection(\n\u001b[0;32m 87\u001b[0m address, timeout, source_address\u001b[39m=\u001b[39msource_address\n\u001b[0;32m 88\u001b[0m )\n\u001b[0;32m 89\u001b[0m \u001b[39mreturn\u001b[39;00m SyncStream(sock)\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\contextlib.py:137\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 136\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 137\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 138\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 139\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 140\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 141\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_exceptions.py:12\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(exc, from_exc):\n\u001b[1;32m---> 12\u001b[0m \u001b[39mraise\u001b[39;00m to_exc(exc)\n\u001b[0;32m 13\u001b[0m \u001b[39mraise\u001b[39;00m\n", + "\u001b[1;31mConnectError\u001b[0m: [WinError 10061] No connection could be made because the target machine actively refused it", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[1;31mConnectError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn [6], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtiled\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mclient\u001b[39;00m \u001b[39mimport\u001b[39;00m from_uri\n\u001b[1;32m----> 2\u001b[0m client \u001b[39m=\u001b[39m from_uri(\u001b[39m\"\u001b[39;49m\u001b[39mhttp://localhost:8000/api\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\tiled\\client\\constructors.py:101\u001b[0m, in \u001b[0;36mfrom_uri\u001b[1;34m(uri, structure_clients, cache, offline, username, auth_provider, api_key, token_cache, verify, prompt_for_reauthentication, headers, timeout)\u001b[0m\n\u001b[0;32m 91\u001b[0m timeout \u001b[39m=\u001b[39m httpx\u001b[39m.\u001b[39mTimeout(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mDEFAULT_TIMEOUT_PARAMS)\n\u001b[0;32m 93\u001b[0m client \u001b[39m=\u001b[39m httpx\u001b[39m.\u001b[39mClient(\n\u001b[0;32m 94\u001b[0m base_url\u001b[39m=\u001b[39mbase_uri,\n\u001b[0;32m 95\u001b[0m verify\u001b[39m=\u001b[39mverify,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 99\u001b[0m params\u001b[39m=\u001b[39mparams,\n\u001b[0;32m 100\u001b[0m )\n\u001b[1;32m--> 101\u001b[0m context \u001b[39m=\u001b[39m Context(\n\u001b[0;32m 102\u001b[0m client,\n\u001b[0;32m 103\u001b[0m username\u001b[39m=\u001b[39;49musername,\n\u001b[0;32m 104\u001b[0m auth_provider\u001b[39m=\u001b[39;49mauth_provider,\n\u001b[0;32m 105\u001b[0m api_key\u001b[39m=\u001b[39;49mapi_key,\n\u001b[0;32m 106\u001b[0m cache\u001b[39m=\u001b[39;49mcache,\n\u001b[0;32m 107\u001b[0m offline\u001b[39m=\u001b[39;49moffline,\n\u001b[0;32m 108\u001b[0m token_cache\u001b[39m=\u001b[39;49mtoken_cache,\n\u001b[0;32m 109\u001b[0m prompt_for_reauthentication\u001b[39m=\u001b[39;49mprompt_for_reauthentication,\n\u001b[0;32m 110\u001b[0m )\n\u001b[0;32m 111\u001b[0m \u001b[39mreturn\u001b[39;00m from_context(context, structure_clients\u001b[39m=\u001b[39mstructure_clients)\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\tiled\\client\\context.py:228\u001b[0m, in \u001b[0;36mContext.__init__\u001b[1;34m(self, client, username, auth_provider, api_key, cache, offline, token_cache, prompt_for_reauthentication, app)\u001b[0m\n\u001b[0;32m 223\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mdisable_cache(allow_read\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m, allow_write\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m):\n\u001b[0;32m 224\u001b[0m \u001b[39m# Make this request manually to inject custom error handling.\u001b[39;00m\n\u001b[0;32m 225\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_client\u001b[39m.\u001b[39mbuild_request(\n\u001b[0;32m 226\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mGET\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39m/\u001b[39m\u001b[39m\"\u001b[39m, params\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mroot_path\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39mTrue\u001b[39;00m}\n\u001b[0;32m 227\u001b[0m )\n\u001b[1;32m--> 228\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_client\u001b[39m.\u001b[39;49msend(request)\n\u001b[0;32m 229\u001b[0m \u001b[39m# Handle case where user pastes in a link like\u001b[39;00m\n\u001b[0;32m 230\u001b[0m \u001b[39m# https://example.com/some/subpath/node/metadata/a/b/c\u001b[39;00m\n\u001b[0;32m 231\u001b[0m \u001b[39m# and it requires authentication. The 401 response includes a header\u001b[39;00m\n\u001b[0;32m 232\u001b[0m \u001b[39m# that points us to https://examples.com/some/subpath where we\u001b[39;00m\n\u001b[0;32m 233\u001b[0m \u001b[39m# can see the authentication providers and their endpoints.\u001b[39;00m\n\u001b[0;32m 234\u001b[0m \u001b[39mif\u001b[39;00m response\u001b[39m.\u001b[39mstatus_code \u001b[39m==\u001b[39m \u001b[39m401\u001b[39m:\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_client.py:902\u001b[0m, in \u001b[0;36mClient.send\u001b[1;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[0;32m 894\u001b[0m follow_redirects \u001b[39m=\u001b[39m (\n\u001b[0;32m 895\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfollow_redirects\n\u001b[0;32m 896\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(follow_redirects, UseClientDefault)\n\u001b[0;32m 897\u001b[0m \u001b[39melse\u001b[39;00m follow_redirects\n\u001b[0;32m 898\u001b[0m )\n\u001b[0;32m 900\u001b[0m auth \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_build_request_auth(request, auth)\n\u001b[1;32m--> 902\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_auth(\n\u001b[0;32m 903\u001b[0m request,\n\u001b[0;32m 904\u001b[0m auth\u001b[39m=\u001b[39;49mauth,\n\u001b[0;32m 905\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 906\u001b[0m history\u001b[39m=\u001b[39;49m[],\n\u001b[0;32m 907\u001b[0m )\n\u001b[0;32m 908\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 909\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m stream:\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_client.py:930\u001b[0m, in \u001b[0;36mClient._send_handling_auth\u001b[1;34m(self, request, auth, follow_redirects, history)\u001b[0m\n\u001b[0;32m 927\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mnext\u001b[39m(auth_flow)\n\u001b[0;32m 929\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 930\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_redirects(\n\u001b[0;32m 931\u001b[0m request,\n\u001b[0;32m 932\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 933\u001b[0m history\u001b[39m=\u001b[39;49mhistory,\n\u001b[0;32m 934\u001b[0m )\n\u001b[0;32m 935\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 936\u001b[0m \u001b[39mtry\u001b[39;00m:\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_client.py:967\u001b[0m, in \u001b[0;36mClient._send_handling_redirects\u001b[1;34m(self, request, follow_redirects, history)\u001b[0m\n\u001b[0;32m 964\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mrequest\u001b[39m\u001b[39m\"\u001b[39m]:\n\u001b[0;32m 965\u001b[0m hook(request)\n\u001b[1;32m--> 967\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_single_request(request)\n\u001b[0;32m 968\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 969\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mresponse\u001b[39m\u001b[39m\"\u001b[39m]:\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_client.py:1003\u001b[0m, in \u001b[0;36mClient._send_single_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 998\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[0;32m 999\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAttempted to send an async request with a sync Client instance.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1000\u001b[0m )\n\u001b[0;32m 1002\u001b[0m \u001b[39mwith\u001b[39;00m request_context(request\u001b[39m=\u001b[39mrequest):\n\u001b[1;32m-> 1003\u001b[0m response \u001b[39m=\u001b[39m transport\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 1005\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(response\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 1007\u001b[0m response\u001b[39m.\u001b[39mrequest \u001b[39m=\u001b[39m request\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_transports\\default.py:218\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 205\u001b[0m req \u001b[39m=\u001b[39m httpcore\u001b[39m.\u001b[39mRequest(\n\u001b[0;32m 206\u001b[0m method\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mmethod,\n\u001b[0;32m 207\u001b[0m url\u001b[39m=\u001b[39mhttpcore\u001b[39m.\u001b[39mURL(\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 215\u001b[0m extensions\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mextensions,\n\u001b[0;32m 216\u001b[0m )\n\u001b[0;32m 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[1;32m--> 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_pool\u001b[39m.\u001b[39mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n\u001b[0;32m 222\u001b[0m \u001b[39mreturn\u001b[39;00m Response(\n\u001b[0;32m 223\u001b[0m status_code\u001b[39m=\u001b[39mresp\u001b[39m.\u001b[39mstatus,\n\u001b[0;32m 224\u001b[0m headers\u001b[39m=\u001b[39mresp\u001b[39m.\u001b[39mheaders,\n\u001b[0;32m 225\u001b[0m stream\u001b[39m=\u001b[39mResponseStream(resp\u001b[39m.\u001b[39mstream),\n\u001b[0;32m 226\u001b[0m extensions\u001b[39m=\u001b[39mresp\u001b[39m.\u001b[39mextensions,\n\u001b[0;32m 227\u001b[0m )\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\contextlib.py:137\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 135\u001b[0m value \u001b[39m=\u001b[39m typ()\n\u001b[0;32m 136\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 137\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 138\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 139\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 140\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 141\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n\u001b[0;32m 142\u001b[0m \u001b[39mreturn\u001b[39;00m exc \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m value\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_transports\\default.py:77\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 74\u001b[0m \u001b[39mraise\u001b[39;00m\n\u001b[0;32m 76\u001b[0m message \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(exc)\n\u001b[1;32m---> 77\u001b[0m \u001b[39mraise\u001b[39;00m mapped_exc(message) \u001b[39mfrom\u001b[39;00m \u001b[39mexc\u001b[39;00m\n", + "\u001b[1;31mConnectError\u001b[0m: [WinError 10061] No connection could be made because the target machine actively refused it" + ] + } + ], + "source": [ + "from tiled.client import from_uri\n", + "client = from_uri(\"http://localhost:8000/api\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# data path\n", + "data_path = pathlib.Path(\"D:/BNL/AIMMDB/65272_C_007\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def read_dat(path):\n", + " return pd.read_csv(path, sep=\"\\t\", header=None, names=[\"omega, e, k, mu, mu0, chi\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def lower_case_dict(d):\n", + " out = {}\n", + " modified = False\n", + "\n", + " for k, v in d.items():\n", + " if isinstance(v, dict):\n", + " v, modified_ = lower_case_dict(v)\n", + " modified = modified or modified_\n", + " if isinstance(k, str) and not k.islower():\n", + " out[k.lower()] = v\n", + " modified = True\n", + " else:\n", + " out[k] = v\n", + "\n", + " return out, modified\n", + "\n", + "def load_FeffData(data_path):\n", + " \"\"\"\n", + " Load FEFF dataset into a dataframe parsing sample information from metadata\n", + " \"\"\"\n", + " files = list(data_path.rglob(\"*.dat\"))\n", + " print(f\"found {len(files)} dat files to ingest\")\n", + "\n", + " data_list = []\n", + "\n", + " for f in files:\n", + " name = f.stem\n", + " #need to parse metadata from the file name\n", + " df_feff, metadata = read_dat(str(f))\n", + " fields = metadata.pop(\"fields\")\n", + " fields, _ = lower_case_dict(fields)\n", + " metadata.update({k.lower(): v for k, v in fields.items()})\n", + " metadata[\"sample\"].setdefault(\"prep\", None)\n", + " \n", + " try:\n", + " facility_name = metadata[\"facility\"][\"name\"]\n", + " except:\n", + " metadata[\"facility\"] = {\"name\" : None}\n", + " try:\n", + " beamline_name = metadata[\"beamline\"][\"name\"]\n", + " except:\n", + " metadata[\"beamline\"] = {\"name\" : None}\n", + "\n", + " data_list.append(\n", + " {\n", + " \"name\": f.stem,\n", + " \"file\": str(f),\n", + " \"metadata\": metadata,\n", + " \"columns\" : tuple(df_feff)\n", + " }\n", + " )\n", + "\n", + " df = pd.DataFrame(data_list)\n", + "\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "found 1 dat files to ingest\n" + ] + }, + { + "ename": "ValueError", + "evalue": "not enough values to unpack (expected 2, got 1)", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn [10], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39m# read through all the files and extract some metadata\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m feff \u001b[39m=\u001b[39m load_FeffData(pathlib\u001b[39m.\u001b[39;49mPath(\u001b[39m\"\u001b[39;49m\u001b[39mD:/BNL/AIMMDB/65272_C_007\u001b[39;49m\u001b[39m\"\u001b[39;49m))\n\u001b[0;32m 3\u001b[0m feff\n", + "Cell \u001b[1;32mIn [9], line 29\u001b[0m, in \u001b[0;36mload_FeffData\u001b[1;34m(data_path)\u001b[0m\n\u001b[0;32m 27\u001b[0m name \u001b[39m=\u001b[39m f\u001b[39m.\u001b[39mstem\n\u001b[0;32m 28\u001b[0m \u001b[39m#need to parse metadata from the file name\u001b[39;00m\n\u001b[1;32m---> 29\u001b[0m df_xas, metadata \u001b[39m=\u001b[39m read_dat(\u001b[39mstr\u001b[39m(f))\n\u001b[0;32m 30\u001b[0m fields \u001b[39m=\u001b[39m metadata\u001b[39m.\u001b[39mpop(\u001b[39m\"\u001b[39m\u001b[39mfields\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 31\u001b[0m fields, _ \u001b[39m=\u001b[39m lower_case_dict(fields)\n", + "\u001b[1;31mValueError\u001b[0m: not enough values to unpack (expected 2, got 1)" + ] + } + ], + "source": [ + "# read through all the files and extract some metadata\n", + "feff = load_FeffData(pathlib.Path(\"D:/BNL/AIMMDB/65272_C_007\"))\n", + "feff" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def ingest_feff(client, df, verbose=False):\n", + " \"\"\"\n", + " Upload the FEFF dataset to database\n", + " \"\"\"\n", + "\n", + " for (name, prep), g in df.groupby([\"sample.name\", \"sample.prep\"]):\n", + " if verbose:\n", + " print(f\"{name}: {prep}, {len(g)}\")\n", + "\n", + " sample_id = client.write_sample({\"name\" : name, \"prep\" : prep})\n", + "\n", + " for i, row in g.iterrows():\n", + " feff_df, _ = read_dat(row.file)\n", + " metadata = row.metadata\n", + " metadata[\"dataset\"] = \"feff\"\n", + " metadata[\"sample_id\"] = sample_id\n", + " client[\"uid\"].write_dataframe(feff_df, metadata=metadata, specs=[\"FEFF\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "starting ingestion...\n" + ] + }, + { + "ename": "NameError", + "evalue": "name 'client' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn [12], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mstarting ingestion...\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m----> 2\u001b[0m ingest_feff(client, feff, verbose\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[0;32m 3\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mfinished.\u001b[39m\u001b[39m\"\u001b[39m)\n", + "\u001b[1;31mNameError\u001b[0m: name 'client' is not defined" + ] + } + ], + "source": [ + "print(\"starting ingestion...\")\n", + "ingest_feff(client, feff, verbose=True)\n", + "print(\"finished.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'title': 'FEFFInputMetadata',\n", + " 'type': 'object',\n", + " 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n", + " 'measurement_type': {'default': 'xas',\n", + " 'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n", + " 'dataset': {'title': 'Dataset', 'type': 'string'},\n", + " 'sample_id': {'title': 'Sample Id', 'type': 'string'},\n", + " 'input_script': {'title': 'Input Script', 'type': 'string'}},\n", + " 'required': ['element', 'dataset', 'sample_id', 'input_script'],\n", + " 'definitions': {'XDIElement': {'title': 'XDIElement',\n", + " 'type': 'object',\n", + " 'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n", + " 'edge': {'title': 'Edge', 'type': 'string'}},\n", + " 'required': ['symbol', 'edge']},\n", + " 'MeasurementEnum': {'title': 'MeasurementEnum',\n", + " 'description': 'An enumeration.',\n", + " 'enum': ['xas', 'rixs'],\n", + " 'type': 'string'}}}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "FEFFInputMetadata.schema()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'title': 'FEFFOutputMetadata',\n", + " 'type': 'object',\n", + " 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n", + " 'measurement_type': {'default': 'xas',\n", + " 'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n", + " 'dataset': {'title': 'Dataset', 'type': 'string'},\n", + " 'sample_id': {'title': 'Sample Id', 'type': 'string'},\n", + " 'output_script': {'title': 'Output Script', 'type': 'string'}},\n", + " 'required': ['element', 'dataset', 'sample_id', 'output_script'],\n", + " 'definitions': {'XDIElement': {'title': 'XDIElement',\n", + " 'type': 'object',\n", + " 'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n", + " 'edge': {'title': 'Edge', 'type': 'string'}},\n", + " 'required': ['symbol', 'edge']},\n", + " 'MeasurementEnum': {'title': 'MeasurementEnum',\n", + " 'description': 'An enumeration.',\n", + " 'enum': ['xas', 'rixs'],\n", + " 'type': 'string'}}}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "FEFFOutputMetadata.schema()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'title': 'ExperimentalXASMetadata',\n", + " 'type': 'object',\n", + " 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n", + " 'measurement_type': {'default': 'xas',\n", + " 'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n", + " 'dataset': {'title': 'Dataset', 'type': 'string'},\n", + " 'sample_id': {'title': 'Sample Id', 'type': 'string'},\n", + " 'facility': {'$ref': '#/definitions/FacilityMetadata'},\n", + " 'beamline': {'$ref': '#/definitions/BeamlineMetadata'}},\n", + " 'required': ['element', 'dataset', 'facility', 'beamline'],\n", + " 'definitions': {'XDIElement': {'title': 'XDIElement',\n", + " 'type': 'object',\n", + " 'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n", + " 'edge': {'title': 'Edge', 'type': 'string'}},\n", + " 'required': ['symbol', 'edge']},\n", + " 'MeasurementEnum': {'title': 'MeasurementEnum',\n", + " 'description': 'An enumeration.',\n", + " 'enum': ['xas', 'rixs'],\n", + " 'type': 'string'},\n", + " 'FacilityMetadata': {'title': 'FacilityMetadata',\n", + " 'type': 'object',\n", + " 'properties': {'name': {'title': 'Name', 'type': 'string'}},\n", + " 'required': ['name']},\n", + " 'BeamlineMetadata': {'title': 'BeamlineMetadata',\n", + " 'type': 'object',\n", + " 'properties': {'name': {'title': 'Name', 'type': 'string'}},\n", + " 'required': ['name']}}}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# we will enforce that XAS metadata satisfies the following schema\n", + "ExperimentalXASMetadata.schema()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "client[\"uid\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# with the correct metadata we can write to the server\n", + "# NOTE this doesn't prevent you from writing garbage but does help\n", + "df = pd.DataFrame({\"a\" : np.random.rand(100), \"b\" : np.random.rand(100)})\n", + "metadata = {\"dataset\" : \"feff\", \"foo\" : \"bar\", \"element\" : {\"symbol\" : \"Au\", \"edge\" : \"K\"}, \"facility\" : {\"name\" : \"ALS\"}, \"beamline\" : {\"name\" : \"8.0.1\"}}\n", + "node = client[\"uid\"].write_dataframe(df, metadata=metadata, specs=[\"FEFF\"])\n", + "node" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.13 ('aimm')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "189d756eac8438d33e11f8e23aa09bdc4c99760ed11a3bfefa464e31dcca4c4a" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From afe5b50c357a47e1b913ccc8892043f9cbe41f1c Mon Sep 17 00:00:00 2001 From: msegal347 Date: Sat, 22 Oct 2022 11:41:16 -0400 Subject: [PATCH 2/7] Updated feff schemas and ingestion 2022OCT22 --- aimmdb/schemas.py | 29 +----- ingest/ingest_FEFF.ipynb | 187 +++++++++++++++++++-------------------- ingest/load_FEFF_Data.py | 65 ++++++++++++++ 3 files changed, 156 insertions(+), 125 deletions(-) create mode 100644 ingest/load_FEFF_Data.py diff --git a/aimmdb/schemas.py b/aimmdb/schemas.py index 57ca102..7479e2e 100644 --- a/aimmdb/schemas.py +++ b/aimmdb/schemas.py @@ -153,9 +153,7 @@ class FEFFpotentials(pydantic.BaseModel, extra=pydantic.Extra.allow): element: int l_scmt: int l_fms: int - #FEFFpotentials = (x, ipot, Z, element, l_scmt, l_fms) - #converted_potentials = str(FEFFpotentials) - #smiles = string + class FEFFcards(pydantic.BaseModel, extra=pydantic.Extra.allow): atoms: float @@ -170,28 +168,6 @@ class FEFFcards(pydantic.BaseModel, extra=pydantic.Extra.allow): fms: float S02: float corehole: str - #smiles = string - -#class FEFFDataFrame(DataFrameStructure): - #file_input = "xmu.dat" - #omega: float - #e: float - #k: float - #mu: float - #mu0: float - #chi: float - #FEFFDataFrame_inputs = (omega, e, k, mu, mu0, chi) - #smiles = string - - #need to write validation for the Dataframe - -#class ExperimentalFEFFMetadata(pydantic.BaseModel, extra=pydantic.Extra.allow): - #FileType = "feff.out"; "feff.inp" - #measurement_type: MeasurementEnum = pydantic.Field("feff", const=True) - #title = measurement_type(pydantic.field("title")) - #absorbing_atom = measurement_type(pydantic.field("edge")) - #cards = measurement_type(FEFFcards) - #smiles = string class FEFFInputMetadata(pydantic.BaseModel, extra=pydantic.Extra.allow): element: XDIElement @@ -205,6 +181,5 @@ class FEFFOutputMetadata(pydantic.BaseModel, extra=pydantic.Extra.allow): measurement_type: MeasurementEnum = pydantic.Field("FEFF", const=True) dataset: str sample_id: str - #change to output log - output_script: str + output_log: str diff --git a/ingest/ingest_FEFF.ipynb b/ingest/ingest_FEFF.ipynb index 0fdf0f0..9a0f732 100644 --- a/ingest/ingest_FEFF.ipynb +++ b/ingest/ingest_FEFF.ipynb @@ -2,28 +2,20 @@ "cells": [ { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import copy\n", - "import pathlib\n", + "from pathlib import Path\n", "\n", "import numpy as np\n", - "import pandas as pd\n", - "\n", - "from tiled.client import from_uri\n", - "from tiled.examples.xdi import read_xdi\n", - "from tiled.queries import Key\n", - "\n", - "from aimmdb.schemas import ExperimentalXASMetadata\n", - "from aimmdb.schemas import FEFFInputMetadata\n", - "from aimmdb.schemas import FEFFOutputMetadata" + "import pandas as pd" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -53,7 +45,7 @@ "\u001b[1;31mConnectError\u001b[0m: [WinError 10061] No connection could be made because the target machine actively refused it", "\nThe above exception was the direct cause of the following exception:\n", "\u001b[1;31mConnectError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn [6], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtiled\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mclient\u001b[39;00m \u001b[39mimport\u001b[39;00m from_uri\n\u001b[1;32m----> 2\u001b[0m client \u001b[39m=\u001b[39m from_uri(\u001b[39m\"\u001b[39;49m\u001b[39mhttp://localhost:8000/api\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n", + "Cell \u001b[1;32mIn [2], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtiled\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mclient\u001b[39;00m \u001b[39mimport\u001b[39;00m from_uri\n\u001b[1;32m----> 2\u001b[0m client \u001b[39m=\u001b[39m from_uri(\u001b[39m\"\u001b[39;49m\u001b[39mhttp://localhost:8000/api\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n", "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\tiled\\client\\constructors.py:101\u001b[0m, in \u001b[0;36mfrom_uri\u001b[1;34m(uri, structure_clients, cache, offline, username, auth_provider, api_key, token_cache, verify, prompt_for_reauthentication, headers, timeout)\u001b[0m\n\u001b[0;32m 91\u001b[0m timeout \u001b[39m=\u001b[39m httpx\u001b[39m.\u001b[39mTimeout(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mDEFAULT_TIMEOUT_PARAMS)\n\u001b[0;32m 93\u001b[0m client \u001b[39m=\u001b[39m httpx\u001b[39m.\u001b[39mClient(\n\u001b[0;32m 94\u001b[0m base_url\u001b[39m=\u001b[39mbase_uri,\n\u001b[0;32m 95\u001b[0m verify\u001b[39m=\u001b[39mverify,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 99\u001b[0m params\u001b[39m=\u001b[39mparams,\n\u001b[0;32m 100\u001b[0m )\n\u001b[1;32m--> 101\u001b[0m context \u001b[39m=\u001b[39m Context(\n\u001b[0;32m 102\u001b[0m client,\n\u001b[0;32m 103\u001b[0m username\u001b[39m=\u001b[39;49musername,\n\u001b[0;32m 104\u001b[0m auth_provider\u001b[39m=\u001b[39;49mauth_provider,\n\u001b[0;32m 105\u001b[0m api_key\u001b[39m=\u001b[39;49mapi_key,\n\u001b[0;32m 106\u001b[0m cache\u001b[39m=\u001b[39;49mcache,\n\u001b[0;32m 107\u001b[0m offline\u001b[39m=\u001b[39;49moffline,\n\u001b[0;32m 108\u001b[0m token_cache\u001b[39m=\u001b[39;49mtoken_cache,\n\u001b[0;32m 109\u001b[0m prompt_for_reauthentication\u001b[39m=\u001b[39;49mprompt_for_reauthentication,\n\u001b[0;32m 110\u001b[0m )\n\u001b[0;32m 111\u001b[0m \u001b[39mreturn\u001b[39;00m from_context(context, structure_clients\u001b[39m=\u001b[39mstructure_clients)\n", "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\tiled\\client\\context.py:228\u001b[0m, in \u001b[0;36mContext.__init__\u001b[1;34m(self, client, username, auth_provider, api_key, cache, offline, token_cache, prompt_for_reauthentication, app)\u001b[0m\n\u001b[0;32m 223\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mdisable_cache(allow_read\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m, allow_write\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m):\n\u001b[0;32m 224\u001b[0m \u001b[39m# Make this request manually to inject custom error handling.\u001b[39;00m\n\u001b[0;32m 225\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_client\u001b[39m.\u001b[39mbuild_request(\n\u001b[0;32m 226\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mGET\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39m/\u001b[39m\u001b[39m\"\u001b[39m, params\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mroot_path\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39mTrue\u001b[39;00m}\n\u001b[0;32m 227\u001b[0m )\n\u001b[1;32m--> 228\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_client\u001b[39m.\u001b[39;49msend(request)\n\u001b[0;32m 229\u001b[0m \u001b[39m# Handle case where user pastes in a link like\u001b[39;00m\n\u001b[0;32m 230\u001b[0m \u001b[39m# https://example.com/some/subpath/node/metadata/a/b/c\u001b[39;00m\n\u001b[0;32m 231\u001b[0m \u001b[39m# and it requires authentication. The 401 response includes a header\u001b[39;00m\n\u001b[0;32m 232\u001b[0m \u001b[39m# that points us to https://examples.com/some/subpath where we\u001b[39;00m\n\u001b[0;32m 233\u001b[0m \u001b[39m# can see the authentication providers and their endpoints.\u001b[39;00m\n\u001b[0;32m 234\u001b[0m \u001b[39mif\u001b[39;00m response\u001b[39m.\u001b[39mstatus_code \u001b[39m==\u001b[39m \u001b[39m401\u001b[39m:\n", "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_client.py:902\u001b[0m, in \u001b[0;36mClient.send\u001b[1;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[0;32m 894\u001b[0m follow_redirects \u001b[39m=\u001b[39m (\n\u001b[0;32m 895\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfollow_redirects\n\u001b[0;32m 896\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(follow_redirects, UseClientDefault)\n\u001b[0;32m 897\u001b[0m \u001b[39melse\u001b[39;00m follow_redirects\n\u001b[0;32m 898\u001b[0m )\n\u001b[0;32m 900\u001b[0m auth \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_build_request_auth(request, auth)\n\u001b[1;32m--> 902\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_auth(\n\u001b[0;32m 903\u001b[0m request,\n\u001b[0;32m 904\u001b[0m auth\u001b[39m=\u001b[39;49mauth,\n\u001b[0;32m 905\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 906\u001b[0m history\u001b[39m=\u001b[39;49m[],\n\u001b[0;32m 907\u001b[0m )\n\u001b[0;32m 908\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 909\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m stream:\n", @@ -74,116 +66,115 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "# data path\n", - "data_path = pathlib.Path(\"D:/BNL/AIMMDB/65272_C_007\")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Data Path: AIMMDB\\data\\feff\\65272_C_007\n" + ] + } + ], "source": [ - "def read_dat(path):\n", - " return pd.read_csv(path, sep=\"\\t\", header=None, names=[\"omega, e, k, mu, mu0, chi\"])" + "from pathlib import Path\n", + "DATA_PATH = Path(\"aimmdb/data/feff/65272_C_007\")\n", + "print(\"Data Path:\", DATA_PATH)" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'AIMMDB\\\\data\\\\feff\\\\65272_C_007\\\\xmu.dat'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn [7], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mload_FEFF_Data\u001b[39;00m \u001b[39mimport\u001b[39;00m load_feff_data\n\u001b[1;32m----> 3\u001b[0m load_feff_data(DATA_PATH)\n", + "File \u001b[1;32mc:\\Users\\msega\\aimmdb\\ingest\\load_FEFF_Data.py:37\u001b[0m, in \u001b[0;36mload_feff_data\u001b[1;34m(data_path, verbose)\u001b[0m\n\u001b[0;32m 34\u001b[0m feff_out \u001b[39m=\u001b[39m data_path \u001b[39m/\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mfeff.out\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 35\u001b[0m xmu_dat \u001b[39m=\u001b[39m data_path \u001b[39m/\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mxmu.dat\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m---> 37\u001b[0m data \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39;49mread_csv(\n\u001b[0;32m 38\u001b[0m xmu_dat,\n\u001b[0;32m 39\u001b[0m sep\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39ms+\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[0;32m 40\u001b[0m header\u001b[39m=\u001b[39;49m\u001b[39mNone\u001b[39;49;00m,\n\u001b[0;32m 41\u001b[0m names\u001b[39m=\u001b[39;49m[\u001b[39m\"\u001b[39;49m\u001b[39momega\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39me\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mk\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mmu\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mmu0\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mchi\u001b[39;49m\u001b[39m\"\u001b[39;49m],\n\u001b[0;32m 42\u001b[0m comment\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m#\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[0;32m 43\u001b[0m )\n\u001b[0;32m 45\u001b[0m metadata \u001b[39m=\u001b[39m {\n\u001b[0;32m 46\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mfeff.inp\u001b[39m\u001b[39m\"\u001b[39m: feff_inp\u001b[39m.\u001b[39mread_text(),\n\u001b[0;32m 47\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mfeff.out\u001b[39m\u001b[39m\"\u001b[39m: feff_out\u001b[39m.\u001b[39mread_text(),\n\u001b[0;32m 48\u001b[0m }\n\u001b[0;32m 50\u001b[0m dat \u001b[39m=\u001b[39m [\n\u001b[0;32m 51\u001b[0m line\n\u001b[0;32m 52\u001b[0m \u001b[39mfor\u001b[39;00m line \u001b[39min\u001b[39;00m xmu_dat\u001b[39m.\u001b[39mread_text()\u001b[39m.\u001b[39msplitlines()\n\u001b[0;32m 53\u001b[0m \u001b[39mif\u001b[39;00m line\u001b[39m.\u001b[39mstartswith(\u001b[39m\"\u001b[39m\u001b[39m#\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 54\u001b[0m ]\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\util\\_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.._deprecate_kwarg..wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 209\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 210\u001b[0m kwargs[new_arg_name] \u001b[39m=\u001b[39m new_arg_value\n\u001b[1;32m--> 211\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\util\\_decorators.py:317\u001b[0m, in \u001b[0;36mdeprecate_nonkeyword_arguments..decorate..wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 311\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(args) \u001b[39m>\u001b[39m num_allow_args:\n\u001b[0;32m 312\u001b[0m warnings\u001b[39m.\u001b[39mwarn(\n\u001b[0;32m 313\u001b[0m msg\u001b[39m.\u001b[39mformat(arguments\u001b[39m=\u001b[39marguments),\n\u001b[0;32m 314\u001b[0m \u001b[39mFutureWarning\u001b[39;00m,\n\u001b[0;32m 315\u001b[0m stacklevel\u001b[39m=\u001b[39mfind_stack_level(inspect\u001b[39m.\u001b[39mcurrentframe()),\n\u001b[0;32m 316\u001b[0m )\n\u001b[1;32m--> 317\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:950\u001b[0m, in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)\u001b[0m\n\u001b[0;32m 935\u001b[0m kwds_defaults \u001b[39m=\u001b[39m _refine_defaults_read(\n\u001b[0;32m 936\u001b[0m dialect,\n\u001b[0;32m 937\u001b[0m delimiter,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 946\u001b[0m defaults\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mdelimiter\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39m,\u001b[39m\u001b[39m\"\u001b[39m},\n\u001b[0;32m 947\u001b[0m )\n\u001b[0;32m 948\u001b[0m kwds\u001b[39m.\u001b[39mupdate(kwds_defaults)\n\u001b[1;32m--> 950\u001b[0m \u001b[39mreturn\u001b[39;00m _read(filepath_or_buffer, kwds)\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:605\u001b[0m, in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 602\u001b[0m _validate_names(kwds\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mnames\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m))\n\u001b[0;32m 604\u001b[0m \u001b[39m# Create the parser.\u001b[39;00m\n\u001b[1;32m--> 605\u001b[0m parser \u001b[39m=\u001b[39m TextFileReader(filepath_or_buffer, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwds)\n\u001b[0;32m 607\u001b[0m \u001b[39mif\u001b[39;00m chunksize \u001b[39mor\u001b[39;00m iterator:\n\u001b[0;32m 608\u001b[0m \u001b[39mreturn\u001b[39;00m parser\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:1442\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m 1439\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39moptions[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m kwds[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[0;32m 1441\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles: IOHandles \u001b[39m|\u001b[39m \u001b[39mNone\u001b[39;00m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m-> 1442\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_engine \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_make_engine(f, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mengine)\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:1729\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[1;34m(self, f, engine)\u001b[0m\n\u001b[0;32m 1727\u001b[0m is_text \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m 1728\u001b[0m mode \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mrb\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m-> 1729\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles \u001b[39m=\u001b[39m get_handle(\n\u001b[0;32m 1730\u001b[0m f,\n\u001b[0;32m 1731\u001b[0m mode,\n\u001b[0;32m 1732\u001b[0m encoding\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mencoding\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[0;32m 1733\u001b[0m compression\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mcompression\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[0;32m 1734\u001b[0m memory_map\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mmemory_map\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mFalse\u001b[39;49;00m),\n\u001b[0;32m 1735\u001b[0m is_text\u001b[39m=\u001b[39;49mis_text,\n\u001b[0;32m 1736\u001b[0m errors\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mencoding_errors\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mstrict\u001b[39;49m\u001b[39m\"\u001b[39;49m),\n\u001b[0;32m 1737\u001b[0m storage_options\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mstorage_options\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[0;32m 1738\u001b[0m )\n\u001b[0;32m 1739\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m 1740\u001b[0m f \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles\u001b[39m.\u001b[39mhandle\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\io\\common.py:857\u001b[0m, in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m 852\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(handle, \u001b[39mstr\u001b[39m):\n\u001b[0;32m 853\u001b[0m \u001b[39m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[0;32m 854\u001b[0m \u001b[39m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[0;32m 855\u001b[0m \u001b[39mif\u001b[39;00m ioargs\u001b[39m.\u001b[39mencoding \u001b[39mand\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m ioargs\u001b[39m.\u001b[39mmode:\n\u001b[0;32m 856\u001b[0m \u001b[39m# Encoding\u001b[39;00m\n\u001b[1;32m--> 857\u001b[0m handle \u001b[39m=\u001b[39m \u001b[39mopen\u001b[39;49m(\n\u001b[0;32m 858\u001b[0m handle,\n\u001b[0;32m 859\u001b[0m ioargs\u001b[39m.\u001b[39;49mmode,\n\u001b[0;32m 860\u001b[0m encoding\u001b[39m=\u001b[39;49mioargs\u001b[39m.\u001b[39;49mencoding,\n\u001b[0;32m 861\u001b[0m errors\u001b[39m=\u001b[39;49merrors,\n\u001b[0;32m 862\u001b[0m newline\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[0;32m 863\u001b[0m )\n\u001b[0;32m 864\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 865\u001b[0m \u001b[39m# Binary mode\u001b[39;00m\n\u001b[0;32m 866\u001b[0m handle \u001b[39m=\u001b[39m \u001b[39mopen\u001b[39m(handle, ioargs\u001b[39m.\u001b[39mmode)\n", + "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'AIMMDB\\\\data\\\\feff\\\\65272_C_007\\\\xmu.dat'" + ] + } + ], "source": [ - "def lower_case_dict(d):\n", - " out = {}\n", - " modified = False\n", - "\n", - " for k, v in d.items():\n", - " if isinstance(v, dict):\n", - " v, modified_ = lower_case_dict(v)\n", - " modified = modified or modified_\n", - " if isinstance(k, str) and not k.islower():\n", - " out[k.lower()] = v\n", - " modified = True\n", - " else:\n", - " out[k] = v\n", - "\n", - " return out, modified\n", - "\n", - "def load_FeffData(data_path):\n", - " \"\"\"\n", - " Load FEFF dataset into a dataframe parsing sample information from metadata\n", - " \"\"\"\n", - " files = list(data_path.rglob(\"*.dat\"))\n", - " print(f\"found {len(files)} dat files to ingest\")\n", - "\n", - " data_list = []\n", - "\n", - " for f in files:\n", - " name = f.stem\n", - " #need to parse metadata from the file name\n", - " df_feff, metadata = read_dat(str(f))\n", - " fields = metadata.pop(\"fields\")\n", - " fields, _ = lower_case_dict(fields)\n", - " metadata.update({k.lower(): v for k, v in fields.items()})\n", - " metadata[\"sample\"].setdefault(\"prep\", None)\n", - " \n", - " try:\n", - " facility_name = metadata[\"facility\"][\"name\"]\n", - " except:\n", - " metadata[\"facility\"] = {\"name\" : None}\n", - " try:\n", - " beamline_name = metadata[\"beamline\"][\"name\"]\n", - " except:\n", - " metadata[\"beamline\"] = {\"name\" : None}\n", - "\n", - " data_list.append(\n", - " {\n", - " \"name\": f.stem,\n", - " \"file\": str(f),\n", - " \"metadata\": metadata,\n", - " \"columns\" : tuple(df_feff)\n", - " }\n", - " )\n", - "\n", - " df = pd.DataFrame(data_list)\n", + "from load_FEFF_Data import load_feff_data\n", "\n", - " return df" + "load_feff_data(DATA_PATH)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 51, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "found 1 dat files to ingest\n" + "Files opened: D:\\BNL\\AIMMDB\\FEFF_Data\\65272_C_007\\feff.inp D:\\BNL\\AIMMDB\\FEFF_Data\\65272_C_007\\feff.out D:\\BNL\\AIMMDB\\FEFF_Data\\65272_C_007\\xmu.dat\n" ] }, { - "ename": "ValueError", - "evalue": "not enough values to unpack (expected 2, got 1)", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn [10], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39m# read through all the files and extract some metadata\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m feff \u001b[39m=\u001b[39m load_FeffData(pathlib\u001b[39m.\u001b[39;49mPath(\u001b[39m\"\u001b[39;49m\u001b[39mD:/BNL/AIMMDB/65272_C_007\u001b[39;49m\u001b[39m\"\u001b[39;49m))\n\u001b[0;32m 3\u001b[0m feff\n", - "Cell \u001b[1;32mIn [9], line 29\u001b[0m, in \u001b[0;36mload_FeffData\u001b[1;34m(data_path)\u001b[0m\n\u001b[0;32m 27\u001b[0m name \u001b[39m=\u001b[39m f\u001b[39m.\u001b[39mstem\n\u001b[0;32m 28\u001b[0m \u001b[39m#need to parse metadata from the file name\u001b[39;00m\n\u001b[1;32m---> 29\u001b[0m df_xas, metadata \u001b[39m=\u001b[39m read_dat(\u001b[39mstr\u001b[39m(f))\n\u001b[0;32m 30\u001b[0m fields \u001b[39m=\u001b[39m metadata\u001b[39m.\u001b[39mpop(\u001b[39m\"\u001b[39m\u001b[39mfields\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 31\u001b[0m fields, _ \u001b[39m=\u001b[39m lower_case_dict(fields)\n", - "\u001b[1;31mValueError\u001b[0m: not enough values to unpack (expected 2, got 1)" - ] + "data": { + "text/plain": [ + "( omega e k mu mu0 chi\n", + " 0 274.278 -11.491 -0.80 0.008794 0.011493 -0.002699\n", + " 1 274.741 -11.028 -0.72 0.010453 0.014542 -0.004089\n", + " 2 275.156 -10.613 -0.64 0.012638 0.018913 -0.006275\n", + " 3 275.521 -10.247 -0.56 0.015644 0.025455 -0.009811\n", + " 4 275.838 -9.930 -0.48 0.020026 0.035790 -0.015765\n", + " .. ... ... ... ... ... ...\n", + " 95 325.002 39.234 3.56 0.981847 1.006860 -0.025010\n", + " 96 326.094 40.325 3.60 0.986115 1.001850 -0.015738\n", + " 97 327.197 41.428 3.64 0.989862 0.996706 -0.006844\n", + " 98 328.313 42.544 3.68 0.992953 0.991381 0.001572\n", + " 99 329.440 43.672 3.72 0.995197 0.985933 0.009264\n", + " \n", + " [100 rows x 6 columns],\n", + " {'feff.inp': 'TITLE 65272_C_007\\n\\nEDGE K\\nS02 1.0\\nCOREHOLE RPA\\nCONTROL 1 1 1 1 1 1\\n\\nXANES 4 0.04 0.1\\n\\nFMS 9.0\\nEXCHANGE 0 0.0 0.0 2\\nSCF 7.0 1 100 0.2 3\\nRPATH -1\\n\\nPOTENTIALS\\n*\\tipot\\tZ\\telement\\tl_scmt\\tl_fms\\n0\\t6\\tC\\t-1\\t-1\\t8\\n1\\t8\\tO\\t-1\\t-1\\t1\\n2\\t1\\tH\\t-1\\t-1\\t14\\n3\\t6\\tC\\t-1\\t-1\\t8\\n\\nATOMS\\n0.11604509 1.38784311 0.13505013 1\\n0.20666659 -0.06290667 0.03415119 3\\n0.94990656 -0.55278239 -1.02579164 3\\n1.29005187 -0.84456617 0.37110379 3\\n-1.29324947 -0.72339127 0.13453590 3\\n-1.97009972 -0.32847353 1.56467581 3\\n-3.42889944 -0.78651651 1.14860359 3\\n-3.77383154 0.00398307 -0.11424171 0\\n-2.22576479 0.04473701 -0.80950366 3\\n0.04259256 1.50349635 1.04473692 2\\n0.73134101 -1.30930483 -1.56407524 2\\n1.61476561 0.19846610 -1.46760323 2\\n1.99713078 -0.31513827 1.00205616 2\\n1.06157229 -1.96780200 0.86736120 2\\n-1.18655214 -1.78808606 -0.07935984 2\\n-1.57211827 -1.09624803 2.18470682 2\\n-1.77161235 0.63742451 1.86021944 2\\n-3.72126728 -1.82168908 1.09559373 2\\n-4.04110075 -0.32200572 1.99098428 2\\n-4.48166510 -0.68255787 -0.81378802 2\\n-4.07898758 1.08233018 -0.08022361 2\\n-1.91391821 0.99342510 -1.10212711 2\\n-2.35106517 -0.44231706 -1.87892007 2\\nEND\\n',\n", + " 'feff.out': \"### feff starts at Mon Aug 15 11:00:04 EDT 2022\\n### Serial version\\n### FeffPath is /sdcc/u/mcarbone/software/JFEFF/hsw/feff90/linux\\n \\nLaunching FEFF version FEFF 9.9.1\\nCore hole lifetime is 0.087 eV.\\nYour calculation:\\n 65272_C_007\\nC K edge XANES using RPA corehole.\\nUsing: * Self-Consistent Field potentials\\nUsing cards: ATOMS CONTROL EXCHANGE TITLE RPATH POTENTIALS XANES EDGE SCF FMS S02 COREHOLE\\n\\n :WARNING TWO ATOMS VERY CLOSE TOGETHER. CHECK INPUT.\\n atoms 1 10 distance 0.91995E+00 Angstrom\\n 1 1.16045E-01 1.38784E+00 1.35050E-01 Z= 8\\n 10 4.25926E-02 1.50350E+00 1.04474E+00 Z= 1\\nCalculating atomic potentials ...\\n overlapped atomic potential and density for unique potential 0\\n overlapped atomic potential and density for unique potential 1\\n overlapped atomic potential and density for unique potential 2\\n overlapped atomic potential and density for unique potential 3\\nDone with module: atomic potentials.\\n\\nCalculating SCF potentials ...\\nFEFF-serial using 1 thread.\\nMuffin tin radii and interstitial parameters [bohr]:\\ntype, norman radius, muffin tin, overlap factor\\n 0 1.13638E+00 9.29428E-01 1.15000E+00\\n 1 1.06795E+00 8.35957E-01 1.15000E+00\\n 2 9.74431E-01 8.01033E-01 1.15000E+00\\n 3 1.11495E+00 9.03613E-01 1.15000E+00\\nCore-valence separation energy: ecv= -40.000 eV\\nInitial Fermi level: mu= 4.233 eV\\nSCF ITERATION NUMBER 1\\n point # 1 energy = -40.000\\nFMS for a cluster of 23 atoms around atom type 0\\n point # 20 energy = -28.920\\n point # 40 energy = -12.611\\n point # 60 energy = -9.617\\n point # 80 energy = -9.317\\nNew Fermi level: mu= -9.303 eV Charge distance= 0.0299 (partial c.d.= 4.0449)\\nnegative density 2 -24.517 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -6.869 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -1.302 - usually harmless precision error, but check DOS if it persists\\nSCF ITERATION NUMBER 1\\n point # 1 energy = -40.000\\nFMS for a cluster of 23 atoms around atom type 0\\n point # 20 energy = -28.745\\n point # 40 energy = -9.303\\n point # 60 energy = -9.241\\nNew Fermi level: mu= -9.191 eV Charge distance= 0.0199 (partial c.d.= 0.0449)\\nnegative density 2 -47.433 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -15.613 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -5.581 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -0.912 - usually harmless precision error, but check DOS if it persists\\nSCF ITERATION NUMBER 1\\n point # 1 energy = -40.000\\nFMS for a cluster of 23 atoms around atom type 0\\n point # 20 energy = -28.704\\n point # 40 energy = -9.191\\n point # 60 energy = -9.129\\nNew Fermi level: mu= -9.121 eV Charge distance= 0.0134 (partial c.d.= 0.0120)\\nnegative density 2 -65.691 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -22.544 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -8.961 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -2.641 - usually harmless precision error, but check DOS if it persists\\nSCF ITERATION NUMBER 2\\n point # 1 energy = -40.000\\n point # 20 energy = -28.678\\n point # 40 energy = -9.121\\nNew Fermi level: mu= -9.080 eV Charge distance= 0.0411 (partial c.d.= 0.0078)\\nnegative density 2 -131.390 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -47.430 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -21.071 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -8.813 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -2.852 - usually harmless precision error, but check DOS if it persists\\nnegative density 3 -551.622 - usually harmless precision error, but check DOS if it persists\\nSCF ITERATION NUMBER 3\\n point # 1 energy = -40.000\\n point # 20 energy = -29.028\\n point # 40 energy = -9.080\\n point # 60 energy = -9.030\\nNew Fermi level: mu= -8.975 eV Charge distance= 0.0038 (partial c.d.= 0.0229)\\nnegative density 2 -132.230 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -47.722 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -21.195 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -8.861 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -2.864 - usually harmless precision error, but check DOS if it persists\\nnegative density 3 -574.716 - usually harmless precision error, but check DOS if it persists\\nSCF ITERATION NUMBER 4\\n point # 1 energy = -40.000\\n point # 20 energy = -28.991\\n point # 40 energy = -8.975\\nNew Fermi level: mu= -9.001 eV Charge distance= 0.0077 (partial c.d.= 0.0025)\\nnegative density 2 -132.973 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -47.991 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -21.294 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -8.882 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -2.851 - usually harmless precision error, but check DOS if it persists\\nnegative density 3 -616.926 - usually harmless precision error, but check DOS if it persists\\nSCF ITERATION NUMBER 5\\n point # 1 energy = -40.000\\n point # 20 energy = -29.000\\n point # 40 energy = -9.001\\n point # 60 energy = -9.051\\nNew Fermi level: mu= -9.053 eV Charge distance= 0.0002 (partial c.d.= 0.0050)\\nnegative density 2 -132.433 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -47.816 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -21.210 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -8.839 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -2.828 - usually harmless precision error, but check DOS if it persists\\nnegative density 3 -617.614 - usually harmless precision error, but check DOS if it persists\\nSCF ITERATION NUMBER 6\\n point # 1 energy = -40.000\\n point # 20 energy = -29.019\\n point # 40 energy = -9.053\\nNew Fermi level: mu= -9.052 eV Charge distance= 0.0004 (partial c.d.= 0.0001)\\nElectronic configuration\\n type l N_el\\n 0 0 1.408\\n 0 1 2.506\\n 0 2 0.179\\n 0 3 0.000\\n 1 0 1.845\\n 1 1 4.086\\n 1 2 0.087\\n 1 3 0.000\\n 2 0 0.795\\n 2 1 0.113\\n 2 2 0.000\\n 2 3 0.000\\n 3 0 1.396\\n 3 1 2.473\\n 3 2 0.197\\n 3 3 0.000\\nCharge transfer: type charge\\n 0 0.093\\n 1 0.018\\n 2 -0.092\\n 3 0.066\\nConvergence reached in 6 iterations.\\nDone with module: potentials.\\n\\nCalculating screened core-hole potential ...\\nFMS for a cluster of 15 atoms around iph = 0\\n 0% of energy integration\\n 20%\\n 40%\\n 60%\\n 80%\\n 100%\\nPreparing response function.\\nComputing (1 - K Chi0)^-1 v_ch\\nDone with module: screened core-hole potential.\\n\\nCalculating cross-section and phases ...\\n absorption cross section\\n x0,dx,rnrm,inrm,jnrm 8.80000000000000 5.000000000000000E-002\\n 2.14744258699983 192 193\\n phase shifts for unique potential 0\\n phase shifts for unique potential 1\\n phase shifts for unique potential 2\\n phase shifts for unique potential 3\\nDone with module: cross-section and phases (XSPH).\\n\\nFMS calculation of full Green's function ...\\nFEFF-serial using 1 thread.\\nUsing 114 energy points.\\nxprep done\\nFMS for a cluster of 23 atoms\\nEnergy point 1/ 114\\nEnergy point 10/ 114\\nEnergy point 20/ 114\\nEnergy point 30/ 114\\nEnergy point 40/ 114\\nEnergy point 50/ 114\\nEnergy point 60/ 114\\nEnergy point 70/ 114\\nEnergy point 80/ 114\\nEnergy point 90/ 114\\nEnergy point 100/ 114\\nEnergy point 110/ 114\\nDone with module: FMS.\\n\\nMKGTR: Tracing over Green's function ...\\nDone with module: MKGTR.\\n\\nPathfinder: finding scattering paths...\\nPreparing plane wave scattering amplitudes\\nSearching for paths\\nEliminating path degeneracies\\n0 paths retained.\\nDone with module: pathfinder.\\n\\nCalculating EXAFS parameters ...\\nDone with module: EXAFS parameters (GENFMT).\\n\\nCalculating XAS spectra ...\\nDone with module: XAS spectra (FF2X: DW + final sum over paths).\\n\\n \\n### feff ends at Mon Aug 15 11:00:21 EDT 2022\\n \\n\",\n", + " 'xmu.dat': ['# # 65272_C_007 FEFF 9.9.1 ',\n", + " '# # POT SCF 100 7.0000 1, screened core-hole, AFOLP (folp(0)= 1.150)',\n", + " '# # Abs Z= 6 Rmt= 0.929 Rnm= 1.136 K shell',\n", + " '# # Pot 1 Z= 8 Rmt= 0.836 Rnm= 1.068',\n", + " '# # Pot 2 Z= 1 Rmt= 0.801 Rnm= 0.974',\n", + " '# # Pot 3 Z= 6 Rmt= 0.904 Rnm= 1.115',\n", + " '# # Gam_ch=8.676E-02 H-L exch Vi= 0.000E+00 Vr= 0.000E+00',\n", + " '# # Mu=-9.052E+00eV kf=2.206E+00 Vint=-1.429E+01eV Rs_int= 1.644',\n", + " '# FMS rfms= 9.0000',\n", + " '# # PATH Rmax=-1.000, Keep_limit= 0.00, Heap_limit 0.00 Pwcrit= 2.50%',\n", + " '# S02=1.000 Temp= 0.00 Debye_temp= 0.00 Global_sig2= 0.00000',\n", + " '# Energy zero shift, vr, vi 0.00000E+00 0.00000E+00',\n", + " '# Curved wave amplitude ratio filter 4.000%',\n", + " '# file sig2 tot cw amp ratio deg nlegs reff inp sig2',\n", + " '# 0/ 0 paths used',\n", + " '# xsedge+ 50, used to normalize mu 7.4830E-03',\n", + " '# -----------------------------------------------------------------------']})" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "# read through all the files and extract some metadata\n", - "feff = load_FeffData(pathlib.Path(\"D:/BNL/AIMMDB/65272_C_007\"))\n", - "feff" + "# data path\n", + "import pathlib\n", + "\n", + "feff = load_feff_data(pathlib.Path(\"D:/BNL/AIMMDB/FEFF_Data/65272_C_007\"))\n", + "feff\n" ] }, { diff --git a/ingest/load_FEFF_Data.py b/ingest/load_FEFF_Data.py new file mode 100644 index 0000000..e354cea --- /dev/null +++ b/ingest/load_FEFF_Data.py @@ -0,0 +1,65 @@ +import copy +import pathlib + +import numpy as np +import pandas as pd + +from tiled.client import from_uri +from tiled.examples.xdi import read_xdi +from tiled.queries import Key + +import pandas as pd +from pathlib import Path + +def load_feff_data(data_path, verbose=True): + """ + Parameters + ---------- + data_path : os.PathLike + path to the feff.inp, feff.out, and xmu.dat file. + verbose : bool, optional + Prints debug information if True. + + Returns + ------- + feff_data : pandas.Dataframe + dataframe containing the xmu.dat data + metadata : dict + dictionary containing the feff.inp, feff.out, and xmu.dat metadata + """ + + data_path = Path(data_path) + + feff_inp = data_path / "feff.inp" + feff_out = data_path / "feff.out" + xmu_dat = data_path / "xmu.dat" + + data = pd.read_csv( + xmu_dat, + sep="\s+", + header=None, + names=["omega", "e", "k", "mu", "mu0", "chi"], + comment="#", + ) + + metadata = { + "feff.inp": feff_inp.read_text(), + "feff.out": feff_out.read_text(), + } + + dat = [ + line + for line in xmu_dat.read_text().splitlines() + if line.startswith("#") + ] + metadata["xmu.dat-comments"] = "\n".join(dat) + + if verbose: + print("FEFF Input:", feff_inp) + print("FEFF Output:", feff_out) + print("FEFF Data:", xmu_dat) + print(data) + print(metadata) + + # returns data and metadata, a pd.DataFrame and dict, respectively. + return data, metadata \ No newline at end of file From 69b7b8d35c22f320e9d3eed40791ad08350ad38c Mon Sep 17 00:00:00 2001 From: msegal347 Date: Sat, 22 Oct 2022 13:26:07 -0400 Subject: [PATCH 3/7] moved notebooks, included tests --- aimmdb/_tests/ingest/test_feff.py | 38 ++ aimmdb/ingest/__init__.py | 0 {ingest => aimmdb/ingest}/load_FEFF_Data.py | 0 aimmdb/schemas.py | 8 - ingest/ingest_FEFF.ipynb | 407 ------------------ {ingest => notebooks}/ingest.ipynb | 0 notebooks/ingest_FEFF.ipynb | 170 ++++++++ .../ingest_newville_example.ipynb | 69 ++- 8 files changed, 269 insertions(+), 423 deletions(-) create mode 100644 aimmdb/_tests/ingest/test_feff.py create mode 100644 aimmdb/ingest/__init__.py rename {ingest => aimmdb/ingest}/load_FEFF_Data.py (100%) delete mode 100644 ingest/ingest_FEFF.ipynb rename {ingest => notebooks}/ingest.ipynb (100%) create mode 100644 notebooks/ingest_FEFF.ipynb rename {ingest => notebooks}/ingest_newville_example.ipynb (92%) diff --git a/aimmdb/_tests/ingest/test_feff.py b/aimmdb/_tests/ingest/test_feff.py new file mode 100644 index 0000000..b24124d --- /dev/null +++ b/aimmdb/_tests/ingest/test_feff.py @@ -0,0 +1,38 @@ +import copy +import pandas as pd +from pathlib import Path + +from aimmdb.ingest import load_feff_data + + +DATA_PATH = Path("aimmdb/_tests/data/feff/65272_C_007") + + +def test_load_feff_data(): + + data, metadata = load_feff_data(DATA_PATH) + + assert isinstance(data, pd.DataFrame) + assert isinstance(metadata, dict) + assert isinstance(metadata["feff.inp"], str) + assert isinstance(metadata["feff.out"], str) + assert isinstance(metadata["xmu.dat-comments"], str) + + +def copy_feff_data(): + + data, metadata = load_feff_data(DATA_PATH) + + data_copy = copy.deepcopy(data) + + assert data.equals(data_copy) + + metadata_copy = copy.deepcopy(metadata) + + assert metadata.equals(metadata_copy) + + assert isinstance(data_copy, pd.DataFrame) + assert isinstance(metadata_copy, dict) + assert isinstance(metadata_copy["feff.inp"], str) + assert isinstance(metadata_copy["feff.out"], str) + assert isinstance(metadata_copy["xmu.dat-comments"], str) diff --git a/aimmdb/ingest/__init__.py b/aimmdb/ingest/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ingest/load_FEFF_Data.py b/aimmdb/ingest/load_FEFF_Data.py similarity index 100% rename from ingest/load_FEFF_Data.py rename to aimmdb/ingest/load_FEFF_Data.py diff --git a/aimmdb/schemas.py b/aimmdb/schemas.py index 7479e2e..35e64f5 100644 --- a/aimmdb/schemas.py +++ b/aimmdb/schemas.py @@ -146,14 +146,6 @@ class BatteryChargeMetadataInternal(pydantic.BaseModel): class BatteryChargeMetadata(pydantic.BaseModel, extra=pydantic.Extra.allow): charge: BatteryChargeMetadataInternal -class FEFFpotentials(pydantic.BaseModel, extra=pydantic.Extra.allow): - x: Optional[str] - ipot: int - Z: str - element: int - l_scmt: int - l_fms: int - class FEFFcards(pydantic.BaseModel, extra=pydantic.Extra.allow): atoms: float diff --git a/ingest/ingest_FEFF.ipynb b/ingest/ingest_FEFF.ipynb deleted file mode 100644 index 9a0f732..0000000 --- a/ingest/ingest_FEFF.ipynb +++ /dev/null @@ -1,407 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import copy\n", - "from pathlib import Path\n", - "\n", - "import numpy as np\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "ename": "ConnectError", - "evalue": "[WinError 10061] No connection could be made because the target machine actively refused it", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mConnectionRefusedError\u001b[0m Traceback (most recent call last)", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_exceptions.py:8\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m----> 8\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 9\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE786\u001b[39;00m\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\backends\\sync.py:86\u001b[0m, in \u001b[0;36mSyncBackend.connect_tcp\u001b[1;34m(self, host, port, timeout, local_address)\u001b[0m\n\u001b[0;32m 85\u001b[0m \u001b[39mwith\u001b[39;00m map_exceptions(exc_map):\n\u001b[1;32m---> 86\u001b[0m sock \u001b[39m=\u001b[39m socket\u001b[39m.\u001b[39;49mcreate_connection(\n\u001b[0;32m 87\u001b[0m address, timeout, source_address\u001b[39m=\u001b[39;49msource_address\n\u001b[0;32m 88\u001b[0m )\n\u001b[0;32m 89\u001b[0m \u001b[39mreturn\u001b[39;00m SyncStream(sock)\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\socket.py:844\u001b[0m, in \u001b[0;36mcreate_connection\u001b[1;34m(address, timeout, source_address)\u001b[0m\n\u001b[0;32m 843\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 844\u001b[0m \u001b[39mraise\u001b[39;00m err\n\u001b[0;32m 845\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[0;32m 846\u001b[0m \u001b[39m# Break explicitly a reference cycle\u001b[39;00m\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\socket.py:832\u001b[0m, in \u001b[0;36mcreate_connection\u001b[1;34m(address, timeout, source_address)\u001b[0m\n\u001b[0;32m 831\u001b[0m sock\u001b[39m.\u001b[39mbind(source_address)\n\u001b[1;32m--> 832\u001b[0m sock\u001b[39m.\u001b[39;49mconnect(sa)\n\u001b[0;32m 833\u001b[0m \u001b[39m# Break explicitly a reference cycle\u001b[39;00m\n", - "\u001b[1;31mConnectionRefusedError\u001b[0m: [WinError 10061] No connection could be made because the target machine actively refused it", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[1;31mConnectError\u001b[0m Traceback (most recent call last)", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_transports\\default.py:60\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 59\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 60\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 61\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE-786\u001b[39;00m\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_transports\\default.py:218\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[1;32m--> 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_pool\u001b[39m.\u001b[39;49mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\connection_pool.py:253\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 252\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresponse_closed(status)\n\u001b[1;32m--> 253\u001b[0m \u001b[39mraise\u001b[39;00m exc\n\u001b[0;32m 254\u001b[0m \u001b[39melse\u001b[39;00m:\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\connection_pool.py:237\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 236\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 237\u001b[0m response \u001b[39m=\u001b[39m connection\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 238\u001b[0m \u001b[39mexcept\u001b[39;00m ConnectionNotAvailable:\n\u001b[0;32m 239\u001b[0m \u001b[39m# The ConnectionNotAvailable exception is a special case, that\u001b[39;00m\n\u001b[0;32m 240\u001b[0m \u001b[39m# indicates we need to retry the request on a new connection.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[39m# might end up as an HTTP/2 connection, but which actually ends\u001b[39;00m\n\u001b[0;32m 245\u001b[0m \u001b[39m# up as HTTP/1.1.\u001b[39;00m\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\connection.py:86\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 85\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_connect_failed \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n\u001b[1;32m---> 86\u001b[0m \u001b[39mraise\u001b[39;00m exc\n\u001b[0;32m 87\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_connection\u001b[39m.\u001b[39mis_available():\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\connection.py:63\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 62\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 63\u001b[0m stream \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_connect(request)\n\u001b[0;32m 65\u001b[0m ssl_object \u001b[39m=\u001b[39m stream\u001b[39m.\u001b[39mget_extra_info(\u001b[39m\"\u001b[39m\u001b[39mssl_object\u001b[39m\u001b[39m\"\u001b[39m)\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\connection.py:111\u001b[0m, in \u001b[0;36mHTTPConnection._connect\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 108\u001b[0m \u001b[39mwith\u001b[39;00m Trace(\n\u001b[0;32m 109\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mconnection.connect_tcp\u001b[39m\u001b[39m\"\u001b[39m, request, kwargs\n\u001b[0;32m 110\u001b[0m ) \u001b[39mas\u001b[39;00m trace:\n\u001b[1;32m--> 111\u001b[0m stream \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_network_backend\u001b[39m.\u001b[39mconnect_tcp(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 112\u001b[0m trace\u001b[39m.\u001b[39mreturn_value \u001b[39m=\u001b[39m stream\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\backends\\sync.py:86\u001b[0m, in \u001b[0;36mSyncBackend.connect_tcp\u001b[1;34m(self, host, port, timeout, local_address)\u001b[0m\n\u001b[0;32m 85\u001b[0m \u001b[39mwith\u001b[39;00m map_exceptions(exc_map):\n\u001b[1;32m---> 86\u001b[0m sock \u001b[39m=\u001b[39m socket\u001b[39m.\u001b[39mcreate_connection(\n\u001b[0;32m 87\u001b[0m address, timeout, source_address\u001b[39m=\u001b[39msource_address\n\u001b[0;32m 88\u001b[0m )\n\u001b[0;32m 89\u001b[0m \u001b[39mreturn\u001b[39;00m SyncStream(sock)\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\contextlib.py:137\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 136\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 137\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 138\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 139\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 140\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 141\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_exceptions.py:12\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(exc, from_exc):\n\u001b[1;32m---> 12\u001b[0m \u001b[39mraise\u001b[39;00m to_exc(exc)\n\u001b[0;32m 13\u001b[0m \u001b[39mraise\u001b[39;00m\n", - "\u001b[1;31mConnectError\u001b[0m: [WinError 10061] No connection could be made because the target machine actively refused it", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[1;31mConnectError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn [2], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtiled\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mclient\u001b[39;00m \u001b[39mimport\u001b[39;00m from_uri\n\u001b[1;32m----> 2\u001b[0m client \u001b[39m=\u001b[39m from_uri(\u001b[39m\"\u001b[39;49m\u001b[39mhttp://localhost:8000/api\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\tiled\\client\\constructors.py:101\u001b[0m, in \u001b[0;36mfrom_uri\u001b[1;34m(uri, structure_clients, cache, offline, username, auth_provider, api_key, token_cache, verify, prompt_for_reauthentication, headers, timeout)\u001b[0m\n\u001b[0;32m 91\u001b[0m timeout \u001b[39m=\u001b[39m httpx\u001b[39m.\u001b[39mTimeout(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mDEFAULT_TIMEOUT_PARAMS)\n\u001b[0;32m 93\u001b[0m client \u001b[39m=\u001b[39m httpx\u001b[39m.\u001b[39mClient(\n\u001b[0;32m 94\u001b[0m base_url\u001b[39m=\u001b[39mbase_uri,\n\u001b[0;32m 95\u001b[0m verify\u001b[39m=\u001b[39mverify,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 99\u001b[0m params\u001b[39m=\u001b[39mparams,\n\u001b[0;32m 100\u001b[0m )\n\u001b[1;32m--> 101\u001b[0m context \u001b[39m=\u001b[39m Context(\n\u001b[0;32m 102\u001b[0m client,\n\u001b[0;32m 103\u001b[0m username\u001b[39m=\u001b[39;49musername,\n\u001b[0;32m 104\u001b[0m auth_provider\u001b[39m=\u001b[39;49mauth_provider,\n\u001b[0;32m 105\u001b[0m api_key\u001b[39m=\u001b[39;49mapi_key,\n\u001b[0;32m 106\u001b[0m cache\u001b[39m=\u001b[39;49mcache,\n\u001b[0;32m 107\u001b[0m offline\u001b[39m=\u001b[39;49moffline,\n\u001b[0;32m 108\u001b[0m token_cache\u001b[39m=\u001b[39;49mtoken_cache,\n\u001b[0;32m 109\u001b[0m prompt_for_reauthentication\u001b[39m=\u001b[39;49mprompt_for_reauthentication,\n\u001b[0;32m 110\u001b[0m )\n\u001b[0;32m 111\u001b[0m \u001b[39mreturn\u001b[39;00m from_context(context, structure_clients\u001b[39m=\u001b[39mstructure_clients)\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\tiled\\client\\context.py:228\u001b[0m, in \u001b[0;36mContext.__init__\u001b[1;34m(self, client, username, auth_provider, api_key, cache, offline, token_cache, prompt_for_reauthentication, app)\u001b[0m\n\u001b[0;32m 223\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mdisable_cache(allow_read\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m, allow_write\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m):\n\u001b[0;32m 224\u001b[0m \u001b[39m# Make this request manually to inject custom error handling.\u001b[39;00m\n\u001b[0;32m 225\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_client\u001b[39m.\u001b[39mbuild_request(\n\u001b[0;32m 226\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mGET\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39m/\u001b[39m\u001b[39m\"\u001b[39m, params\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mroot_path\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39mTrue\u001b[39;00m}\n\u001b[0;32m 227\u001b[0m )\n\u001b[1;32m--> 228\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_client\u001b[39m.\u001b[39;49msend(request)\n\u001b[0;32m 229\u001b[0m \u001b[39m# Handle case where user pastes in a link like\u001b[39;00m\n\u001b[0;32m 230\u001b[0m \u001b[39m# https://example.com/some/subpath/node/metadata/a/b/c\u001b[39;00m\n\u001b[0;32m 231\u001b[0m \u001b[39m# and it requires authentication. The 401 response includes a header\u001b[39;00m\n\u001b[0;32m 232\u001b[0m \u001b[39m# that points us to https://examples.com/some/subpath where we\u001b[39;00m\n\u001b[0;32m 233\u001b[0m \u001b[39m# can see the authentication providers and their endpoints.\u001b[39;00m\n\u001b[0;32m 234\u001b[0m \u001b[39mif\u001b[39;00m response\u001b[39m.\u001b[39mstatus_code \u001b[39m==\u001b[39m \u001b[39m401\u001b[39m:\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_client.py:902\u001b[0m, in \u001b[0;36mClient.send\u001b[1;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[0;32m 894\u001b[0m follow_redirects \u001b[39m=\u001b[39m (\n\u001b[0;32m 895\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfollow_redirects\n\u001b[0;32m 896\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(follow_redirects, UseClientDefault)\n\u001b[0;32m 897\u001b[0m \u001b[39melse\u001b[39;00m follow_redirects\n\u001b[0;32m 898\u001b[0m )\n\u001b[0;32m 900\u001b[0m auth \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_build_request_auth(request, auth)\n\u001b[1;32m--> 902\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_auth(\n\u001b[0;32m 903\u001b[0m request,\n\u001b[0;32m 904\u001b[0m auth\u001b[39m=\u001b[39;49mauth,\n\u001b[0;32m 905\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 906\u001b[0m history\u001b[39m=\u001b[39;49m[],\n\u001b[0;32m 907\u001b[0m )\n\u001b[0;32m 908\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 909\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m stream:\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_client.py:930\u001b[0m, in \u001b[0;36mClient._send_handling_auth\u001b[1;34m(self, request, auth, follow_redirects, history)\u001b[0m\n\u001b[0;32m 927\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mnext\u001b[39m(auth_flow)\n\u001b[0;32m 929\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 930\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_redirects(\n\u001b[0;32m 931\u001b[0m request,\n\u001b[0;32m 932\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 933\u001b[0m history\u001b[39m=\u001b[39;49mhistory,\n\u001b[0;32m 934\u001b[0m )\n\u001b[0;32m 935\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 936\u001b[0m \u001b[39mtry\u001b[39;00m:\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_client.py:967\u001b[0m, in \u001b[0;36mClient._send_handling_redirects\u001b[1;34m(self, request, follow_redirects, history)\u001b[0m\n\u001b[0;32m 964\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mrequest\u001b[39m\u001b[39m\"\u001b[39m]:\n\u001b[0;32m 965\u001b[0m hook(request)\n\u001b[1;32m--> 967\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_single_request(request)\n\u001b[0;32m 968\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 969\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mresponse\u001b[39m\u001b[39m\"\u001b[39m]:\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_client.py:1003\u001b[0m, in \u001b[0;36mClient._send_single_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 998\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[0;32m 999\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAttempted to send an async request with a sync Client instance.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1000\u001b[0m )\n\u001b[0;32m 1002\u001b[0m \u001b[39mwith\u001b[39;00m request_context(request\u001b[39m=\u001b[39mrequest):\n\u001b[1;32m-> 1003\u001b[0m response \u001b[39m=\u001b[39m transport\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 1005\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(response\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 1007\u001b[0m response\u001b[39m.\u001b[39mrequest \u001b[39m=\u001b[39m request\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_transports\\default.py:218\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 205\u001b[0m req \u001b[39m=\u001b[39m httpcore\u001b[39m.\u001b[39mRequest(\n\u001b[0;32m 206\u001b[0m method\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mmethod,\n\u001b[0;32m 207\u001b[0m url\u001b[39m=\u001b[39mhttpcore\u001b[39m.\u001b[39mURL(\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 215\u001b[0m extensions\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mextensions,\n\u001b[0;32m 216\u001b[0m )\n\u001b[0;32m 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[1;32m--> 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_pool\u001b[39m.\u001b[39mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n\u001b[0;32m 222\u001b[0m \u001b[39mreturn\u001b[39;00m Response(\n\u001b[0;32m 223\u001b[0m status_code\u001b[39m=\u001b[39mresp\u001b[39m.\u001b[39mstatus,\n\u001b[0;32m 224\u001b[0m headers\u001b[39m=\u001b[39mresp\u001b[39m.\u001b[39mheaders,\n\u001b[0;32m 225\u001b[0m stream\u001b[39m=\u001b[39mResponseStream(resp\u001b[39m.\u001b[39mstream),\n\u001b[0;32m 226\u001b[0m extensions\u001b[39m=\u001b[39mresp\u001b[39m.\u001b[39mextensions,\n\u001b[0;32m 227\u001b[0m )\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\contextlib.py:137\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 135\u001b[0m value \u001b[39m=\u001b[39m typ()\n\u001b[0;32m 136\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 137\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 138\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 139\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 140\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 141\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n\u001b[0;32m 142\u001b[0m \u001b[39mreturn\u001b[39;00m exc \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m value\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_transports\\default.py:77\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 74\u001b[0m \u001b[39mraise\u001b[39;00m\n\u001b[0;32m 76\u001b[0m message \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(exc)\n\u001b[1;32m---> 77\u001b[0m \u001b[39mraise\u001b[39;00m mapped_exc(message) \u001b[39mfrom\u001b[39;00m \u001b[39mexc\u001b[39;00m\n", - "\u001b[1;31mConnectError\u001b[0m: [WinError 10061] No connection could be made because the target machine actively refused it" - ] - } - ], - "source": [ - "from tiled.client import from_uri\n", - "client = from_uri(\"http://localhost:8000/api\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Data Path: AIMMDB\\data\\feff\\65272_C_007\n" - ] - } - ], - "source": [ - "from pathlib import Path\n", - "DATA_PATH = Path(\"aimmdb/data/feff/65272_C_007\")\n", - "print(\"Data Path:\", DATA_PATH)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: 'AIMMDB\\\\data\\\\feff\\\\65272_C_007\\\\xmu.dat'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn [7], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mload_FEFF_Data\u001b[39;00m \u001b[39mimport\u001b[39;00m load_feff_data\n\u001b[1;32m----> 3\u001b[0m load_feff_data(DATA_PATH)\n", - "File \u001b[1;32mc:\\Users\\msega\\aimmdb\\ingest\\load_FEFF_Data.py:37\u001b[0m, in \u001b[0;36mload_feff_data\u001b[1;34m(data_path, verbose)\u001b[0m\n\u001b[0;32m 34\u001b[0m feff_out \u001b[39m=\u001b[39m data_path \u001b[39m/\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mfeff.out\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 35\u001b[0m xmu_dat \u001b[39m=\u001b[39m data_path \u001b[39m/\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mxmu.dat\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m---> 37\u001b[0m data \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39;49mread_csv(\n\u001b[0;32m 38\u001b[0m xmu_dat,\n\u001b[0;32m 39\u001b[0m sep\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m\\\u001b[39;49m\u001b[39ms+\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[0;32m 40\u001b[0m header\u001b[39m=\u001b[39;49m\u001b[39mNone\u001b[39;49;00m,\n\u001b[0;32m 41\u001b[0m names\u001b[39m=\u001b[39;49m[\u001b[39m\"\u001b[39;49m\u001b[39momega\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39me\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mk\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mmu\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mmu0\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mchi\u001b[39;49m\u001b[39m\"\u001b[39;49m],\n\u001b[0;32m 42\u001b[0m comment\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m#\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[0;32m 43\u001b[0m )\n\u001b[0;32m 45\u001b[0m metadata \u001b[39m=\u001b[39m {\n\u001b[0;32m 46\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mfeff.inp\u001b[39m\u001b[39m\"\u001b[39m: feff_inp\u001b[39m.\u001b[39mread_text(),\n\u001b[0;32m 47\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mfeff.out\u001b[39m\u001b[39m\"\u001b[39m: feff_out\u001b[39m.\u001b[39mread_text(),\n\u001b[0;32m 48\u001b[0m }\n\u001b[0;32m 50\u001b[0m dat \u001b[39m=\u001b[39m [\n\u001b[0;32m 51\u001b[0m line\n\u001b[0;32m 52\u001b[0m \u001b[39mfor\u001b[39;00m line \u001b[39min\u001b[39;00m xmu_dat\u001b[39m.\u001b[39mread_text()\u001b[39m.\u001b[39msplitlines()\n\u001b[0;32m 53\u001b[0m \u001b[39mif\u001b[39;00m line\u001b[39m.\u001b[39mstartswith(\u001b[39m\"\u001b[39m\u001b[39m#\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 54\u001b[0m ]\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\util\\_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.._deprecate_kwarg..wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 209\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 210\u001b[0m kwargs[new_arg_name] \u001b[39m=\u001b[39m new_arg_value\n\u001b[1;32m--> 211\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\util\\_decorators.py:317\u001b[0m, in \u001b[0;36mdeprecate_nonkeyword_arguments..decorate..wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 311\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(args) \u001b[39m>\u001b[39m num_allow_args:\n\u001b[0;32m 312\u001b[0m warnings\u001b[39m.\u001b[39mwarn(\n\u001b[0;32m 313\u001b[0m msg\u001b[39m.\u001b[39mformat(arguments\u001b[39m=\u001b[39marguments),\n\u001b[0;32m 314\u001b[0m \u001b[39mFutureWarning\u001b[39;00m,\n\u001b[0;32m 315\u001b[0m stacklevel\u001b[39m=\u001b[39mfind_stack_level(inspect\u001b[39m.\u001b[39mcurrentframe()),\n\u001b[0;32m 316\u001b[0m )\n\u001b[1;32m--> 317\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:950\u001b[0m, in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)\u001b[0m\n\u001b[0;32m 935\u001b[0m kwds_defaults \u001b[39m=\u001b[39m _refine_defaults_read(\n\u001b[0;32m 936\u001b[0m dialect,\n\u001b[0;32m 937\u001b[0m delimiter,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 946\u001b[0m defaults\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mdelimiter\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39m,\u001b[39m\u001b[39m\"\u001b[39m},\n\u001b[0;32m 947\u001b[0m )\n\u001b[0;32m 948\u001b[0m kwds\u001b[39m.\u001b[39mupdate(kwds_defaults)\n\u001b[1;32m--> 950\u001b[0m \u001b[39mreturn\u001b[39;00m _read(filepath_or_buffer, kwds)\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:605\u001b[0m, in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 602\u001b[0m _validate_names(kwds\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mnames\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m))\n\u001b[0;32m 604\u001b[0m \u001b[39m# Create the parser.\u001b[39;00m\n\u001b[1;32m--> 605\u001b[0m parser \u001b[39m=\u001b[39m TextFileReader(filepath_or_buffer, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwds)\n\u001b[0;32m 607\u001b[0m \u001b[39mif\u001b[39;00m chunksize \u001b[39mor\u001b[39;00m iterator:\n\u001b[0;32m 608\u001b[0m \u001b[39mreturn\u001b[39;00m parser\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:1442\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m 1439\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39moptions[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m kwds[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[0;32m 1441\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles: IOHandles \u001b[39m|\u001b[39m \u001b[39mNone\u001b[39;00m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m-> 1442\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_engine \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_make_engine(f, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mengine)\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:1729\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[1;34m(self, f, engine)\u001b[0m\n\u001b[0;32m 1727\u001b[0m is_text \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m 1728\u001b[0m mode \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mrb\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m-> 1729\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles \u001b[39m=\u001b[39m get_handle(\n\u001b[0;32m 1730\u001b[0m f,\n\u001b[0;32m 1731\u001b[0m mode,\n\u001b[0;32m 1732\u001b[0m encoding\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mencoding\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[0;32m 1733\u001b[0m compression\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mcompression\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[0;32m 1734\u001b[0m memory_map\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mmemory_map\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mFalse\u001b[39;49;00m),\n\u001b[0;32m 1735\u001b[0m is_text\u001b[39m=\u001b[39;49mis_text,\n\u001b[0;32m 1736\u001b[0m errors\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mencoding_errors\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mstrict\u001b[39;49m\u001b[39m\"\u001b[39;49m),\n\u001b[0;32m 1737\u001b[0m storage_options\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mstorage_options\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[0;32m 1738\u001b[0m )\n\u001b[0;32m 1739\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m 1740\u001b[0m f \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles\u001b[39m.\u001b[39mhandle\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\io\\common.py:857\u001b[0m, in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m 852\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(handle, \u001b[39mstr\u001b[39m):\n\u001b[0;32m 853\u001b[0m \u001b[39m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[0;32m 854\u001b[0m \u001b[39m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[0;32m 855\u001b[0m \u001b[39mif\u001b[39;00m ioargs\u001b[39m.\u001b[39mencoding \u001b[39mand\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m ioargs\u001b[39m.\u001b[39mmode:\n\u001b[0;32m 856\u001b[0m \u001b[39m# Encoding\u001b[39;00m\n\u001b[1;32m--> 857\u001b[0m handle \u001b[39m=\u001b[39m \u001b[39mopen\u001b[39;49m(\n\u001b[0;32m 858\u001b[0m handle,\n\u001b[0;32m 859\u001b[0m ioargs\u001b[39m.\u001b[39;49mmode,\n\u001b[0;32m 860\u001b[0m encoding\u001b[39m=\u001b[39;49mioargs\u001b[39m.\u001b[39;49mencoding,\n\u001b[0;32m 861\u001b[0m errors\u001b[39m=\u001b[39;49merrors,\n\u001b[0;32m 862\u001b[0m newline\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[0;32m 863\u001b[0m )\n\u001b[0;32m 864\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 865\u001b[0m \u001b[39m# Binary mode\u001b[39;00m\n\u001b[0;32m 866\u001b[0m handle \u001b[39m=\u001b[39m \u001b[39mopen\u001b[39m(handle, ioargs\u001b[39m.\u001b[39mmode)\n", - "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'AIMMDB\\\\data\\\\feff\\\\65272_C_007\\\\xmu.dat'" - ] - } - ], - "source": [ - "from load_FEFF_Data import load_feff_data\n", - "\n", - "load_feff_data(DATA_PATH)" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Files opened: D:\\BNL\\AIMMDB\\FEFF_Data\\65272_C_007\\feff.inp D:\\BNL\\AIMMDB\\FEFF_Data\\65272_C_007\\feff.out D:\\BNL\\AIMMDB\\FEFF_Data\\65272_C_007\\xmu.dat\n" - ] - }, - { - "data": { - "text/plain": [ - "( omega e k mu mu0 chi\n", - " 0 274.278 -11.491 -0.80 0.008794 0.011493 -0.002699\n", - " 1 274.741 -11.028 -0.72 0.010453 0.014542 -0.004089\n", - " 2 275.156 -10.613 -0.64 0.012638 0.018913 -0.006275\n", - " 3 275.521 -10.247 -0.56 0.015644 0.025455 -0.009811\n", - " 4 275.838 -9.930 -0.48 0.020026 0.035790 -0.015765\n", - " .. ... ... ... ... ... ...\n", - " 95 325.002 39.234 3.56 0.981847 1.006860 -0.025010\n", - " 96 326.094 40.325 3.60 0.986115 1.001850 -0.015738\n", - " 97 327.197 41.428 3.64 0.989862 0.996706 -0.006844\n", - " 98 328.313 42.544 3.68 0.992953 0.991381 0.001572\n", - " 99 329.440 43.672 3.72 0.995197 0.985933 0.009264\n", - " \n", - " [100 rows x 6 columns],\n", - " {'feff.inp': 'TITLE 65272_C_007\\n\\nEDGE K\\nS02 1.0\\nCOREHOLE RPA\\nCONTROL 1 1 1 1 1 1\\n\\nXANES 4 0.04 0.1\\n\\nFMS 9.0\\nEXCHANGE 0 0.0 0.0 2\\nSCF 7.0 1 100 0.2 3\\nRPATH -1\\n\\nPOTENTIALS\\n*\\tipot\\tZ\\telement\\tl_scmt\\tl_fms\\n0\\t6\\tC\\t-1\\t-1\\t8\\n1\\t8\\tO\\t-1\\t-1\\t1\\n2\\t1\\tH\\t-1\\t-1\\t14\\n3\\t6\\tC\\t-1\\t-1\\t8\\n\\nATOMS\\n0.11604509 1.38784311 0.13505013 1\\n0.20666659 -0.06290667 0.03415119 3\\n0.94990656 -0.55278239 -1.02579164 3\\n1.29005187 -0.84456617 0.37110379 3\\n-1.29324947 -0.72339127 0.13453590 3\\n-1.97009972 -0.32847353 1.56467581 3\\n-3.42889944 -0.78651651 1.14860359 3\\n-3.77383154 0.00398307 -0.11424171 0\\n-2.22576479 0.04473701 -0.80950366 3\\n0.04259256 1.50349635 1.04473692 2\\n0.73134101 -1.30930483 -1.56407524 2\\n1.61476561 0.19846610 -1.46760323 2\\n1.99713078 -0.31513827 1.00205616 2\\n1.06157229 -1.96780200 0.86736120 2\\n-1.18655214 -1.78808606 -0.07935984 2\\n-1.57211827 -1.09624803 2.18470682 2\\n-1.77161235 0.63742451 1.86021944 2\\n-3.72126728 -1.82168908 1.09559373 2\\n-4.04110075 -0.32200572 1.99098428 2\\n-4.48166510 -0.68255787 -0.81378802 2\\n-4.07898758 1.08233018 -0.08022361 2\\n-1.91391821 0.99342510 -1.10212711 2\\n-2.35106517 -0.44231706 -1.87892007 2\\nEND\\n',\n", - " 'feff.out': \"### feff starts at Mon Aug 15 11:00:04 EDT 2022\\n### Serial version\\n### FeffPath is /sdcc/u/mcarbone/software/JFEFF/hsw/feff90/linux\\n \\nLaunching FEFF version FEFF 9.9.1\\nCore hole lifetime is 0.087 eV.\\nYour calculation:\\n 65272_C_007\\nC K edge XANES using RPA corehole.\\nUsing: * Self-Consistent Field potentials\\nUsing cards: ATOMS CONTROL EXCHANGE TITLE RPATH POTENTIALS XANES EDGE SCF FMS S02 COREHOLE\\n\\n :WARNING TWO ATOMS VERY CLOSE TOGETHER. CHECK INPUT.\\n atoms 1 10 distance 0.91995E+00 Angstrom\\n 1 1.16045E-01 1.38784E+00 1.35050E-01 Z= 8\\n 10 4.25926E-02 1.50350E+00 1.04474E+00 Z= 1\\nCalculating atomic potentials ...\\n overlapped atomic potential and density for unique potential 0\\n overlapped atomic potential and density for unique potential 1\\n overlapped atomic potential and density for unique potential 2\\n overlapped atomic potential and density for unique potential 3\\nDone with module: atomic potentials.\\n\\nCalculating SCF potentials ...\\nFEFF-serial using 1 thread.\\nMuffin tin radii and interstitial parameters [bohr]:\\ntype, norman radius, muffin tin, overlap factor\\n 0 1.13638E+00 9.29428E-01 1.15000E+00\\n 1 1.06795E+00 8.35957E-01 1.15000E+00\\n 2 9.74431E-01 8.01033E-01 1.15000E+00\\n 3 1.11495E+00 9.03613E-01 1.15000E+00\\nCore-valence separation energy: ecv= -40.000 eV\\nInitial Fermi level: mu= 4.233 eV\\nSCF ITERATION NUMBER 1\\n point # 1 energy = -40.000\\nFMS for a cluster of 23 atoms around atom type 0\\n point # 20 energy = -28.920\\n point # 40 energy = -12.611\\n point # 60 energy = -9.617\\n point # 80 energy = -9.317\\nNew Fermi level: mu= -9.303 eV Charge distance= 0.0299 (partial c.d.= 4.0449)\\nnegative density 2 -24.517 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -6.869 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -1.302 - usually harmless precision error, but check DOS if it persists\\nSCF ITERATION NUMBER 1\\n point # 1 energy = -40.000\\nFMS for a cluster of 23 atoms around atom type 0\\n point # 20 energy = -28.745\\n point # 40 energy = -9.303\\n point # 60 energy = -9.241\\nNew Fermi level: mu= -9.191 eV Charge distance= 0.0199 (partial c.d.= 0.0449)\\nnegative density 2 -47.433 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -15.613 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -5.581 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -0.912 - usually harmless precision error, but check DOS if it persists\\nSCF ITERATION NUMBER 1\\n point # 1 energy = -40.000\\nFMS for a cluster of 23 atoms around atom type 0\\n point # 20 energy = -28.704\\n point # 40 energy = -9.191\\n point # 60 energy = -9.129\\nNew Fermi level: mu= -9.121 eV Charge distance= 0.0134 (partial c.d.= 0.0120)\\nnegative density 2 -65.691 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -22.544 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -8.961 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -2.641 - usually harmless precision error, but check DOS if it persists\\nSCF ITERATION NUMBER 2\\n point # 1 energy = -40.000\\n point # 20 energy = -28.678\\n point # 40 energy = -9.121\\nNew Fermi level: mu= -9.080 eV Charge distance= 0.0411 (partial c.d.= 0.0078)\\nnegative density 2 -131.390 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -47.430 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -21.071 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -8.813 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -2.852 - usually harmless precision error, but check DOS if it persists\\nnegative density 3 -551.622 - usually harmless precision error, but check DOS if it persists\\nSCF ITERATION NUMBER 3\\n point # 1 energy = -40.000\\n point # 20 energy = -29.028\\n point # 40 energy = -9.080\\n point # 60 energy = -9.030\\nNew Fermi level: mu= -8.975 eV Charge distance= 0.0038 (partial c.d.= 0.0229)\\nnegative density 2 -132.230 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -47.722 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -21.195 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -8.861 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -2.864 - usually harmless precision error, but check DOS if it persists\\nnegative density 3 -574.716 - usually harmless precision error, but check DOS if it persists\\nSCF ITERATION NUMBER 4\\n point # 1 energy = -40.000\\n point # 20 energy = -28.991\\n point # 40 energy = -8.975\\nNew Fermi level: mu= -9.001 eV Charge distance= 0.0077 (partial c.d.= 0.0025)\\nnegative density 2 -132.973 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -47.991 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -21.294 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -8.882 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -2.851 - usually harmless precision error, but check DOS if it persists\\nnegative density 3 -616.926 - usually harmless precision error, but check DOS if it persists\\nSCF ITERATION NUMBER 5\\n point # 1 energy = -40.000\\n point # 20 energy = -29.000\\n point # 40 energy = -9.001\\n point # 60 energy = -9.051\\nNew Fermi level: mu= -9.053 eV Charge distance= 0.0002 (partial c.d.= 0.0050)\\nnegative density 2 -132.433 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -47.816 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -21.210 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -8.839 - usually harmless precision error, but check DOS if it persists\\nnegative density 2 -2.828 - usually harmless precision error, but check DOS if it persists\\nnegative density 3 -617.614 - usually harmless precision error, but check DOS if it persists\\nSCF ITERATION NUMBER 6\\n point # 1 energy = -40.000\\n point # 20 energy = -29.019\\n point # 40 energy = -9.053\\nNew Fermi level: mu= -9.052 eV Charge distance= 0.0004 (partial c.d.= 0.0001)\\nElectronic configuration\\n type l N_el\\n 0 0 1.408\\n 0 1 2.506\\n 0 2 0.179\\n 0 3 0.000\\n 1 0 1.845\\n 1 1 4.086\\n 1 2 0.087\\n 1 3 0.000\\n 2 0 0.795\\n 2 1 0.113\\n 2 2 0.000\\n 2 3 0.000\\n 3 0 1.396\\n 3 1 2.473\\n 3 2 0.197\\n 3 3 0.000\\nCharge transfer: type charge\\n 0 0.093\\n 1 0.018\\n 2 -0.092\\n 3 0.066\\nConvergence reached in 6 iterations.\\nDone with module: potentials.\\n\\nCalculating screened core-hole potential ...\\nFMS for a cluster of 15 atoms around iph = 0\\n 0% of energy integration\\n 20%\\n 40%\\n 60%\\n 80%\\n 100%\\nPreparing response function.\\nComputing (1 - K Chi0)^-1 v_ch\\nDone with module: screened core-hole potential.\\n\\nCalculating cross-section and phases ...\\n absorption cross section\\n x0,dx,rnrm,inrm,jnrm 8.80000000000000 5.000000000000000E-002\\n 2.14744258699983 192 193\\n phase shifts for unique potential 0\\n phase shifts for unique potential 1\\n phase shifts for unique potential 2\\n phase shifts for unique potential 3\\nDone with module: cross-section and phases (XSPH).\\n\\nFMS calculation of full Green's function ...\\nFEFF-serial using 1 thread.\\nUsing 114 energy points.\\nxprep done\\nFMS for a cluster of 23 atoms\\nEnergy point 1/ 114\\nEnergy point 10/ 114\\nEnergy point 20/ 114\\nEnergy point 30/ 114\\nEnergy point 40/ 114\\nEnergy point 50/ 114\\nEnergy point 60/ 114\\nEnergy point 70/ 114\\nEnergy point 80/ 114\\nEnergy point 90/ 114\\nEnergy point 100/ 114\\nEnergy point 110/ 114\\nDone with module: FMS.\\n\\nMKGTR: Tracing over Green's function ...\\nDone with module: MKGTR.\\n\\nPathfinder: finding scattering paths...\\nPreparing plane wave scattering amplitudes\\nSearching for paths\\nEliminating path degeneracies\\n0 paths retained.\\nDone with module: pathfinder.\\n\\nCalculating EXAFS parameters ...\\nDone with module: EXAFS parameters (GENFMT).\\n\\nCalculating XAS spectra ...\\nDone with module: XAS spectra (FF2X: DW + final sum over paths).\\n\\n \\n### feff ends at Mon Aug 15 11:00:21 EDT 2022\\n \\n\",\n", - " 'xmu.dat': ['# # 65272_C_007 FEFF 9.9.1 ',\n", - " '# # POT SCF 100 7.0000 1, screened core-hole, AFOLP (folp(0)= 1.150)',\n", - " '# # Abs Z= 6 Rmt= 0.929 Rnm= 1.136 K shell',\n", - " '# # Pot 1 Z= 8 Rmt= 0.836 Rnm= 1.068',\n", - " '# # Pot 2 Z= 1 Rmt= 0.801 Rnm= 0.974',\n", - " '# # Pot 3 Z= 6 Rmt= 0.904 Rnm= 1.115',\n", - " '# # Gam_ch=8.676E-02 H-L exch Vi= 0.000E+00 Vr= 0.000E+00',\n", - " '# # Mu=-9.052E+00eV kf=2.206E+00 Vint=-1.429E+01eV Rs_int= 1.644',\n", - " '# FMS rfms= 9.0000',\n", - " '# # PATH Rmax=-1.000, Keep_limit= 0.00, Heap_limit 0.00 Pwcrit= 2.50%',\n", - " '# S02=1.000 Temp= 0.00 Debye_temp= 0.00 Global_sig2= 0.00000',\n", - " '# Energy zero shift, vr, vi 0.00000E+00 0.00000E+00',\n", - " '# Curved wave amplitude ratio filter 4.000%',\n", - " '# file sig2 tot cw amp ratio deg nlegs reff inp sig2',\n", - " '# 0/ 0 paths used',\n", - " '# xsedge+ 50, used to normalize mu 7.4830E-03',\n", - " '# -----------------------------------------------------------------------']})" - ] - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# data path\n", - "import pathlib\n", - "\n", - "feff = load_feff_data(pathlib.Path(\"D:/BNL/AIMMDB/FEFF_Data/65272_C_007\"))\n", - "feff\n" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "def ingest_feff(client, df, verbose=False):\n", - " \"\"\"\n", - " Upload the FEFF dataset to database\n", - " \"\"\"\n", - "\n", - " for (name, prep), g in df.groupby([\"sample.name\", \"sample.prep\"]):\n", - " if verbose:\n", - " print(f\"{name}: {prep}, {len(g)}\")\n", - "\n", - " sample_id = client.write_sample({\"name\" : name, \"prep\" : prep})\n", - "\n", - " for i, row in g.iterrows():\n", - " feff_df, _ = read_dat(row.file)\n", - " metadata = row.metadata\n", - " metadata[\"dataset\"] = \"feff\"\n", - " metadata[\"sample_id\"] = sample_id\n", - " client[\"uid\"].write_dataframe(feff_df, metadata=metadata, specs=[\"FEFF\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "starting ingestion...\n" - ] - }, - { - "ename": "NameError", - "evalue": "name 'client' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn [12], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mstarting ingestion...\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m----> 2\u001b[0m ingest_feff(client, feff, verbose\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[0;32m 3\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mfinished.\u001b[39m\u001b[39m\"\u001b[39m)\n", - "\u001b[1;31mNameError\u001b[0m: name 'client' is not defined" - ] - } - ], - "source": [ - "print(\"starting ingestion...\")\n", - "ingest_feff(client, feff, verbose=True)\n", - "print(\"finished.\")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'title': 'FEFFInputMetadata',\n", - " 'type': 'object',\n", - " 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n", - " 'measurement_type': {'default': 'xas',\n", - " 'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n", - " 'dataset': {'title': 'Dataset', 'type': 'string'},\n", - " 'sample_id': {'title': 'Sample Id', 'type': 'string'},\n", - " 'input_script': {'title': 'Input Script', 'type': 'string'}},\n", - " 'required': ['element', 'dataset', 'sample_id', 'input_script'],\n", - " 'definitions': {'XDIElement': {'title': 'XDIElement',\n", - " 'type': 'object',\n", - " 'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n", - " 'edge': {'title': 'Edge', 'type': 'string'}},\n", - " 'required': ['symbol', 'edge']},\n", - " 'MeasurementEnum': {'title': 'MeasurementEnum',\n", - " 'description': 'An enumeration.',\n", - " 'enum': ['xas', 'rixs'],\n", - " 'type': 'string'}}}" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "FEFFInputMetadata.schema()" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'title': 'FEFFOutputMetadata',\n", - " 'type': 'object',\n", - " 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n", - " 'measurement_type': {'default': 'xas',\n", - " 'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n", - " 'dataset': {'title': 'Dataset', 'type': 'string'},\n", - " 'sample_id': {'title': 'Sample Id', 'type': 'string'},\n", - " 'output_script': {'title': 'Output Script', 'type': 'string'}},\n", - " 'required': ['element', 'dataset', 'sample_id', 'output_script'],\n", - " 'definitions': {'XDIElement': {'title': 'XDIElement',\n", - " 'type': 'object',\n", - " 'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n", - " 'edge': {'title': 'Edge', 'type': 'string'}},\n", - " 'required': ['symbol', 'edge']},\n", - " 'MeasurementEnum': {'title': 'MeasurementEnum',\n", - " 'description': 'An enumeration.',\n", - " 'enum': ['xas', 'rixs'],\n", - " 'type': 'string'}}}" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "FEFFOutputMetadata.schema()" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'title': 'ExperimentalXASMetadata',\n", - " 'type': 'object',\n", - " 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n", - " 'measurement_type': {'default': 'xas',\n", - " 'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n", - " 'dataset': {'title': 'Dataset', 'type': 'string'},\n", - " 'sample_id': {'title': 'Sample Id', 'type': 'string'},\n", - " 'facility': {'$ref': '#/definitions/FacilityMetadata'},\n", - " 'beamline': {'$ref': '#/definitions/BeamlineMetadata'}},\n", - " 'required': ['element', 'dataset', 'facility', 'beamline'],\n", - " 'definitions': {'XDIElement': {'title': 'XDIElement',\n", - " 'type': 'object',\n", - " 'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n", - " 'edge': {'title': 'Edge', 'type': 'string'}},\n", - " 'required': ['symbol', 'edge']},\n", - " 'MeasurementEnum': {'title': 'MeasurementEnum',\n", - " 'description': 'An enumeration.',\n", - " 'enum': ['xas', 'rixs'],\n", - " 'type': 'string'},\n", - " 'FacilityMetadata': {'title': 'FacilityMetadata',\n", - " 'type': 'object',\n", - " 'properties': {'name': {'title': 'Name', 'type': 'string'}},\n", - " 'required': ['name']},\n", - " 'BeamlineMetadata': {'title': 'BeamlineMetadata',\n", - " 'type': 'object',\n", - " 'properties': {'name': {'title': 'Name', 'type': 'string'}},\n", - " 'required': ['name']}}}" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# we will enforce that XAS metadata satisfies the following schema\n", - "ExperimentalXASMetadata.schema()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "client[\"uid\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# with the correct metadata we can write to the server\n", - "# NOTE this doesn't prevent you from writing garbage but does help\n", - "df = pd.DataFrame({\"a\" : np.random.rand(100), \"b\" : np.random.rand(100)})\n", - "metadata = {\"dataset\" : \"feff\", \"foo\" : \"bar\", \"element\" : {\"symbol\" : \"Au\", \"edge\" : \"K\"}, \"facility\" : {\"name\" : \"ALS\"}, \"beamline\" : {\"name\" : \"8.0.1\"}}\n", - "node = client[\"uid\"].write_dataframe(df, metadata=metadata, specs=[\"FEFF\"])\n", - "node" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.9.13 ('aimm')", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "189d756eac8438d33e11f8e23aa09bdc4c99760ed11a3bfefa464e31dcca4c4a" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/ingest/ingest.ipynb b/notebooks/ingest.ipynb similarity index 100% rename from ingest/ingest.ipynb rename to notebooks/ingest.ipynb diff --git a/notebooks/ingest_FEFF.ipynb b/notebooks/ingest_FEFF.ipynb new file mode 100644 index 0000000..0577106 --- /dev/null +++ b/notebooks/ingest_FEFF.ipynb @@ -0,0 +1,170 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import copy\n", + "from pathlib import Path\n", + "\n", + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from tiled.client import from_uri\n", + "client = from_uri(\"http://localhost:8000/api\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import os\n", + "from pathlib import Path\n", + "\n", + "\n", + "DATA_PATH = Path(\"aimmdb/data/feff/65272_C_007\")\n", + "print(\"Data Path:\", DATA_PATH)\n", + "\n", + "contents = os.listdir(DATA_PATH)\n", + "print(\"Contents:\", contents)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from load_FEFF_Data import load_feff_data\n", + "\n", + "data, metadata = load_feff_data(DATA_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def ingest_feff(client, df, verbose=False):\n", + " \"\"\"\n", + " Upload the FEFF dataset to database\n", + " \"\"\"\n", + "\n", + " for (name, prep), g in df.groupby([\"sample.name\", \"sample.prep\"]):\n", + " if verbose:\n", + " print(f\"{name}: {prep}, {len(g)}\")\n", + "\n", + " sample_id = client.write_sample({\"name\" : name, \"prep\" : prep})\n", + "\n", + " for i, row in g.iterrows():\n", + " feff_df, _ = read_dat(row.file)\n", + " metadata = row.metadata\n", + " metadata[\"dataset\"] = \"feff\"\n", + " metadata[\"sample_id\"] = sample_id\n", + " client[\"uid\"].write_dataframe(feff_df, metadata=metadata, specs=[\"FEFF\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"starting ingestion...\")\n", + "ingest_feff(client, feff, verbose=True)\n", + "print(\"finished.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "FEFFInputMetadata.schema()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "FEFFOutputMetadata.schema()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# we will enforce that XAS metadata satisfies the following schema\n", + "ExperimentalXASMetadata.schema()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "client[\"uid\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# with the correct metadata we can write to the server\n", + "# NOTE this doesn't prevent you from writing garbage but does help\n", + "df = pd.DataFrame({\"a\" : np.random.rand(100), \"b\" : np.random.rand(100)})\n", + "metadata = {\"dataset\" : \"feff\", \"foo\" : \"bar\", \"element\" : {\"symbol\" : \"Au\", \"edge\" : \"K\"}, \"facility\" : {\"name\" : \"ALS\"}, \"beamline\" : {\"name\" : \"8.0.1\"}}\n", + "node = client[\"uid\"].write_dataframe(df, metadata=metadata, specs=[\"FEFF\"])\n", + "node" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10.4 ('my_pymatgen')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "8cf392b7cd98023928c855fd79964086ca343b5f82a42ebb28f5e83ba8cfe45c" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/ingest/ingest_newville_example.ipynb b/notebooks/ingest_newville_example.ipynb similarity index 92% rename from ingest/ingest_newville_example.ipynb rename to notebooks/ingest_newville_example.ipynb index f178340..5851cf8 100644 --- a/ingest/ingest_newville_example.ipynb +++ b/notebooks/ingest_newville_example.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "3d0110f7-ba1d-4817-95e0-1a9ec70770a0", "metadata": {}, "outputs": [], @@ -34,10 +34,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "1ff6333b-690f-4119-a2b2-bbe5c94c3112", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'title': 'ExperimentalXASMetadata',\n", + " 'type': 'object',\n", + " 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n", + " 'measurement_type': {'default': 'xas',\n", + " 'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n", + " 'dataset': {'title': 'Dataset', 'type': 'string'},\n", + " 'sample_id': {'title': 'Sample Id', 'type': 'string'},\n", + " 'facility': {'$ref': '#/definitions/FacilityMetadata'},\n", + " 'beamline': {'$ref': '#/definitions/BeamlineMetadata'}},\n", + " 'required': ['element', 'dataset', 'facility', 'beamline'],\n", + " 'definitions': {'XDIElement': {'title': 'XDIElement',\n", + " 'type': 'object',\n", + " 'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n", + " 'edge': {'title': 'Edge', 'type': 'string'}},\n", + " 'required': ['symbol', 'edge']},\n", + " 'MeasurementEnum': {'title': 'MeasurementEnum',\n", + " 'description': 'An enumeration.',\n", + " 'enum': ['xas', 'rixs'],\n", + " 'type': 'string'},\n", + " 'FacilityMetadata': {'title': 'FacilityMetadata',\n", + " 'type': 'object',\n", + " 'properties': {'name': {'title': 'Name', 'type': 'string'}},\n", + " 'required': ['name']},\n", + " 'BeamlineMetadata': {'title': 'BeamlineMetadata',\n", + " 'type': 'object',\n", + " 'properties': {'name': {'title': 'Name', 'type': 'string'}},\n", + " 'required': ['name']}}}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# we will enforce that XAS metadata satisfies the following schema\n", "ExperimentalXASMetadata.schema()" @@ -56,10 +93,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "cbf5e6a8-77b8-4c1d-80f5-f773cbcc5681", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "ExperimentalXASMetadata(element=XDIElement(symbol='Fe', edge='K'), measurement_type='xas', dataset='example', sample_id=None, facility=FacilityMetadata(name=None), beamline=BeamlineMetadata(name='8.0.1'))" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# example of valid metadata\n", "metadata = {\n", @@ -1056,9 +1104,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:aimm]", + "display_name": "Python 3.9.13 ('aimm')", "language": "python", - "name": "conda-env-aimm-py" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -1070,7 +1118,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.9.13" + }, + "vscode": { + "interpreter": { + "hash": "189d756eac8438d33e11f8e23aa09bdc4c99760ed11a3bfefa464e31dcca4c4a" + } } }, "nbformat": 4, From aac4c9edc1ac654f074f480dd2b032a3eb2b88f2 Mon Sep 17 00:00:00 2001 From: msegal347 Date: Mon, 24 Oct 2022 10:25:15 -0400 Subject: [PATCH 4/7] updated notebook with output cleaning --- aimmdb/schemas.py | 2 +- notebooks/ingest_FEFF.ipynb | 200 +++++++++++++++++++++++++++++++----- 2 files changed, 178 insertions(+), 24 deletions(-) diff --git a/aimmdb/schemas.py b/aimmdb/schemas.py index 35e64f5..34e4b20 100644 --- a/aimmdb/schemas.py +++ b/aimmdb/schemas.py @@ -153,7 +153,7 @@ class FEFFcards(pydantic.BaseModel, extra=pydantic.Extra.allow): exchange: float title: Optional[str] rpath: int - potentials: FEFFpotentials + potentials: float xanes: float edge: str scf: float diff --git a/notebooks/ingest_FEFF.ipynb b/notebooks/ingest_FEFF.ipynb index 0577106..db9dd04 100644 --- a/notebooks/ingest_FEFF.ipynb +++ b/notebooks/ingest_FEFF.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -10,7 +10,8 @@ "from pathlib import Path\n", "\n", "import numpy as np\n", - "import pandas as pd" + "import pandas as pd\n", + "import nbconvert" ] }, { @@ -25,9 +26,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Data Path: aimmdb\\data\\feff\\65272_C_007\n" + ] + }, + { + "ename": "FileNotFoundError", + "evalue": "[WinError 3] The system cannot find the path specified: 'aimmdb\\\\data\\\\feff\\\\65272_C_007'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn [5], line 8\u001b[0m\n\u001b[0;32m 5\u001b[0m DATA_PATH \u001b[39m=\u001b[39m Path(\u001b[39m\"\u001b[39m\u001b[39maimmdb/data/feff/65272_C_007\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 6\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mData Path:\u001b[39m\u001b[39m\"\u001b[39m, DATA_PATH)\n\u001b[1;32m----> 8\u001b[0m contents \u001b[39m=\u001b[39m os\u001b[39m.\u001b[39;49mlistdir(DATA_PATH)\n\u001b[0;32m 9\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mContents:\u001b[39m\u001b[39m\"\u001b[39m, contents)\n", + "\u001b[1;31mFileNotFoundError\u001b[0m: [WinError 3] The system cannot find the path specified: 'aimmdb\\\\data\\\\feff\\\\65272_C_007'" + ] + } + ], "source": [ "\n", "import os\n", @@ -47,14 +67,14 @@ "metadata": {}, "outputs": [], "source": [ - "from load_FEFF_Data import load_feff_data\n", + "from aimmdb.ingest.load_FEFF_Data import load_feff_data\n", "\n", "data, metadata = load_feff_data(DATA_PATH)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -70,7 +90,7 @@ " sample_id = client.write_sample({\"name\" : name, \"prep\" : prep})\n", "\n", " for i, row in g.iterrows():\n", - " feff_df, _ = read_dat(row.file)\n", + " feff_df, _ = load_feff_data.dat(row.file)\n", " metadata = row.metadata\n", " metadata[\"dataset\"] = \"feff\"\n", " metadata[\"sample_id\"] = sample_id\n", @@ -79,41 +99,156 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "starting ingestion...\n" + ] + }, + { + "ename": "NameError", + "evalue": "name 'client' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn [9], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mstarting ingestion...\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m----> 2\u001b[0m ingest_feff(client, load_feff_data\u001b[39m.\u001b[39mdata, verbose\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[0;32m 3\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mfinished.\u001b[39m\u001b[39m\"\u001b[39m)\n", + "\u001b[1;31mNameError\u001b[0m: name 'client' is not defined" + ] + } + ], "source": [ "print(\"starting ingestion...\")\n", - "ingest_feff(client, feff, verbose=True)\n", + "ingest_feff(client, load_feff_data.data, verbose=True)\n", "print(\"finished.\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'title': 'FEFFInputMetadata',\n", + " 'type': 'object',\n", + " 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n", + " 'measurement_type': {'default': 'FEFF',\n", + " 'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n", + " 'dataset': {'title': 'Dataset', 'type': 'string'},\n", + " 'sample_id': {'title': 'Sample Id', 'type': 'string'},\n", + " 'input_script': {'title': 'Input Script', 'type': 'string'}},\n", + " 'required': ['element', 'dataset', 'sample_id', 'input_script'],\n", + " 'definitions': {'XDIElement': {'title': 'XDIElement',\n", + " 'type': 'object',\n", + " 'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n", + " 'edge': {'title': 'Edge', 'type': 'string'}},\n", + " 'required': ['symbol', 'edge']},\n", + " 'MeasurementEnum': {'title': 'MeasurementEnum',\n", + " 'description': 'An enumeration.',\n", + " 'enum': ['xas', 'rixs'],\n", + " 'type': 'string'}}}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "FEFFInputMetadata.schema()" + "import aimmdb.schemas as schemas\n", + "\n", + "schemas.FEFFInputMetadata.schema()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'title': 'FEFFOutputMetadata',\n", + " 'type': 'object',\n", + " 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n", + " 'measurement_type': {'default': 'FEFF',\n", + " 'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n", + " 'dataset': {'title': 'Dataset', 'type': 'string'},\n", + " 'sample_id': {'title': 'Sample Id', 'type': 'string'},\n", + " 'output_log': {'title': 'Output Log', 'type': 'string'}},\n", + " 'required': ['element', 'dataset', 'sample_id', 'output_log'],\n", + " 'definitions': {'XDIElement': {'title': 'XDIElement',\n", + " 'type': 'object',\n", + " 'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n", + " 'edge': {'title': 'Edge', 'type': 'string'}},\n", + " 'required': ['symbol', 'edge']},\n", + " 'MeasurementEnum': {'title': 'MeasurementEnum',\n", + " 'description': 'An enumeration.',\n", + " 'enum': ['xas', 'rixs'],\n", + " 'type': 'string'}}}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "FEFFOutputMetadata.schema()" + "import aimmdb.schemas as schemas\n", + "\n", + "schemas.FEFFOutputMetadata.schema()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'title': 'FEFFcards',\n", + " 'type': 'object',\n", + " 'properties': {'atoms': {'title': 'Atoms', 'type': 'number'},\n", + " 'control': {'title': 'Control', 'type': 'integer'},\n", + " 'exchange': {'title': 'Exchange', 'type': 'number'},\n", + " 'title': {'title': 'Title', 'type': 'string'},\n", + " 'rpath': {'title': 'Rpath', 'type': 'integer'},\n", + " 'potentials': {'title': 'Potentials', 'type': 'number'},\n", + " 'xanes': {'title': 'Xanes', 'type': 'number'},\n", + " 'edge': {'title': 'Edge', 'type': 'string'},\n", + " 'scf': {'title': 'Scf', 'type': 'number'},\n", + " 'fms': {'title': 'Fms', 'type': 'number'},\n", + " 'S02': {'title': 'S02', 'type': 'number'},\n", + " 'corehole': {'title': 'Corehole', 'type': 'string'}},\n", + " 'required': ['atoms',\n", + " 'control',\n", + " 'exchange',\n", + " 'rpath',\n", + " 'potentials',\n", + " 'xanes',\n", + " 'edge',\n", + " 'scf',\n", + " 'fms',\n", + " 'S02',\n", + " 'corehole']}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# we will enforce that XAS metadata satisfies the following schema\n", - "ExperimentalXASMetadata.schema()" + "\n", + "import aimmdb.schemas as schemas\n", + "schemas.FEFFcards.schema()" ] }, { @@ -138,11 +273,30 @@ "node = client[\"uid\"].write_dataframe(df, metadata=metadata, specs=[\"FEFF\"])\n", "node" ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[NbConvertApp] Converting notebook ingest_FEFF.ipynb to notebook\n", + "[NbConvertApp] Writing 4109 bytes to ingest_FEFF.ipynb\n" + ] + } + ], + "source": [ + "# clear outputs\n", + "!jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace ingest_FEFF.ipynb" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3.10.4 ('my_pymatgen')", + "display_name": "Python 3.9.13 ('aimm')", "language": "python", "name": "python3" }, @@ -156,12 +310,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.9.13" }, "orig_nbformat": 4, "vscode": { "interpreter": { - "hash": "8cf392b7cd98023928c855fd79964086ca343b5f82a42ebb28f5e83ba8cfe45c" + "hash": "189d756eac8438d33e11f8e23aa09bdc4c99760ed11a3bfefa464e31dcca4c4a" } } }, From 6269e1bcf3da8e2fb290759e08dbbf697b402538 Mon Sep 17 00:00:00 2001 From: msegal347 Date: Mon, 24 Oct 2022 10:44:00 -0400 Subject: [PATCH 5/7] cleared outputs --- notebooks/ingest_FEFF.ipynb | 324 ------------------------------------ 1 file changed, 324 deletions(-) diff --git a/notebooks/ingest_FEFF.ipynb b/notebooks/ingest_FEFF.ipynb index db9dd04..e69de29 100644 --- a/notebooks/ingest_FEFF.ipynb +++ b/notebooks/ingest_FEFF.ipynb @@ -1,324 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import copy\n", - "from pathlib import Path\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "import nbconvert" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from tiled.client import from_uri\n", - "client = from_uri(\"http://localhost:8000/api\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Data Path: aimmdb\\data\\feff\\65272_C_007\n" - ] - }, - { - "ename": "FileNotFoundError", - "evalue": "[WinError 3] The system cannot find the path specified: 'aimmdb\\\\data\\\\feff\\\\65272_C_007'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn [5], line 8\u001b[0m\n\u001b[0;32m 5\u001b[0m DATA_PATH \u001b[39m=\u001b[39m Path(\u001b[39m\"\u001b[39m\u001b[39maimmdb/data/feff/65272_C_007\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 6\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mData Path:\u001b[39m\u001b[39m\"\u001b[39m, DATA_PATH)\n\u001b[1;32m----> 8\u001b[0m contents \u001b[39m=\u001b[39m os\u001b[39m.\u001b[39;49mlistdir(DATA_PATH)\n\u001b[0;32m 9\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mContents:\u001b[39m\u001b[39m\"\u001b[39m, contents)\n", - "\u001b[1;31mFileNotFoundError\u001b[0m: [WinError 3] The system cannot find the path specified: 'aimmdb\\\\data\\\\feff\\\\65272_C_007'" - ] - } - ], - "source": [ - "\n", - "import os\n", - "from pathlib import Path\n", - "\n", - "\n", - "DATA_PATH = Path(\"aimmdb/data/feff/65272_C_007\")\n", - "print(\"Data Path:\", DATA_PATH)\n", - "\n", - "contents = os.listdir(DATA_PATH)\n", - "print(\"Contents:\", contents)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from aimmdb.ingest.load_FEFF_Data import load_feff_data\n", - "\n", - "data, metadata = load_feff_data(DATA_PATH)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "def ingest_feff(client, df, verbose=False):\n", - " \"\"\"\n", - " Upload the FEFF dataset to database\n", - " \"\"\"\n", - "\n", - " for (name, prep), g in df.groupby([\"sample.name\", \"sample.prep\"]):\n", - " if verbose:\n", - " print(f\"{name}: {prep}, {len(g)}\")\n", - "\n", - " sample_id = client.write_sample({\"name\" : name, \"prep\" : prep})\n", - "\n", - " for i, row in g.iterrows():\n", - " feff_df, _ = load_feff_data.dat(row.file)\n", - " metadata = row.metadata\n", - " metadata[\"dataset\"] = \"feff\"\n", - " metadata[\"sample_id\"] = sample_id\n", - " client[\"uid\"].write_dataframe(feff_df, metadata=metadata, specs=[\"FEFF\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "starting ingestion...\n" - ] - }, - { - "ename": "NameError", - "evalue": "name 'client' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn [9], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mstarting ingestion...\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m----> 2\u001b[0m ingest_feff(client, load_feff_data\u001b[39m.\u001b[39mdata, verbose\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[0;32m 3\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mfinished.\u001b[39m\u001b[39m\"\u001b[39m)\n", - "\u001b[1;31mNameError\u001b[0m: name 'client' is not defined" - ] - } - ], - "source": [ - "print(\"starting ingestion...\")\n", - "ingest_feff(client, load_feff_data.data, verbose=True)\n", - "print(\"finished.\")" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'title': 'FEFFInputMetadata',\n", - " 'type': 'object',\n", - " 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n", - " 'measurement_type': {'default': 'FEFF',\n", - " 'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n", - " 'dataset': {'title': 'Dataset', 'type': 'string'},\n", - " 'sample_id': {'title': 'Sample Id', 'type': 'string'},\n", - " 'input_script': {'title': 'Input Script', 'type': 'string'}},\n", - " 'required': ['element', 'dataset', 'sample_id', 'input_script'],\n", - " 'definitions': {'XDIElement': {'title': 'XDIElement',\n", - " 'type': 'object',\n", - " 'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n", - " 'edge': {'title': 'Edge', 'type': 'string'}},\n", - " 'required': ['symbol', 'edge']},\n", - " 'MeasurementEnum': {'title': 'MeasurementEnum',\n", - " 'description': 'An enumeration.',\n", - " 'enum': ['xas', 'rixs'],\n", - " 'type': 'string'}}}" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import aimmdb.schemas as schemas\n", - "\n", - "schemas.FEFFInputMetadata.schema()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'title': 'FEFFOutputMetadata',\n", - " 'type': 'object',\n", - " 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n", - " 'measurement_type': {'default': 'FEFF',\n", - " 'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n", - " 'dataset': {'title': 'Dataset', 'type': 'string'},\n", - " 'sample_id': {'title': 'Sample Id', 'type': 'string'},\n", - " 'output_log': {'title': 'Output Log', 'type': 'string'}},\n", - " 'required': ['element', 'dataset', 'sample_id', 'output_log'],\n", - " 'definitions': {'XDIElement': {'title': 'XDIElement',\n", - " 'type': 'object',\n", - " 'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n", - " 'edge': {'title': 'Edge', 'type': 'string'}},\n", - " 'required': ['symbol', 'edge']},\n", - " 'MeasurementEnum': {'title': 'MeasurementEnum',\n", - " 'description': 'An enumeration.',\n", - " 'enum': ['xas', 'rixs'],\n", - " 'type': 'string'}}}" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import aimmdb.schemas as schemas\n", - "\n", - "schemas.FEFFOutputMetadata.schema()" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'title': 'FEFFcards',\n", - " 'type': 'object',\n", - " 'properties': {'atoms': {'title': 'Atoms', 'type': 'number'},\n", - " 'control': {'title': 'Control', 'type': 'integer'},\n", - " 'exchange': {'title': 'Exchange', 'type': 'number'},\n", - " 'title': {'title': 'Title', 'type': 'string'},\n", - " 'rpath': {'title': 'Rpath', 'type': 'integer'},\n", - " 'potentials': {'title': 'Potentials', 'type': 'number'},\n", - " 'xanes': {'title': 'Xanes', 'type': 'number'},\n", - " 'edge': {'title': 'Edge', 'type': 'string'},\n", - " 'scf': {'title': 'Scf', 'type': 'number'},\n", - " 'fms': {'title': 'Fms', 'type': 'number'},\n", - " 'S02': {'title': 'S02', 'type': 'number'},\n", - " 'corehole': {'title': 'Corehole', 'type': 'string'}},\n", - " 'required': ['atoms',\n", - " 'control',\n", - " 'exchange',\n", - " 'rpath',\n", - " 'potentials',\n", - " 'xanes',\n", - " 'edge',\n", - " 'scf',\n", - " 'fms',\n", - " 'S02',\n", - " 'corehole']}" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "import aimmdb.schemas as schemas\n", - "schemas.FEFFcards.schema()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "client[\"uid\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# with the correct metadata we can write to the server\n", - "# NOTE this doesn't prevent you from writing garbage but does help\n", - "df = pd.DataFrame({\"a\" : np.random.rand(100), \"b\" : np.random.rand(100)})\n", - "metadata = {\"dataset\" : \"feff\", \"foo\" : \"bar\", \"element\" : {\"symbol\" : \"Au\", \"edge\" : \"K\"}, \"facility\" : {\"name\" : \"ALS\"}, \"beamline\" : {\"name\" : \"8.0.1\"}}\n", - "node = client[\"uid\"].write_dataframe(df, metadata=metadata, specs=[\"FEFF\"])\n", - "node" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[NbConvertApp] Converting notebook ingest_FEFF.ipynb to notebook\n", - "[NbConvertApp] Writing 4109 bytes to ingest_FEFF.ipynb\n" - ] - } - ], - "source": [ - "# clear outputs\n", - "!jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace ingest_FEFF.ipynb" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.9.13 ('aimm')", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "189d756eac8438d33e11f8e23aa09bdc4c99760ed11a3bfefa464e31dcca4c4a" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From edf372049c961260fd194d6c2effd26640c6547a Mon Sep 17 00:00:00 2001 From: msegal347 Date: Mon, 24 Oct 2022 14:56:47 -0400 Subject: [PATCH 6/7] need help fixing the ingestion to client --- aimmdb/schemas.py | 4 +- aimmdb/validation.py | 4 +- notebooks/ingest_FEFF.ipynb | 483 ++++++++++++++++++++++++++++++++++++ 3 files changed, 487 insertions(+), 4 deletions(-) diff --git a/aimmdb/schemas.py b/aimmdb/schemas.py index 34e4b20..fa02f9e 100644 --- a/aimmdb/schemas.py +++ b/aimmdb/schemas.py @@ -166,12 +166,12 @@ class FEFFInputMetadata(pydantic.BaseModel, extra=pydantic.Extra.allow): measurement_type: MeasurementEnum = pydantic.Field("FEFF", const=True) dataset: str sample_id: str - input_script: str + #input_script: str class FEFFOutputMetadata(pydantic.BaseModel, extra=pydantic.Extra.allow): element: XDIElement measurement_type: MeasurementEnum = pydantic.Field("FEFF", const=True) dataset: str sample_id: str - output_log: str + #output_log: str diff --git a/aimmdb/validation.py b/aimmdb/validation.py index 168663b..491fa3a 100644 --- a/aimmdb/validation.py +++ b/aimmdb/validation.py @@ -1,7 +1,7 @@ import pydantic from tiled.validation_registration import ValidationError -from .schemas import BatteryChargeMetadata, ExperimentalXASMetadata, ExperimentalFEFFMetadata +from .schemas import BatteryChargeMetadata, ExperimentalXASMetadata def validate_xas_metadata(metadata, structure_family, structure, spec): @@ -46,7 +46,7 @@ def validate_battery_charge_data(metadata, structure_family, structure, spec): except pydantic.ValidationError as e: raise ValidationError(str(e)) -def validate_feff(metadata, structure_family, structure, spec): +def validate_feff_data(data, structure): #validate_xas_metadata(metadata, structure_family, structure, spec) columns = set(structure.macro.columns) diff --git a/notebooks/ingest_FEFF.ipynb b/notebooks/ingest_FEFF.ipynb index e69de29..c23d7a3 100644 --- a/notebooks/ingest_FEFF.ipynb +++ b/notebooks/ingest_FEFF.ipynb @@ -0,0 +1,483 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import copy\n", + "from pathlib import Path\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import nbconvert" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from tiled.client import from_uri\n", + "client = from_uri(\"http://localhost:8000/api\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Data Path: C:\\Users\\msega\\aimmdb\\aimmdb\\_tests\\data\\feff\\65272_C_007\n", + "Contents: ['feff.inp', 'feff.out', 'xmu.dat']\n" + ] + } + ], + "source": [ + "\n", + "import os\n", + "from pathlib import Path\n", + "\n", + "\n", + "DATA_PATH = Path(\"C:/Users/msega/aimmdb/aimmdb/_tests/data/feff/65272_C_007/\")\n", + "\n", + "#DATA_PATH = Path(\"aimmdb/data/feff/65272_C_007\")\n", + "print(\"Data Path:\", DATA_PATH)\n", + "\n", + "contents = os.listdir(DATA_PATH)\n", + "print(\"Contents:\", contents)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from aimmdb.ingest.load_FEFF_Data import load_feff_data\n", + "\n", + "data, metadata = load_feff_data(DATA_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#print dataframe from xmu.dat\n", + "\n", + "print(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#print metadata from feff.inp and feff.out\n", + "\n", + "print(metadata)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#load feff data from data_path\n", + "\n", + "data, metadata = load_feff_data(DATA_PATH)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "def ingest_feff(client, data_path, verbose=False):\n", + " \"\"\"\n", + " Parameters\n", + " ----------\n", + " client : tiled.client.Client\n", + " The client to use to connect to the server.\n", + " data : os.PathLike\n", + " path to the feff.inp, feff.out, and xmu.dat files.\n", + " verbose : bool, optional\n", + " Prints debug information if True.\n", + " \"\"\"\n", + " client = from_uri(\"http://localhost:8000/api\")\n", + " data_path = Path(\"C:/Users/msega/aimmdb/aimmdb/_tests/data/feff/65272_C_007/\")\n", + "\n", + " files = list(data_path.rglob(\"*\"))\n", + " print(\"found {len(files)} files to ingest\")\n", + " print(\"Ingesting FEFF data from:\", data_path)\n", + "\n", + "\n", + " feff_data, metadata = load_feff_data(data_path, verbose=False)\n", + "\n", + " print(feff_data)\n", + "\n", + "\n", + " #client[\"uid\"].write_dataframe(feff_data, metadata, specs=[\"FEFF\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "found {len(files)} files to ingest\n", + "Ingesting FEFF data from: C:\\Users\\msega\\aimmdb\\aimmdb\\_tests\\data\\feff\\65272_C_007\n" + ] + } + ], + "source": [ + "from aimmdb.ingest.load_FEFF_Data import load_feff_data\n", + "from tiled.client import from_uri\n", + "from aimmdb.ingest.load_FEFF_Data import load_feff_data\n", + "\n", + "client = from_uri(\"http://localhost:8000/api\")\n", + "\n", + "data, metadata = load_feff_data(DATA_PATH, verbose=False)\n", + "\n", + "\n", + "\n", + "ingest_feff(client, DATA_PATH, verbose=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "starting ingestion...\n" + ] + }, + { + "ename": "KeyError", + "evalue": "'sample.name'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn [91], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mstarting ingestion...\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m----> 2\u001b[0m ingest_feff(client, data, verbose\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n\u001b[0;32m 3\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mfinished.\u001b[39m\u001b[39m\"\u001b[39m)\n", + "Cell \u001b[1;32mIn [88], line 6\u001b[0m, in \u001b[0;36mingest_feff\u001b[1;34m(client, df, verbose)\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mingest_feff\u001b[39m(client, df, verbose\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m):\n\u001b[0;32m 2\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[39m Upload the FEFF dataset to database\u001b[39;00m\n\u001b[0;32m 4\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m----> 6\u001b[0m \u001b[39mfor\u001b[39;00m (name, prep), g \u001b[39min\u001b[39;00m df\u001b[39m.\u001b[39;49mgroupby([\u001b[39m\"\u001b[39;49m\u001b[39msample.name\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39msample.prep\u001b[39;49m\u001b[39m\"\u001b[39;49m]):\n\u001b[0;32m 7\u001b[0m \u001b[39mif\u001b[39;00m verbose:\n\u001b[0;32m 8\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mname\u001b[39m}\u001b[39;00m\u001b[39m: \u001b[39m\u001b[39m{\u001b[39;00mprep\u001b[39m}\u001b[39;00m\u001b[39m, \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mlen\u001b[39m(g)\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\core\\frame.py:8392\u001b[0m, in \u001b[0;36mDataFrame.groupby\u001b[1;34m(self, by, axis, level, as_index, sort, group_keys, squeeze, observed, dropna)\u001b[0m\n\u001b[0;32m 8389\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mTypeError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mYou have to supply one of \u001b[39m\u001b[39m'\u001b[39m\u001b[39mby\u001b[39m\u001b[39m'\u001b[39m\u001b[39m and \u001b[39m\u001b[39m'\u001b[39m\u001b[39mlevel\u001b[39m\u001b[39m'\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 8390\u001b[0m axis \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_axis_number(axis)\n\u001b[1;32m-> 8392\u001b[0m \u001b[39mreturn\u001b[39;00m DataFrameGroupBy(\n\u001b[0;32m 8393\u001b[0m obj\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m,\n\u001b[0;32m 8394\u001b[0m keys\u001b[39m=\u001b[39;49mby,\n\u001b[0;32m 8395\u001b[0m axis\u001b[39m=\u001b[39;49maxis,\n\u001b[0;32m 8396\u001b[0m level\u001b[39m=\u001b[39;49mlevel,\n\u001b[0;32m 8397\u001b[0m as_index\u001b[39m=\u001b[39;49mas_index,\n\u001b[0;32m 8398\u001b[0m sort\u001b[39m=\u001b[39;49msort,\n\u001b[0;32m 8399\u001b[0m group_keys\u001b[39m=\u001b[39;49mgroup_keys,\n\u001b[0;32m 8400\u001b[0m squeeze\u001b[39m=\u001b[39;49msqueeze,\n\u001b[0;32m 8401\u001b[0m observed\u001b[39m=\u001b[39;49mobserved,\n\u001b[0;32m 8402\u001b[0m dropna\u001b[39m=\u001b[39;49mdropna,\n\u001b[0;32m 8403\u001b[0m )\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\core\\groupby\\groupby.py:959\u001b[0m, in \u001b[0;36mGroupBy.__init__\u001b[1;34m(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, observed, mutated, dropna)\u001b[0m\n\u001b[0;32m 956\u001b[0m \u001b[39mif\u001b[39;00m grouper \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 957\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpandas\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mcore\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mgroupby\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mgrouper\u001b[39;00m \u001b[39mimport\u001b[39;00m get_grouper\n\u001b[1;32m--> 959\u001b[0m grouper, exclusions, obj \u001b[39m=\u001b[39m get_grouper(\n\u001b[0;32m 960\u001b[0m obj,\n\u001b[0;32m 961\u001b[0m keys,\n\u001b[0;32m 962\u001b[0m axis\u001b[39m=\u001b[39;49maxis,\n\u001b[0;32m 963\u001b[0m level\u001b[39m=\u001b[39;49mlevel,\n\u001b[0;32m 964\u001b[0m sort\u001b[39m=\u001b[39;49msort,\n\u001b[0;32m 965\u001b[0m observed\u001b[39m=\u001b[39;49mobserved,\n\u001b[0;32m 966\u001b[0m mutated\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmutated,\n\u001b[0;32m 967\u001b[0m dropna\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdropna,\n\u001b[0;32m 968\u001b[0m )\n\u001b[0;32m 970\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mobj \u001b[39m=\u001b[39m obj\n\u001b[0;32m 971\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39maxis \u001b[39m=\u001b[39m obj\u001b[39m.\u001b[39m_get_axis_number(axis)\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\core\\groupby\\grouper.py:889\u001b[0m, in \u001b[0;36mget_grouper\u001b[1;34m(obj, key, axis, level, sort, observed, mutated, validate, dropna)\u001b[0m\n\u001b[0;32m 887\u001b[0m in_axis, level, gpr \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m, gpr, \u001b[39mNone\u001b[39;00m\n\u001b[0;32m 888\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m--> 889\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mKeyError\u001b[39;00m(gpr)\n\u001b[0;32m 890\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(gpr, Grouper) \u001b[39mand\u001b[39;00m gpr\u001b[39m.\u001b[39mkey \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 891\u001b[0m \u001b[39m# Add key to exclusions\u001b[39;00m\n\u001b[0;32m 892\u001b[0m exclusions\u001b[39m.\u001b[39madd(gpr\u001b[39m.\u001b[39mkey)\n", + "\u001b[1;31mKeyError\u001b[0m: 'sample.name'" + ] + } + ], + "source": [ + "print(\"starting ingestion...\")\n", + "ingest_feff(client, data, verbose=True)\n", + "print(\"finished.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'title': 'FEFFInputMetadata',\n", + " 'type': 'object',\n", + " 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n", + " 'measurement_type': {'default': 'FEFF',\n", + " 'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n", + " 'dataset': {'title': 'Dataset', 'type': 'string'},\n", + " 'sample_id': {'title': 'Sample Id', 'type': 'string'},\n", + " 'input_script': {'title': 'Input Script', 'type': 'string'}},\n", + " 'required': ['element', 'dataset', 'sample_id', 'input_script'],\n", + " 'definitions': {'XDIElement': {'title': 'XDIElement',\n", + " 'type': 'object',\n", + " 'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n", + " 'edge': {'title': 'Edge', 'type': 'string'}},\n", + " 'required': ['symbol', 'edge']},\n", + " 'MeasurementEnum': {'title': 'MeasurementEnum',\n", + " 'description': 'An enumeration.',\n", + " 'enum': ['xas', 'rixs'],\n", + " 'type': 'string'}}}" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import aimmdb.schemas as schemas\n", + "\n", + "schemas.FEFFInputMetadata.schema()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'title': 'FEFFOutputMetadata',\n", + " 'type': 'object',\n", + " 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n", + " 'measurement_type': {'default': 'FEFF',\n", + " 'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n", + " 'dataset': {'title': 'Dataset', 'type': 'string'},\n", + " 'sample_id': {'title': 'Sample Id', 'type': 'string'},\n", + " 'output_log': {'title': 'Output Log', 'type': 'string'}},\n", + " 'required': ['element', 'dataset', 'sample_id', 'output_log'],\n", + " 'definitions': {'XDIElement': {'title': 'XDIElement',\n", + " 'type': 'object',\n", + " 'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n", + " 'edge': {'title': 'Edge', 'type': 'string'}},\n", + " 'required': ['symbol', 'edge']},\n", + " 'MeasurementEnum': {'title': 'MeasurementEnum',\n", + " 'description': 'An enumeration.',\n", + " 'enum': ['xas', 'rixs'],\n", + " 'type': 'string'}}}" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import aimmdb.schemas as schemas\n", + "\n", + "schemas.FEFFOutputMetadata.schema()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'title': 'FEFFcards',\n", + " 'type': 'object',\n", + " 'properties': {'atoms': {'title': 'Atoms', 'type': 'number'},\n", + " 'control': {'title': 'Control', 'type': 'integer'},\n", + " 'exchange': {'title': 'Exchange', 'type': 'number'},\n", + " 'title': {'title': 'Title', 'type': 'string'},\n", + " 'rpath': {'title': 'Rpath', 'type': 'integer'},\n", + " 'potentials': {'title': 'Potentials', 'type': 'number'},\n", + " 'xanes': {'title': 'Xanes', 'type': 'number'},\n", + " 'edge': {'title': 'Edge', 'type': 'string'},\n", + " 'scf': {'title': 'Scf', 'type': 'number'},\n", + " 'fms': {'title': 'Fms', 'type': 'number'},\n", + " 'S02': {'title': 'S02', 'type': 'number'},\n", + " 'corehole': {'title': 'Corehole', 'type': 'string'}},\n", + " 'required': ['atoms',\n", + " 'control',\n", + " 'exchange',\n", + " 'rpath',\n", + " 'potentials',\n", + " 'xanes',\n", + " 'edge',\n", + " 'scf',\n", + " 'fms',\n", + " 'S02',\n", + " 'corehole']}" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "import aimmdb.schemas as schemas\n", + "schemas.FEFFcards.schema()" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'title': 'FEFFInputMetadata',\n", + " 'type': 'object',\n", + " 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n", + " 'measurement_type': {'default': 'FEFF',\n", + " 'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n", + " 'dataset': {'title': 'Dataset', 'type': 'string'},\n", + " 'sample_id': {'title': 'Sample Id', 'type': 'string'},\n", + " 'input_script': {'title': 'Input Script', 'type': 'string'}},\n", + " 'required': ['element', 'dataset', 'sample_id', 'input_script'],\n", + " 'definitions': {'XDIElement': {'title': 'XDIElement',\n", + " 'type': 'object',\n", + " 'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n", + " 'edge': {'title': 'Edge', 'type': 'string'}},\n", + " 'required': ['symbol', 'edge']},\n", + " 'MeasurementEnum': {'title': 'MeasurementEnum',\n", + " 'description': 'An enumeration.',\n", + " 'enum': ['xas', 'rixs'],\n", + " 'type': 'string'}}}" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#show feff.inp as a schema\n", + "import aimmdb.schemas as schemas\n", + "\n", + "schemas.FEFFInputMetadata.schema()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [], + "source": [ + "DATA_PATH = Path(\"C:/Users/msega/aimmdb/aimmdb/_tests/data/feff/65272_C_007/\")\n", + "\n", + "feff_inp = DATA_PATH / \"feff.inp\"\n", + "feff_out = DATA_PATH / \"feff.out\"\n", + "xmu_dat = DATA_PATH / \"xmu.dat\"\n", + "\n", + "metadata = {\n", + " \"feff.inp\": feff_inp.read_text(),\n", + " \"feff.out\": feff_out.read_text(),\n", + "}\n", + "\n", + "dat = [\n", + " line\n", + " for line in xmu_dat.read_text().splitlines()\n", + " if line.startswith(\"#\")\n", + "]\n", + "\n", + "metadata[\"xmu.dat-comments\"] = \"\\n\".join(dat)" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mtimeout\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\backends\\sync.py:26\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n\u001b[1;32m---> 26\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv(max_bytes)\n", + "\u001b[1;31mtimeout\u001b[0m: timed out", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn [94], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m client[\u001b[39m\"\u001b[39;49m\u001b[39muid\u001b[39;49m\u001b[39m\"\u001b[39;49m]\n", + "File \u001b[1;32mc:\\users\\msega\\aimmdb\\aimmdb\\client.py:71\u001b[0m, in \u001b[0;36mAIMMCatalog.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 69\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__getitem__\u001b[39m(key\u001b[39m.\u001b[39muid)\n\u001b[0;32m 70\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m---> 71\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49m\u001b[39m__getitem__\u001b[39;49m(key)\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\tiled\\client\\node.py:355\u001b[0m, in \u001b[0;36mNode.__getitem__\u001b[1;34m(self, key, _ignore_inlined_contents)\u001b[0m\n\u001b[0;32m 353\u001b[0m \u001b[39mif\u001b[39;00m self_link\u001b[39m.\u001b[39mendswith(\u001b[39m\"\u001b[39m\u001b[39m/\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[0;32m 354\u001b[0m self_link \u001b[39m=\u001b[39m self_link[:\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m]\n\u001b[1;32m--> 355\u001b[0m content \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mcontext\u001b[39m.\u001b[39;49mget_json(\n\u001b[0;32m 356\u001b[0m self_link \u001b[39m+\u001b[39;49m \u001b[39mf\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m/\u001b[39;49m\u001b[39m{\u001b[39;49;00mkey\u001b[39m}\u001b[39;49;00m\u001b[39m\"\u001b[39;49m,\n\u001b[0;32m 357\u001b[0m )\n\u001b[0;32m 358\u001b[0m \u001b[39mexcept\u001b[39;00m ClientError \u001b[39mas\u001b[39;00m err:\n\u001b[0;32m 359\u001b[0m \u001b[39mif\u001b[39;00m err\u001b[39m.\u001b[39mresponse\u001b[39m.\u001b[39mstatus_code \u001b[39m==\u001b[39m \u001b[39m404\u001b[39m:\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\tiled\\client\\context.py:518\u001b[0m, in \u001b[0;36mContext.get_json\u001b[1;34m(self, path, stream, **kwargs)\u001b[0m\n\u001b[0;32m 516\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mget_json\u001b[39m(\u001b[39mself\u001b[39m, path, stream\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[0;32m 517\u001b[0m \u001b[39mreturn\u001b[39;00m msgpack\u001b[39m.\u001b[39munpackb(\n\u001b[1;32m--> 518\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_content(\n\u001b[0;32m 519\u001b[0m path, accept\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mapplication/x-msgpack\u001b[39m\u001b[39m\"\u001b[39m, stream\u001b[39m=\u001b[39mstream, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs\n\u001b[0;32m 520\u001b[0m ),\n\u001b[0;32m 521\u001b[0m timestamp\u001b[39m=\u001b[39m\u001b[39m3\u001b[39m, \u001b[39m# Decode msgpack Timestamp as datetime.datetime object.\u001b[39;00m\n\u001b[0;32m 522\u001b[0m )\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\tiled\\client\\context.py:444\u001b[0m, in \u001b[0;36mContext.get_content\u001b[1;34m(self, path, accept, stream, revalidate, **kwargs)\u001b[0m\n\u001b[0;32m 441\u001b[0m \u001b[39mreturn\u001b[39;00m content\n\u001b[0;32m 442\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_cache \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 443\u001b[0m \u001b[39m# No cache, so we can use the client straightforwardly.\u001b[39;00m\n\u001b[1;32m--> 444\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send(request, stream\u001b[39m=\u001b[39;49mstream)\n\u001b[0;32m 445\u001b[0m handle_error(response)\n\u001b[0;32m 446\u001b[0m \u001b[39mif\u001b[39;00m response\u001b[39m.\u001b[39mheaders\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mcontent-encoding\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mblosc\u001b[39m\u001b[39m\"\u001b[39m:\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\tiled\\client\\context.py:602\u001b[0m, in \u001b[0;36mContext._send\u001b[1;34m(self, request, stream, attempts)\u001b[0m\n\u001b[0;32m 598\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_send\u001b[39m(\u001b[39mself\u001b[39m, request, stream\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m, attempts\u001b[39m=\u001b[39m\u001b[39m0\u001b[39m):\n\u001b[0;32m 599\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 600\u001b[0m \u001b[39m If sending results in an authentication error, reauthenticate.\u001b[39;00m\n\u001b[0;32m 601\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 602\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_client\u001b[39m.\u001b[39;49msend(request, stream\u001b[39m=\u001b[39;49mstream)\n\u001b[0;32m 603\u001b[0m \u001b[39mif\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mapi_key \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m) \u001b[39mand\u001b[39;00m (response\u001b[39m.\u001b[39mstatus_code \u001b[39m==\u001b[39m \u001b[39m401\u001b[39m) \u001b[39mand\u001b[39;00m (attempts \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m):\n\u001b[0;32m 604\u001b[0m \u001b[39m# Try refreshing the token.\u001b[39;00m\n\u001b[0;32m 605\u001b[0m tokens \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mreauthenticate()\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_client.py:902\u001b[0m, in \u001b[0;36mClient.send\u001b[1;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[0;32m 894\u001b[0m follow_redirects \u001b[39m=\u001b[39m (\n\u001b[0;32m 895\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfollow_redirects\n\u001b[0;32m 896\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(follow_redirects, UseClientDefault)\n\u001b[0;32m 897\u001b[0m \u001b[39melse\u001b[39;00m follow_redirects\n\u001b[0;32m 898\u001b[0m )\n\u001b[0;32m 900\u001b[0m auth \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_build_request_auth(request, auth)\n\u001b[1;32m--> 902\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_auth(\n\u001b[0;32m 903\u001b[0m request,\n\u001b[0;32m 904\u001b[0m auth\u001b[39m=\u001b[39;49mauth,\n\u001b[0;32m 905\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 906\u001b[0m history\u001b[39m=\u001b[39;49m[],\n\u001b[0;32m 907\u001b[0m )\n\u001b[0;32m 908\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 909\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m stream:\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_client.py:930\u001b[0m, in \u001b[0;36mClient._send_handling_auth\u001b[1;34m(self, request, auth, follow_redirects, history)\u001b[0m\n\u001b[0;32m 927\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mnext\u001b[39m(auth_flow)\n\u001b[0;32m 929\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 930\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_redirects(\n\u001b[0;32m 931\u001b[0m request,\n\u001b[0;32m 932\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 933\u001b[0m history\u001b[39m=\u001b[39;49mhistory,\n\u001b[0;32m 934\u001b[0m )\n\u001b[0;32m 935\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 936\u001b[0m \u001b[39mtry\u001b[39;00m:\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_client.py:967\u001b[0m, in \u001b[0;36mClient._send_handling_redirects\u001b[1;34m(self, request, follow_redirects, history)\u001b[0m\n\u001b[0;32m 964\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mrequest\u001b[39m\u001b[39m\"\u001b[39m]:\n\u001b[0;32m 965\u001b[0m hook(request)\n\u001b[1;32m--> 967\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_single_request(request)\n\u001b[0;32m 968\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 969\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mresponse\u001b[39m\u001b[39m\"\u001b[39m]:\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_client.py:1003\u001b[0m, in \u001b[0;36mClient._send_single_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 998\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[0;32m 999\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAttempted to send an async request with a sync Client instance.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1000\u001b[0m )\n\u001b[0;32m 1002\u001b[0m \u001b[39mwith\u001b[39;00m request_context(request\u001b[39m=\u001b[39mrequest):\n\u001b[1;32m-> 1003\u001b[0m response \u001b[39m=\u001b[39m transport\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 1005\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(response\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 1007\u001b[0m response\u001b[39m.\u001b[39mrequest \u001b[39m=\u001b[39m request\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_transports\\default.py:218\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 205\u001b[0m req \u001b[39m=\u001b[39m httpcore\u001b[39m.\u001b[39mRequest(\n\u001b[0;32m 206\u001b[0m method\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mmethod,\n\u001b[0;32m 207\u001b[0m url\u001b[39m=\u001b[39mhttpcore\u001b[39m.\u001b[39mURL(\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 215\u001b[0m extensions\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mextensions,\n\u001b[0;32m 216\u001b[0m )\n\u001b[0;32m 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[1;32m--> 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_pool\u001b[39m.\u001b[39;49mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n\u001b[0;32m 222\u001b[0m \u001b[39mreturn\u001b[39;00m Response(\n\u001b[0;32m 223\u001b[0m status_code\u001b[39m=\u001b[39mresp\u001b[39m.\u001b[39mstatus,\n\u001b[0;32m 224\u001b[0m headers\u001b[39m=\u001b[39mresp\u001b[39m.\u001b[39mheaders,\n\u001b[0;32m 225\u001b[0m stream\u001b[39m=\u001b[39mResponseStream(resp\u001b[39m.\u001b[39mstream),\n\u001b[0;32m 226\u001b[0m extensions\u001b[39m=\u001b[39mresp\u001b[39m.\u001b[39mextensions,\n\u001b[0;32m 227\u001b[0m )\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\connection_pool.py:253\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 251\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mBaseException\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 252\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresponse_closed(status)\n\u001b[1;32m--> 253\u001b[0m \u001b[39mraise\u001b[39;00m exc\n\u001b[0;32m 254\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 255\u001b[0m \u001b[39mbreak\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\connection_pool.py:237\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 234\u001b[0m \u001b[39mraise\u001b[39;00m exc\n\u001b[0;32m 236\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 237\u001b[0m response \u001b[39m=\u001b[39m connection\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 238\u001b[0m \u001b[39mexcept\u001b[39;00m ConnectionNotAvailable:\n\u001b[0;32m 239\u001b[0m \u001b[39m# The ConnectionNotAvailable exception is a special case, that\u001b[39;00m\n\u001b[0;32m 240\u001b[0m \u001b[39m# indicates we need to retry the request on a new connection.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[39m# might end up as an HTTP/2 connection, but which actually ends\u001b[39;00m\n\u001b[0;32m 245\u001b[0m \u001b[39m# up as HTTP/1.1.\u001b[39;00m\n\u001b[0;32m 246\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_pool_lock:\n\u001b[0;32m 247\u001b[0m \u001b[39m# Maintain our position in the request queue, but reset the\u001b[39;00m\n\u001b[0;32m 248\u001b[0m \u001b[39m# status so that the request becomes queued again.\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\connection.py:90\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 87\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_connection\u001b[39m.\u001b[39mis_available():\n\u001b[0;32m 88\u001b[0m \u001b[39mraise\u001b[39;00m ConnectionNotAvailable()\n\u001b[1;32m---> 90\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_connection\u001b[39m.\u001b[39;49mhandle_request(request)\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\http11.py:105\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 103\u001b[0m \u001b[39mwith\u001b[39;00m Trace(\u001b[39m\"\u001b[39m\u001b[39mhttp11.response_closed\u001b[39m\u001b[39m\"\u001b[39m, request) \u001b[39mas\u001b[39;00m trace:\n\u001b[0;32m 104\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_response_closed()\n\u001b[1;32m--> 105\u001b[0m \u001b[39mraise\u001b[39;00m exc\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\http11.py:84\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 75\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_send_request_body(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 76\u001b[0m \u001b[39mwith\u001b[39;00m Trace(\n\u001b[0;32m 77\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mhttp11.receive_response_headers\u001b[39m\u001b[39m\"\u001b[39m, request, kwargs\n\u001b[0;32m 78\u001b[0m ) \u001b[39mas\u001b[39;00m trace:\n\u001b[0;32m 79\u001b[0m (\n\u001b[0;32m 80\u001b[0m http_version,\n\u001b[0;32m 81\u001b[0m status,\n\u001b[0;32m 82\u001b[0m reason_phrase,\n\u001b[0;32m 83\u001b[0m headers,\n\u001b[1;32m---> 84\u001b[0m ) \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_receive_response_headers(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 85\u001b[0m trace\u001b[39m.\u001b[39mreturn_value \u001b[39m=\u001b[39m (\n\u001b[0;32m 86\u001b[0m http_version,\n\u001b[0;32m 87\u001b[0m status,\n\u001b[0;32m 88\u001b[0m reason_phrase,\n\u001b[0;32m 89\u001b[0m headers,\n\u001b[0;32m 90\u001b[0m )\n\u001b[0;32m 92\u001b[0m \u001b[39mreturn\u001b[39;00m Response(\n\u001b[0;32m 93\u001b[0m status\u001b[39m=\u001b[39mstatus,\n\u001b[0;32m 94\u001b[0m headers\u001b[39m=\u001b[39mheaders,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 100\u001b[0m },\n\u001b[0;32m 101\u001b[0m )\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\http11.py:148\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_response_headers\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 145\u001b[0m timeout \u001b[39m=\u001b[39m timeouts\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mread\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m)\n\u001b[0;32m 147\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 148\u001b[0m event \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_receive_event(timeout\u001b[39m=\u001b[39;49mtimeout)\n\u001b[0;32m 149\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(event, h11\u001b[39m.\u001b[39mResponse):\n\u001b[0;32m 150\u001b[0m \u001b[39mbreak\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\http11.py:177\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_event\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 174\u001b[0m event \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_h11_state\u001b[39m.\u001b[39mnext_event()\n\u001b[0;32m 176\u001b[0m \u001b[39mif\u001b[39;00m event \u001b[39mis\u001b[39;00m h11\u001b[39m.\u001b[39mNEED_DATA:\n\u001b[1;32m--> 177\u001b[0m data \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_network_stream\u001b[39m.\u001b[39;49mread(\n\u001b[0;32m 178\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mREAD_NUM_BYTES, timeout\u001b[39m=\u001b[39;49mtimeout\n\u001b[0;32m 179\u001b[0m )\n\u001b[0;32m 181\u001b[0m \u001b[39m# If we feed this case through h11 we'll raise an exception like:\u001b[39;00m\n\u001b[0;32m 182\u001b[0m \u001b[39m#\u001b[39;00m\n\u001b[0;32m 183\u001b[0m \u001b[39m# httpcore.RemoteProtocolError: can't handle event type\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 187\u001b[0m \u001b[39m# perspective. Instead we handle this case distinctly and treat\u001b[39;00m\n\u001b[0;32m 188\u001b[0m \u001b[39m# it as a ConnectError.\u001b[39;00m\n\u001b[0;32m 189\u001b[0m \u001b[39mif\u001b[39;00m data \u001b[39m==\u001b[39m \u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mand\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_h11_state\u001b[39m.\u001b[39mtheir_state \u001b[39m==\u001b[39m h11\u001b[39m.\u001b[39mSEND_RESPONSE:\n", + "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\backends\\sync.py:26\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 24\u001b[0m \u001b[39mwith\u001b[39;00m map_exceptions(exc_map):\n\u001b[0;32m 25\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n\u001b[1;32m---> 26\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv(max_bytes)\n", + "\u001b[1;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "client[\"uid\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'client' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn [51], line 5\u001b[0m\n\u001b[0;32m 3\u001b[0m df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mDataFrame({\u001b[39m\"\u001b[39m\u001b[39ma\u001b[39m\u001b[39m\"\u001b[39m : np\u001b[39m.\u001b[39mrandom\u001b[39m.\u001b[39mrand(\u001b[39m100\u001b[39m), \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m : np\u001b[39m.\u001b[39mrandom\u001b[39m.\u001b[39mrand(\u001b[39m100\u001b[39m)})\n\u001b[0;32m 4\u001b[0m metadata \u001b[39m=\u001b[39m {\u001b[39m\"\u001b[39m\u001b[39mdataset\u001b[39m\u001b[39m\"\u001b[39m : \u001b[39m\"\u001b[39m\u001b[39mfeff\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mfoo\u001b[39m\u001b[39m\"\u001b[39m : \u001b[39m\"\u001b[39m\u001b[39mbar\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39melement\u001b[39m\u001b[39m\"\u001b[39m : {\u001b[39m\"\u001b[39m\u001b[39msymbol\u001b[39m\u001b[39m\"\u001b[39m : \u001b[39m\"\u001b[39m\u001b[39mAu\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39medge\u001b[39m\u001b[39m\"\u001b[39m : \u001b[39m\"\u001b[39m\u001b[39mK\u001b[39m\u001b[39m\"\u001b[39m}, \u001b[39m\"\u001b[39m\u001b[39mfacility\u001b[39m\u001b[39m\"\u001b[39m : {\u001b[39m\"\u001b[39m\u001b[39mname\u001b[39m\u001b[39m\"\u001b[39m : \u001b[39m\"\u001b[39m\u001b[39mALS\u001b[39m\u001b[39m\"\u001b[39m}, \u001b[39m\"\u001b[39m\u001b[39mbeamline\u001b[39m\u001b[39m\"\u001b[39m : {\u001b[39m\"\u001b[39m\u001b[39mname\u001b[39m\u001b[39m\"\u001b[39m : \u001b[39m\"\u001b[39m\u001b[39m8.0.1\u001b[39m\u001b[39m\"\u001b[39m}}\n\u001b[1;32m----> 5\u001b[0m node \u001b[39m=\u001b[39m client[\u001b[39m\"\u001b[39m\u001b[39muid\u001b[39m\u001b[39m\"\u001b[39m]\u001b[39m.\u001b[39mwrite_dataframe(df, metadata\u001b[39m=\u001b[39mmetadata, specs\u001b[39m=\u001b[39m[\u001b[39m\"\u001b[39m\u001b[39mFEFF\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[0;32m 6\u001b[0m node\n", + "\u001b[1;31mNameError\u001b[0m: name 'client' is not defined" + ] + } + ], + "source": [ + "# with the correct metadata we can write to the server\n", + "# NOTE this doesn't prevent you from writing garbage but does help\n", + "df = pd.DataFrame({\"a\" : np.random.rand(100), \"b\" : np.random.rand(100)})\n", + "metadata = {\"dataset\" : \"feff\", \"foo\" : \"bar\", \"element\" : {\"symbol\" : \"Au\", \"edge\" : \"K\"}, \"facility\" : {\"name\" : \"ALS\"}, \"beamline\" : {\"name\" : \"8.0.1\"}}\n", + "node = client[\"uid\"].write_dataframe(df, metadata=metadata, specs=[\"FEFF\"])\n", + "node" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.13 ('aimm')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "189d756eac8438d33e11f8e23aa09bdc4c99760ed11a3bfefa464e31dcca4c4a" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 96ce6d7695cf6b32da3749673be5431eee370e8f Mon Sep 17 00:00:00 2001 From: msegal347 Date: Mon, 24 Oct 2022 15:03:01 -0400 Subject: [PATCH 7/7] cleared outputs --- notebooks/ingest_FEFF.ipynb | 334 +++--------------------------------- 1 file changed, 20 insertions(+), 314 deletions(-) diff --git a/notebooks/ingest_FEFF.ipynb b/notebooks/ingest_FEFF.ipynb index c23d7a3..bddaaa5 100644 --- a/notebooks/ingest_FEFF.ipynb +++ b/notebooks/ingest_FEFF.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -26,18 +26,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Data Path: C:\\Users\\msega\\aimmdb\\aimmdb\\_tests\\data\\feff\\65272_C_007\n", - "Contents: ['feff.inp', 'feff.out', 'xmu.dat']\n" - ] - } - ], + "outputs": [], "source": [ "\n", "import os\n", @@ -69,40 +60,6 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "#print dataframe from xmu.dat\n", - "\n", - "print(data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#print metadata from feff.inp and feff.out\n", - "\n", - "print(metadata)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#load feff data from data_path\n", - "\n", - "data, metadata = load_feff_data(DATA_PATH)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], "source": [ "def ingest_feff(client, data_path, verbose=False):\n", " \"\"\"\n", @@ -123,28 +80,14 @@ " print(\"Ingesting FEFF data from:\", data_path)\n", "\n", "\n", - " feff_data, metadata = load_feff_data(data_path, verbose=False)\n", - "\n", - " print(feff_data)\n", - "\n", - "\n", - " #client[\"uid\"].write_dataframe(feff_data, metadata, specs=[\"FEFF\"])" + " feff_data, metadata = load_feff_data(data_path, verbose=False)\n" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "found {len(files)} files to ingest\n", - "Ingesting FEFF data from: C:\\Users\\msega\\aimmdb\\aimmdb\\_tests\\data\\feff\\65272_C_007\n" - ] - } - ], + "outputs": [], "source": [ "from aimmdb.ingest.load_FEFF_Data import load_feff_data\n", "from tiled.client import from_uri\n", @@ -154,78 +97,27 @@ "\n", "data, metadata = load_feff_data(DATA_PATH, verbose=False)\n", "\n", + "client[\"uid\"].write_dataframe(data, metadata, specs=[\"FEFF\"])\n", "\n", - "\n", - "ingest_feff(client, DATA_PATH, verbose=True)" + "#ingest_feff(client, DATA_PATH, verbose=True)" ] }, { "cell_type": "code", - "execution_count": 91, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "starting ingestion...\n" - ] - }, - { - "ename": "KeyError", - "evalue": "'sample.name'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn [91], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mstarting ingestion...\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m----> 2\u001b[0m ingest_feff(client, data, verbose\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n\u001b[0;32m 3\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mfinished.\u001b[39m\u001b[39m\"\u001b[39m)\n", - "Cell \u001b[1;32mIn [88], line 6\u001b[0m, in \u001b[0;36mingest_feff\u001b[1;34m(client, df, verbose)\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mingest_feff\u001b[39m(client, df, verbose\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m):\n\u001b[0;32m 2\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[39m Upload the FEFF dataset to database\u001b[39;00m\n\u001b[0;32m 4\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m----> 6\u001b[0m \u001b[39mfor\u001b[39;00m (name, prep), g \u001b[39min\u001b[39;00m df\u001b[39m.\u001b[39;49mgroupby([\u001b[39m\"\u001b[39;49m\u001b[39msample.name\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39msample.prep\u001b[39;49m\u001b[39m\"\u001b[39;49m]):\n\u001b[0;32m 7\u001b[0m \u001b[39mif\u001b[39;00m verbose:\n\u001b[0;32m 8\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mname\u001b[39m}\u001b[39;00m\u001b[39m: \u001b[39m\u001b[39m{\u001b[39;00mprep\u001b[39m}\u001b[39;00m\u001b[39m, \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mlen\u001b[39m(g)\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\core\\frame.py:8392\u001b[0m, in \u001b[0;36mDataFrame.groupby\u001b[1;34m(self, by, axis, level, as_index, sort, group_keys, squeeze, observed, dropna)\u001b[0m\n\u001b[0;32m 8389\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mTypeError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mYou have to supply one of \u001b[39m\u001b[39m'\u001b[39m\u001b[39mby\u001b[39m\u001b[39m'\u001b[39m\u001b[39m and \u001b[39m\u001b[39m'\u001b[39m\u001b[39mlevel\u001b[39m\u001b[39m'\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 8390\u001b[0m axis \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_axis_number(axis)\n\u001b[1;32m-> 8392\u001b[0m \u001b[39mreturn\u001b[39;00m DataFrameGroupBy(\n\u001b[0;32m 8393\u001b[0m obj\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m,\n\u001b[0;32m 8394\u001b[0m keys\u001b[39m=\u001b[39;49mby,\n\u001b[0;32m 8395\u001b[0m axis\u001b[39m=\u001b[39;49maxis,\n\u001b[0;32m 8396\u001b[0m level\u001b[39m=\u001b[39;49mlevel,\n\u001b[0;32m 8397\u001b[0m as_index\u001b[39m=\u001b[39;49mas_index,\n\u001b[0;32m 8398\u001b[0m sort\u001b[39m=\u001b[39;49msort,\n\u001b[0;32m 8399\u001b[0m group_keys\u001b[39m=\u001b[39;49mgroup_keys,\n\u001b[0;32m 8400\u001b[0m squeeze\u001b[39m=\u001b[39;49msqueeze,\n\u001b[0;32m 8401\u001b[0m observed\u001b[39m=\u001b[39;49mobserved,\n\u001b[0;32m 8402\u001b[0m dropna\u001b[39m=\u001b[39;49mdropna,\n\u001b[0;32m 8403\u001b[0m )\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\core\\groupby\\groupby.py:959\u001b[0m, in \u001b[0;36mGroupBy.__init__\u001b[1;34m(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, observed, mutated, dropna)\u001b[0m\n\u001b[0;32m 956\u001b[0m \u001b[39mif\u001b[39;00m grouper \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 957\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpandas\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mcore\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mgroupby\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mgrouper\u001b[39;00m \u001b[39mimport\u001b[39;00m get_grouper\n\u001b[1;32m--> 959\u001b[0m grouper, exclusions, obj \u001b[39m=\u001b[39m get_grouper(\n\u001b[0;32m 960\u001b[0m obj,\n\u001b[0;32m 961\u001b[0m keys,\n\u001b[0;32m 962\u001b[0m axis\u001b[39m=\u001b[39;49maxis,\n\u001b[0;32m 963\u001b[0m level\u001b[39m=\u001b[39;49mlevel,\n\u001b[0;32m 964\u001b[0m sort\u001b[39m=\u001b[39;49msort,\n\u001b[0;32m 965\u001b[0m observed\u001b[39m=\u001b[39;49mobserved,\n\u001b[0;32m 966\u001b[0m mutated\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmutated,\n\u001b[0;32m 967\u001b[0m dropna\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdropna,\n\u001b[0;32m 968\u001b[0m )\n\u001b[0;32m 970\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mobj \u001b[39m=\u001b[39m obj\n\u001b[0;32m 971\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39maxis \u001b[39m=\u001b[39m obj\u001b[39m.\u001b[39m_get_axis_number(axis)\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\pandas\\core\\groupby\\grouper.py:889\u001b[0m, in \u001b[0;36mget_grouper\u001b[1;34m(obj, key, axis, level, sort, observed, mutated, validate, dropna)\u001b[0m\n\u001b[0;32m 887\u001b[0m in_axis, level, gpr \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m, gpr, \u001b[39mNone\u001b[39;00m\n\u001b[0;32m 888\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m--> 889\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mKeyError\u001b[39;00m(gpr)\n\u001b[0;32m 890\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(gpr, Grouper) \u001b[39mand\u001b[39;00m gpr\u001b[39m.\u001b[39mkey \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 891\u001b[0m \u001b[39m# Add key to exclusions\u001b[39;00m\n\u001b[0;32m 892\u001b[0m exclusions\u001b[39m.\u001b[39madd(gpr\u001b[39m.\u001b[39mkey)\n", - "\u001b[1;31mKeyError\u001b[0m: 'sample.name'" - ] - } - ], + "outputs": [], "source": [ "print(\"starting ingestion...\")\n", - "ingest_feff(client, data, verbose=True)\n", + "ingest_feff(client, DATA_PATH, verbose=True)\n", "print(\"finished.\")" ] }, { "cell_type": "code", - "execution_count": 92, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'title': 'FEFFInputMetadata',\n", - " 'type': 'object',\n", - " 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n", - " 'measurement_type': {'default': 'FEFF',\n", - " 'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n", - " 'dataset': {'title': 'Dataset', 'type': 'string'},\n", - " 'sample_id': {'title': 'Sample Id', 'type': 'string'},\n", - " 'input_script': {'title': 'Input Script', 'type': 'string'}},\n", - " 'required': ['element', 'dataset', 'sample_id', 'input_script'],\n", - " 'definitions': {'XDIElement': {'title': 'XDIElement',\n", - " 'type': 'object',\n", - " 'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n", - " 'edge': {'title': 'Edge', 'type': 'string'}},\n", - " 'required': ['symbol', 'edge']},\n", - " 'MeasurementEnum': {'title': 'MeasurementEnum',\n", - " 'description': 'An enumeration.',\n", - " 'enum': ['xas', 'rixs'],\n", - " 'type': 'string'}}}" - ] - }, - "execution_count": 92, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import aimmdb.schemas as schemas\n", "\n", @@ -234,37 +126,9 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'title': 'FEFFOutputMetadata',\n", - " 'type': 'object',\n", - " 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n", - " 'measurement_type': {'default': 'FEFF',\n", - " 'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n", - " 'dataset': {'title': 'Dataset', 'type': 'string'},\n", - " 'sample_id': {'title': 'Sample Id', 'type': 'string'},\n", - " 'output_log': {'title': 'Output Log', 'type': 'string'}},\n", - " 'required': ['element', 'dataset', 'sample_id', 'output_log'],\n", - " 'definitions': {'XDIElement': {'title': 'XDIElement',\n", - " 'type': 'object',\n", - " 'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n", - " 'edge': {'title': 'Edge', 'type': 'string'}},\n", - " 'required': ['symbol', 'edge']},\n", - " 'MeasurementEnum': {'title': 'MeasurementEnum',\n", - " 'description': 'An enumeration.',\n", - " 'enum': ['xas', 'rixs'],\n", - " 'type': 'string'}}}" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import aimmdb.schemas as schemas\n", "\n", @@ -273,44 +137,9 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'title': 'FEFFcards',\n", - " 'type': 'object',\n", - " 'properties': {'atoms': {'title': 'Atoms', 'type': 'number'},\n", - " 'control': {'title': 'Control', 'type': 'integer'},\n", - " 'exchange': {'title': 'Exchange', 'type': 'number'},\n", - " 'title': {'title': 'Title', 'type': 'string'},\n", - " 'rpath': {'title': 'Rpath', 'type': 'integer'},\n", - " 'potentials': {'title': 'Potentials', 'type': 'number'},\n", - " 'xanes': {'title': 'Xanes', 'type': 'number'},\n", - " 'edge': {'title': 'Edge', 'type': 'string'},\n", - " 'scf': {'title': 'Scf', 'type': 'number'},\n", - " 'fms': {'title': 'Fms', 'type': 'number'},\n", - " 'S02': {'title': 'S02', 'type': 'number'},\n", - " 'corehole': {'title': 'Corehole', 'type': 'string'}},\n", - " 'required': ['atoms',\n", - " 'control',\n", - " 'exchange',\n", - " 'rpath',\n", - " 'potentials',\n", - " 'xanes',\n", - " 'edge',\n", - " 'scf',\n", - " 'fms',\n", - " 'S02',\n", - " 'corehole']}" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "\n", "import aimmdb.schemas as schemas\n", @@ -319,138 +148,15 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'title': 'FEFFInputMetadata',\n", - " 'type': 'object',\n", - " 'properties': {'element': {'$ref': '#/definitions/XDIElement'},\n", - " 'measurement_type': {'default': 'FEFF',\n", - " 'allOf': [{'$ref': '#/definitions/MeasurementEnum'}]},\n", - " 'dataset': {'title': 'Dataset', 'type': 'string'},\n", - " 'sample_id': {'title': 'Sample Id', 'type': 'string'},\n", - " 'input_script': {'title': 'Input Script', 'type': 'string'}},\n", - " 'required': ['element', 'dataset', 'sample_id', 'input_script'],\n", - " 'definitions': {'XDIElement': {'title': 'XDIElement',\n", - " 'type': 'object',\n", - " 'properties': {'symbol': {'title': 'Symbol', 'type': 'string'},\n", - " 'edge': {'title': 'Edge', 'type': 'string'}},\n", - " 'required': ['symbol', 'edge']},\n", - " 'MeasurementEnum': {'title': 'MeasurementEnum',\n", - " 'description': 'An enumeration.',\n", - " 'enum': ['xas', 'rixs'],\n", - " 'type': 'string'}}}" - ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "#show feff.inp as a schema\n", "import aimmdb.schemas as schemas\n", "\n", "schemas.FEFFInputMetadata.schema()\n" ] - }, - { - "cell_type": "code", - "execution_count": 93, - "metadata": {}, - "outputs": [], - "source": [ - "DATA_PATH = Path(\"C:/Users/msega/aimmdb/aimmdb/_tests/data/feff/65272_C_007/\")\n", - "\n", - "feff_inp = DATA_PATH / \"feff.inp\"\n", - "feff_out = DATA_PATH / \"feff.out\"\n", - "xmu_dat = DATA_PATH / \"xmu.dat\"\n", - "\n", - "metadata = {\n", - " \"feff.inp\": feff_inp.read_text(),\n", - " \"feff.out\": feff_out.read_text(),\n", - "}\n", - "\n", - "dat = [\n", - " line\n", - " for line in xmu_dat.read_text().splitlines()\n", - " if line.startswith(\"#\")\n", - "]\n", - "\n", - "metadata[\"xmu.dat-comments\"] = \"\\n\".join(dat)" - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "metadata": {}, - "outputs": [ - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mtimeout\u001b[0m Traceback (most recent call last)", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\backends\\sync.py:26\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n\u001b[1;32m---> 26\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv(max_bytes)\n", - "\u001b[1;31mtimeout\u001b[0m: timed out", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn [94], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m client[\u001b[39m\"\u001b[39;49m\u001b[39muid\u001b[39;49m\u001b[39m\"\u001b[39;49m]\n", - "File \u001b[1;32mc:\\users\\msega\\aimmdb\\aimmdb\\client.py:71\u001b[0m, in \u001b[0;36mAIMMCatalog.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 69\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__getitem__\u001b[39m(key\u001b[39m.\u001b[39muid)\n\u001b[0;32m 70\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m---> 71\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49m\u001b[39m__getitem__\u001b[39;49m(key)\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\tiled\\client\\node.py:355\u001b[0m, in \u001b[0;36mNode.__getitem__\u001b[1;34m(self, key, _ignore_inlined_contents)\u001b[0m\n\u001b[0;32m 353\u001b[0m \u001b[39mif\u001b[39;00m self_link\u001b[39m.\u001b[39mendswith(\u001b[39m\"\u001b[39m\u001b[39m/\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[0;32m 354\u001b[0m self_link \u001b[39m=\u001b[39m self_link[:\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m]\n\u001b[1;32m--> 355\u001b[0m content \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mcontext\u001b[39m.\u001b[39;49mget_json(\n\u001b[0;32m 356\u001b[0m self_link \u001b[39m+\u001b[39;49m \u001b[39mf\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m/\u001b[39;49m\u001b[39m{\u001b[39;49;00mkey\u001b[39m}\u001b[39;49;00m\u001b[39m\"\u001b[39;49m,\n\u001b[0;32m 357\u001b[0m )\n\u001b[0;32m 358\u001b[0m \u001b[39mexcept\u001b[39;00m ClientError \u001b[39mas\u001b[39;00m err:\n\u001b[0;32m 359\u001b[0m \u001b[39mif\u001b[39;00m err\u001b[39m.\u001b[39mresponse\u001b[39m.\u001b[39mstatus_code \u001b[39m==\u001b[39m \u001b[39m404\u001b[39m:\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\tiled\\client\\context.py:518\u001b[0m, in \u001b[0;36mContext.get_json\u001b[1;34m(self, path, stream, **kwargs)\u001b[0m\n\u001b[0;32m 516\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mget_json\u001b[39m(\u001b[39mself\u001b[39m, path, stream\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[0;32m 517\u001b[0m \u001b[39mreturn\u001b[39;00m msgpack\u001b[39m.\u001b[39munpackb(\n\u001b[1;32m--> 518\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_content(\n\u001b[0;32m 519\u001b[0m path, accept\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mapplication/x-msgpack\u001b[39m\u001b[39m\"\u001b[39m, stream\u001b[39m=\u001b[39mstream, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs\n\u001b[0;32m 520\u001b[0m ),\n\u001b[0;32m 521\u001b[0m timestamp\u001b[39m=\u001b[39m\u001b[39m3\u001b[39m, \u001b[39m# Decode msgpack Timestamp as datetime.datetime object.\u001b[39;00m\n\u001b[0;32m 522\u001b[0m )\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\tiled\\client\\context.py:444\u001b[0m, in \u001b[0;36mContext.get_content\u001b[1;34m(self, path, accept, stream, revalidate, **kwargs)\u001b[0m\n\u001b[0;32m 441\u001b[0m \u001b[39mreturn\u001b[39;00m content\n\u001b[0;32m 442\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_cache \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 443\u001b[0m \u001b[39m# No cache, so we can use the client straightforwardly.\u001b[39;00m\n\u001b[1;32m--> 444\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send(request, stream\u001b[39m=\u001b[39;49mstream)\n\u001b[0;32m 445\u001b[0m handle_error(response)\n\u001b[0;32m 446\u001b[0m \u001b[39mif\u001b[39;00m response\u001b[39m.\u001b[39mheaders\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mcontent-encoding\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mblosc\u001b[39m\u001b[39m\"\u001b[39m:\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\tiled\\client\\context.py:602\u001b[0m, in \u001b[0;36mContext._send\u001b[1;34m(self, request, stream, attempts)\u001b[0m\n\u001b[0;32m 598\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_send\u001b[39m(\u001b[39mself\u001b[39m, request, stream\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m, attempts\u001b[39m=\u001b[39m\u001b[39m0\u001b[39m):\n\u001b[0;32m 599\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 600\u001b[0m \u001b[39m If sending results in an authentication error, reauthenticate.\u001b[39;00m\n\u001b[0;32m 601\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 602\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_client\u001b[39m.\u001b[39;49msend(request, stream\u001b[39m=\u001b[39;49mstream)\n\u001b[0;32m 603\u001b[0m \u001b[39mif\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mapi_key \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m) \u001b[39mand\u001b[39;00m (response\u001b[39m.\u001b[39mstatus_code \u001b[39m==\u001b[39m \u001b[39m401\u001b[39m) \u001b[39mand\u001b[39;00m (attempts \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m):\n\u001b[0;32m 604\u001b[0m \u001b[39m# Try refreshing the token.\u001b[39;00m\n\u001b[0;32m 605\u001b[0m tokens \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mreauthenticate()\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_client.py:902\u001b[0m, in \u001b[0;36mClient.send\u001b[1;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[0;32m 894\u001b[0m follow_redirects \u001b[39m=\u001b[39m (\n\u001b[0;32m 895\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfollow_redirects\n\u001b[0;32m 896\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(follow_redirects, UseClientDefault)\n\u001b[0;32m 897\u001b[0m \u001b[39melse\u001b[39;00m follow_redirects\n\u001b[0;32m 898\u001b[0m )\n\u001b[0;32m 900\u001b[0m auth \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_build_request_auth(request, auth)\n\u001b[1;32m--> 902\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_auth(\n\u001b[0;32m 903\u001b[0m request,\n\u001b[0;32m 904\u001b[0m auth\u001b[39m=\u001b[39;49mauth,\n\u001b[0;32m 905\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 906\u001b[0m history\u001b[39m=\u001b[39;49m[],\n\u001b[0;32m 907\u001b[0m )\n\u001b[0;32m 908\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 909\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m stream:\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_client.py:930\u001b[0m, in \u001b[0;36mClient._send_handling_auth\u001b[1;34m(self, request, auth, follow_redirects, history)\u001b[0m\n\u001b[0;32m 927\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mnext\u001b[39m(auth_flow)\n\u001b[0;32m 929\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 930\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_redirects(\n\u001b[0;32m 931\u001b[0m request,\n\u001b[0;32m 932\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 933\u001b[0m history\u001b[39m=\u001b[39;49mhistory,\n\u001b[0;32m 934\u001b[0m )\n\u001b[0;32m 935\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 936\u001b[0m \u001b[39mtry\u001b[39;00m:\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_client.py:967\u001b[0m, in \u001b[0;36mClient._send_handling_redirects\u001b[1;34m(self, request, follow_redirects, history)\u001b[0m\n\u001b[0;32m 964\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mrequest\u001b[39m\u001b[39m\"\u001b[39m]:\n\u001b[0;32m 965\u001b[0m hook(request)\n\u001b[1;32m--> 967\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_single_request(request)\n\u001b[0;32m 968\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 969\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mresponse\u001b[39m\u001b[39m\"\u001b[39m]:\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_client.py:1003\u001b[0m, in \u001b[0;36mClient._send_single_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 998\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[0;32m 999\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAttempted to send an async request with a sync Client instance.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1000\u001b[0m )\n\u001b[0;32m 1002\u001b[0m \u001b[39mwith\u001b[39;00m request_context(request\u001b[39m=\u001b[39mrequest):\n\u001b[1;32m-> 1003\u001b[0m response \u001b[39m=\u001b[39m transport\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 1005\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(response\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 1007\u001b[0m response\u001b[39m.\u001b[39mrequest \u001b[39m=\u001b[39m request\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpx\\_transports\\default.py:218\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 205\u001b[0m req \u001b[39m=\u001b[39m httpcore\u001b[39m.\u001b[39mRequest(\n\u001b[0;32m 206\u001b[0m method\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mmethod,\n\u001b[0;32m 207\u001b[0m url\u001b[39m=\u001b[39mhttpcore\u001b[39m.\u001b[39mURL(\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 215\u001b[0m extensions\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mextensions,\n\u001b[0;32m 216\u001b[0m )\n\u001b[0;32m 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[1;32m--> 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_pool\u001b[39m.\u001b[39;49mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n\u001b[0;32m 222\u001b[0m \u001b[39mreturn\u001b[39;00m Response(\n\u001b[0;32m 223\u001b[0m status_code\u001b[39m=\u001b[39mresp\u001b[39m.\u001b[39mstatus,\n\u001b[0;32m 224\u001b[0m headers\u001b[39m=\u001b[39mresp\u001b[39m.\u001b[39mheaders,\n\u001b[0;32m 225\u001b[0m stream\u001b[39m=\u001b[39mResponseStream(resp\u001b[39m.\u001b[39mstream),\n\u001b[0;32m 226\u001b[0m extensions\u001b[39m=\u001b[39mresp\u001b[39m.\u001b[39mextensions,\n\u001b[0;32m 227\u001b[0m )\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\connection_pool.py:253\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 251\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mBaseException\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 252\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresponse_closed(status)\n\u001b[1;32m--> 253\u001b[0m \u001b[39mraise\u001b[39;00m exc\n\u001b[0;32m 254\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 255\u001b[0m \u001b[39mbreak\u001b[39;00m\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\connection_pool.py:237\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 234\u001b[0m \u001b[39mraise\u001b[39;00m exc\n\u001b[0;32m 236\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 237\u001b[0m response \u001b[39m=\u001b[39m connection\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 238\u001b[0m \u001b[39mexcept\u001b[39;00m ConnectionNotAvailable:\n\u001b[0;32m 239\u001b[0m \u001b[39m# The ConnectionNotAvailable exception is a special case, that\u001b[39;00m\n\u001b[0;32m 240\u001b[0m \u001b[39m# indicates we need to retry the request on a new connection.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[39m# might end up as an HTTP/2 connection, but which actually ends\u001b[39;00m\n\u001b[0;32m 245\u001b[0m \u001b[39m# up as HTTP/1.1.\u001b[39;00m\n\u001b[0;32m 246\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_pool_lock:\n\u001b[0;32m 247\u001b[0m \u001b[39m# Maintain our position in the request queue, but reset the\u001b[39;00m\n\u001b[0;32m 248\u001b[0m \u001b[39m# status so that the request becomes queued again.\u001b[39;00m\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\connection.py:90\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 87\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_connection\u001b[39m.\u001b[39mis_available():\n\u001b[0;32m 88\u001b[0m \u001b[39mraise\u001b[39;00m ConnectionNotAvailable()\n\u001b[1;32m---> 90\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_connection\u001b[39m.\u001b[39;49mhandle_request(request)\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\http11.py:105\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 103\u001b[0m \u001b[39mwith\u001b[39;00m Trace(\u001b[39m\"\u001b[39m\u001b[39mhttp11.response_closed\u001b[39m\u001b[39m\"\u001b[39m, request) \u001b[39mas\u001b[39;00m trace:\n\u001b[0;32m 104\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_response_closed()\n\u001b[1;32m--> 105\u001b[0m \u001b[39mraise\u001b[39;00m exc\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\http11.py:84\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 75\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_send_request_body(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 76\u001b[0m \u001b[39mwith\u001b[39;00m Trace(\n\u001b[0;32m 77\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mhttp11.receive_response_headers\u001b[39m\u001b[39m\"\u001b[39m, request, kwargs\n\u001b[0;32m 78\u001b[0m ) \u001b[39mas\u001b[39;00m trace:\n\u001b[0;32m 79\u001b[0m (\n\u001b[0;32m 80\u001b[0m http_version,\n\u001b[0;32m 81\u001b[0m status,\n\u001b[0;32m 82\u001b[0m reason_phrase,\n\u001b[0;32m 83\u001b[0m headers,\n\u001b[1;32m---> 84\u001b[0m ) \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_receive_response_headers(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 85\u001b[0m trace\u001b[39m.\u001b[39mreturn_value \u001b[39m=\u001b[39m (\n\u001b[0;32m 86\u001b[0m http_version,\n\u001b[0;32m 87\u001b[0m status,\n\u001b[0;32m 88\u001b[0m reason_phrase,\n\u001b[0;32m 89\u001b[0m headers,\n\u001b[0;32m 90\u001b[0m )\n\u001b[0;32m 92\u001b[0m \u001b[39mreturn\u001b[39;00m Response(\n\u001b[0;32m 93\u001b[0m status\u001b[39m=\u001b[39mstatus,\n\u001b[0;32m 94\u001b[0m headers\u001b[39m=\u001b[39mheaders,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 100\u001b[0m },\n\u001b[0;32m 101\u001b[0m )\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\http11.py:148\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_response_headers\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 145\u001b[0m timeout \u001b[39m=\u001b[39m timeouts\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mread\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m)\n\u001b[0;32m 147\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 148\u001b[0m event \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_receive_event(timeout\u001b[39m=\u001b[39;49mtimeout)\n\u001b[0;32m 149\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(event, h11\u001b[39m.\u001b[39mResponse):\n\u001b[0;32m 150\u001b[0m \u001b[39mbreak\u001b[39;00m\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\_sync\\http11.py:177\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_event\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 174\u001b[0m event \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_h11_state\u001b[39m.\u001b[39mnext_event()\n\u001b[0;32m 176\u001b[0m \u001b[39mif\u001b[39;00m event \u001b[39mis\u001b[39;00m h11\u001b[39m.\u001b[39mNEED_DATA:\n\u001b[1;32m--> 177\u001b[0m data \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_network_stream\u001b[39m.\u001b[39;49mread(\n\u001b[0;32m 178\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mREAD_NUM_BYTES, timeout\u001b[39m=\u001b[39;49mtimeout\n\u001b[0;32m 179\u001b[0m )\n\u001b[0;32m 181\u001b[0m \u001b[39m# If we feed this case through h11 we'll raise an exception like:\u001b[39;00m\n\u001b[0;32m 182\u001b[0m \u001b[39m#\u001b[39;00m\n\u001b[0;32m 183\u001b[0m \u001b[39m# httpcore.RemoteProtocolError: can't handle event type\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 187\u001b[0m \u001b[39m# perspective. Instead we handle this case distinctly and treat\u001b[39;00m\n\u001b[0;32m 188\u001b[0m \u001b[39m# it as a ConnectError.\u001b[39;00m\n\u001b[0;32m 189\u001b[0m \u001b[39mif\u001b[39;00m data \u001b[39m==\u001b[39m \u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mand\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_h11_state\u001b[39m.\u001b[39mtheir_state \u001b[39m==\u001b[39m h11\u001b[39m.\u001b[39mSEND_RESPONSE:\n", - "File \u001b[1;32mc:\\Users\\msega\\anaconda3\\envs\\aimm\\lib\\site-packages\\httpcore\\backends\\sync.py:26\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 24\u001b[0m \u001b[39mwith\u001b[39;00m map_exceptions(exc_map):\n\u001b[0;32m 25\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n\u001b[1;32m---> 26\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv(max_bytes)\n", - "\u001b[1;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "client[\"uid\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'client' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn [51], line 5\u001b[0m\n\u001b[0;32m 3\u001b[0m df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mDataFrame({\u001b[39m\"\u001b[39m\u001b[39ma\u001b[39m\u001b[39m\"\u001b[39m : np\u001b[39m.\u001b[39mrandom\u001b[39m.\u001b[39mrand(\u001b[39m100\u001b[39m), \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m : np\u001b[39m.\u001b[39mrandom\u001b[39m.\u001b[39mrand(\u001b[39m100\u001b[39m)})\n\u001b[0;32m 4\u001b[0m metadata \u001b[39m=\u001b[39m {\u001b[39m\"\u001b[39m\u001b[39mdataset\u001b[39m\u001b[39m\"\u001b[39m : \u001b[39m\"\u001b[39m\u001b[39mfeff\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mfoo\u001b[39m\u001b[39m\"\u001b[39m : \u001b[39m\"\u001b[39m\u001b[39mbar\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39melement\u001b[39m\u001b[39m\"\u001b[39m : {\u001b[39m\"\u001b[39m\u001b[39msymbol\u001b[39m\u001b[39m\"\u001b[39m : \u001b[39m\"\u001b[39m\u001b[39mAu\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39medge\u001b[39m\u001b[39m\"\u001b[39m : \u001b[39m\"\u001b[39m\u001b[39mK\u001b[39m\u001b[39m\"\u001b[39m}, \u001b[39m\"\u001b[39m\u001b[39mfacility\u001b[39m\u001b[39m\"\u001b[39m : {\u001b[39m\"\u001b[39m\u001b[39mname\u001b[39m\u001b[39m\"\u001b[39m : \u001b[39m\"\u001b[39m\u001b[39mALS\u001b[39m\u001b[39m\"\u001b[39m}, \u001b[39m\"\u001b[39m\u001b[39mbeamline\u001b[39m\u001b[39m\"\u001b[39m : {\u001b[39m\"\u001b[39m\u001b[39mname\u001b[39m\u001b[39m\"\u001b[39m : \u001b[39m\"\u001b[39m\u001b[39m8.0.1\u001b[39m\u001b[39m\"\u001b[39m}}\n\u001b[1;32m----> 5\u001b[0m node \u001b[39m=\u001b[39m client[\u001b[39m\"\u001b[39m\u001b[39muid\u001b[39m\u001b[39m\"\u001b[39m]\u001b[39m.\u001b[39mwrite_dataframe(df, metadata\u001b[39m=\u001b[39mmetadata, specs\u001b[39m=\u001b[39m[\u001b[39m\"\u001b[39m\u001b[39mFEFF\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[0;32m 6\u001b[0m node\n", - "\u001b[1;31mNameError\u001b[0m: name 'client' is not defined" - ] - } - ], - "source": [ - "# with the correct metadata we can write to the server\n", - "# NOTE this doesn't prevent you from writing garbage but does help\n", - "df = pd.DataFrame({\"a\" : np.random.rand(100), \"b\" : np.random.rand(100)})\n", - "metadata = {\"dataset\" : \"feff\", \"foo\" : \"bar\", \"element\" : {\"symbol\" : \"Au\", \"edge\" : \"K\"}, \"facility\" : {\"name\" : \"ALS\"}, \"beamline\" : {\"name\" : \"8.0.1\"}}\n", - "node = client[\"uid\"].write_dataframe(df, metadata=metadata, specs=[\"FEFF\"])\n", - "node" - ] } ], "metadata": {