Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add FEFF schema #41

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions aimmdb/_tests/ingest/test_feff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import copy
import pandas as pd
from pathlib import Path

from aimmdb.ingest import load_feff_data


DATA_PATH = Path("aimmdb/_tests/data/feff/65272_C_007")


def test_load_feff_data():

data, metadata = load_feff_data(DATA_PATH)

assert isinstance(data, pd.DataFrame)
assert isinstance(metadata, dict)
assert isinstance(metadata["feff.inp"], str)
assert isinstance(metadata["feff.out"], str)
assert isinstance(metadata["xmu.dat-comments"], str)


def copy_feff_data():

data, metadata = load_feff_data(DATA_PATH)

data_copy = copy.deepcopy(data)

assert data.equals(data_copy)

metadata_copy = copy.deepcopy(metadata)

assert metadata.equals(metadata_copy)

assert isinstance(data_copy, pd.DataFrame)
assert isinstance(metadata_copy, dict)
assert isinstance(metadata_copy["feff.inp"], str)
assert isinstance(metadata_copy["feff.out"], str)
assert isinstance(metadata_copy["xmu.dat-comments"], str)
6 changes: 6 additions & 0 deletions aimmdb/adapters/feff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from tiled.adapters.dataframe import DataFrameAdapter


# dataframe adapter representing FEFF data
class FEFFAdapter(DataFrameAdapter):
specs = ["FEFF"]
Empty file added aimmdb/ingest/__init__.py
Empty file.
65 changes: 65 additions & 0 deletions aimmdb/ingest/load_FEFF_Data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import copy
import pathlib

import numpy as np
import pandas as pd

from tiled.client import from_uri
from tiled.examples.xdi import read_xdi
from tiled.queries import Key

import pandas as pd
from pathlib import Path

def load_feff_data(data_path, verbose=True):
"""
Parameters
----------
data_path : os.PathLike
path to the feff.inp, feff.out, and xmu.dat file.
verbose : bool, optional
Prints debug information if True.

Returns
-------
feff_data : pandas.Dataframe
dataframe containing the xmu.dat data
metadata : dict
dictionary containing the feff.inp, feff.out, and xmu.dat metadata
"""

data_path = Path(data_path)

feff_inp = data_path / "feff.inp"
feff_out = data_path / "feff.out"
xmu_dat = data_path / "xmu.dat"

data = pd.read_csv(
xmu_dat,
sep="\s+",
header=None,
names=["omega", "e", "k", "mu", "mu0", "chi"],
comment="#",
)

metadata = {
"feff.inp": feff_inp.read_text(),
"feff.out": feff_out.read_text(),
}

dat = [
line
for line in xmu_dat.read_text().splitlines()
if line.startswith("#")
]
metadata["xmu.dat-comments"] = "\n".join(dat)

if verbose:
print("FEFF Input:", feff_inp)
print("FEFF Output:", feff_out)
print("FEFF Data:", xmu_dat)
print(data)
print(metadata)

# returns data and metadata, a pd.DataFrame and dict, respectively.
return data, metadata
30 changes: 30 additions & 0 deletions aimmdb/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,33 @@ class BatteryChargeMetadataInternal(pydantic.BaseModel):

class BatteryChargeMetadata(pydantic.BaseModel, extra=pydantic.Extra.allow):
charge: BatteryChargeMetadataInternal


class FEFFcards(pydantic.BaseModel, extra=pydantic.Extra.allow):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as my previous comment:

This is too fine-grained. We can simply remove it for now. All of this is going into input_script.

atoms: float
control: int
exchange: float
title: Optional[str]
rpath: int
potentials: float
xanes: float
edge: str
scf: float
fms: float
S02: float
corehole: str

class FEFFInputMetadata(pydantic.BaseModel, extra=pydantic.Extra.allow):
element: XDIElement
measurement_type: MeasurementEnum = pydantic.Field("FEFF", const=True)
dataset: str
sample_id: str
#input_script: str

class FEFFOutputMetadata(pydantic.BaseModel, extra=pydantic.Extra.allow):
element: XDIElement
measurement_type: MeasurementEnum = pydantic.Field("FEFF", const=True)
dataset: str
sample_id: str
#output_log: str

11 changes: 11 additions & 0 deletions aimmdb/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,14 @@ def validate_battery_charge_data(metadata, structure_family, structure, spec):
metadata = BatteryChargeMetadata.parse_obj(metadata)
except pydantic.ValidationError as e:
raise ValidationError(str(e))

def validate_feff_data(data, structure):
#validate_xas_metadata(metadata, structure_family, structure, spec)

columns = set(structure.macro.columns)

required_columns = {"omega", "e", "k", "mu", "mu0", "chi"}


if not required_columns.issubset(columns):
raise ValidationError(f"columns {columns} must contain {required_columns}")
File renamed without changes.
189 changes: 189 additions & 0 deletions notebooks/ingest_FEFF.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import copy\n",
"from pathlib import Path\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import nbconvert"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from tiled.client import from_uri\n",
"client = from_uri(\"http://localhost:8000/api\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"import os\n",
"from pathlib import Path\n",
"\n",
"\n",
"DATA_PATH = Path(\"C:/Users/msega/aimmdb/aimmdb/_tests/data/feff/65272_C_007/\")\n",
"\n",
"#DATA_PATH = Path(\"aimmdb/data/feff/65272_C_007\")\n",
"print(\"Data Path:\", DATA_PATH)\n",
"\n",
"contents = os.listdir(DATA_PATH)\n",
"print(\"Contents:\", contents)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from aimmdb.ingest.load_FEFF_Data import load_feff_data\n",
"\n",
"data, metadata = load_feff_data(DATA_PATH)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def ingest_feff(client, data_path, verbose=False):\n",
" \"\"\"\n",
" Parameters\n",
" ----------\n",
" client : tiled.client.Client\n",
" The client to use to connect to the server.\n",
" data : os.PathLike\n",
" path to the feff.inp, feff.out, and xmu.dat files.\n",
" verbose : bool, optional\n",
" Prints debug information if True.\n",
" \"\"\"\n",
" client = from_uri(\"http://localhost:8000/api\")\n",
" data_path = Path(\"C:/Users/msega/aimmdb/aimmdb/_tests/data/feff/65272_C_007/\")\n",
"\n",
" files = list(data_path.rglob(\"*\"))\n",
" print(\"found {len(files)} files to ingest\")\n",
" print(\"Ingesting FEFF data from:\", data_path)\n",
"\n",
"\n",
" feff_data, metadata = load_feff_data(data_path, verbose=False)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from aimmdb.ingest.load_FEFF_Data import load_feff_data\n",
"from tiled.client import from_uri\n",
"from aimmdb.ingest.load_FEFF_Data import load_feff_data\n",
"\n",
"client = from_uri(\"http://localhost:8000/api\")\n",
"\n",
"data, metadata = load_feff_data(DATA_PATH, verbose=False)\n",
"\n",
"client[\"uid\"].write_dataframe(data, metadata, specs=[\"FEFF\"])\n",
"\n",
"#ingest_feff(client, DATA_PATH, verbose=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(\"starting ingestion...\")\n",
"ingest_feff(client, DATA_PATH, verbose=True)\n",
"print(\"finished.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import aimmdb.schemas as schemas\n",
"\n",
"schemas.FEFFInputMetadata.schema()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import aimmdb.schemas as schemas\n",
"\n",
"schemas.FEFFOutputMetadata.schema()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"import aimmdb.schemas as schemas\n",
"schemas.FEFFcards.schema()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#show feff.inp as a schema\n",
"import aimmdb.schemas as schemas\n",
"\n",
"schemas.FEFFInputMetadata.schema()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.13 ('aimm')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "189d756eac8438d33e11f8e23aa09bdc4c99760ed11a3bfefa464e31dcca4c4a"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading