diff --git a/source/schema/begin_run_event.json b/source/schema/begin_run_event.json new file mode 100644 index 0000000..cc5783a --- /dev/null +++ b/source/schema/begin_run_event.json @@ -0,0 +1,50 @@ +{ + "properties": { + "beamline_config": { + "type": ["object", "string"], + "description": "Meta-data regrading how the beamline is configured. Maybe UID into another collection" + }, + "project": { + "type": "string", + "description": "Name of project that this run is part of" + }, + "sample": { + "type": ["object", "string"], + "description": "Information about the sample, may be a UID to another collection" + }, + "beamline_id": { + "type": "string", + "description": "The beamline ID" + }, + "scan_id": { + "type": "integer", + "description": "Scan ID number, not globally unique" + }, + "time": { + "type": "number", + "description": "Time the run started. Unix epoch time" + }, + "uid": { + "type": "string", + "description": "Globally unique ID for tihs run" + }, + "group": { + "type": ["string", "integer"], + "description": "Unix group to associate this data with" + }, + "owner": { + "type": ["string", "integer"], + "description": "Unix owner to associate this data with" + } + }, + "required": [ + "uid", + "time", + "group", + "owner", + "beamline_config", + "beamline_id" + ], + "type": "object", + "description": "Document created at the start of run. Provides a seach target and later documents link to it" +} diff --git a/source/schema/end_run_event.json b/source/schema/end_run_event.json new file mode 100644 index 0000000..9a259ef --- /dev/null +++ b/source/schema/end_run_event.json @@ -0,0 +1,35 @@ + +{ + "properties": { + "begin_run_event": { + "type": "string", + "description": "Reference back to the begin run document that this document is paired with." + }, + "reason": { + "type": "string", + "description": "Long-form description of why the run ended" + }, + "time": { + "type": "number", + "description": "The time the run ended. Unix epoch" + }, + "completion_state": { + "type": "string", + "enum": ["success", "abort", "fail"], + "description": "State of the run when it ended" + }, + "uid": { + "type": "string", + "description": "Globally unique ID for tihs run" + } + }, + "required": [ + "uid", + "begin_run_event", + "time", + "completion_state" + ], + "type": "object", + "description": "Document for the end of a run indicating the success/fail state of the run and the end time" + +} diff --git a/source/schema/ev_desc_doc.json b/source/schema/ev_desc_doc.json new file mode 100644 index 0000000..2cf9cb6 --- /dev/null +++ b/source/schema/ev_desc_doc.json @@ -0,0 +1,44 @@ +{ +"definitions": { + "data_key": { + "properties": { + "external": { + "pattern": "^[A-Z]+:?", + "type": "string" + }, + "source": { + "pattern": "^[A-Z]+:", + "type": "string" + } + }, + "required": [ + "source" + ], + "type": "object" + } +}, +"properties": { + "begin_run_event": { + "type": "string" + }, + "keys": { + "additionalProperties": { + "$ref": "#/definitions/data_key" + }, + "type": "object" + }, + "time": { + "type": "number" + }, + "uid": { + "type": "string" + } +}, +"required": [ + "uid", + "keys", + "time", + "begin_run_event" +], +"type": "object" +} diff --git a/source/schema/ev_desc_impl.json b/source/schema/ev_desc_impl.json new file mode 100644 index 0000000..0d0783f --- /dev/null +++ b/source/schema/ev_desc_impl.json @@ -0,0 +1,47 @@ +{ + "definitions": { + "data_key": { + "properties": { + "external": { + "pattern": "^[A-Z]+:?", + "type": "string" + }, + "source": { + "pattern": "^[A-Z]+:", + "type": "string" + } + }, + "required": [ + "source" + ], + "type": "object" + } + }, + "properties": { + "begin_run_event": { + "type": "string" + }, + "data_keys": { + "additionalProperties": { + "$ref": "#/definitions/data_key" + }, + "type": "object" + }, + "time": { + "type": "number" + }, + "id": { + "type": "string" + }, + "event_type": { + "type": "string" + } + }, + "required": [ + "id", + "keys", + "time", + "begin_run_event" + ], + "type": "object" +} diff --git a/source/schema/ev_desc_prop.json b/source/schema/ev_desc_prop.json new file mode 100644 index 0000000..8cad762 --- /dev/null +++ b/source/schema/ev_desc_prop.json @@ -0,0 +1,75 @@ + { + "definitions": { + "data_key": { + "title": "data_key", + "description": "Describes the objects in the data property of Event documents", + "properties": { + "dtype": { + "enum": [ + "string", + "number", + "array", + "boolean", + "integer" + ], + "type": "string", + "description": "The type of the data in the event." + }, + "external": { + "pattern": "^[A-Z]+:?", + "type": "string", + "description": "Where the data is stored if it is stored external to the events." + }, + "shape": { + "items": { + "type": "integer" + }, + "type": ["array", "null"], + "destription": "The shape of the data. Null and empty list mean scalar data." + }, + "source": { + "pattern": "^[A-Z]+:", + "type": "string", + "description": "The source (ex piece of hardware) of the data." + } + }, + "required": [ + "source", + "dtype", + "shape" + ], + "type": "object" + } + }, + "properties": { + "keys": { + "additionalProperties": { + "$ref": "#/definitions/data_key" + }, + "type": "object", + "description": "The describes the data to be in the event Documents", + "title": "keys" + }, + "uid": { + "type": "string", + "description": "Globally unique ID for this event descriptor.", + "title": "uid" + }, + "begin_run_event": { + "type": "string", + "description": "Globally unique ID to the begin_run document this descriptor is associtaed with." + }, + "time": { + "type": "number", + "description": "Creation time of the document as unix epoch time." + } + }, + "required": [ + "uid", + "keys", + "time" + ], + "type": "object", + "title": "event_descriptor", + "description": "Document to describe the data captured in the associated event documents" + } diff --git a/source/schema/event.json b/source/schema/event.json new file mode 100644 index 0000000..d98ad61 --- /dev/null +++ b/source/schema/event.json @@ -0,0 +1,66 @@ +{ + "definitions": { + "data_field": { + "title": "data_field", + "description": "Schema for entries in the event data property", + "items": [ + { + "type": [ + "string", + "number", + "boolean", + "integer" + ], + "description": "The measured value or UID to look up external data", + "title": "value" + + }, + { + "type": "number", + "description": "The hardware time stamp of this measurement. [s] from unix epoch", + "title": "timestamp" + } + + ], + "type": "array", + "additionalItems": false, + "minItems": 2 + } + }, + "properties": { + "data": { + "additionalProperties": { + "$ref": "#/definitions/data_field" + }, + "type": "object", + "description": "The actual measument data" + }, + "descriptor": { + "type": "string", + "description": "UID to point back to Descriptor for this event stream" + }, + "seq_num": { + "type": "integer", + "description": "Sequence number to identify the location of this Event in the Event stream" + }, + "time": { + "type": "number", + "description": "The event time. This maybe different than the timestamps on each of the data entries" + }, + "uid": { + "type": "string", + "description": "Globally unique identifier for this Event" + } + }, + "required": [ + "uid", + "data", + "time", + "descriptor", + "seq_num" + ], + "additionalProperties": false, + "type": "object", + "title": "event", + "description": "Document to record a quanta of collected data" +} diff --git a/source/schema/filestore-format.rst b/source/schema/filestore-format.rst new file mode 100644 index 0000000..90b55cc --- /dev/null +++ b/source/schema/filestore-format.rst @@ -0,0 +1,94 @@ +.. _filestore-format: + +**************************** +Format of File Store Entries +**************************** + +Introduction +============ + +The fileStore is for abstracting access to the contents of +data stored in files. It is important to make the distinction between +*files* and *datasets*, as a single *file* can contain more than one *dataset*. +There are two core collections, ``FileBase`` +which stores information about the *files* (where the file physically is, +what format it is) and ``FileEventLink`` which stores which file and how +to extract a single *dataset* from that file. Thus, by storing a single +uuid, MetadataStore can keep track of non-scalar data in external storage. +By only linking to ``FileStore`` through a *dataset* ID the grouping of +data by file is de-coupled from any other logical grouping (by run or by +detector). + +Associated with the database there needs to be a set of ``Handler`` classes +which deal with opening files and extracting the *dataset* from the file. There +should also be a set of symmetric classes which handle file/dataset creation +and the generation/insertion of the relevant documents. + +There will be one or more secondary (derived) collections which store meta-data +like file size, data shape/type ect. + + +Collections +=========== + +FileBase +-------- + +The ``FileBase`` collection store the bare minimum required to locate and +open a file. The documents have the following structure :: + + FileBase : { + spec: , + file_path: , + custom: + } + +Which mean: + + - ``spec`` : a string identifier to control the dispatch to the correct + ``Handler`` class. By doing dispatch this way multiple types of files (or + even different views into the same file) can be stored in the same database. + + - ``file_path`` : path to where the file exists on the physical disk. + + - ``custom`` : dictionary of arguments which are passed to the ``Handler`` along + with the ``file_path`` when the file is opened. + + +There is not intended to be any reference to ``FileBase`` documents on in the +analysis client code, but DAQ will need to both create ``FileBase`` entries and +keep track of them to create ``FileEntryLink`` documents. + + +FileEntryLink +------------- +The ``FileEntryLink`` collections holds the information required to extract a single +data set from a *file*. The documents have the following structure :: + + FileEntryLink : { + file_base : , + event_id : , + link_parameters : , + } + +which mean : + + - ``file_base`` : a link back to what file the *dataset* is in. + - ``event_id`` : a globally unique identifier for the dataset. This string is + exposed to ``metadataStore`` + - ``link_parameters`` : a dictionary of kwargs to pass to the ``Handler`` instance + +Handlers +======== + +The handler classes are what hold all of the data retrieval logic together. The API on +``Handlers`` is such that, given an ``event_id`` :: + + + fel_doc = get_event_link_document(event_id) + fb_doc = get_file_base_doc(fel_doc) + # use the spec value to look up what handler to use + h_class = handler_dispatch(fb_doc.spec) + # use the file_path and custom values to create a handler + h = h_class(fb_doc.file_path, **fb_doc.custom) + dataset = h(**fel_doc.link_parameters) diff --git a/source/schema/mds.fig b/source/schema/mds.fig new file mode 100644 index 0000000..245fde4 --- /dev/null +++ b/source/schema/mds.fig @@ -0,0 +1,87 @@ +#FIG 3.2 Produced by xfig version 3.2.5c +Landscape +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +6 6750 3690 7560 3960 +2 2 0 1 0 7 50 -1 20 0.000 0 0 -1 0 0 5 + 6750 3690 7560 3690 7560 3960 6750 3960 6750 3690 +4 1 0 50 -1 0 12 0.0000 4 150 795 7154 3900 File Base\001 +-6 +6 5269 2565 6214 2835 +2 2 0 1 0 7 50 -1 20 0.000 0 0 -1 0 0 5 + 5269 2565 6214 2565 6214 2835 5269 2835 5269 2565 +4 1 0 50 -1 0 12 0.0000 4 150 915 5741 2775 Snowflake\001 +-6 +6 4320 2565 5130 2835 +2 2 0 1 0 7 50 -1 20 0.000 0 0 -1 0 0 5 + 4320 2565 5130 2565 5130 2835 4320 2835 4320 2565 +4 1 0 50 -1 0 12 0.0000 4 195 645 4724 2752 Sample\001 +-6 +6 3420 3195 4950 3420 +2 2 0 1 0 7 50 -1 20 0.000 0 0 -1 0 0 5 + 3420 3195 4950 3195 4950 3420 3420 3420 3420 3195 +4 1 0 50 -1 0 12 0.0000 4 195 1425 4184 3360 Beamline Config\001 +-6 +6 4612 3600 6097 3825 +2 2 0 1 0 7 50 -1 20 0.000 0 0 -1 0 0 5 + 4612 3600 6097 3600 6097 3825 4612 3825 4612 3600 +4 1 0 50 -1 0 12 0.0000 4 195 1440 5355 3765 Begin Run Event\001 +-6 +6 3015 4140 4455 4365 +2 2 0 1 0 7 50 -1 20 0.000 0 0 -1 0 0 5 + 3015 4140 4455 4140 4455 4365 3015 4365 3015 4140 +4 1 0 50 -1 0 12 0.0000 4 150 1290 3735 4327 End Run Event\001 +-6 +6 4620 4140 6180 4365 +2 2 0 1 0 7 50 -1 20 0.000 0 0 -1 0 0 5 + 4646 4140 6153 4140 6153 4365 4646 4365 4646 4140 +4 1 0 50 -1 0 12 0.0000 4 195 1560 5400 4305 Event Descriptors\001 +-6 +6 5490 4725 6075 4950 +2 2 0 1 0 7 50 -1 20 0.000 0 0 -1 0 0 5 + 5490 4725 6075 4725 6075 4950 5490 4950 5490 4725 +4 1 0 50 -1 0 12 0.0000 4 150 510 5782 4912 Event\001 +-6 +6 6570 4275 7875 4500 +2 2 0 1 0 7 50 -1 20 0.000 0 0 -1 0 0 5 + 6592 4275 7852 4275 7852 4500 6592 4500 6592 4275 +4 1 0 50 -1 0 12 0.0000 4 150 1305 7222 4462 File Event Link\001 +-6 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 7110 4275 7110 3960 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 5805 4725 5805 4365 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 5445 4140 5445 3825 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 4095 4140 5400 3825 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 5265 3600 4230 3420 +2 2 1 1 0 7 50 -1 -1 4.000 0 0 -1 0 0 5 + 6345 2250 2925 2250 2925 4455 6345 4455 6345 2250 +2 2 0 1 0 7 51 -1 18 0.000 0 0 -1 0 0 5 + 2835 3015 6300 3015 6300 5085 2835 5085 2835 3015 +2 1 3 1 0 7 50 -1 -1 3.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 5265 3600 5670 2835 +2 1 3 1 0 7 50 -1 -1 3.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 5265 3600 4770 2835 +2 1 3 1 0 7 50 -1 -1 3.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 6075 4815 7110 4500 +2 2 0 1 0 7 51 -1 18 0.000 0 0 -1 0 0 5 + 6480 3330 7920 3330 7920 4590 6480 4590 6480 3330 +4 0 0 50 -1 0 12 0.0000 4 150 645 3060 2520 Header\001 +4 0 0 50 -1 0 12 0.0000 4 150 465 2925 4905 MDS\001 +4 0 0 50 -1 0 12 0.0000 4 150 825 6525 3555 File Store\001 diff --git a/source/schema/metadatastore-format.rst b/source/schema/metadatastore-format.rst new file mode 100644 index 0000000..44584df --- /dev/null +++ b/source/schema/metadatastore-format.rst @@ -0,0 +1,415 @@ +******************************* +Format of Metadatastore Entries +******************************* + +Introduction +============ + +The metadatastore is based on the concept of documents which are either +events, or descriptions of events. An ``event`` is a quantum of data +stored in the metadata store and represents an *action* at a given time. For +example: "*measurement of 8 scaler chanels*", "*trigger detectors*" or +"*start run*". + +Expanding the notion of **events**, these can also be used for derived data. +For example, an event could be the result of a data analysis or reduction +routine which was run at a certain time. + +The document schemas in this document are written using ``jsonschema``. The +full spec http://json-schema.org/ is basically un-readable. A more readable +introduction https://spacetelescope.github.io/understanding-json-schema/index.html. + + +.. todo:: + Expand this section + + + + +Time +==== + +One of the cornerstones of this data acquisition and analysis method is the use +of *time* as the method by which data can be aligned and correlated. A single +``event`` should have happened at a certain quantum of time with the +determination of what a time *quantum* is left to the details of the +experiment. Time however, can be horrendously messy. Throughout this +section we use two terms, ``timestamp`` and ``time``. These mean + + +time + The date/time as found by the client when an ``event`` is + created. This could be a date-time format as determined by the underlying + storage method (for example a database). + +timestamp + A (usually *float*) representation of the hardware time when a + certain value was obtained. Wherever possible this should be read from + hardware. For example, this could be the *EPICS* timestamp from when the + record processed which provides the value. + + +We use the literal ``