Skip to content

Commit

Permalink
Progress 2
Browse files Browse the repository at this point in the history
  • Loading branch information
albertzak committed May 16, 2020
1 parent 15472c5 commit 2939bb3
Show file tree
Hide file tree
Showing 13 changed files with 234 additions and 104 deletions.
7 changes: 6 additions & 1 deletion glossary.tex
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
\newacronym{ACID}{ACID}{Atomicity, Consistency, Isolation, Durability}
\newacronym{EAV}{EAV}{Entity-Attribute-Value}
\newacronym{RDBMS}{RDBMS}{Relational Database Management Systems}
\newacronym{EAV}{EAV}{Entity-Attribute-Value}
\newacronym{tx}{$t_x$}{transaction time}
\newacronym{SPA}{SPA}{Single Page Application}
\newacronym{SQL}{SQL}{Structured Query Language}
Expand All @@ -28,3 +27,9 @@
\newacronym{IETF}{IETF}{Internet Engineering Task Force}
\newacronym{API}{API}{Application Programming Interface}
\newacronym{CAP}{CAP}{Consistency, Availability, Partition tolerance}
\newacronym{DSL}{DSL}{Domain-specific language}
\newacronym{UI}{UI}{User Interface}
\newacronym{DRP}{DRP}{Distributed Reactive Programming}
\newacronym{6NF}{6NF}{6\textsuperscript{th} normal form}
\newacronym{ES}{ES}{Event Sourcing}
\newacronym{txe}{txe}{Transaction entity}
83 changes: 77 additions & 6 deletions lit.bib
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ @article{mackay2000reconfiguring
}

@article{chen2010developing,
title={Developing EMRs in developing countries},
title={Developing {EMRs} in developing countries},
author={Chen, Weihua and Akay, Metin},
journal={IEEE Transactions on Information Technology in Biomedicine},
volume={15},
Expand Down Expand Up @@ -422,6 +422,14 @@ @misc{ddpspec
year={2012}
}

@misc{datahike,
title={Datahike, A durable datalog implementation adaptable for distribution},
author={Kühne, Konrad and others},
howpublished={\url{https://github.com/replikativ/datahike}},
note={accessed 2020-05-16},
year={2018}
}

@misc{eve,
title={Eve: Programming designed for humans},
author={Cole, Joshua and Granger, Chris and Montella, Corey and others},
Expand All @@ -438,7 +446,12 @@ @misc{moffat16eve
year={2016}
}


@article{veldhuizen2012leapfrog,
title={Leapfrog triejoin: A simple, worst-case optimal join algorithm},
author={Veldhuizen, Todd L},
journal={arXiv preprint arXiv:1210.0481},
year={2012}
}

@misc{eva,
title={Eva, a distributed database-system implementing an entity-attribute-value data-model that is time-aware, accumulative, and atomically consistent},
Expand Down Expand Up @@ -565,10 +578,10 @@ @misc{parker15posh

@misc{datomicdocs,
title={Datomic Documentation},
howpublished={\url{https://docs.datomic.com/on-prem/index.html}},
howpublished={Cognitect, Inc. \url{https://docs.datomic.com/on-prem/index.html}},
note={accessed 2020-04-10},
year={2019},
author={Parker, Matt and Krivosheev, Denis and others},
author={Hickey, Rich and Halloway, Stuart and others},
}

@misc{krivosheev19reposh,
Expand All @@ -586,10 +599,10 @@ @misc{small16datscript
author={Small, Christopher and others},
}

@misc{small16datscript,
@misc{braid19,
title={Braid-{HTTP}: {Synchronization} for {HTTP}},
howpublished={\url{https://datatracker.ietf.org/doc/html/draft-toomim-httpbis-braid-http}},
year={2016},
year={2019},
author={Toomim, Michael and Little, Greg and Walker, Rafie and Bellomy, Byrn},
}

Expand Down Expand Up @@ -622,3 +635,61 @@ @inproceedings{weilbach2016decoupling
pages={1--6},
year={2016}
}

@inproceedings{salvaneschi2013towards,
title={Towards distributed reactive programming},
author={Salvaneschi, Guido and Drechsler, Joscha and Mezini, Mira},
booktitle={International Conference on Coordination Languages and Models},
pages={226--235},
year={2013},
organization={Springer}
}

@inproceedings{margara2014we,
title={We have a DREAM: Distributed reactive programming with consistency guarantees},
author={Margara, Alessandro and Salvaneschi, Guido},
booktitle={Proceedings of the 8th {ACM} International Conference on Distributed Event-Based Systems},
pages={142--153},
year={2014}
}

@inproceedings{elliott1997functional,
title={Functional reactive animation},
author={Elliott, Conal and Hudak, Paul},
booktitle={Proceedings of the second {ACM SIGPLAN} international conference on Functional programming},
pages={263--273},
year={1997}
}

@inproceedings{jastrow2015entity,
title={The entity-attribute-value data model in a multi-tenant shared data environment},
author={Jastrow, Torben and Preuss, Thomas},
booktitle={2015 10th International Conference on P2P, Parallel, Grid, Cloud and Internet Computing (3PGCIC)},
pages={494--497},
year={2015},
organization={IEEE}
}

@inproceedings{huff1994help,
title={HELP the next generation: a new client-server architecture.},
author={Huff, Stanley M and Haug, Peter J and Stevens, Lane E and Dupont, Robert C and Pryor, T Allan},
booktitle={Proceedings of the Annual Symposium on Computer Application in Medical Care},
pages={271},
year={1994},
organization={American Medical Informatics Association}
}

@inproceedings{kabbedijk2012case,
title={A case study of the variability consequences of the CQRS pattern in online business software},
author={Kabbedijk, Jaap and Jansen, Slinger and Brinkkemper, Sjaak},
booktitle={Proceedings of the 17th European Conference on Pattern Languages of Programs},
pages={1--10},
year={2012}
}

@book{okasaki1999purely,
title={Purely functional data structures},
author={Okasaki, Chris},
year={1999},
publisher={Cambridge University Press}
}
10 changes: 7 additions & 3 deletions sections/conclusion.tex
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
\cleardoublepage
\section{Future Work}

\paragraph{Full stack laziness.} A fully lazy distributed data structure would allow transparent access and local caching of all facts for which the client passes access rules set up by the server. Such a design would also allow transparent querying of past facts, possibly aided by hints from the programmer as to where (on client or server) the query should be executed.

\paragraph{Safe concurrent editing.}
A distributed system expects connection loss and simultaneous conflicting edits. It should be possible to define a schema that selects one of many built-in conflict resolution strategies specific to the domain requirements of each attribute. A per-field specifiable tradeoff as dictated by the \gls{cap} theorem of C and A must propagate to the clients and dictate the possible operations on the data item in question given the current network conditions \cite{emerick2014api}. Recently discovered concepts such as the \gls{CRDT} and \gls{OT} appear to provide composable consistency primitives for robust replication \cite{weilbach2015replikativ, weilbach2016decoupling}.
A distributed system expects connection loss and simultaneous conflicting edits. It should be possible to define a schema that selects one of many built-in conflict resolution strategies specific to the domain requirements of each attribute. A per-field specifiable tradeoff as dictated by the \gls{cap} theorem of C and A must propagate to the clients and dictate the possible operations on the data item in question given the current network conditions \cite{emerick2014api}.

The Braid protocol \cite{braid} is an in-progress draft of a proposed \gls{IETF} standard to add history, editing, and subscription semantics to HTTP resources. It aims to standardize the representation and synchronization of arbitrary web application state. Braid can allow simultaneous editing of the same resource by different clients and servers and can guarantee a consistent resulting state via selectable \emph{merge types} implementing CRDTs and OTs.

A \gls{DRP} approach by \cite{margara2014we} focuses strongly on selectable consistency guarantees, while \gls{CRDT} and \gls{OT} are recently discovered concepts which appear to provide composable consistency primitives for robust replication \cite{weilbach2015replikativ, weilbach2016decoupling}.

\paragraph{(Temporal) constraints.}
The Braid protocol \cite{braid19} is an in-progress draft of a proposed \gls{IETF} standard to add history, editing, and subscription semantics to HTTP resources. It aims to standardize the representation and synchronization of arbitrary web application state. Braid can allow simultaneous editing of the same resource by different clients and servers and can guarantee a consistent resulting state via selectable \emph{merge types} implementing various CRDTs and OTs.


\paragraph{(Temporal) logic constraints.}

"use logic to express what is true, use logic to check whether something is true, use logic to find out what is true" \cite{sicp}

minikanren \cite{byrd2010relational}
Expand All @@ -37,6 +40,7 @@ \section{Future Work}
timely dataflow \cite{murray2013naiad}
The Differential dataflow \cite{mcsherry2013differential}

The 3DF
3df [reactive datalog for datomic, goebel] \cite{gobel2019optimising}


Expand Down
18 changes: 7 additions & 11 deletions sections/design.tex
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,27 @@ \section{Design}\label{sec:design}
\subsection{Conceptual model}



\begin{itemize}
\item
\item A fact is a triple \lisp{[e a v]}

A transition is a fact with an indicator whether it is an assertion or retraction: [+/- e a v tv ]

A transaction is a list of transitions atomically applied at the same logical time. [[+ ...] [- ...] [+...]]

A commit is a transaction with a tx timestamp: [2020 [+ ...] [- ...] [+...]]
\item A transition is a fact with an indicator whether it is an assertion \lisp{[:+ e a v]} or retraction \lisp{[:- e a v]}.

The log is the ordered list of commits.
\item A transaction is a list of transitions epochally applied at the same logical time and a pointer to the transaction, \lisp{tx}, which holds the value of a (newly-generated) transaction entity id: \lisp{[[+ ... tx] [- ... tx] [+ ... tx]]}

The database is fully desribed by the log.
\item The log is a single ordered list of transactions.

\item The database is fully described by the log.

\end{itemize}


\subsection{Query language}

This section describes the ideas behind the query language, which is a greatly simplified language modeled after the relational query language used in Datomic, which is in turn a Lisp variant of the Datalog \cite{abiteboul1988datalog} language expressed using the syntactic forms of Clojure's \gls{edn}.

The choice of language is arbitrary -- any relational language would suffice -- and the core of the database does not depend on any query language capabilities Modeling the language after the one used in Datomic was chosen because because not only has the edn notation become a de-facto standard for other EAV databases like Juxt Crux, EVA, and Datascript, but because the shape of each query clause maps naturally to the representation of a fact in canonical EAV order.
The choice of language is arbitrary -- any relational language would suffice -- and the core of the database does not depend on any query language capabilities Modeling the language after the one used in Datomic was chosen because because not only has the edn notation become a de-facto standard for other EAV databases like Crux, EVA, and Datascript, but because the shape of each query clause maps naturally to the representation of a fact in canonical EAV order.

See \autoref{lst:example_query} for an query consisting of four query clauses (the \lisp{:where} part) performing an implicit join, and a final projection (\lisp{:find}) to extract the values bound to the \emph{\gls{lvar}} symbols \lisp{?name} and \lisp{?location}. For example, the query clause \lisp{[?p :name ?name]} applied to the fact \lisp{[:person/123 :name "Chu"]} would result in \emph{binding} the lvar \lisp{?p} to the value \lisp{:person/123}, and the lvar \lisp{?name} to the value \lisp{"Chu"}. Other clauses are bound likewise. Note that multiple occurrences of the same lvar prompt \emph{unification} with the same value, creating an implicit \emph{join}. The order of the query clauses has no semantic meaning.
See \autoref{lst:example_query} for an query consisting of four query clauses (the \lisp{:where} part) performing an implicit join, and a final projection (\lisp{:find}) to extract the values bound to the \emph{\gls{lvar}} symbols \lisp{?name} and \lisp{?location}. For example, the query clause \lisp{[?p :name ?name]} applied to the fact \lisp{[:person/123 :name "Hye-mi"]} would result in \emph{binding} the lvar \lisp{?p} to the value \lisp{:person/123}, and the lvar \lisp{?name} to the value \lisp{"Hye-mi"}. Other clauses are bound likewise. Note that multiple occurrences of the same lvar prompt \emph{unification} with the same value, creating an implicit \emph{join}. The order of the query clauses has no semantic meaning.

Performing a query entails applying the \lisp{q} function to a database value and a query. Clients can thus decide whether to leverage the query language via loading a library, or just access the data via the index structures directly.

Expand Down
15 changes: 2 additions & 13 deletions sections/design_goals.tex
Original file line number Diff line number Diff line change
Expand Up @@ -49,22 +49,11 @@ \subsection{Goals}



\subsection{Non-goals}\label{sec:nongoals}
`'\subsection{Non-goals}\label{sec:nongoals}


\paragraph{}
very hard with meteor-like pubsub: what do do when roles change?
this is to be solved via \gls{CQRS}: server-side callable "actions" that run selected mutations

conflict resolution / concurrent editing [see future work]
Automated conflict resolution allowing safe concurrent editing is not part of the design at this point.

excision/privacy

latency compensation/optimistic updates

\paragraph{Efficiency.}
No attention is paid to the efficiency of compute and memory usage. Tradeoffs are almost always made in favor of clarity of the mapping between conceptual model and implementation of the proof of concept. The only major optimization is the design of the triple index, leastwise it doubles as the simplest possible way to access arbitrary data without the overhead of parsing and executing a query.
No attention is paid to the efficiency of compute and memory usage. Tradeoffs are almost always made in favor of clarity concerning the mapping between conceptual model and implementation of the proof of concept. The only major optimization is the fact that the triple index structure exists, leastwise it doubles as the simplest possible way to access arbitrary data without the overhead of parsing and executing a query.

Custom indexing strategies, e.g. ways to maintain a phonetic index to query for people's names, do not need to be part of the database design, because the triple indexing scheme is general enough to allow arbitrary access to the data in a manner that is efficient enough without having to declare indices upfront.

Expand Down
2 changes: 1 addition & 1 deletion sections/design_tenets.tex
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ \subsection{Tenets}\label{sec:tenets}


\paragraph{Immutability}
Instead of thinking about the database as a "place" where to fetch data from and write data to,


- pass whole DB as a value through app [Datomic][Re-frame]

Expand Down
Loading

0 comments on commit 2939bb3

Please sign in to comment.