diff --git a/doc/.gitignore b/doc/.gitignore
new file mode 100644
index 00000000..e35d8850
--- /dev/null
+++ b/doc/.gitignore
@@ -0,0 +1 @@
+_build
diff --git a/doc/Makefile b/doc/Makefile
new file mode 100644
index 00000000..9f83d06b
--- /dev/null
+++ b/doc/Makefile
@@ -0,0 +1,153 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+PAPER         =
+BUILDDIR      = _build
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
+
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html       to make standalone HTML files"
+	@echo "  dirhtml    to make HTML files named index.html in directories"
+	@echo "  singlehtml to make a single large HTML file"
+	@echo "  pickle     to make pickle files"
+	@echo "  json       to make JSON files"
+	@echo "  htmlhelp   to make HTML files and a HTML help project"
+	@echo "  qthelp     to make HTML files and a qthelp project"
+	@echo "  devhelp    to make HTML files and a Devhelp project"
+	@echo "  epub       to make an epub"
+	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
+	@echo "  text       to make text files"
+	@echo "  man        to make manual pages"
+	@echo "  texinfo    to make Texinfo files"
+	@echo "  info       to make Texinfo files and run them through makeinfo"
+	@echo "  gettext    to make PO message catalogs"
+	@echo "  changes    to make an overview of all changed/added/deprecated items"
+	@echo "  linkcheck  to check all external links for integrity"
+	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+	-rm -rf $(BUILDDIR)/*
+
+html:
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+	@echo
+	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Testprojectlpk.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Testprojectlpk.qhc"
+
+devhelp:
+	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+	@echo
+	@echo "Build finished."
+	@echo "To view the help file:"
+	@echo "# mkdir -p $$HOME/.local/share/devhelp/Testprojectlpk"
+	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Testprojectlpk"
+	@echo "# devhelp"
+
+epub:
+	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+	@echo
+	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make' in that directory to run these through (pdf)latex" \
+	      "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through pdflatex..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+	@echo
+	@echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+	@echo
+	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo
+	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+	@echo "Run \`make' in that directory to run these through makeinfo" \
+	      "(use \`make info' here to do that automatically)."
+
+info:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo "Running Texinfo files through makeinfo..."
+	make -C $(BUILDDIR)/texinfo info
+	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+	@echo
+	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
diff --git a/doc/conf.py b/doc/conf.py
new file mode 100644
index 00000000..3def987f
--- /dev/null
+++ b/doc/conf.py
@@ -0,0 +1,244 @@
+# -*- coding: utf-8 -*-
+#
+# Liblognorm documentation build configuration file, created by
+# sphinx-quickstart on Mon Dec 16 13:12:44 2013.
+#
+# This file is execfile()d with the current directory set to its containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys, os
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.insert(0, os.path.abspath('.'))
+
+# -- General configuration -----------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be extensions
+# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = []
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'Liblognorm'
+copyright = u'Adiscon'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '1.0'
+# The full version, including alpha/beta/rc tags.
+release = '1.0.0'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ['_build']
+
+# The reST default role (used for this markup: `text`) to use for all documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+
+# -- Options for HTML output ---------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+html_theme = 'haiku'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+html_title = "A fast log normalization library"
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+# html_short_title = None
+html_short_title = project + " " + release + " documentation"
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_domain_indices = True
+
+# If false, no index is generated.
+html_use_index = False
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+html_show_copyright = False
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'Liblognormdoc'
+
+
+# -- Options for LaTeX output --------------------------------------------------
+
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+#'papersize': 'letterpaper',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass [howto/manual]).
+latex_documents = [
+  ('index', 'Liblognorm.tex', u'Liblognorm Documentation',
+   u'Pavel Levshin', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output --------------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    ('index', 'liblognorm', u'Liblognorm Documentation',
+     [u'Pavel Levshin'], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output ------------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+  ('index', 'Liblognorm', u'Liblognorm Documentation',
+   u'Pavel Levshin', 'Liblognorm', 'Fast log normalization library.',
+   'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
diff --git a/doc/configuration.rst b/doc/configuration.rst
new file mode 100644
index 00000000..6e4c05f0
--- /dev/null
+++ b/doc/configuration.rst
@@ -0,0 +1,328 @@
+How to configure
+================
+
+To use liblognorm, you need 3 things.
+
+1. An installed and working copy of liblognorm. The installation process 
+   has been discussed in the chapter :doc:`installation`.
+2. Log files.
+3. A rulebase, which is heart of liblognorm configuration.
+
+Log files
+---------
+
+A log file is a text file, which typically holds many lines. Each line is 
+a log message. These are usually a bit strange to read, thus to analyze. 
+This mostly happens, if you have a lot of different devices, that are all 
+creating log messages in a different format. 
+
+Rulebase
+--------
+
+The rulebase holds all the schemes for your logs. It basically consists of 
+many lines that reflect the structure of your log messages. When the 
+normalization process is started, a parse-tree will be generated from
+the rulebase and put into the memory. This will then be used to parse the 
+log messages.
+
+Each line in rulebase file is evaluated separately.
+
+Commentaries
+------------
+
+To keep your rulebase tidy, you can use commentaries. Start a commentary 
+with "#" like in many other configurations. It should look like this::
+
+    # The following prefix and rules are for firewall logs
+
+Empty lines are just skipped, they can be inserted for readability.
+    
+Rules
+-----
+
+If the line starts with 'rule=', then it contains a rule. This line has
+following format::
+
+    rule=[<tag1>[,<tag2>...]]:<match description>
+
+Everything before a colon is treated as comma-separated list of tags, which
+will be attached to a match. After the colon, match description should be
+given. It consists of string literals and field selectors. String literals
+should match exactly, whereas field selectors may match variable parts
+of a message.
+
+A rule could look like this::
+
+    rule=:%date:date-rfc3164% %host:word% %tag:char-to:\x3a%: no longer listening on %ip:ipv4%#%port:number%'
+
+This excerpt is a common rule. A rule always contains several different 
+"parts"/properties and reflects the structure of the message you want to 
+normalize (e.g. Host, IP, Source, Syslogtag...).
+
+Literals
+--------
+
+Literal is just a sequence of characters, which must match exactly. 
+Percent sign characters must be escaped to prevent them from starting a 
+field accidentally. Replace each "%" with "\\x25" or "%%", when it occurs
+in a string literal.
+
+Fields
+------
+
+The structure of a field selector is as follows::
+
+    %<field name>:<field type>[:<additional information>]%
+
+field name -> that name can be selected freely. It should be a description 
+of what kind of information the field is holding, e.g. SRC is the field 
+contains the source IP address of the message. These names should also be 
+chosen carefully, since the field name can be used in every rule and 
+therefore should fit for the same kind of information in different rules.
+
+If field name is "-", then this field is matched but not saved.
+
+field type -> selects the accordant parser, which are described below.
+
+Special characters that need to be escaped when used inside a field 
+description are "%" and ":". For example, this will match anything up to
+(but not including) a colon::
+
+    %variable:char-to:\x3a%
+
+Additional information is dependent on the field type; only some field 
+types need additional information.
+    
+Field types
+-----------
+
+number
+######
+
+One or more decimal digits.
+
+::
+
+    %port:number%
+
+word
+####    
+
+One or more characters, up to the next space (\\x20), or
+up to end of line.
+
+::
+
+    %host:word%
+
+char-to
+####### 
+
+One or more characters, up to the next character given in
+extra data. Additional data must contain exactly one character, which
+can be escaped.
+
+::
+
+    %field_name:char-to:,%
+    %field_name:char-to:\x25%
+
+char-sep
+########
+
+Zero or more characters, up to the next character given in extra data, or 
+up to end of line. Additional data must contain exactly one character, 
+which can be escaped.               
+
+::
+
+    %field_name:char-sep:,%
+    %field_name:char-sep:\x25%
+
+rest
+####
+
+Zero or more characters till end of line. Should be always at end of the 
+rule.
+
+::
+
+    %field_name:rest%
+
+quoted-string
+#############   
+
+Zero or more characters, surrounded by double quote marks.
+Quote marks are stripped from the match.
+
+::
+
+    %field_name:quoted-string%
+
+date-iso
+########    
+
+Date in ISO format ('YYYY-MM-DD').
+
+::
+
+    %field-name:date-iso%
+
+time-24hr
+#########   
+
+Time of format 'HH:MM:SS', where HH is 00..23.
+
+::
+
+    %time:time-24hr%
+
+time-12hr
+#########   
+
+Time of format 'HH:MM:SS', where HH is 00..12.
+
+::
+
+    %time:time-12hr%
+
+date-rfc3164
+############
+
+Valid date/time in RFC3164 format, i.e.: 'Oct 29 09:47:08'.
+This parser implements several quirks to match malformed
+timestamps from some devices.
+
+::
+
+    %date:date-rfc3164%
+
+date-rfc5424
+############
+
+Valid date/time in RFC5424 format, i.e.:
+'1985-04-12T19:20:50.52-04:00'.
+Slightly different formats are allowed.
+
+::
+
+    %date:date-rfc5424%
+
+ipv4
+####
+
+IPv4 address, in dot-decimal notation (AAA.BBB.CCC.DDD).
+
+::
+
+    %ip-src:ipv4%
+
+iptables
+########    
+
+Name=value pairs, separated by spaces, as in Netfilter log messages.
+Name of the selector is not used; names from the line are 
+used instead. This selector always matches everything till 
+end of the line. Cannot match zero characters.
+
+::
+
+    %-:iptables%
+
+Prefixes
+--------
+
+Several rules can have a common prefix. You can set it once with this 
+syntax::
+
+    prefix=<prefix match description>
+    
+Prefix match description syntax is the same as rule match description. 
+Every following rule will be treated as an addition to this prefix.
+
+Prefix can be reset to default (empty value) by the line::
+
+    prefix=
+
+You can define a prefix for devices that produce the same header in each 
+message. We assume, that you have your rules sorted by device. In such a 
+case you can take the header of the rules and use it with the prefix 
+variable. Here is a example of a rule for IPTables::
+
+    prefix=%date:date-rfc3164% %host:word% %tag:char-to:-\x3a%:
+    rule=:INBOUND%INBOUND:char-to:-\x3a%: IN=%IN:word% PHYSIN=%PHYSIN:word% OUT=%OUT:word% PHYSOUT=%PHYSOUT:word% SRC=%source:ipv4% DST=%destination:ipv4% LEN=%LEN:number% TOS=%TOS:char-to: % PREC=%PREC:word% TTL=%TTL:number% ID=%ID:number% DF PROTO=%PROTO:word% SPT=%SPT:number% DPT=%DPT:number% WINDOW=%WINDOW:number% RES=0x00 ACK SYN URGP=%URGP:number%
+
+Usually, every rule would hold what is defined in the prefix at its 
+beginning. But since we can define the prefix, we can save that work in 
+every line and just make the rules for the log lines. This saves us a lot 
+of work and even saves space.
+
+In a rulebase you can use multiple prefixes obviously. The prefix will be 
+used for the following rules. If then another prefix is set, the first one 
+will be erased, and new one will be used for the following rules.
+
+Rule tags
+---------
+
+Rule tagging capability permits very easy classification of syslog 
+messages and log records in general. So you can not only extract data from 
+your various log source, you can also classify events, for example, as 
+being a "login", a "logout" or a firewall "denied access". This makes it 
+very easy to look at specific subsets of messages and process them in ways 
+specific to the information being conveyed. 
+
+To see how it works, let’s first define what a tag is:
+
+A tag is a simple alphanumeric string that identifies a specific type of 
+object, action, status, etc. For example, we can have object tags for 
+firewalls and servers. For simplicity, let’s call them "firewall" and 
+"server". Then, we can have action tags like "login", "logout" and 
+"connectionOpen". Status tags could include "success" or "fail", among 
+others. Tags form a flat space, there is no inherent relationship between 
+them (but this may be added later on top of the current implementation). 
+Think of tags like the tag cloud in a blogging system. Tags can be defined 
+for any reason and need. A single event can be associated with as many 
+tags as required. 
+
+Assigning tags to messages is simple. A rule contains both the sample of 
+the message (including the extracted fields) as well as the tags. 
+Have a look at this sample::
+
+    rule=:sshd[%pid:number%]: Invalid user %user:word% from %src-ip:ipv4%
+
+Here, we have a rule that shows an invalid ssh login request. The various 
+fields are used to extract information into a well-defined structure. Have 
+you ever wondered why every rule starts with a colon? Now, here is the 
+answer: the colon separates the tag part from the actual sample part. 
+Now, you can create a rule like this::
+
+    rule=ssh,user,login,fail:sshd[%pid:number%]: Invalid user %user:word% from %src-ip:ipv4%
+
+Note the "ssh,user,login,fail" part in front of the colon. These are the 
+four tags the user has decided to assign to this event. What now happens 
+is that the normalizer does not only extract the information from the 
+message if it finds a match, but it also adds the tags as metadata. Once 
+normalization is done, one can not only query the individual fields, but 
+also query if a specific tag is associated with this event. For example, 
+to find all ssh-related events (provided the rules are built that way), 
+you can normalize a large log and select only that subset of the 
+normalized log that contains the tag "ssh".
+
+Log annotations
+---------------
+
+In short, annotations allow to add arbitrary attributes to a parsed
+message, depending on rule tags. Values of these attributes are fixed,
+they cannot be derived from variable fields. Syntax is as following::
+
+    annotate=<tag>:+<field name>="<field value>"
+
+Field value should always be enclosed in double quote marks.
+
+There can be multiple annotations for the same tag.
+
+Examples
+--------
+
+Look at :doc:`sample rulebase <sample_rulebase>` for configuration 
+examples and matching log lines. 
diff --git a/doc/graph.png b/doc/graph.png
new file mode 100644
index 00000000..93ef7f09
Binary files /dev/null and b/doc/graph.png differ
diff --git a/doc/index.rst b/doc/index.rst
new file mode 100644
index 00000000..ce38bb03
--- /dev/null
+++ b/doc/index.rst
@@ -0,0 +1,27 @@
+.. Liblognorm documentation master file, created by
+   sphinx-quickstart on Mon Dec 16 13:12:44 2013.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to Liblognorm documentation!
+============================================
+
+Contents:
+
+.. toctree::
+    :maxdepth: 3
+
+    introduction
+    installation
+    configuration
+    sample_rulebase
+    lognormalizer
+    internals
+    license
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`search`
diff --git a/doc/installation.rst b/doc/installation.rst
new file mode 100644
index 00000000..cbf42c0e
--- /dev/null
+++ b/doc/installation.rst
@@ -0,0 +1,76 @@
+How to install
+==============
+
+Here you can find the first steps to install and try liblognorm.
+
+Getting liblognorm
+------------------
+
+There are several ways to install libognorm. You can install it
+from your distribution, if it is there. You can get binary packages from
+Rsyslog repositories:
+
+- `RedHat Enterprise Linux or CentOS <http://www.rsyslog.com/rhelcentos-rpms/>`_
+- `Ubuntu <http://www.rsyslog.com/ubuntu-repository/>`_
+- `Debian <http://www.rsyslog.com/debian-repository/>`_
+
+Or you can build your own binaries from sources. You can fetch all 
+sources from git (below you can find all commands you need) or you can 
+download it as tarballs at: 
+
+- `libestr <http://libestr.adiscon.com/download/>`_
+- `liblognorm <http://www.liblognorm.com/download/>`_
+
+Please note if you compile it from tarballs then you have to do the same 
+steps which are mentioned below, apart from::
+
+    $ git clone ...
+    $ autoreconf -vfi
+
+Building from git
+-----------------
+
+To build liblognorm from sources, you need to have 
+`json-c <https://github.com/json-c/json-c/wiki>`_ installed.
+
+Open a terminal and switch to the folder where you want to build 
+liblognorm. Below you will find the necessary commands. First, build
+and install prerequisite library called **libestr**::
+
+    $ git clone git://git.adiscon.com/git/libestr.git
+    $ cd libestr
+    $ autoreconf -vfi
+    $ ./configure
+    $ make
+    $ make install
+
+leave that folder and repeat this step again for liblognorm::
+
+    $ cd ..
+    $ git clone git://git.adiscon.com/git/liblognorm.git
+    $ cd liblognorm
+    $ autoreconf -vfi
+    $ ./configure
+    $ make
+    $ make install
+
+That’s all you have to do.
+
+Testing
+-------
+
+For a first test we need two further things, a test log and the rulebase. 
+Both can be downloaded `here 
+<http://blog.gerhards.net/2010/11/log-normalization-first-results.html>`_.
+
+After downloading these examples you can use liblognorm. Go to 
+liblognorm/src and use the command below::
+
+    $ ./lognormalize -r messages.sampdb -o json <messages.log
+
+where::
+    
+    -r = path to the rulebase
+    -o = output format
+
+Please have look at :doc:`lognormalizer` for all available options.
diff --git a/doc/internals.rst b/doc/internals.rst
new file mode 100644
index 00000000..f63a6a8d
--- /dev/null
+++ b/doc/internals.rst
@@ -0,0 +1,37 @@
+Liblognorm internals
+====================
+
+Parse-tree
+----------
+
+A parse-tree is generated each time when normalization process is set up.
+
+You could also call it a optimized rulebase. Each message runs through 
+this tree consisting of parsers and fields and will be compared to it. The 
+message can either fit into a branch or not. If it fits, it can be 
+normalized. If it does not fit any branch in the tree, then a fitting 
+sample has to be created for this message.
+ 
+The tree is built from branches. These branches consist of 3 things: 
+nodes, paths and parser.
+
+A node is typically a literal part from a message where either a parser 
+follows or there are several subsequent literals which are different, so 
+one of the paths must be selected. After a parser, a node will always 
+follow. Parsers are like variables and thus the core structure of a 
+sample. With these a property field can be filled, which in the end is 
+needed to normalize the message. 
+
+A few notes on optimization of a parse-tree.
+
+A parse-tree is always optimized, whether or not the samples of a similar 
+kind are next to each other or not. Even if you make the order totally 
+random, it should always result in the same parse-tree. Therefore, no 
+optimization efforts have to be made to the tree itself. It reuses 
+equivalent prefixes of messages which are already in the tree. Only if a 
+difference occurs, then a new node must follow. 
+
+One case where rule order can be significant is when a message can match
+two or more different rules. This can occur when the rules differ in
+parsers. If in doubt, use :doc:`lognormalizer <lognormalizer>` tool to 
+debug.
diff --git a/doc/introduction.rst b/doc/introduction.rst
new file mode 100644
index 00000000..860e07d1
--- /dev/null
+++ b/doc/introduction.rst
@@ -0,0 +1,25 @@
+Introduction
+============
+
+Briefly described, liblognorm is a tool to normalize log data.
+
+People who need to take a look at logs often have a common problem. Logs 
+from different machines (from different vendors) usually have different 
+formats. Even if it is the same type of log (e.g. from firewalls), the log 
+entries are so different, that it is pretty hard to read these. This is 
+where liblognorm comes into the game. With this tool you can normalize all 
+your logs. All you need is liblognorm and its dependencies and a sample 
+database that fits the logs you want to normalize.
+
+So, for example, if you have traffic logs from three different firewalls, 
+liblognorm will be able to "normalize" the events into generic ones. Among 
+others, it will extract source and destination ip addresses and ports and 
+make them available via well-defined fields. As the end result, a common log 
+analysis application will be able to work on that common set and so this 
+backend will be independent from the actual firewalls feeding it. Even 
+better, once we have a well-understood interim format, it is also easy to 
+convert that into any other vendor specific format, so that you can use that 
+vendor's analysis tool.
+
+By design, liblognorm is constructed as a library. Thus, it can be used by 
+other tools.
\ No newline at end of file
diff --git a/doc/license.rst b/doc/license.rst
new file mode 100644
index 00000000..9c39f5a3
--- /dev/null
+++ b/doc/license.rst
@@ -0,0 +1,6 @@
+Licensing
+=========
+
+.. literalinclude:: ../COPYING
+    :linenos:
+
diff --git a/doc/lognormalizer.rst b/doc/lognormalizer.rst
new file mode 100644
index 00000000..2285c530
--- /dev/null
+++ b/doc/lognormalizer.rst
@@ -0,0 +1,114 @@
+Lognormalizer
+=============
+
+Lognormalizer is a sample tool which is often used to test and debug 
+rulebases before real use. Nevertheless, it can be used in production as 
+a simple command line interface to liblognorm.
+
+This tool reads log lines from its standard input and prints results 
+to standard output. You need to use redirections if you want to read 
+or write files.
+
+An example of the command::
+
+    $ lognormalizer -r messages.sampdb -o json <messages.log
+
+Command line options
+--------------------
+
+::
+
+    -r <FILENAME>
+
+Specifies name of the file containing the rulebase.
+
+::
+
+    -v
+    
+Increase verbosity level. Can be used several times.
+
+::
+
+    -p
+
+Print only successfully parsed messages.
+
+::
+
+    -t <TAG>
+    
+Print only those messages which have this tag.
+    
+::
+
+    -e <json|xml|csv>   
+
+Output format. By default, output is in Mitre CEE format. With this option, you can change it to JSON, XML or CSV.
+
+::
+
+    -T
+
+Include 'event.tags' attribute when output is in JSON format. This attribute contains list of tags of the matched 
+rule.
+
+::
+
+    -E <DATA>
+
+Encoder-specific data (for example, a separator for CSV).
+
+::
+
+    -d [FILENAME]
+
+Generate DOT file describing parse tree. It is used to plot parse graph 
+with GraphViz.
+
+Creating a graph of the rulebase
+--------------------------------
+
+To get a better overview of a rulebase you can create a graph that shows you 
+the chain of normalization (parse-tree).
+
+At first you have to install an additional package called graphviz. Graphviz 
+is a tool that creates such a graph with the help of a control file (created 
+with the rulebase). `Here <http://www.graphviz.org/>`_ you will find more 
+information about graphviz.
+
+To install it you can use the package manager. For example, on RedHat 
+systems it is yum command::
+
+    $ sudo yum install graphviz
+
+The next step would be creating the control file for graphviz. Therefore we 
+use the normalizer command with the options -d "prefered filename for the 
+control file" and -r "rulebase"::
+
+    $ lognormalize -d control.dot -r messages.rb
+
+Please note that there is no need for an input or output file.
+If you have a look at the control file now you will see that the content is 
+a little bit confusing, but it includes all information, like the nodes, 
+fields and parser, that graphviz needs to create the graph. Of course you 
+can edit that file, but please note that it is a lot of work.
+
+Now we can create the graph by typing::
+
+    $ dot control.dot -Tpng >graph.png
+
+dot + name of control file + option -T -> file format + output file
+
+That is just one example for using graphviz, of course you can do many 
+other great things with it. But I think this "simple" graph could be very 
+helpful for the normalizer.
+
+Below you see sample for such a graph, but please note that this is 
+not such a pretty one. Such a graph can grow very fast by editing your 
+rulebase.
+
+.. figure:: graph.png
+   :width: 90 %
+   :alt: graph sample
+
diff --git a/doc/sample_rulebase.rst b/doc/sample_rulebase.rst
new file mode 100644
index 00000000..053c2106
--- /dev/null
+++ b/doc/sample_rulebase.rst
@@ -0,0 +1,6 @@
+Sample rulebase
+===============
+
+.. literalinclude:: ../rulebases/sample.rulebase
+    :linenos:
+