diff --git a/doc/.gitignore b/doc/.gitignore new file mode 100644 index 00000000..e35d8850 --- /dev/null +++ b/doc/.gitignore @@ -0,0 +1 @@ +_build diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 00000000..9f83d06b --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,153 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + -rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Testprojectlpk.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Testprojectlpk.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/Testprojectlpk" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Testprojectlpk" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." diff --git a/doc/conf.py b/doc/conf.py new file mode 100644 index 00000000..3def987f --- /dev/null +++ b/doc/conf.py @@ -0,0 +1,244 @@ +# -*- coding: utf-8 -*- +# +# Liblognorm documentation build configuration file, created by +# sphinx-quickstart on Mon Dec 16 13:12:44 2013. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = [] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'Liblognorm' +copyright = u'Adiscon' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '1.0' +# The full version, including alpha/beta/rc tags. +release = '1.0.0' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'haiku' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None +html_title = "A fast log normalization library" + +# A shorter title for the navigation bar. Default is the same as html_title. +# html_short_title = None +html_short_title = project + " " + release + " documentation" + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +html_use_index = False + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +html_show_copyright = False + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'Liblognormdoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'Liblognorm.tex', u'Liblognorm Documentation', + u'Pavel Levshin', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'liblognorm', u'Liblognorm Documentation', + [u'Pavel Levshin'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------------ + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'Liblognorm', u'Liblognorm Documentation', + u'Pavel Levshin', 'Liblognorm', 'Fast log normalization library.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' diff --git a/doc/configuration.rst b/doc/configuration.rst new file mode 100644 index 00000000..6e4c05f0 --- /dev/null +++ b/doc/configuration.rst @@ -0,0 +1,328 @@ +How to configure +================ + +To use liblognorm, you need 3 things. + +1. An installed and working copy of liblognorm. The installation process + has been discussed in the chapter :doc:`installation`. +2. Log files. +3. A rulebase, which is heart of liblognorm configuration. + +Log files +--------- + +A log file is a text file, which typically holds many lines. Each line is +a log message. These are usually a bit strange to read, thus to analyze. +This mostly happens, if you have a lot of different devices, that are all +creating log messages in a different format. + +Rulebase +-------- + +The rulebase holds all the schemes for your logs. It basically consists of +many lines that reflect the structure of your log messages. When the +normalization process is started, a parse-tree will be generated from +the rulebase and put into the memory. This will then be used to parse the +log messages. + +Each line in rulebase file is evaluated separately. + +Commentaries +------------ + +To keep your rulebase tidy, you can use commentaries. Start a commentary +with "#" like in many other configurations. It should look like this:: + + # The following prefix and rules are for firewall logs + +Empty lines are just skipped, they can be inserted for readability. + +Rules +----- + +If the line starts with 'rule=', then it contains a rule. This line has +following format:: + + rule=[[,...]]: + +Everything before a colon is treated as comma-separated list of tags, which +will be attached to a match. After the colon, match description should be +given. It consists of string literals and field selectors. String literals +should match exactly, whereas field selectors may match variable parts +of a message. + +A rule could look like this:: + + rule=:%date:date-rfc3164% %host:word% %tag:char-to:\x3a%: no longer listening on %ip:ipv4%#%port:number%' + +This excerpt is a common rule. A rule always contains several different +"parts"/properties and reflects the structure of the message you want to +normalize (e.g. Host, IP, Source, Syslogtag...). + +Literals +-------- + +Literal is just a sequence of characters, which must match exactly. +Percent sign characters must be escaped to prevent them from starting a +field accidentally. Replace each "%" with "\\x25" or "%%", when it occurs +in a string literal. + +Fields +------ + +The structure of a field selector is as follows:: + + %:[:]% + +field name -> that name can be selected freely. It should be a description +of what kind of information the field is holding, e.g. SRC is the field +contains the source IP address of the message. These names should also be +chosen carefully, since the field name can be used in every rule and +therefore should fit for the same kind of information in different rules. + +If field name is "-", then this field is matched but not saved. + +field type -> selects the accordant parser, which are described below. + +Special characters that need to be escaped when used inside a field +description are "%" and ":". For example, this will match anything up to +(but not including) a colon:: + + %variable:char-to:\x3a% + +Additional information is dependent on the field type; only some field +types need additional information. + +Field types +----------- + +number +###### + +One or more decimal digits. + +:: + + %port:number% + +word +#### + +One or more characters, up to the next space (\\x20), or +up to end of line. + +:: + + %host:word% + +char-to +####### + +One or more characters, up to the next character given in +extra data. Additional data must contain exactly one character, which +can be escaped. + +:: + + %field_name:char-to:,% + %field_name:char-to:\x25% + +char-sep +######## + +Zero or more characters, up to the next character given in extra data, or +up to end of line. Additional data must contain exactly one character, +which can be escaped. + +:: + + %field_name:char-sep:,% + %field_name:char-sep:\x25% + +rest +#### + +Zero or more characters till end of line. Should be always at end of the +rule. + +:: + + %field_name:rest% + +quoted-string +############# + +Zero or more characters, surrounded by double quote marks. +Quote marks are stripped from the match. + +:: + + %field_name:quoted-string% + +date-iso +######## + +Date in ISO format ('YYYY-MM-DD'). + +:: + + %field-name:date-iso% + +time-24hr +######### + +Time of format 'HH:MM:SS', where HH is 00..23. + +:: + + %time:time-24hr% + +time-12hr +######### + +Time of format 'HH:MM:SS', where HH is 00..12. + +:: + + %time:time-12hr% + +date-rfc3164 +############ + +Valid date/time in RFC3164 format, i.e.: 'Oct 29 09:47:08'. +This parser implements several quirks to match malformed +timestamps from some devices. + +:: + + %date:date-rfc3164% + +date-rfc5424 +############ + +Valid date/time in RFC5424 format, i.e.: +'1985-04-12T19:20:50.52-04:00'. +Slightly different formats are allowed. + +:: + + %date:date-rfc5424% + +ipv4 +#### + +IPv4 address, in dot-decimal notation (AAA.BBB.CCC.DDD). + +:: + + %ip-src:ipv4% + +iptables +######## + +Name=value pairs, separated by spaces, as in Netfilter log messages. +Name of the selector is not used; names from the line are +used instead. This selector always matches everything till +end of the line. Cannot match zero characters. + +:: + + %-:iptables% + +Prefixes +-------- + +Several rules can have a common prefix. You can set it once with this +syntax:: + + prefix= + +Prefix match description syntax is the same as rule match description. +Every following rule will be treated as an addition to this prefix. + +Prefix can be reset to default (empty value) by the line:: + + prefix= + +You can define a prefix for devices that produce the same header in each +message. We assume, that you have your rules sorted by device. In such a +case you can take the header of the rules and use it with the prefix +variable. Here is a example of a rule for IPTables:: + + prefix=%date:date-rfc3164% %host:word% %tag:char-to:-\x3a%: + rule=:INBOUND%INBOUND:char-to:-\x3a%: IN=%IN:word% PHYSIN=%PHYSIN:word% OUT=%OUT:word% PHYSOUT=%PHYSOUT:word% SRC=%source:ipv4% DST=%destination:ipv4% LEN=%LEN:number% TOS=%TOS:char-to: % PREC=%PREC:word% TTL=%TTL:number% ID=%ID:number% DF PROTO=%PROTO:word% SPT=%SPT:number% DPT=%DPT:number% WINDOW=%WINDOW:number% RES=0x00 ACK SYN URGP=%URGP:number% + +Usually, every rule would hold what is defined in the prefix at its +beginning. But since we can define the prefix, we can save that work in +every line and just make the rules for the log lines. This saves us a lot +of work and even saves space. + +In a rulebase you can use multiple prefixes obviously. The prefix will be +used for the following rules. If then another prefix is set, the first one +will be erased, and new one will be used for the following rules. + +Rule tags +--------- + +Rule tagging capability permits very easy classification of syslog +messages and log records in general. So you can not only extract data from +your various log source, you can also classify events, for example, as +being a "login", a "logout" or a firewall "denied access". This makes it +very easy to look at specific subsets of messages and process them in ways +specific to the information being conveyed. + +To see how it works, let’s first define what a tag is: + +A tag is a simple alphanumeric string that identifies a specific type of +object, action, status, etc. For example, we can have object tags for +firewalls and servers. For simplicity, let’s call them "firewall" and +"server". Then, we can have action tags like "login", "logout" and +"connectionOpen". Status tags could include "success" or "fail", among +others. Tags form a flat space, there is no inherent relationship between +them (but this may be added later on top of the current implementation). +Think of tags like the tag cloud in a blogging system. Tags can be defined +for any reason and need. A single event can be associated with as many +tags as required. + +Assigning tags to messages is simple. A rule contains both the sample of +the message (including the extracted fields) as well as the tags. +Have a look at this sample:: + + rule=:sshd[%pid:number%]: Invalid user %user:word% from %src-ip:ipv4% + +Here, we have a rule that shows an invalid ssh login request. The various +fields are used to extract information into a well-defined structure. Have +you ever wondered why every rule starts with a colon? Now, here is the +answer: the colon separates the tag part from the actual sample part. +Now, you can create a rule like this:: + + rule=ssh,user,login,fail:sshd[%pid:number%]: Invalid user %user:word% from %src-ip:ipv4% + +Note the "ssh,user,login,fail" part in front of the colon. These are the +four tags the user has decided to assign to this event. What now happens +is that the normalizer does not only extract the information from the +message if it finds a match, but it also adds the tags as metadata. Once +normalization is done, one can not only query the individual fields, but +also query if a specific tag is associated with this event. For example, +to find all ssh-related events (provided the rules are built that way), +you can normalize a large log and select only that subset of the +normalized log that contains the tag "ssh". + +Log annotations +--------------- + +In short, annotations allow to add arbitrary attributes to a parsed +message, depending on rule tags. Values of these attributes are fixed, +they cannot be derived from variable fields. Syntax is as following:: + + annotate=:+="" + +Field value should always be enclosed in double quote marks. + +There can be multiple annotations for the same tag. + +Examples +-------- + +Look at :doc:`sample rulebase ` for configuration +examples and matching log lines. diff --git a/doc/graph.png b/doc/graph.png new file mode 100644 index 00000000..93ef7f09 Binary files /dev/null and b/doc/graph.png differ diff --git a/doc/index.rst b/doc/index.rst new file mode 100644 index 00000000..ce38bb03 --- /dev/null +++ b/doc/index.rst @@ -0,0 +1,27 @@ +.. Liblognorm documentation master file, created by + sphinx-quickstart on Mon Dec 16 13:12:44 2013. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to Liblognorm documentation! +============================================ + +Contents: + +.. toctree:: + :maxdepth: 3 + + introduction + installation + configuration + sample_rulebase + lognormalizer + internals + license + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`search` diff --git a/doc/installation.rst b/doc/installation.rst new file mode 100644 index 00000000..cbf42c0e --- /dev/null +++ b/doc/installation.rst @@ -0,0 +1,76 @@ +How to install +============== + +Here you can find the first steps to install and try liblognorm. + +Getting liblognorm +------------------ + +There are several ways to install libognorm. You can install it +from your distribution, if it is there. You can get binary packages from +Rsyslog repositories: + +- `RedHat Enterprise Linux or CentOS `_ +- `Ubuntu `_ +- `Debian `_ + +Or you can build your own binaries from sources. You can fetch all +sources from git (below you can find all commands you need) or you can +download it as tarballs at: + +- `libestr `_ +- `liblognorm `_ + +Please note if you compile it from tarballs then you have to do the same +steps which are mentioned below, apart from:: + + $ git clone ... + $ autoreconf -vfi + +Building from git +----------------- + +To build liblognorm from sources, you need to have +`json-c `_ installed. + +Open a terminal and switch to the folder where you want to build +liblognorm. Below you will find the necessary commands. First, build +and install prerequisite library called **libestr**:: + + $ git clone git://git.adiscon.com/git/libestr.git + $ cd libestr + $ autoreconf -vfi + $ ./configure + $ make + $ make install + +leave that folder and repeat this step again for liblognorm:: + + $ cd .. + $ git clone git://git.adiscon.com/git/liblognorm.git + $ cd liblognorm + $ autoreconf -vfi + $ ./configure + $ make + $ make install + +That’s all you have to do. + +Testing +------- + +For a first test we need two further things, a test log and the rulebase. +Both can be downloaded `here +`_. + +After downloading these examples you can use liblognorm. Go to +liblognorm/src and use the command below:: + + $ ./lognormalize -r messages.sampdb -o json ` tool to +debug. diff --git a/doc/introduction.rst b/doc/introduction.rst new file mode 100644 index 00000000..860e07d1 --- /dev/null +++ b/doc/introduction.rst @@ -0,0 +1,25 @@ +Introduction +============ + +Briefly described, liblognorm is a tool to normalize log data. + +People who need to take a look at logs often have a common problem. Logs +from different machines (from different vendors) usually have different +formats. Even if it is the same type of log (e.g. from firewalls), the log +entries are so different, that it is pretty hard to read these. This is +where liblognorm comes into the game. With this tool you can normalize all +your logs. All you need is liblognorm and its dependencies and a sample +database that fits the logs you want to normalize. + +So, for example, if you have traffic logs from three different firewalls, +liblognorm will be able to "normalize" the events into generic ones. Among +others, it will extract source and destination ip addresses and ports and +make them available via well-defined fields. As the end result, a common log +analysis application will be able to work on that common set and so this +backend will be independent from the actual firewalls feeding it. Even +better, once we have a well-understood interim format, it is also easy to +convert that into any other vendor specific format, so that you can use that +vendor's analysis tool. + +By design, liblognorm is constructed as a library. Thus, it can be used by +other tools. \ No newline at end of file diff --git a/doc/license.rst b/doc/license.rst new file mode 100644 index 00000000..9c39f5a3 --- /dev/null +++ b/doc/license.rst @@ -0,0 +1,6 @@ +Licensing +========= + +.. literalinclude:: ../COPYING + :linenos: + diff --git a/doc/lognormalizer.rst b/doc/lognormalizer.rst new file mode 100644 index 00000000..2285c530 --- /dev/null +++ b/doc/lognormalizer.rst @@ -0,0 +1,114 @@ +Lognormalizer +============= + +Lognormalizer is a sample tool which is often used to test and debug +rulebases before real use. Nevertheless, it can be used in production as +a simple command line interface to liblognorm. + +This tool reads log lines from its standard input and prints results +to standard output. You need to use redirections if you want to read +or write files. + +An example of the command:: + + $ lognormalizer -r messages.sampdb -o json + +Specifies name of the file containing the rulebase. + +:: + + -v + +Increase verbosity level. Can be used several times. + +:: + + -p + +Print only successfully parsed messages. + +:: + + -t + +Print only those messages which have this tag. + +:: + + -e + +Output format. By default, output is in Mitre CEE format. With this option, you can change it to JSON, XML or CSV. + +:: + + -T + +Include 'event.tags' attribute when output is in JSON format. This attribute contains list of tags of the matched +rule. + +:: + + -E + +Encoder-specific data (for example, a separator for CSV). + +:: + + -d [FILENAME] + +Generate DOT file describing parse tree. It is used to plot parse graph +with GraphViz. + +Creating a graph of the rulebase +-------------------------------- + +To get a better overview of a rulebase you can create a graph that shows you +the chain of normalization (parse-tree). + +At first you have to install an additional package called graphviz. Graphviz +is a tool that creates such a graph with the help of a control file (created +with the rulebase). `Here `_ you will find more +information about graphviz. + +To install it you can use the package manager. For example, on RedHat +systems it is yum command:: + + $ sudo yum install graphviz + +The next step would be creating the control file for graphviz. Therefore we +use the normalizer command with the options -d "prefered filename for the +control file" and -r "rulebase":: + + $ lognormalize -d control.dot -r messages.rb + +Please note that there is no need for an input or output file. +If you have a look at the control file now you will see that the content is +a little bit confusing, but it includes all information, like the nodes, +fields and parser, that graphviz needs to create the graph. Of course you +can edit that file, but please note that it is a lot of work. + +Now we can create the graph by typing:: + + $ dot control.dot -Tpng >graph.png + +dot + name of control file + option -T -> file format + output file + +That is just one example for using graphviz, of course you can do many +other great things with it. But I think this "simple" graph could be very +helpful for the normalizer. + +Below you see sample for such a graph, but please note that this is +not such a pretty one. Such a graph can grow very fast by editing your +rulebase. + +.. figure:: graph.png + :width: 90 % + :alt: graph sample + diff --git a/doc/sample_rulebase.rst b/doc/sample_rulebase.rst new file mode 100644 index 00000000..053c2106 --- /dev/null +++ b/doc/sample_rulebase.rst @@ -0,0 +1,6 @@ +Sample rulebase +=============== + +.. literalinclude:: ../rulebases/sample.rulebase + :linenos: +