diff --git a/bin/mapgtf b/bin/mapgtf
new file mode 100644
index 0000000..7278b6a
--- /dev/null
+++ b/bin/mapgtf
@@ -0,0 +1,286 @@
+#!/usr/bin/env python
+
+# mapgtf - annotate genome maps by gtf feature attributes
+
+from __future__ import print_function
+
+import argparse
+
+p = argparse.ArgumentParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter)
+p.add_argument('-f', dest = 'feature', default = None,
+               help = 'optionally select feature from gtf file, e.g. exon')
+p.add_argument('-s', dest = 'source', default = None,
+               help = 'optionally select source to subset gtf file, e.g. ensembl')
+p.add_argument('-a', dest = 'attribute', nargs = '+',
+               default = ['gene_id', 'gene_name', 'gene_biotype', 'transcript_id',
+                          'transcript_name', 'transcript_biotype'],
+               help = 'select attribute tags to include as annotation columns')
+p.add_argument('-A', dest = 'other', nargs = '+',
+               default = ['feature'],
+               help = 'select additional columns to be added')
+p.add_argument('-k', dest = 'keep', action = 'store_true',
+               help = 'keep non-intersecting lines from file')
+p.add_argument('-o', dest = 'output', default = 'annotated.map',
+               help = 'output file name')
+p.add_argument('-c', dest = 'map_chr_col', default = 1, type = int,
+               help = 'map column index [1-based] for chromosome ids')
+p.add_argument('-p', dest = 'map_pos_col', default = 2, type = int,
+               help = 'map column index [1-based] for positions')
+p.add_argument('-e', dest = 'map_end_col', default = None, type = int,
+               help = 'map column index [1-based] for end positions, if MAPFILE contains ranges')
+p.add_argument('-n', dest = 'ncpus', default = 1, type = int,
+               help = 'select number of processes for parallel computing')
+p.add_argument('-v', dest = 'verbose', action = 'store_true',
+               help = 'be more verbose')
+p.add_argument('gtf', metavar = 'GTFFILE',
+               help = '''annotation gtf file (gff version 2);
+               see http://www.ensembl.org/info/website/upload/gff.html''')
+p.add_argument('map', metavar = 'MAPFILE',
+               help = 'genome map file to convert; VCF files need uncommented header line!')
+arg = p.parse_args()
+
+import re
+import os
+import sys
+import pandas
+from joblib import Parallel, delayed
+
+def read_gtf (path, chromosome = None, source = None, feature = None, ids = None):
+    """read gtf file and subset by chromosome, source and feature"""
+
+    def grep_gtf_ids (attributes, name):
+        """return attribute values"""
+        value = ''
+        try:
+            value = re.search('.*' + name + '\ "([^"]+)".*', attributes).group(1)
+        except:
+            pass
+        return value
+
+    def gtf_add_attribute (gtf, ids = None):
+        if ids is None:
+            return gtf
+        for i in ids:
+            gtf[i] = gtf.attribute.apply(lambda a: grep_gtf_ids(a, i))
+        return gtf
+
+    gtf = pandas.read_table(
+        path, sep = '\t', comment = '#', header = None, dtype = str)
+    if not len(gtf.columns) == 9:
+        printx('error: gtf_file does not have 9 columns', exit = 1)
+    gtf.columns = ['chromosome', 'source', 'feature', 'start', 'end', 'score',
+                   'strand', 'frame', 'attribute']
+    if source is not None:
+        gtf = gtf[gtf.source.isin([source])]
+    if feature is not None:
+        gtf = gtf[gtf.feature.isin([feature])]
+    if chromosome is not None:
+        gtf = gtf[gtf.chromosome.isin(chromosome)]
+    if ids is not None:
+        gtf = gtf_add_attribute(gtf, ids)
+    gtf['start'] = gtf['start'].astype(int)
+    gtf['end'] = gtf['end'].astype(int)
+    return gtf
+
+def read_map (path, ppos = None, epos = None):
+    """read input file as map format"""
+    m = pandas.read_table(path, sep = '\t', comment = '#', dtype = str)
+    if ppos is not None:
+        m.iloc[:,ppos] = m.iloc[:,ppos].astype(int)
+    if epos is not None:
+        m.iloc[:,epos] = m.iloc[:,epos].astype(int)
+    return m
+
+def float2str(x, mis = ''):
+    try:
+        x = str(int(x))
+    except:
+        x = mis
+    return x
+
+def flatten_list2d (ls):
+    """flatten a list of lists to a list of items"""
+    return [ i for subls in ls for i in subls ]
+
+def factor (length, groups):
+    """create a factor of length with balanced sized groups"""
+    g = range(groups)
+    t = length / groups + (1 if (length % groups > 0) else 0)
+    f = sorted(g * t)[:length]
+    return f
+
+def mcapply ( d, f, a = [], axis = 1, cpus = 2, simplify = True ):
+    """apply a function to a DataFrame by multiple processes"""
+    def isx(value, ls):
+        return [ value == i for i in ls ]
+    if len(d) < 1:
+        return d
+    if len(d.columns) < 1:
+        return d
+    cpus = min(cpus, len(d))
+    if axis == 1:
+        x = factor(len(d), cpus)
+        s = Parallel(n_jobs = cpus)(delayed(f)(
+            d[isx(i,x)], *a) for i in range(cpus))
+    else:
+        x = factor(len(d.columns), cpus)
+        s = Parallel(n_jobs = cpus)(delayed(f)(
+            d[:,isx(i,x)], *a) for i in range(cpus))
+    if simplify:
+        s = [ i for i in s if len(i) > 0 ]
+        if len(s) > 0:
+            s = pandas.concat(s, axis = 1 - axis)
+        else:
+            s = d[:0, :]
+    return(s)
+
+def match_gtf (gtf, c, p = None, e = None):
+    """return bool index for rows matching chromosomes and position in start/end range"""
+    if p is None:
+        return (gtf['chromosome'] == c)
+    if e is None:
+        return (gtf['chromosome'] == c) & (gtf['start'] <= p) & (gtf['end'] >= p)
+    else:
+        return (gtf['chromosome'] == c) & (
+            (gtf['start'] <= p) & (gtf['end'] >= p) |
+            (gtf['start'] <= e) & (gtf['end'] >= e) )
+
+def collapse_df (d, sep = ',', unique = True, transpose = False):
+    """collapse a DataFrame by rows on a separator"""
+    d = d.drop_duplicates()
+    def collapse_col (x, s, u):
+        if u:
+            x = set(x)
+        x = map(str, x)
+        return s.join(x)
+    d = d.apply(lambda col: collapse_col(col, sep, unique))
+    if transpose:
+        d = pandas.DataFrame(d).transpose()
+    return d
+
+def merge_gtf (m, gtf, gtf_columns = None, chrom_col = 0, pos_col = 1, end_col = None):
+    if gtf_columns is None:
+        return m
+    if end_col is None:
+        g = m.apply(lambda row: collapse_df(
+            gtf[match_gtf(gtf, row[chrom_col], row[pos_col])][gtf_columns]), axis = 1)
+    else:
+        g = m.apply(lambda row: collapse_df(
+            gtf[match_gtf(gtf, row[chrom_col], row[pos_col], row[end_col])][gtf_columns]), axis = 1)
+    m = pandas.concat([m, g], axis = 1)
+    return m
+
+def insert_attr (m, c, p, gtf, attr):
+    """insert attribute columns by position/chromosome match into map"""
+    def attr_list (ci, pi, ai):
+        al = gtf[match_gtf(gtf, ci, pi)][ai]
+        if len(al) < 1:
+            return ''
+        al = al.unique()
+        al = sorted(al)
+        al = ';'.join(al)
+        return al
+    for a in attr:
+        m[a] = m.apply(lambda row: attr_list(row[c], row[p], a), axis = 1)
+    return m
+
+def gtf_positions (gtf):
+    """set of positions"""
+    p = set()
+    for s, e in zip(gtf['start'], gtf['end']):
+        p = p | set(range(s, e + 1))
+    return p
+
+def printx (msg, end = '\n', exit = None):
+    print(msg, end = end)
+    sys.stdout.flush()
+    if exit is not None:
+        sys.exit(exit)
+
+def main ():
+
+    ### checks
+    if not os.path.isfile(arg.gtf):
+        printx('missing GTFFILE: ' + arg.gtf, exit = 1)
+    if not os.path.isfile(arg.map):
+        printx('missing MAPFILE: ' + arg.map, exit = 1)
+
+    ### import map
+    arg.map_pos_col -= 1
+    arg.map_chr_col -= 1
+    if arg.map_end_col is not None:
+        arg.map_end_col -= 1
+    if arg.verbose:
+        printx('reading file ... ', end = '')
+    m = read_map(arg.map, arg.map_pos_col, arg.map_end_col)
+    if arg.verbose:
+        printx('retrieved ' + str(len(m)) + ' row(s) with ' +
+              str(len(m.columns)) + ' columns')
+    chrom_col = m.columns[arg.map_chr_col]
+    pos_col = m.columns[arg.map_pos_col]
+    end_col = arg.map_end_col
+    if end_col is not None:
+        end_col = m.columns[end_col]
+    chrom_map = set(m[chrom_col].unique())
+
+    ### import gtf
+    if arg.verbose:
+        printx('reading gtf ... ', end = '')
+    g = read_gtf(arg.gtf, chrom_map, arg.source, arg.feature, arg.attribute)
+    if arg.verbose:
+        printx('retrieved ' + str(len(g)) + ' row(s)')
+    chrom_gtf = set(g['chromosome'].unique())
+    if not all([ i in g.columns for i in arg.other ]):
+        printx('invalid OTHER columns from GTF file', exit = 1)
+
+    ### get intersect
+    # chromosomes
+    chrom_all = list(chrom_map & chrom_gtf)
+    mnomatch = m[~m[chrom_col].isin(chrom_all)]
+    m = m[m[chrom_col].isin(chrom_all)]
+    if arg.verbose:
+        printx('intersected chromosomes')
+        printx('  common chromosomes: ' + ', '.join(chrom_all))
+        printx('  map rows remaining: ' + str(len(m)))
+
+    ### map
+    res = []
+    merge_how = 'inner'
+    if arg.keep:
+        merge_how = 'left'
+    if arg.verbose:
+        printx('mapping')
+    for chrom in sorted(chrom_all):
+        if arg.verbose:
+            printx('  processing chromosome ' + chrom, end = ' ... ')
+        g_chrom = g[g['chromosome'] == chrom]
+        g_chrom_se = g_chrom[['start','end']]
+        res_chrom = m[m[chrom_col] == chrom]
+        applyargs = [g_chrom, arg.other + arg.attribute, chrom_col, pos_col]
+        if end_col is not None:
+            applyargs.append(end_col)
+        #res_chrom = merge_gtf(res_chrom, g_chrom, arg.attribute, chrom_col, pos_col)
+        res_chrom = mcapply(res_chrom, merge_gtf, applyargs, cpus = arg.ncpus)
+        res.append(res_chrom)
+        if arg.verbose:
+            printx('ok')
+    res = pandas.concat(res)
+    rescols = res.columns
+    if arg.keep:
+        res = res.append(mnomatch)
+    res = res[rescols]
+    res.sort_values([chrom_col, pos_col], axis = 0, inplace = True)
+
+
+    ### export
+    if arg.verbose:
+        printx('exporting')
+    res.to_csv(arg.output, sep = '\t', index = False)
+
+    ### done
+    printx('done')
+
+if __name__ == '__main__':
+    main()
+
+### EOF
diff --git a/docs/mapgtf.md b/docs/mapgtf.md
new file mode 100644
index 0000000..c1fd277
--- /dev/null
+++ b/docs/mapgtf.md
@@ -0,0 +1,117 @@
+# mapgtf
+
+## About
+A tool to annotate genome maps by gtf feature attributes.
+
+## Usage
+
+Install the `AGEpy` python package, which provides this tool.
+
+Run `mapgtf --help` from the command line to show the usage:
+
+```
+usage: mapgtf [-h] [-f FEATURE] [-s SOURCE] [-a ATTRIBUTE [ATTRIBUTE ...]]
+              [-A OTHER [OTHER ...]] [-k] [-o OUTPUT] [-c MAP_CHR_COL]
+              [-p MAP_POS_COL] [-e MAP_END_COL] [-n NCPUS] [-v]
+              GTFFILE MAPFILE
+
+positional arguments:
+  GTFFILE               annotation gtf file (gff version 2); see
+                        http://www.ensembl.org/info/website/upload/gff.html
+  MAPFILE               genome map file to convert; VCF files need uncommented
+                        header line!
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -f FEATURE            optionally select feature from gtf file, e.g. exon
+                        (default: None)
+  -s SOURCE             optionally select source to subset gtf file, e.g.
+                        ensembl (default: None)
+  -a ATTRIBUTE [ATTRIBUTE ...]
+                        select attribute tags to include as annotation columns
+                        (default: ['gene_id', 'gene_name', 'gene_biotype',
+                        'transcript_id', 'transcript_name',
+                        'transcript_biotype'])
+  -A OTHER [OTHER ...]  select additional columns to be added (default:
+                        ['feature'])
+  -k                    keep non-intersecting lines from file (default: False)
+  -o OUTPUT             output file name (default: annotated.map)
+  -c MAP_CHR_COL        map column index [1-based] for chromosome ids
+                        (default: 1)
+  -p MAP_POS_COL        map column index [1-based] for positions (default: 2)
+  -e MAP_END_COL        map column index [1-based] for end positions, if
+                        MAPFILE contains ranges (default: None)
+  -n NCPUS              select number of processes for parallel computing
+                        (default: 1)
+  -v                    be more verbose (default: False)
+```
+
+## Input format
+
+`GTFFILE` is a gtf (gff version 2) genome annotation formatted file.
+See the specification at:
+http://www.ensembl.org/info/website/upload/gff.html
+
+`MAPFILE` is a **tab separated** text file.
+It must contain a **header line**,
+and at least a chromosome ID and position column.
+In addition, another position column for range ends can be defined.
+
+See the examples:
+
+### Map file with single positions
+
+*File content*
+
+```
+start   chr stats   notes
+1       I   0.12    NA
+10      I   0.44    NA
+5       II  0.12    NA
+12      II  0.01    important
+10      III 0.59    NA
+240     III 0.81    NA
+```
+
+*Command*
+
+```bash
+mapgtf -c 2 -p 1 /path/to/gtf /path/to/map
+```
+
+### Map file with regions (similar to bed file formats)
+
+*File content*
+
+```
+chr start   end stats
+I   1       5   0.12
+I   10      100 0.44
+II  5       9   0.12
+II  12      17  0.01
+III 10      190 0.59
+III 240     500 0.81
+```
+
+*Command*
+
+```bash
+mapgtf -e 3 /path/to/gtf /path/to/map
+```
+
+### VCF (variant calling format) file
+
+*File content*
+
+See the specification at:
+https://samtools.github.io/hts-specs/VCFv4.2.pdf
+
+Since the header line in a vcf file starts with a comment character, it needs to be removed.
+
+*Command*
+
+```bash
+sed 's/^#CHROM/CHROM/' /path/to/vcf > /path/to/vcf.mod
+mapgtf /path/to/gtf /path/to/vcf.mod
+```
+
diff --git a/setup.py b/setup.py
index 168996e..b21b6cf 100644
--- a/setup.py
+++ b/setup.py
@@ -10,8 +10,8 @@
       packages = [ 'AGEpy' ],
       install_requires = [ 'Pandas>=0.15.2', 'numpy>=1.9.2','requests==2.10.0', \
       'suds', 'xlrd', 'biomart', 'rpy2', 'matplotlib', 'pyocclient==0.1', \
-      'xlsxwriter','pybedtools'],
+      'xlsxwriter','pybedtools', 'joblib'],
       dependency_links=["git+https://github.com/mpg-age-bioinformatics/pyocclient.git#egg=pyocclient-0.1"],
       zip_safe = False,
-      scripts=['bin/david','bin/bit','bin/obo2tsv']
+      scripts=['bin/david','bin/bit','bin/obo2tsv','bin/mapgtf']
       )