diff --git a/README.md b/README.md
index 9b183d0..e5b59e0 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ The techniques behind the original parser are described in the paper [Simple and
 
 #### Required software
 
- * Python 2.7 interpreter
+ * Python 3 (/!\ recent move from python 2.7 which was used for all releases).
  * [DyNet library](https://github.com/clab/dynet/tree/master/python)
 
     Note: the current version is Dynet 2.0 but Dynet 1.0 was used in both releases 1.0 and 2.0
@@ -84,6 +84,7 @@ You can also specify the gamma scalar using `--elmo_gamma` or set `--elmo_learn_
 to learn the value during training.
 
 Credits to Johannes Gontrum for this addition.
+Credits to Giuseppe Attardi for porting the parser to python 3.
 
 #### Citation
 
diff --git a/barchybrid/src/arc_hybrid.py b/barchybrid/src/arc_hybrid.py
index 93dd668..76c73f4 100644
--- a/barchybrid/src/arc_hybrid.py
+++ b/barchybrid/src/arc_hybrid.py
@@ -5,7 +5,7 @@
 import numpy as np
 from copy import deepcopy
 from collections import defaultdict
-import codecs, json
+import json
 
 class ArcHybridLSTM:
     def __init__(self, vocab, options):
@@ -65,8 +65,8 @@ def __evaluate(self, stack, buf, train):
 
         #feature rep
         empty = self.feature_extractor.empty
-        topStack = [ stack.roots[-i-1].lstms if len(stack) > i else [empty] for i in xrange(self.k) ]
-        topBuffer = [ buf.roots[i].lstms if len(buf) > i else [empty] for i in xrange(1) ]
+        topStack = [ stack.roots[-i-1].lstms if len(stack) > i else [empty] for i in range(self.k) ]
+        topBuffer = [ buf.roots[i].lstms if len(buf) > i else [empty] for i in range(1) ]
 
         input = dy.concatenate(list(chain(*(topStack + topBuffer))))
         output = self.unlabeled_MLP(input)
@@ -116,11 +116,11 @@ def __evaluate(self, stack, buf, train):
 
 
     def Save(self, filename):
-        print 'Saving model to ' + filename
+        print('Saving model to ' + filename)
         self.model.save(filename)
 
     def Load(self, filename):
-        print 'Loading model from ' + filename
+        print('Loading model from ' + filename)
         self.model.populate(filename)
 
 
@@ -208,7 +208,7 @@ def Predict(self, treebanks, datasplit, options):
         reached_max_swap = 0
         char_map = {}
         if options.char_map_file:
-            char_map_fh = codecs.open(options.char_map_file,encoding='utf-8')
+            char_map_fh = open(options.char_map_file,encoding='utf-8')
             char_map = json.loads(char_map_fh.read())
         # should probably use a namedtuple in get_vocab to make this prettier
         _, test_words, test_chars, _, _, _, test_treebanks, test_langs = utils.get_vocab(treebanks,datasplit,char_map)
@@ -218,10 +218,10 @@ def Predict(self, treebanks, datasplit, options):
         test_embeddings = defaultdict(lambda: {})
         if options.word_emb_size > 0 and options.ext_word_emb_file:
             new_test_words = \
-                set(test_words) - self.feature_extractor.words.viewkeys()
+                set(test_words) - self.feature_extractor.words.keys()
 
-            print "Number of OOV word types at test time: %i (out of %i)" % (
-                len(new_test_words), len(test_words))
+            print("Number of OOV word types at test time: %i (out of %i)" %
+                  (len(new_test_words), len(test_words)))
 
             if len(new_test_words) > 0:
                 # no point loading embeddings if there are no words to look for
@@ -234,15 +234,15 @@ def Predict(self, treebanks, datasplit, options):
                     )
                     test_embeddings["words"].update(embeddings)
                 if len(test_langs) > 1 and test_embeddings["words"]:
-                    print "External embeddings found for %i words "\
+                    print("External embeddings found for %i words "\
                           "(out of %i)" % \
-                          (len(test_embeddings["words"]), len(new_test_words))
+                          (len(test_embeddings["words"]), len(new_test_words)))
 
         if options.char_emb_size > 0:
             new_test_chars = \
-                set(test_chars) - self.feature_extractor.chars.viewkeys()
-            print "Number of OOV char types at test time: %i (out of %i)" % (
-                len(new_test_chars), len(test_chars))
+                set(test_chars) - self.feature_extractor.chars.keys()
+            print("Number of OOV char types at test time: %i (out of %i)" %
+                  (len(new_test_chars), len(test_chars)))
 
             if len(new_test_chars) > 0:
                 for lang in test_langs:
@@ -255,9 +255,9 @@ def Predict(self, treebanks, datasplit, options):
                     )
                     test_embeddings["chars"].update(embeddings)
                 if len(test_langs) > 1 and test_embeddings["chars"]:
-                    print "External embeddings found for %i chars "\
+                    print("External embeddings found for %i chars "\
                           "(out of %i)" % \
-                          (len(test_embeddings["chars"]), len(new_test_chars))
+                          (len(test_embeddings["chars"]), len(new_test_chars)))
 
         data = utils.read_conll_dir(treebanks,datasplit,char_map=char_map)
         for iSentence, osentence in enumerate(data,1):
@@ -286,7 +286,7 @@ def Predict(self, treebanks, datasplit, options):
                 if iSwap == max_swap and not reached_swap_for_i_sentence:
                     reached_max_swap += 1
                     reached_swap_for_i_sentence = True
-                    print "reached max swap in %d out of %d sentences"%(reached_max_swap, iSentence)
+                    print("reached max swap in %d out of %d sentences"%(reached_max_swap, iSentence))
                 self.apply_transition(best,stack,buf,hoffset)
                 if best[1] == SWAP:
                     iSwap += 1
@@ -315,7 +315,7 @@ def Train(self, trainData, options):
         start = time.time()
 
         random.shuffle(trainData) # in certain cases the data will already have been shuffled after being read from file or while creating dev data
-        print "Length of training data: ", len(trainData)
+        print("Length of training data: ", len(trainData))
 
         errs = []
 
@@ -328,7 +328,7 @@ def Train(self, trainData, options):
                 ' Errors: %.3f'%((float(eerrors)) / etotal)+\
                 ' Labeled Errors: %.3f'%(float(lerrors) / etotal)+\
                 ' Time: %.2gs'%(time.time()-start)
-                print loss_message
+                print(loss_message)
                 start = time.time()
                 eerrors = 0
                 eloss = 0.0
@@ -432,5 +432,5 @@ def Train(self, trainData, options):
             dy.renew_cg()
 
         self.trainer.update()
-        print "Loss: ", mloss/iSentence
-        print "Total Training Time: %.2gs"%(time.time()-beg)
+        print("Loss: ", mloss/iSentence)
+        print("Total Training Time: %.2gs" % (time.time()-beg))
diff --git a/barchybrid/src/feature_extractor.py b/barchybrid/src/feature_extractor.py
index 57b3473..5491e35 100644
--- a/barchybrid/src/feature_extractor.py
+++ b/barchybrid/src/feature_extractor.py
@@ -4,7 +4,7 @@
 import numpy as np
 import random
 from collections import defaultdict
-import codecs, re, os
+import re, os
 
 class FeatureExtractor(object):
     def __init__(self, model, options, vocab, nnvecs=1):
@@ -57,7 +57,7 @@ def __init__(self, model, options, vocab, nnvecs=1):
                         options,
                         emb_file=options.ext_word_emb_file,
                         lang=lang,
-                        words=self.words.viewkeys()
+                        words=self.words.keys()
                     )
                     self.external_embedding["words"].update(embeddings)
 
@@ -82,7 +82,7 @@ def __init__(self, model, options, vocab, nnvecs=1):
                             options,
                             emb_dir=options.ext_emb_dir,
                             lang=lang,
-                            words=self.words.viewkeys()
+                            words=self.words.keys()
                         )
                         self.external_embedding["words"].update(embeddings)
 
@@ -105,7 +105,7 @@ def __init__(self, model, options, vocab, nnvecs=1):
                 2 * (options.char_lstm_output_size
                      if options.char_emb_size > 0 else 0)
         )
-        print "Word-level LSTM input size: " + str(self.lstm_input_size)
+        print("Word-level LSTM input size: " + str(self.lstm_input_size))
 
         self.bilstms = []
         if options.no_bilstms > 0:
@@ -136,14 +136,15 @@ def Init(self,options):
         paddingTbankVec = self.treebank_lookup[0] if options.tbank_emb_size > 0 else None
 
         self.paddingVec = dy.tanh(self.word2lstm.expr() *\
-            dy.concatenate(filter(None,[paddingWordVec,
+            dy.concatenate(list(filter(None,[paddingWordVec,
                                         paddingElmoVec,
                                         paddingPosVec,
                                         paddingCharVec,
-                                        paddingTbankVec])) + self.word2lstmbias.expr())
+                                        paddingTbankVec]))) + self.word2lstmbias.expr())
 
         self.empty = self.paddingVec if self.nnvecs == 1 else\
-            dy.concatenate([self.paddingVec for _ in xrange(self.nnvecs)])
+            dy.concatenate([self.paddingVec for _ in range(self.nnvecs)])
+
 
     def getWordEmbeddings(self, sentence, train, options, test_embeddings=defaultdict(lambda:{})):
 
@@ -197,11 +198,11 @@ def getWordEmbeddings(self, sentence, train, options, test_embeddings=defaultdic
                     # TODO
                     root.vecs["elmo"] = dy.zeros(self.elmo.emb_dim)
 
-            root.vec = dy.concatenate(filter(None, [root.vecs["word"],
+            root.vec = dy.concatenate(list(filter(None, [root.vecs["word"],
                                                     root.vecs["elmo"],
                                                     root.vecs["pos"],
                                                     root.vecs["char"],
-                                                    root.vecs["treebank"]]))
+                                                         root.vecs["treebank"]])))
 
         for bilstm in self.bilstms:
             bilstm.set_token_vecs(sentence,train)
@@ -224,19 +225,19 @@ def get_char_vector(self,root,train,test_embeddings_chars={}):
     def init_lookups(self,options):
 
         if self.external_embedding["words"]:
-            print 'Initialising %i word vectors with external embeddings'%len(self.external_embedding["words"])
+            print('Initialising %i word vectors with external embeddings'%len(self.external_embedding["words"]))
             for word in self.external_embedding["words"]:
                 if len(self.external_embedding["words"][word]) != options.word_emb_size:
                     raise Exception("Size of external embedding does not match specified word embedding size of %s"%(options.word_emb_size))
                 self.word_lookup.init_row(self.words[word],self.external_embedding["words"][word])
         elif options.word_emb_size > 0:
-            print 'No word external embeddings found: all vectors initialised randomly'
+            print('No word external embeddings found: all vectors initialised randomly')
 
         if self.external_embedding["chars"]:
-            print 'Initialising %i char vectors with external embeddings'%len(self.external_embedding["chars"])
+            print('Initialising %i char vectors with external embeddings'%len(self.external_embedding["chars"]))
             for char in self.external_embedding["chars"]:
                 if len(self.external_embedding["chars"][char]) != options.char_emb_size:
                     raise Exception("Size of external embedding does not match specified char embedding size of %s"%(options.char_emb_size))
                 self.char_lookup.init_row(self.chars[char],self.external_embedding["chars"][char])
         elif options.char_emb_size > 0:
-            print 'No character external embeddings found: all vectors initialised randomly'
+            print('No character external embeddings found: all vectors initialised randomly')
diff --git a/barchybrid/src/mstlstm.py b/barchybrid/src/mstlstm.py
index bc03ad2..0052774 100644
--- a/barchybrid/src/mstlstm.py
+++ b/barchybrid/src/mstlstm.py
@@ -37,7 +37,7 @@ def  __getExpr(self, sentence, i, j, train):
 
 
     def __evaluate(self, sentence, train):
-        exprs = [ [self.__getExpr(sentence, i, j, train) for j in xrange(len(sentence))] for i in xrange(len(sentence)) ]
+        exprs = [ [self.__getExpr(sentence, i, j, train) for j in range(len(sentence))] for i in range(len(sentence)) ]
         scores = np.array([ [output.scalar_value() for output in exprsRow] for exprsRow in exprs ])
         return scores, exprs
 
@@ -58,7 +58,7 @@ def Load(self, filename):
     def Predict(self, treebanks, datasplit, options):
         char_map = {}
         if options.char_map_file:
-            char_map_fh = codecs.open(options.char_map_file,encoding='utf-8')
+            char_map_fh = open(options.char_map_file,encoding='utf-8')
             char_map = json.loads(char_map_fh.read())
         # should probably use a namedtuple in get_vocab to make this prettier
         _, test_words, test_chars, _, _, _, test_treebanks, test_langs = utils.get_vocab(treebanks,datasplit,char_map)
@@ -68,10 +68,10 @@ def Predict(self, treebanks, datasplit, options):
         test_embeddings = defaultdict(lambda: {})
         if options.word_emb_size > 0 and options.ext_word_emb_file:
             new_test_words = \
-                    set(test_words) - self.feature_extractor.words.viewkeys()
+                    set(test_words) - self.feature_extractor.words.keys()
 
-            print "Number of OOV word types at test time: %i (out of %i)" % (
-                len(new_test_words), len(test_words))
+            print("Number of OOV word types at test time: %i (out of %i)" % (
+                len(new_test_words), len(test_words)))
 
             if len(new_test_words) > 0:
                 # no point loading embeddings if there are no words to look for
@@ -84,15 +84,16 @@ def Predict(self, treebanks, datasplit, options):
                     )
                     test_embeddings["words"].update(embeddings)
                     if len(test_langs) > 1 and test_embeddings["words"]:
-                        print "External embeddings found for %i words "\
+                        print("External embeddings found for %i words "\
                                 "(out of %i)" % \
-                                (len(test_embeddings["words"]), len(new_test_words))
+                                (len(test_embeddings["words"]),
+                                 len(new_test_words)))
 
         if options.char_emb_size > 0:
             new_test_chars = \
-                    set(test_chars) - self.feature_extractor.chars.viewkeys()
-            print "Number of OOV char types at test time: %i (out of %i)" % (
-                len(new_test_chars), len(test_chars))
+                    set(test_chars) - self.feature_extractor.chars.keys()
+            print("Number of OOV char types at test time: %i (out of %i)" % (
+                len(new_test_chars), len(test_chars)))
 
             if len(new_test_chars) > 0:
                 for lang in test_langs:
@@ -105,9 +106,10 @@ def Predict(self, treebanks, datasplit, options):
                     )
                     test_embeddings["chars"].update(embeddings)
                     if len(test_langs) > 1 and test_embeddings["chars"]:
-                        print "External embeddings found for %i chars "\
+                        print("External embeddings found for %i chars "\
                                 "(out of %i)" % \
-                                (len(test_embeddings["chars"]), len(new_test_chars))
+                                (len(test_embeddings["chars"]),
+                                 len(new_test_chars)))
 
         data = utils.read_conll_dir(treebanks,datasplit,char_map=char_map)
         for iSentence, osentence in enumerate(data,1):
@@ -124,7 +126,8 @@ def Predict(self, treebanks, datasplit, options):
                 ## ADD for handling multi-roots problem
                 rootHead = [head for head in heads if head==0]
                 if len(rootHead) != 1:
-                    print "it has multi-root, changing it for heading first root for other roots"
+                    print("it has multi-root, changing it for heading first root\
+                          for other roots")
                     rootHead = [seq for seq, head in enumerate(heads) if head == 0]
                     for seq in rootHead[1:]:heads[seq] = rootHead[0]
                 ## finish to multi-roots
@@ -174,7 +177,7 @@ def Train(self, trainData, options):
                         ' Errors: %.3f'%((float(eerrors)) / etotal)+\
                         ' Labeled Errors: %.3f'%(float(lerrors) / etotal)+\
                         ' Time: %.2gs'%(time.time()-start)
-                print loss_message
+                print(loss_message)
                 start = time.time()
                 eerrors = 0
                 eloss = 0.0
@@ -244,5 +247,5 @@ def Train(self, trainData, options):
             dy.renew_cg()
 
         self.trainer.update()
-        print "Loss: ", mloss/iSentence
-        print "Total Training Time: %.2gs"%(time.time()-beg)
+        print("Loss: ", mloss/iSentence)
+        print("Total Training Time: %.2gs"%(time.time()-beg))
diff --git a/barchybrid/src/options_manager.py b/barchybrid/src/options_manager.py
index f76e241..d405e34 100644
--- a/barchybrid/src/options_manager.py
+++ b/barchybrid/src/options_manager.py
@@ -42,7 +42,7 @@ def __init__(self,options):
         if not options.outdir:
             raise Exception("You must specify an output directory via the --outdir option")
         elif not os.path.exists(options.outdir): # create output directory if it doesn't exist
-            print "Creating output directory " + options.outdir
+            print("Creating output directory " + options.outdir)
             os.mkdir(options.outdir)
 
         if not options.graph_based and (not options.predict and not
@@ -51,7 +51,7 @@ def __init__(self,options):
             raise Exception("Must include either head, rl or rlmost (For example, if you specified --disable-head and --disable-rlmost, you must specify --userl)")
 
         if not options.graph_based and (options.rlFlag and options.rlMostFlag):
-            print 'Warning: Switching off rlMostFlag to allow rlFlag to take precedence'
+            print('Warning: Switching off rlMostFlag to allow rlFlag to take precedence')
             options.rlMostFlag = False
 
         if options.word_emb_size == 0 and options.pos_emb_size == 0 and\
@@ -140,10 +140,10 @@ def create_UD_treebank_list(self,options):
                 else:
                     treebank.outdir = options.outdir
                 if not os.path.exists(treebank.outdir): # create language-specific output folder if it doesn't exist
-                    print "Creating language-specific output directory " + treebank.outdir
+                    print("Creating language-specific output directory " + treebank.outdir)
                     os.mkdir(treebank.outdir)
                 else:
-                    print ("Warning: language-specific subdirectory " + treebank.outdir
+                    print("Warning: language-specific subdirectory " + treebank.outdir
                         + " already exists, contents may be overwritten")
 
                 if not options.predict:
@@ -162,7 +162,7 @@ def create_UD_treebank_list(self,options):
 
                 treebanks.append(treebank)
             else:
-                print "Warning: skipping invalid language code " + iso
+                print("Warning: skipping invalid language code " + iso)
 
         return treebanks
 
@@ -179,7 +179,7 @@ def prepareDev(self,treebank,options):
                     dev_file = os.path.join(treebank.outdir,'dev-split' + '.conllu') # location for the new dev file
                     train_file = os.path.join(treebank.outdir,'train-split' + '.conllu') # location for the new train file
                     dev_len = int(0.01*options.dev_percent*tot_sen)
-                    print ("Taking " + str(dev_len) + " of " + str(tot_sen)
+                    print("Taking " + str(dev_len) + " of " + str(tot_sen)
                             + " sentences from training data as new dev data for " + treebank.name)
                     random.shuffle(train_data)
                     dev_data = train_data[:dev_len]
@@ -191,21 +191,21 @@ def prepareDev(self,treebank,options):
                     treebank.devfile = dev_file
                     treebank.trainfile = train_file
                 else: # not enough sentences
-                    print ("Warning: not enough sentences in training data to create dev set for "
+                    print("Warning: not enough sentences in training data to create dev set for "
                         + treebank.name + " (minimum required --min-train-size: " + str(options.min_train_sents) + ")")
                     treebank.pred_dev = False
             else: # option --create-dev not set
-                print ("Warning: No dev data for " + treebank.name
+                print("Warning: No dev data for " + treebank.name
                         + ", consider adding option --create-dev to create dev data from training set")
                 treebank.pred_dev = False
         if options.model_selection and not treebank.pred_dev:
-            print "Warning: can't do model selection for " + treebank.name + " as prediction on dev data is off"
+            print("Warning: can't do model selection for " + treebank.name + " as prediction on dev data is off")
 
     # if debug options is set, we read in the training, dev and test files as appropriate, cap the number of sentences and store
     # new files with these smaller data sets
     def createDebugData(self,treebank,options):
         ext = '.conllu' if options.conllu else '.conll'
-        print 'Creating smaller data sets for debugging'
+        print('Creating smaller data sets for debugging')
         if not options.predict:
             train_data = list(utils.read_conll(treebank.trainfile,maxSize=options.debug_train_sents,hard_lim=True))
             train_file = os.path.join(treebank.outdir,'train-debug' + ext) # location for the new train file
diff --git a/barchybrid/src/parser.py b/barchybrid/src/parser.py
index 2f5d5e9..e2d60bf 100644
--- a/barchybrid/src/parser.py
+++ b/barchybrid/src/parser.py
@@ -2,49 +2,49 @@
 from options_manager import OptionsManager
 import pickle, utils, os, time, sys, copy, itertools, re, random
 from shutil import copyfile
-import codecs
+
 
 def run(experiment,options):
     if options.graph_based:
         from mstlstm import MSTParserLSTM as Parser
-        print 'Working with a graph-based parser'
+        print('Working with a graph-based parser')
     else:
         from arc_hybrid import ArcHybridLSTM as Parser
-        print 'Working with a transition-based parser'
+        print('Working with a transition-based parser')
 
     if not options.predict: # training
 
         paramsfile = os.path.join(experiment.outdir, options.params)
 
         if not options.continueTraining:
-            print 'Preparing vocab'
+            print('Preparing vocab')
             vocab = utils.get_vocab(experiment.treebanks,"train")
-            print 'Finished collecting vocab'
+            print('Finished collecting vocab')
 
-            with open(paramsfile, 'w') as paramsfp:
-                print 'Saving params to ' + paramsfile
+            with open(paramsfile, 'wb') as paramsfp:
+                print('Saving params to ' + paramsfile)
                 pickle.dump((vocab, options), paramsfp)
 
-                print 'Initializing the model'
+                print('Initializing the model')
                 parser = Parser(vocab, options)
         else:  #continue
             if options.continueParams:
                 paramsfile = options.continueParams
             with open(paramsfile, 'r') as paramsfp:
                 stored_vocab, stored_options = pickle.load(paramsfp)
-                print 'Initializing the model:'
+                print('Initializing the model:')
                 parser = Parser(stored_vocab, stored_options)
 
             parser.Load(options.continueModel)
 
         dev_best = [options.epochs,-1.0] # best epoch, best score
 
-        for epoch in xrange(options.first_epoch, options.epochs+1):
+        for epoch in range(options.first_epoch, options.epochs+1):
 
-            print 'Starting epoch ' + str(epoch)
+            print('Starting epoch ' + str(epoch))
             traindata = list(utils.read_conll_dir(experiment.treebanks, "train", options.max_sentences))
             parser.Train(traindata,options)
-            print 'Finished epoch ' + str(epoch)
+            print('Finished epoch ' + str(epoch))
 
             model_file = os.path.join(experiment.outdir, options.model + str(epoch))
             parser.Save(model_file)
@@ -56,7 +56,7 @@ def run(experiment,options):
                 if pred_treebanks:
                     for treebank in pred_treebanks:
                         treebank.outfilename = os.path.join(treebank.outdir, 'dev_epoch_' + str(epoch) + '.conllu')
-                        print "Predicting on dev data for " + treebank.name
+                        print("Predicting on dev data for " + treebank.name)
                     pred = list(parser.Predict(pred_treebanks,"dev",options))
                     utils.write_conll_multiling(pred,pred_treebanks)
 
@@ -64,28 +64,28 @@ def run(experiment,options):
                         mean_score = 0.0
                         for treebank in pred_treebanks:
                             score = utils.evaluate(treebank.dev_gold,treebank.outfilename,options.conllu)
-                            print "Dev score %.2f at epoch %i for %s"%(score,epoch,treebank.name)
+                            print("Dev score %.2f at epoch %i for %s"%(score,epoch,treebank.name))
                             mean_score += score
                         if len(pred_treebanks) > 1: # multiling case
                             mean_score = mean_score/len(pred_treebanks)
-                            print "Mean dev score %.2f at epoch %i"%(mean_score,epoch)
+                            print("Mean dev score %.2f at epoch %i"%(mean_score,epoch))
                         if options.model_selection:
                             if mean_score > dev_best[1]:
                                 dev_best = [epoch,mean_score] # update best dev score
-                            # hack to print the word "mean" if the dev score is an average
+                            # hack to printthe word "mean" if the dev score is an average
                             mean_string = "mean " if len(pred_treebanks) > 1 else ""
-                            print "Best %sdev score %.2f at epoch %i"%(mean_string,dev_best[1],dev_best[0])
+                            print("Best %sdev score %.2f at epoch %i"%(mean_string,dev_best[1],dev_best[0]))
 
 
             # at the last epoch choose which model to copy to barchybrid.model
             if epoch == options.epochs:
                 bestmodel_file = os.path.join(experiment.outdir,"barchybrid.model" + str(dev_best[0]))
                 model_file = os.path.join(experiment.outdir,"barchybrid.model")
-                print "Copying " + bestmodel_file + " to " + model_file
+                print("Copying " + bestmodel_file + " to " + model_file)
                 copyfile(bestmodel_file,model_file)
                 best_dev_file = os.path.join(experiment.outdir,"best_dev_epoch.txt")
                 with open (best_dev_file, 'w') as fh:
-                    print "Writing best scores to: " + best_dev_file
+                    print("Writing best scores to: " + best_dev_file)
                     if len(experiment.treebanks) == 1:
                         fh.write("Best dev score %s at epoch %i\n"%(dev_best[1],dev_best[0]))
                     else:
@@ -94,7 +94,7 @@ def run(experiment,options):
     else: #if predict - so
 
         params = os.path.join(experiment.modeldir,options.params)
-        print 'Reading params from ' + params
+        print('Reading params from ' + params)
         with open(params, 'r') as paramsfp:
             stored_vocab, stored_opt = pickle.load(paramsfp)
 
@@ -126,11 +126,12 @@ def run(experiment,options):
 
             if options.pred_eval:
                 for treebank in experiment.treebanks:
-                    print "Evaluating on " + treebank.name
+                    print("Evaluating on " + treebank.name)
                     score = utils.evaluate(treebank.test_gold,treebank.outfilename,options.conllu)
-                    print "Obtained LAS F1 score of %.2f on %s" %(score,treebank.name)
+                    print("Obtained LAS F1 score of %.2f on %s" %(score,treebank.name))
+
+            print('Finished predicting')
 
-            print 'Finished predicting'
 
 if __name__ == '__main__':
 
diff --git a/barchybrid/src/utils.py b/barchybrid/src/utils.py
index 7dbcee2..d694116 100644
--- a/barchybrid/src/utils.py
+++ b/barchybrid/src/utils.py
@@ -4,12 +4,13 @@
 from itertools import chain
 from operator import itemgetter
 import random
-import codecs, json
+import json
 
 # a global variable so we don't have to keep loading from file repeatedly
 iso_dict = {}
 reverse_iso_dict = {}
 
+
 class ConllEntry:
     def __init__(self, id, form, lemma, pos, cpos, feats=None, parent_id=None, relation=None,
         deps=None, misc=None, treebank_id=None, proxy_tbank=None, language=None, char_rep=None):
@@ -48,6 +49,7 @@ def __str__(self):
                   self.deps, self.misc]
         return '\t'.join(['_' if v is None else v for v in values])
 
+
 class Treebank(object):
     def __init__(self,trainfile,devfile,testfile):
         self.name = 'noname'
@@ -59,6 +61,7 @@ def __init__(self,trainfile,devfile,testfile):
         self.outfilename = None
         self.proxy_tbank = None
 
+
 class UDtreebank(Treebank):
     def __init__(self, treebank_info, options):
         """
@@ -101,6 +104,7 @@ def __init__(self, treebank_info, options):
             self.dev_gold = self.test_gold
         self.outfilename = self.iso_id + '.conllu'
 
+
 class ParseForest:
     def __init__(self, sentence):
         self.roots = list(sentence)
@@ -143,6 +147,7 @@ def isProj(sentence):
 
     return len(forest.roots) == 1
 
+
 def get_vocab(treebanks,datasplit,char_map={}):
     """
     Collect frequencies of words, cpos, pos and deprels + languages.
@@ -178,21 +183,24 @@ def get_vocab(treebanks,datasplit,char_map={}):
     # loads the same when predicting with a saved model later on
     # this is also another reason not to use sets for everything here as they are unordered
     # which creates problems when loading from file at predict time
-    return (wordsCount, wordsCount.keys(), charsCount.keys(), posCount.keys(),
-       cposCount.keys(), relCount.keys(), tbankCount.keys(), langCount.keys())
+    return (wordsCount, list(wordsCount.keys()), list(charsCount.keys()), list(posCount.keys()),
+            list(cposCount.keys()), list(relCount.keys()), list(tbankCount.keys()), list(langCount.keys()))
+
 
 def load_iso_dict(json_file='./src/utils/ud_iso.json'):
-    print "Loading ISO dict from %s"%json_file
+    print("Loading ISO dict from %s"%json_file)
     global iso_dict
-    ud_iso_file = codecs.open(json_file,encoding='utf-8')
+    ud_iso_file = open(json_file,encoding='utf-8')
     json_str = ud_iso_file.read()
     iso_dict = json.loads(json_str)
 
+
 def load_reverse_iso_dict(json_file='./src/utils/ud_iso.json'):
     global reverse_iso_dict
     if not iso_dict:
         load_iso_dict(json_file=json_file)
-    reverse_iso_dict = {v: k for k, v in iso_dict.iteritems()}
+    reverse_iso_dict = {v: k for k, v in iso_dict.items()}
+
 
 def load_lang_iso_dict(json_file='./src/utils/ud_iso.json'):
 
@@ -208,6 +216,7 @@ def load_lang_iso_dict(json_file='./src/utils/ud_iso.json'):
 
     return lang_iso_dict
 
+
 # convert treebank to language by removing everything after underscore
 def get_lang_from_tbank_name(tbank_name):
 
@@ -221,6 +230,7 @@ def get_lang_from_tbank_name(tbank_name):
 
     return lang
 
+
 def get_lang_from_tbank_id(tbank_id):
     if not tbank_id:
         return None
@@ -229,6 +239,7 @@ def get_lang_from_tbank_id(tbank_id):
         load_reverse_iso_dict()
     return get_lang_from_tbank_name(reverse_iso_dict[tbank_id])
 
+
 # gets everything before the underscore in treebank iso e.g. "sv_talbanken" -> "sv"
 # with an exception for the two Norwegian variants where it's useful to consider them
 # as separate languages
@@ -241,10 +252,12 @@ def get_lang_iso(treebank_iso):
         m = re.match(r'(.*_(nynorsk|bokmaal)?)',treebank_iso)
         return m.group(1).rstrip('_')
 
+
 # from a list of treebanks, return those that match a particular language
 def get_treebanks_from_lang(treebank_ids,lang):
    return [treebank_id for treebank_id in treebank_ids if get_lang_from_tbank_id(treebank_id) == lang]
 
+
 def get_all_treebanks(options):
 
     if not iso_dict:
@@ -255,8 +268,9 @@ def get_all_treebanks(options):
 
     return json_treebanks
 
+
 def read_conll_dir(treebanks,filetype,maxSize=-1,char_map={}):
-    #print "Max size for each corpus: ", maxSize
+    #print("Max size for each corpus: ", maxSize)
     if filetype == "train":
         return chain(*(read_conll(treebank.trainfile, treebank.iso_id, treebank.proxy_tbank, maxSize, train=True, char_map=char_map) for treebank in treebanks))
     elif filetype == "dev":
@@ -268,15 +282,14 @@ def read_conll_dir(treebanks,filetype,maxSize=-1,char_map={}):
 def generate_root_token(treebank_id, proxy_tbank, language):
     return ConllEntry(0, '*root*', '*root*', 'ROOT-POS', 'ROOT-CPOS', '_', -1,
         'rroot', '_', '_',treebank_id=treebank_id, proxy_tbank=proxy_tbank,
-        language=language
-    )
+        language=language)
 
 
 def read_conll(filename, treebank_id=None, proxy_tbank=None, maxSize=-1, hard_lim=False, vocab_prep=False, drop_nproj=False, train=True, char_map={}):
     # hard lim means capping the corpus size across the whole training procedure
     # soft lim means using a sample of the whole corpus at each epoch
-    fh = codecs.open(filename,'r',encoding='utf-8')
-    print "Reading " + filename
+    fh = open(filename,'r',encoding='utf-8')
+    print("Reading " + filename)
     if vocab_prep and not hard_lim:
         maxSize = -1 # when preparing the vocab with a soft limit we need to use the whole corpus
     ts = time.time()
@@ -312,12 +325,12 @@ def read_conll(filename, treebank_id=None, proxy_tbank=None, maxSize=-1, hard_li
                             yield tokens
                             yield_count += 1
                             if yield_count == maxSize:
-                                print "Capping size of corpus at " + str(yield_count) + " sentences"
+                                print("Capping size of corpus at " + str(yield_count) + " sentences")
                                 break;
                     else:
                         yield tokens
                 else:
-                    #print 'Non-projective sentence dropped'
+                    #print('Non-projective sentence dropped')
                     dropped += 1
             tokens = [generate_root_token(treebank_id, proxy_tbank, language)]
         else:
@@ -335,52 +348,53 @@ def read_conll(filename, treebank_id=None, proxy_tbank=None, maxSize=-1, hard_li
                 tokens.append(token)
 
     if hard_lim and yield_count < maxSize:
-        print 'Warning: unable to yield ' + str(maxSize) + ' sentences, only ' + str(yield_count) + ' found'
+        print('Warning: unable to yield ' + str(maxSize) + ' sentences, only ' + str(yield_count) + ' found')
 
 # TODO: deal with case where there are still unyielded tokens
 # e.g. when there is no newline at end of file
 #    if len(tokens) > 1:
 #        yield tokens
 
-    print sents_read, 'sentences read'
+    print(sents_read, 'sentences read')
 
     if maxSize > 0 and not hard_lim:
         if len(sents) > maxSize:
             sents = random.sample(sents,maxSize)
-            print "Yielding " + str(len(sents)) + " random sentences"
+            print("Yielding " + str(len(sents)) + " random sentences")
         for toks in sents:
             yield toks
 
     te = time.time()
-    print 'Time: %.2gs'%(te-ts)
+    print('Time: %.2gs'%(te-ts))
+
 
 def write_conll(fn, conll_gen):
-    print "Writing to " + fn
+    print("Writing to " + fn)
     sents = 0
-    with codecs.open(fn, 'w', encoding='utf-8') as fh:
+    with open(fn, 'w', encoding='utf-8') as fh:
         for sentence in conll_gen:
             sents += 1
             for entry in sentence[1:]:
-                fh.write(unicode(entry) + '\n')
-                #print str(entry)
+                fh.write(str(entry) + '\n')
             fh.write('\n')
-        print "Wrote " + str(sents) + " sentences"
+        print("Wrote " + str(sents) + " sentences")
+
 
 def write_conll_multiling(conll_gen, treebanks):
     tbank_dict = {treebank.iso_id:treebank for treebank in treebanks}
     cur_tbank = conll_gen[0][0].treebank_id
     outfile = tbank_dict[cur_tbank].outfilename
-    fh = codecs.open(outfile,'w',encoding='utf-8')
-    print "Writing to " + outfile
+    fh = open(outfile,'w',encoding='utf-8')
+    print("Writing to " + outfile)
     for sentence in conll_gen:
         if cur_tbank != sentence[0].treebank_id:
             fh.close()
             cur_tbank = sentence[0].treebank_id
             outfile = tbank_dict[cur_tbank].outfilename
-            fh = codecs.open(outfile,'w',encoding='utf-8')
-            print "Writing to " + outfile
+            fh = open(outfile,'w',encoding='utf-8')
+            print("Writing to " + outfile)
         for entry in sentence[1:]:
-            fh.write(unicode(entry) + '\n')
+            fh.write(str(entry) + '\n')
         fh.write('\n')
 
 
@@ -388,18 +402,20 @@ def parse_list_arg(l):
     """Return a list of line values if it's a file or a list of values if it
     is a string"""
     if os.path.isfile(l):
-        f = codecs.open(l, 'r', encoding='utf-8')
+        f = open(l, 'r', encoding='utf-8')
         return [line.strip("\n").split()[0] for line in f]
     else:
         return [el for el in l.split(" ")]
 
+
 numberRegex = re.compile("[0-9]+|[0-9]+\\.[0-9]+|[0-9]+[0-9,]+");
 def normalize(word):
     return 'NUM' if numberRegex.match(word) else word.lower()
 
+
 def evaluate(gold,test,conllu):
     scoresfile = test + '.txt'
-    print "Writing to " + scoresfile
+    print("Writing to " + scoresfile)
     if not conllu:
         #os.system('perl src/utils/eval.pl -g ' + gold + ' -s ' + test  + ' > ' + scoresfile + ' &')
         os.system('perl src/utils/eval.pl -g ' + gold + ' -s ' + test  + ' > ' + scoresfile )
@@ -408,6 +424,7 @@ def evaluate(gold,test,conllu):
     score = get_LAS_score(scoresfile,conllu)
     return score
 
+
 def inorder(sentence):
     queue = [sentence[0]]
     def inorder_helper(sentence,i):
@@ -423,18 +440,21 @@ def inorder_helper(sentence,i):
         return results
     return inorder_helper(sentence,queue[0].id)
 
+
 def set_seeds(options):
     python_seed = 1
     if not options.predict and options.dynet_seed: # seeds shouldn't make any difference when predicting
-        print "Using default Python seed"
+        print("Using default Python seed")
         random.seed(python_seed)
 
+
 def generate_seed():
     return random.randint(0,10**9) # this range seems to work for Dynet and Python's random function
 
+
 def get_LAS_score(filename, conllu=True):
     score = None
-    with codecs.open(filename,'r',encoding='utf-8') as fh:
+    with open(filename,'r',encoding='utf-8') as fh:
         if conllu:
             for line in fh:
                 if re.match(r'^LAS',line):
@@ -446,44 +466,50 @@ def get_LAS_score(filename, conllu=True):
 
     return score
 
+import lzma
+
 def extract_embeddings_from_file(filename, words=None, max_emb=-1, filtered_filename=None):
     # words should be a set used to filter the embeddings
-    print "Extracting embeddings from", filename
+    print("Extracting embeddings from", filename)
     ts = time.time()
     line_count = 0
     error_count = 0 # e.g. invalid utf-8 in embeddings file
 
-    with open(filename,'r') as fh: # byte string
+    #with open(filename,'r') as fh: # byte string
+    with lzma.open(filename, mode='rt', encoding='utf-8') as fh:
 
-        fh.readline() # ignore first line with embedding stats
+        next(fh) # ignore first line with embedding stats
         embeddings = OrderedDict()
 
-        for line in fh:
+        while True:
             if max_emb < 0 or line_count < max_emb:
                 try:
+                    line = next(fh)
                     # only split on normal space, not e.g. non-break space
-                    eles = line.decode('utf-8').strip().split(" ")
+                    eles = line.strip().split(" ")
                     word = re.sub(u"\xa0"," ",eles[0]) # replace non-break space with regular space
                     if not words or word in words:
                         embeddings[word] = [float(f) for f in eles[1:]]
+                except StopIteration:
+                    break
                 except UnicodeDecodeError:
-#                    print "Unable to read word at line %i: %s"%(line_count, word)
+#                    print("Unable to read word at line %i: %s"%(line_count, word))
                     error_count += 1
                 line_count += 1
                 if line_count % 100000 == 0:
-                    print "Reading line: " + str(line_count)
+                    print("Reading line: " + str(line_count))
             else:
                 break
 
-    print "Read %i embeddings"%line_count
+    print("Read %i embeddings"%line_count)
     te = time.time()
-    print 'Time: %.2gs'%(te-ts)
-#    print "%i utf-8 errors"%error_count
+    print('Time: %.2gs'%(te-ts))
+#    print("%i utf-8 errors"%error_count)
     if words:
-        print "%i entries found from vocabulary (out of %i)"%(len(embeddings),len(words))
+        print("%i entries found from vocabulary (out of %i)"%(len(embeddings),len(words)))
 
     if filtered_filename and embeddings:
-        print "Writing filtered embeddings to " + filtered_filename
+        print("Writing filtered embeddings to " + filtered_filename)
         with open(filtered_filename,'w') as fh_filter:
             no_embeddings = len(embeddings)
             embedding_size = len(embeddings.itervalues().next())
@@ -496,6 +522,7 @@ def extract_embeddings_from_file(filename, words=None, max_emb=-1, filtered_file
     return embeddings
 
 
+
 def get_external_embeddings(options, emb_file=None, emb_dir=None,
                             lang=None, words=None, chars=False):
 
@@ -528,11 +555,12 @@ def get_external_embeddings(options, emb_file=None, emb_dir=None,
                     emb_file, words, options.max_ext_emb)
                 external_embedding.update(embeddings)
             else:
-                print "Warning: %s does not exist, proceeding without" \
-                      % emb_file
+                print("Warning: %s does not exist, proceeding without" \
+                      % emb_file)
 
     return external_embedding
 
+
 # for the most part, we want to send stored options to the parser when in
 # --predict mode, however we want to allow some of these to be updated
 # based on the command line options specified by the user at predict time