diff --git a/Scripts/Ancestry_identifier/Ancestry_identifier.R b/Scripts/Ancestry_identifier/Ancestry_identifier.R
index d596b705..0368eb38 100644
--- a/Scripts/Ancestry_identifier/Ancestry_identifier.R
+++ b/Scripts/Ancestry_identifier/Ancestry_identifier.R
@@ -1,340 +1,340 @@
-#!/usr/bin/Rscript
-# This script was written by Oliver Pain whilst at King's College London University.
-start.time <- Sys.time()
-library("optparse")
-
-option_list = list(
-make_option("--target_plink_chr", action="store", default=NULL, type='character',
-    help="Path to per chromosome target PLINK2 files [required]"),
-make_option("--target_keep", action="store", default=NULL, type='character',
-    help="Path to file listing individuals in the target sample to retain [optional]"),
-make_option("--ref_plink_chr", action="store", default=NULL, type='character',
-    help="Path to per chromosome reference PLINK2 files [required]"),
-make_option("--ref_keep", action="store", default=NULL, type='character',
-    help="Path to file listing individuals in the reference sample to retain [optional]"),
-make_option("--maf", action="store", default=0.05, type='numeric',
-    help="Minor allele frequency threshold [optional]"),
-make_option("--geno", action="store", default=0.02, type='numeric',
-    help="Variant missingness threshold [optional]"),
-make_option("--hwe", action="store", default=1e-6, type='numeric',
-    help="Hardy Weinberg p-value threshold. [optional]"),
-make_option("--n_pcs", action="store", default=6, type='numeric',
-		help="Number of PCs (min=4) [optional]"),
-make_option("--plink2", action="store", default='plink2', type='character',
-		help="Path PLINK software binary [optional]"),
-make_option("--output", action="store", default=NULL, type='character',
-		help="Path for output files [required]"),
-make_option("--pop_data", action="store", default=NULL, type='character',
-    help="Population data for the reference samples [required]"),
-make_option("--model_method", action="store", default='glmnet', type='character',
-    help="Method used for generate prediction model [optional]"),
-make_option("--sd_rule", action="store", default=F, type='logical',
-    help="Logical indicating whether the 3SD rule should be used to define ancestry, or the model-based approach [optional]"),
-make_option("--prob_thresh", action="store", default=0.95, type='numeric',
-    help="Indicates whether probability threshold should be used when defining ancestry [optional]"),
-make_option("--test", action="store", default=NA, type='character',
-    help="Specify test mode [optional]"),
-make_option("--memory", action="store", default=5000, type='numeric',
-		help="Memory limit [optional]")
-)
-
-opt = parse_args(OptionParser(option_list=option_list))
-
-# Load dependencies
-library(GenoUtils)
-source('../functions/misc.R')
-source_all('../functions')
-library(data.table)
-library(caret)
-library(pROC)
-library(verification)
-library(ggplot2)
-library(cowplot)
-
-# Check required inputs
-if(is.null(opt$target_plink_chr)){
-  stop('--target_plink_chr must be specified.\n')
-}
-if(is.null(opt$ref_plink_chr)){
-  stop('--ref_plink_chr must be specified.\n')
-}
-if(is.null(opt$output)){
-  stop('--output must be specified.\n')
-}
-if(is.null(opt$pop_data)){
-  stop('--pop_data must be specified.\n')
-}
-
-# Create output directory
-opt$out_dir<-paste0(dirname(opt$output),'/')
-system(paste0('mkdir -p ',opt$out_dir))
-
-# Create temp directory
-tmp_dir<-tempdir()
-
-# Initiate log file
-log_file <- paste0(opt$output,'.log')
-log_header(log_file = log_file, opt = opt, script = 'Ancestry_identifier.R', start.time = start.time)
-
-
-# If testing, change CHROMS to chr value, and lower ancestry probability threshold
-if(!is.na(opt$test) && opt$test == 'NA'){
-  opt$test<-NA
-}
-if(!is.na(opt$test)){
-  CHROMS <- as.numeric(gsub('chr','',opt$test))
-  opt$prob_thresh <- 0.5
-  log_add(log_file = log_file, message = 'Lowering prob_thresh parameter to 0.5 for testing.')
-}
-
-if(nrow(fread(paste0(opt$ref_plink_chr, CHROMS[1],'.psam'))) < 100){
-  stop('opt$ref_plink_chr must contain at least 100 individuals.')
-}
-
-###########
-# Extract target_keep
-###########
-
-if(!is.null(opt$target_keep)){
-  plink_subset(keep = opt$target_keep, chr = CHROMS, plink2 = opt$plink2, pfile = opt$target_plink_chr, out = paste0(tmp_dir,'/target_subset.chr'))
-  opt$target_plink_chr_subset<-paste0(tmp_dir,'/target_subset')
-} else {
-  opt$target_plink_chr_subset<-opt$target_plink_chr
-}
-
-###########
-# Extract ref_keep
-###########
-
-if(!is.null(opt$ref_keep)){
-  plink_subset(keep = opt$ref_keep, chr = CHROMS, plink2 = opt$plink2, pfile = opt$ref_plink_chr, out = paste0(tmp_dir,'/ref_subset.chr'))
-  opt$ref_plink_chr_subset<-paste0(tmp_dir,'/ref_subset.chr')
-} else {
-  opt$ref_plink_chr_subset<-opt$ref_plink_chr
-}
-
-###########
-# QC target
-###########
-
-# If target sample size is <100, only apply SNP missingness parameter
-psam<-fread(paste0(opt$target_plink_chr_subset, CHROMS[1], '.psam'))
-
-if(nrow(psam) > 100){
-  target_qc_snplist<-plink_qc_snplist(pfile = opt$target_plink_chr_subset, chr = CHROMS, plink2 = opt$plink2, geno = opt$geno, maf = opt$maf, hwe = opt$hwe)
-} else {
-  target_qc_snplist<-plink_qc_snplist(pfile = opt$target_plink_chr_subset, chr = CHROMS, plink2 = opt$plink2, geno = opt$geno)
-  log_add(log_file = log_file, message = 'Target sample size is <100 so only checking genotype missingness.')
-}
-
-###########
-# QC reference
-###########
-
-ref_qc_snplist<-plink_qc_snplist(pfile = opt$ref_plink_chr_subset, chr = CHROMS, plink2 = opt$plink2, geno = opt$geno, maf = opt$maf, hwe = opt$hwe)
-
-###########
-# Harmonise target and reference genetic data
-###########
-
-# read in target pvar file
-targ_pvar<-read_pvar(opt$target_plink_chr_subset, chr = CHROMS)
-
-# read in reference pvar file
-ref_pvar<-read_pvar(opt$ref_plink_chr_subset, chr = CHROMS)
-
-# retain variants surviving QC
-targ_pvar<-targ_pvar[targ_pvar$SNP %in% intersect(target_qc_snplist, ref_qc_snplist), ]
-ref_pvar<-ref_pvar[ref_pvar$SNP %in% intersect(target_qc_snplist, ref_qc_snplist), ]
-
-# insert IUPAC codes
-targ_pvar$IUPAC <- snp_iupac(targ_pvar$A1, targ_pvar$A2)
-ref_pvar$IUPAC <- snp_iupac(ref_pvar$A1, ref_pvar$A2)
-
-# Identify SNPs present in both samples (allowing for strand flips)
-# Identify SNPs that need to be flipped
-target_ref<-merge(targ_pvar, ref_pvar, by='SNP')
-flip <- detect_strand_flip(target_ref$IUPAC.x, target_ref$IUPAC.y)
-
-flip_snplist<-NULL
-if(sum(flip) > 0){
-  flip_snplist<-target_ref$SNP.y[flip]
-  log_add(log_file = log_file, message = paste0(sum(flip), 'variants will be flipped.'))
-}
-
-# Remove variants where IUPAC codes do not match (allowing for strand flips)
-matched <- which((target_ref$IUPAC.x == target_ref$IUPAC.y) | flip)
-target_ref<-target_ref[matched,]
-
-log_add(log_file = log_file, message = paste0(nrow(target_ref),' variants match between target and reference after QC.'))
-
-###########
-# Identify list of LD independent SNPs
-###########
-
-log_add(log_file = log_file, message = 'Identifying LD independent SNPs based on reference data.')
-
-# Subset ref_pvar to contain QC'd variants
-ref_pvar<-ref_pvar[ref_pvar$SNP %in% target_ref$SNP,]
-
-# Remove regions of high LD
-ref_pvar <- remove_regions(dat = ref_pvar, regions = long_ld_coord)
-log_add(log_file = log_file, message = paste0(nrow(ref_pvar),' variants after removal of LD high regions.'))
-
-# Perform LD pruning
-ld_indep <- plink_prune(pfile = opt$ref_plink_chr_subset, plink2 = opt$plink2, extract = ref_pvar$SNP, chr = CHROMS)
-log_add(log_file = log_file, message = paste0(length(ld_indep),' independent variants retained.'))
-
-###########
-# Perform PCA based on reference
-###########
-
-log_add(log_file = log_file, message = 'Performing PCA based on reference.')
-
-snp_weights<-plink_pca(pfile = opt$ref_plink_chr_subset, chr = CHROMS, plink2 = opt$plink2, extract = ld_indep, flip = flip_snplist, n_pc = opt$n_pcs)
-fwrite(snp_weights, paste0(tmp_dir,'/ref.eigenvec.var'), row.names = F, quote=F, sep=' ', na='NA')
-
-###
-# Calculate PCs in the reference sample for scaling the target sample factor scores.
-###
-
-log_add(log_file = log_file, message = 'Computing reference PCs.')
-
-# Calculate PCs in the reference
-ref_pcs<-plink_score(pfile = opt$ref_plink_chr_subset, chr = CHROMS, plink2 = opt$plink2, score = paste0(tmp_dir,'/ref.eigenvec.var'))
-
-# Scale across all individuals
-ref_pcs_centre_scale <- score_mean_sd(scores = ref_pcs)
-
-###
-# Create model predicting reference populations
-###
-
-log_add(log_file = log_file, message = 'Deriving model predicting ref_pop groups.')
-
-# Read in reference pop_data
-pop_data <- read_pop_data(opt$pop_data)
-
-# Scale the reference PCs
-ref_pcs_scaled<-score_scale(score = ref_pcs, ref_scale = ref_pcs_centre_scale)
-ref_pcs_scaled_pop<-merge(ref_pcs_scaled, pop_data, by=c('FID','IID'))
-
-# Build model
-model <- train(y=as.factor(ref_pcs_scaled_pop$POP), x=ref_pcs_scaled_pop[, grepl('PC',names(ref_pcs_scaled_pop)), with=F], method=opt$model_method, metric='logLoss', trControl=trainControl(method="cv", number=5, classProbs= TRUE, savePredictions = 'final', summaryFunction = multiClassSummary))
-
-saveRDS(model$finalModel, paste0(opt$output,'.model.rds'))
-
-#####
-# Calculate PCs in target sample
-#####
-
-log_add(log_file = log_file, message = 'Calculating PCs in the target sample.')
-targ_pcs<-plink_score(pfile = opt$target_plink_chr_subset, chr = CHROMS, plink2 = opt$plink2, score = paste0(tmp_dir,'/ref.eigenvec.var'))
-targ_pcs_scaled<-score_scale(score = targ_pcs, ref_scale = ref_pcs_centre_scale)
-
-###
-# Create plot PC scores of target sample compared to the reference
-###
-
-log_add(log_file = log_file, message = 'Plotting target sample PCs on reference.')
-
-# Combine ref and targ PCs
-targ_pcs_scaled$POP<-'Target'
-ref_pcs_targ_pcs<-rbind(ref_pcs_scaled_pop,targ_pcs_scaled)
-
-PC_1_2<-ggplot(ref_pcs_targ_pcs[ref_pcs_targ_pcs$POP != 'Target',], aes(x=PC1,y=PC2, colour=POP)) +
-  geom_point() +
-	geom_point(data=ref_pcs_targ_pcs[ref_pcs_targ_pcs$POP == 'Target',], aes(x=PC1,y=PC2), colour='black', shape=21) +
-	labs(title = "PCs 1 and 2", colour="") +
-  theme_half_open() +
-  background_grid() +
-  theme(plot.title = element_text(hjust = 0.5))
-
-PC_3_4<-ggplot(ref_pcs_targ_pcs[ref_pcs_targ_pcs$POP != 'Target',], aes(x=PC3,y=PC4, colour=POP)) +
-  geom_point() +
-	geom_point(data=ref_pcs_targ_pcs[ref_pcs_targ_pcs$POP == 'Target',], aes(x=PC3,y=PC4), colour='black', shape=21) +
-	labs(title = "PCs 3 and 4", colour="") +
-  theme_half_open() +
-  background_grid() +
-  theme(plot.title = element_text(hjust = 0.5))
-
-png(paste0(opt$output,'.pc_plot.png'), units='px', res=300, width=4000, height=2000)
-  plot_grid(PC_1_2,PC_3_4)
-dev.off()
-
-###
-# Estimate probability of outcomes in model
-###
-
-log_add(log_file = log_file, message = 'Inferring population membership in target.')
-
-# Read in model
-pop_model_pred<-predict(object = model$finalModel, newx = data.matrix(targ_pcs_scaled[, grepl('PC',names(targ_pcs_scaled)), with=F]), type = "response", s=model$finalModel$lambdaOpt)
-pop_model_pred<-as.data.frame.table(pop_model_pred)
-pop_model_pred<-data.table(	FID=targ_pcs_scaled$FID,
-														IID=targ_pcs_scaled$IID,
-														pop=as.character(pop_model_pred$Var2),
-														prob=round(pop_model_pred$Freq,3))
-
-pop_model_pred<-dcast.data.table(pop_model_pred, formula=FID + IID~pop, value.var = "prob")
-
-fwrite(pop_model_pred, paste0(opt$output,'.model_pred'), sep='\t')
-
-# Create keep files based on the results
-dir.create(paste0(opt$out_dir,'/keep_files/model_based'), recursive = T)
-if(!is.na(opt$prob_thresh)){
-  pop_model_pred$max_prob<-apply(pop_model_pred[,-1:-2], 1, max)
-  pop_model_pred<-pop_model_pred[pop_model_pred$max_prob > opt$prob_thresh,]
-  pop_model_pred$max_prob<-NULL
-}
-
-N_group<-NULL
-for(i in names(pop_model_pred[,-1:-2])){
-	tmp_keep<-pop_model_pred[apply(pop_model_pred[,-1:-2], 1, function(x) x[i] == max(x)),1:2]
-	N_group<-rbind(N_group, data.frame(Group=i, N=nrow(tmp_keep)))
-	fwrite(tmp_keep, paste0(opt$out_dir,'/keep_files/model_based/',i,'.keep'), sep=' ', col.names=F)
-}
-
-N_group<-rbind(N_group, data.frame(Group='Unassigned', N=nrow(targ_pcs_scaled) - nrow(pop_model_pred)))
-
-sink(file = log_file, append = T)
-cat('----------\n')
-cat('N per group based on model:\n')
-print.data.frame(N_group, row.names = FALSE, quote = FALSE, right = FALSE)
-cat('----------\n')
-sink()
-
-if(opt$sd_rule){
-  dir.create(paste0(opt$out_dir,'/keep_files/sd_based'), recursive = T)
-  N_group<-NULL
-  for(pop_i in unique(pop_data$POP)){
-
-    # Calculate scale of PCs within reference population
-    ref_pcs_scaled_i <- score_mean_sd(scores = ref_pcs, keep = pop_data[pop_data$POP == pop_i, c('FID','IID'), with=F])
-
-    # Scale the target PC based on reference mean and SD
-    targ_pcs_scaled_i<-score_scale(score = targ_pcs, ref_scale = ref_pcs_scaled_i)
-
-    # Identify individuals with PCs <3
-    targ_pcs_scaled_i<-targ_pcs_scaled_i[!apply(targ_pcs_scaled_i[,-1:-2], 1, function(x) any(x > 3 | x < -3)),]
-
-    N_group<-rbind(N_group, data.frame(Group=pop_i, N=nrow(targ_pcs_scaled_i)))
-
-    # Save keep file of individuals that fit the population
-    fwrite(targ_pcs_scaled_i[,1:2], paste0(opt$out_dir,'/keep_files/sd_based/',pop_i,'.keep'), col.names=F, sep='\t')
-  }
-
-  sink(file = log_file, append = T)
-  cat('----------\n')
-  cat('N per group based on SD rule:\n')
-  print.data.frame(N_group, row.names = FALSE, quote = FALSE, right = FALSE)
-  cat('----------\n')
-  sink()
-}
-
-end.time <- Sys.time()
-time.taken <- end.time - start.time
-sink(file = paste(opt$output,'.log',sep=''), append = T)
-cat('Analysis finished at',as.character(end.time),'\n')
-cat('Analysis duration was',as.character(round(time.taken,2)),attr(time.taken, 'units'),'\n')
-sink()
+#!/usr/bin/Rscript
+# This script was written by Oliver Pain whilst at King's College London University.
+start.time <- Sys.time()
+library("optparse")
+
+option_list = list(
+make_option("--target_plink_chr", action="store", default=NULL, type='character',
+    help="Path to per chromosome target PLINK2 files [required]"),
+make_option("--target_keep", action="store", default=NULL, type='character',
+    help="Path to file listing individuals in the target sample to retain [optional]"),
+make_option("--ref_plink_chr", action="store", default=NULL, type='character',
+    help="Path to per chromosome reference PLINK2 files [required]"),
+make_option("--ref_keep", action="store", default=NULL, type='character',
+    help="Path to file listing individuals in the reference sample to retain [optional]"),
+make_option("--maf", action="store", default=0.05, type='numeric',
+    help="Minor allele frequency threshold [optional]"),
+make_option("--geno", action="store", default=0.02, type='numeric',
+    help="Variant missingness threshold [optional]"),
+make_option("--hwe", action="store", default=1e-6, type='numeric',
+    help="Hardy Weinberg p-value threshold. [optional]"),
+make_option("--n_pcs", action="store", default=6, type='numeric',
+		help="Number of PCs (min=4) [optional]"),
+make_option("--plink2", action="store", default='plink2', type='character',
+		help="Path PLINK software binary [optional]"),
+make_option("--output", action="store", default=NULL, type='character',
+		help="Path for output files [required]"),
+make_option("--pop_data", action="store", default=NULL, type='character',
+    help="Population data for the reference samples [required]"),
+make_option("--model_method", action="store", default='glmnet', type='character',
+    help="Method used for generate prediction model [optional]"),
+make_option("--sd_rule", action="store", default=F, type='logical',
+    help="Logical indicating whether the 3SD rule should be used to define ancestry, or the model-based approach [optional]"),
+make_option("--prob_thresh", action="store", default=0.95, type='numeric',
+    help="Indicates whether probability threshold should be used when defining ancestry [optional]"),
+make_option("--test", action="store", default=NA, type='character',
+    help="Specify test mode [optional]"),
+make_option("--memory", action="store", default=5000, type='numeric',
+		help="Memory limit [optional]")
+)
+
+opt = parse_args(OptionParser(option_list=option_list))
+
+# Load dependencies
+library(GenoUtils)
+source('../functions/misc.R')
+source_all('../functions')
+library(data.table)
+library(caret)
+library(pROC)
+library(verification)
+library(ggplot2)
+library(cowplot)
+
+# Check required inputs
+if(is.null(opt$target_plink_chr)){
+  stop('--target_plink_chr must be specified.\n')
+}
+if(is.null(opt$ref_plink_chr)){
+  stop('--ref_plink_chr must be specified.\n')
+}
+if(is.null(opt$output)){
+  stop('--output must be specified.\n')
+}
+if(is.null(opt$pop_data)){
+  stop('--pop_data must be specified.\n')
+}
+
+# Create output directory
+opt$out_dir<-paste0(dirname(opt$output),'/')
+system(paste0('mkdir -p ',opt$out_dir))
+
+# Create temp directory
+tmp_dir<-tempdir()
+
+# Initiate log file
+log_file <- paste0(opt$output,'.log')
+log_header(log_file = log_file, opt = opt, script = 'Ancestry_identifier.R', start.time = start.time)
+
+
+# If testing, change CHROMS to chr value, and lower ancestry probability threshold
+if(!is.na(opt$test) && opt$test == 'NA'){
+  opt$test<-NA
+}
+if(!is.na(opt$test)){
+  CHROMS <- as.numeric(gsub('chr','',opt$test))
+  opt$prob_thresh <- 0.5
+  log_add(log_file = log_file, message = 'Lowering prob_thresh parameter to 0.5 for testing.')
+}
+
+if(nrow(fread(paste0(opt$ref_plink_chr, CHROMS[1],'.psam'))) < 100){
+  stop('opt$ref_plink_chr must contain at least 100 individuals.')
+}
+
+###########
+# Extract target_keep
+###########
+
+if(!is.null(opt$target_keep)){
+  plink_subset(keep = opt$target_keep, chr = CHROMS, plink2 = opt$plink2, pfile = opt$target_plink_chr, out = paste0(tmp_dir,'/target_subset.chr'))
+  opt$target_plink_chr_subset<-paste0(tmp_dir,'/target_subset')
+} else {
+  opt$target_plink_chr_subset<-opt$target_plink_chr
+}
+
+###########
+# Extract ref_keep
+###########
+
+if(!is.null(opt$ref_keep)){
+  plink_subset(keep = opt$ref_keep, chr = CHROMS, plink2 = opt$plink2, pfile = opt$ref_plink_chr, out = paste0(tmp_dir,'/ref_subset.chr'))
+  opt$ref_plink_chr_subset<-paste0(tmp_dir,'/ref_subset.chr')
+} else {
+  opt$ref_plink_chr_subset<-opt$ref_plink_chr
+}
+
+###########
+# QC target
+###########
+
+# If target sample size is <100, only apply SNP missingness parameter
+psam<-fread(paste0(opt$target_plink_chr_subset, CHROMS[1], '.psam'))
+
+if(nrow(psam) > 100){
+  target_qc_snplist<-plink_qc_snplist(pfile = opt$target_plink_chr_subset, chr = CHROMS, plink2 = opt$plink2, geno = opt$geno, maf = opt$maf, hwe = opt$hwe)
+} else {
+  target_qc_snplist<-plink_qc_snplist(pfile = opt$target_plink_chr_subset, chr = CHROMS, plink2 = opt$plink2, geno = opt$geno)
+  log_add(log_file = log_file, message = 'Target sample size is <100 so only checking genotype missingness.')
+}
+
+###########
+# QC reference
+###########
+
+ref_qc_snplist<-plink_qc_snplist(pfile = opt$ref_plink_chr_subset, chr = CHROMS, plink2 = opt$plink2, geno = opt$geno, maf = opt$maf, hwe = opt$hwe)
+
+###########
+# Harmonise target and reference genetic data
+###########
+
+# read in target pvar file
+targ_pvar<-read_pvar(opt$target_plink_chr_subset, chr = CHROMS)
+
+# read in reference pvar file
+ref_pvar<-read_pvar(opt$ref_plink_chr_subset, chr = CHROMS)
+
+# retain variants surviving QC
+targ_pvar<-targ_pvar[targ_pvar$SNP %in% intersect(target_qc_snplist, ref_qc_snplist), ]
+ref_pvar<-ref_pvar[ref_pvar$SNP %in% intersect(target_qc_snplist, ref_qc_snplist), ]
+
+# insert IUPAC codes
+targ_pvar$IUPAC <- snp_iupac(targ_pvar$A1, targ_pvar$A2)
+ref_pvar$IUPAC <- snp_iupac(ref_pvar$A1, ref_pvar$A2)
+
+# Identify SNPs present in both samples (allowing for strand flips)
+# Identify SNPs that need to be flipped
+target_ref<-merge(targ_pvar, ref_pvar, by='SNP')
+flip <- detect_strand_flip(target_ref$IUPAC.x, target_ref$IUPAC.y)
+
+flip_snplist<-NULL
+if(sum(flip) > 0){
+  flip_snplist<-target_ref$SNP.y[flip]
+  log_add(log_file = log_file, message = paste0(sum(flip), 'variants will be flipped.'))
+}
+
+# Remove variants where IUPAC codes do not match (allowing for strand flips)
+matched <- which((target_ref$IUPAC.x == target_ref$IUPAC.y) | flip)
+target_ref<-target_ref[matched,]
+
+log_add(log_file = log_file, message = paste0(nrow(target_ref),' variants match between target and reference after QC.'))
+
+###########
+# Identify list of LD independent SNPs
+###########
+
+log_add(log_file = log_file, message = 'Identifying LD independent SNPs based on reference data.')
+
+# Subset ref_pvar to contain QC'd variants
+ref_pvar<-ref_pvar[ref_pvar$SNP %in% target_ref$SNP,]
+
+# Remove regions of high LD
+ref_pvar <- remove_regions(dat = ref_pvar, regions = long_ld_coord)
+log_add(log_file = log_file, message = paste0(nrow(ref_pvar),' variants after removal of LD high regions.'))
+
+# Perform LD pruning
+ld_indep <- plink_prune(pfile = opt$ref_plink_chr_subset, plink2 = opt$plink2, extract = ref_pvar$SNP, chr = CHROMS)
+log_add(log_file = log_file, message = paste0(length(ld_indep),' independent variants retained.'))
+
+###########
+# Perform PCA based on reference
+###########
+
+log_add(log_file = log_file, message = 'Performing PCA based on reference.')
+
+snp_weights<-plink_pca(pfile = opt$ref_plink_chr_subset, chr = CHROMS, plink2 = opt$plink2, extract = ld_indep, flip = flip_snplist, n_pc = opt$n_pcs)
+fwrite(snp_weights, paste0(tmp_dir,'/ref.eigenvec.var'), row.names = F, quote=F, sep=' ', na='NA')
+
+###
+# Calculate PCs in the reference sample for scaling the target sample factor scores.
+###
+
+log_add(log_file = log_file, message = 'Computing reference PCs.')
+
+# Calculate PCs in the reference
+ref_pcs<-plink_score(pfile = opt$ref_plink_chr_subset, chr = CHROMS, plink2 = opt$plink2, score = paste0(tmp_dir,'/ref.eigenvec.var'))
+
+# Scale across all individuals
+ref_pcs_centre_scale <- score_mean_sd(scores = ref_pcs)
+
+###
+# Create model predicting reference populations
+###
+
+log_add(log_file = log_file, message = 'Deriving model predicting ref_pop groups.')
+
+# Read in reference pop_data
+pop_data <- read_pop_data(opt$pop_data)
+
+# Scale the reference PCs
+ref_pcs_scaled<-score_scale(score = ref_pcs, ref_scale = ref_pcs_centre_scale)
+ref_pcs_scaled_pop<-merge(ref_pcs_scaled, pop_data, by=c('FID','IID'))
+
+# Build model
+model <- train(y=as.factor(ref_pcs_scaled_pop$POP), x=ref_pcs_scaled_pop[, grepl('PC',names(ref_pcs_scaled_pop)), with=F], method=opt$model_method, metric='logLoss', trControl=trainControl(method="cv", number=5, classProbs= TRUE, savePredictions = 'final', summaryFunction = multiClassSummary))
+
+saveRDS(model$finalModel, paste0(opt$output,'.model.rds'))
+
+#####
+# Calculate PCs in target sample
+#####
+
+log_add(log_file = log_file, message = 'Calculating PCs in the target sample.')
+targ_pcs<-plink_score(pfile = opt$target_plink_chr_subset, chr = CHROMS, plink2 = opt$plink2, score = paste0(tmp_dir,'/ref.eigenvec.var'))
+targ_pcs_scaled<-score_scale(score = targ_pcs, ref_scale = ref_pcs_centre_scale)
+
+###
+# Create plot PC scores of target sample compared to the reference
+###
+
+log_add(log_file = log_file, message = 'Plotting target sample PCs on reference.')
+
+# Combine ref and targ PCs
+targ_pcs_scaled$POP<-'Target'
+ref_pcs_targ_pcs<-rbind(ref_pcs_scaled_pop,targ_pcs_scaled)
+
+PC_1_2<-ggplot(ref_pcs_targ_pcs[ref_pcs_targ_pcs$POP != 'Target',], aes(x=PC1,y=PC2, colour=POP)) +
+  geom_point() +
+	geom_point(data=ref_pcs_targ_pcs[ref_pcs_targ_pcs$POP == 'Target',], aes(x=PC1,y=PC2), colour='black', shape=21) +
+	labs(title = "PCs 1 and 2", colour="") +
+  theme_half_open() +
+  background_grid() +
+  theme(plot.title = element_text(hjust = 0.5))
+
+PC_3_4<-ggplot(ref_pcs_targ_pcs[ref_pcs_targ_pcs$POP != 'Target',], aes(x=PC3,y=PC4, colour=POP)) +
+  geom_point() +
+	geom_point(data=ref_pcs_targ_pcs[ref_pcs_targ_pcs$POP == 'Target',], aes(x=PC3,y=PC4), colour='black', shape=21) +
+	labs(title = "PCs 3 and 4", colour="") +
+  theme_half_open() +
+  background_grid() +
+  theme(plot.title = element_text(hjust = 0.5))
+
+png(paste0(opt$output,'.pc_plot.png'), units='px', res=300, width=4000, height=2000)
+  plot_grid(PC_1_2,PC_3_4)
+dev.off()
+
+###
+# Estimate probability of outcomes in model
+###
+
+log_add(log_file = log_file, message = 'Inferring population membership in target.')
+
+# Read in model
+pop_model_pred<-predict(object = model$finalModel, newx = data.matrix(targ_pcs_scaled[, grepl('PC',names(targ_pcs_scaled)), with=F]), type = "response", s=model$finalModel$lambdaOpt)
+pop_model_pred<-as.data.frame.table(pop_model_pred)
+pop_model_pred<-data.table(	FID=targ_pcs_scaled$FID,
+														IID=targ_pcs_scaled$IID,
+														pop=as.character(pop_model_pred$Var2),
+														prob=round(pop_model_pred$Freq,3))
+
+pop_model_pred<-dcast.data.table(pop_model_pred, formula=FID + IID~pop, value.var = "prob")
+
+fwrite(pop_model_pred, paste0(opt$output,'.model_pred'), sep='\t')
+
+# Create keep files based on the results
+dir.create(paste0(opt$out_dir,'/keep_files/model_based'), recursive = T)
+if(!is.na(opt$prob_thresh)){
+  pop_model_pred$max_prob<-apply(pop_model_pred[,-1:-2], 1, max)
+  pop_model_pred<-pop_model_pred[pop_model_pred$max_prob > opt$prob_thresh,]
+  pop_model_pred$max_prob<-NULL
+}
+
+N_group<-NULL
+for(i in names(pop_model_pred[,-1:-2])){
+	tmp_keep<-pop_model_pred[apply(pop_model_pred[,-1:-2], 1, function(x) x[i] == max(x)),1:2]
+	N_group<-rbind(N_group, data.frame(Group=i, N=nrow(tmp_keep)))
+	fwrite(tmp_keep, paste0(opt$out_dir,'/keep_files/model_based/',i,'.keep'), sep=' ', col.names=F)
+}
+
+N_group<-rbind(N_group, data.frame(Group='Unassigned', N=nrow(targ_pcs_scaled) - nrow(pop_model_pred)))
+
+sink(file = log_file, append = T)
+cat('----------\n')
+cat('N per group based on model:\n')
+print.data.frame(N_group, row.names = FALSE, quote = FALSE, right = FALSE)
+cat('----------\n')
+sink()
+
+if(opt$sd_rule){
+  dir.create(paste0(opt$out_dir,'/keep_files/sd_based'), recursive = T)
+  N_group<-NULL
+  for(pop_i in unique(pop_data$POP)){
+
+    # Calculate scale of PCs within reference population
+    ref_pcs_scaled_i <- score_mean_sd(scores = ref_pcs, keep = pop_data[pop_data$POP == pop_i, c('FID','IID'), with=F])
+
+    # Scale the target PC based on reference mean and SD
+    targ_pcs_scaled_i<-score_scale(score = targ_pcs, ref_scale = ref_pcs_scaled_i)
+
+    # Identify individuals with PCs <3
+    targ_pcs_scaled_i<-targ_pcs_scaled_i[!apply(targ_pcs_scaled_i[,-1:-2], 1, function(x) any(x > 3 | x < -3)),]
+
+    N_group<-rbind(N_group, data.frame(Group=pop_i, N=nrow(targ_pcs_scaled_i)))
+
+    # Save keep file of individuals that fit the population
+    fwrite(targ_pcs_scaled_i[,1:2], paste0(opt$out_dir,'/keep_files/sd_based/',pop_i,'.keep'), col.names=F, sep='\t')
+  }
+
+  sink(file = log_file, append = T)
+  cat('----------\n')
+  cat('N per group based on SD rule:\n')
+  print.data.frame(N_group, row.names = FALSE, quote = FALSE, right = FALSE)
+  cat('----------\n')
+  sink()
+}
+
+end.time <- Sys.time()
+time.taken <- end.time - start.time
+sink(file = paste(opt$output,'.log',sep=''), append = T)
+cat('Analysis finished at',as.character(end.time),'\n')
+cat('Analysis duration was',as.character(round(time.taken,2)),attr(time.taken, 'units'),'\n')
+sink()
diff --git a/Scripts/format_target/format_target.R b/Scripts/format_target/format_target.R
index 124c5b8c..69055d68 100644
--- a/Scripts/format_target/format_target.R
+++ b/Scripts/format_target/format_target.R
@@ -151,11 +151,9 @@ targ_pvar<-targ_pvar[,c('CHR','BP','SNP','A2','A1'),with=F]
 
 # Label SNP with _dup if the RSID is duplicated, so these variants are removed.
 dup_snp<-duplicated(targ_pvar$SNP)
-log_add(log_file = log_file, message = paste0('Removing ', sum(dup_snp),' duplicate variants.'))
+log_add(log_file = log_file, message = paste0('Removing ', sum(dup_snp),' duplicate variants - May have IUPAC NA.'))
 targ_pvar$SNP[dup_snp]<-paste0(targ_pvar$SNP[dup_snp],'_dup')
 
-log_add(log_file = log_file, message = paste0(sum(!dup_snp)," of ", nrow(ref)," reference variants are in the target."))
-
 # Write out new bim file
 names(targ_pvar)<-c('#CHROM','POS','ID','REF','ALT')
 fwrite(targ_pvar, paste0(tmp_dir,'/subset.pvar'), col.names=T, row.names=F, quote=F, na='NA', sep=' ')
@@ -180,7 +178,7 @@ ref_psam<-fread(paste0(opt$ref,'.psam'))
 names(ref_psam)<-gsub('\\#', '', names(ref_psam))
 ref_psam <- ref_psam[, names(ref_psam) %in% c('FID', 'IID'), with = F]
 if(ncol(ref_psam) == 1){
-  ref_ID_update<-data.frame(ref_psam$`IID`, paste0(ref_psam$`#IID`,'_REF'))
+  ref_ID_update<-data.frame(ref_psam$`IID`, paste0(ref_psam$`IID`,'_REF'))
 } else {
   ref_ID_update<-data.frame(ref_psam$`FID`, ref_psam$`IID`, paste0(ref_psam$`FID`,'_REF'), paste0(ref_psam$`IID`,'_REF'))
 }
@@ -188,10 +186,14 @@ fwrite(ref_ID_update, paste0(tmp_dir,'/ref_ID_update.txt'), sep=' ', col.names=F
 system(paste0(opt$plink2,' --pfile ',opt$ref,' --make-pgen --update-ids ',tmp_dir,'/ref_ID_update.txt --out ',tmp_dir,'/REF --memory 5000 --threads 1'))
 
 # Merge target and reference plink files to insert missing SNPs
-system(paste0(opt$plink2,' --pfile ',tmp_dir,'/subset --pmerge ',tmp_dir,'/REF --make-pgen --memory 5000 --threads 1 --out ',tmp_dir,'/subset'))
+# plink2's pmerge only handles concatenation for the time being
+# In the meantime, convert the ref and target into plink1 binaries, merge, and then convert back to plink2 binaries
+system(paste0(opt$plink2,' --pfile ',tmp_dir,'/subset --make-bed --memory 5000 --threads 1 --out ',tmp_dir,'/subset'))
+system(paste0(opt$plink2,' --pfile ',tmp_dir,'/REF --make-bed --out ',tmp_dir,'/REF --memory 5000 --threads 1'))
+system(paste0(opt$plink,' --bfile ',tmp_dir,'/subset --bmerge ',tmp_dir,'/REF --make-bed --allow-no-sex --out ',tmp_dir,'/ref_targ'))
 
 # Extract only target individuals
-system(paste0(opt$plink2,' --pfile ',tmp_dir,'/subset --remove ',tmp_dir,'/REF.psam --make-pgen --memory 5000 --threads 1 --out ',opt$output))
+system(paste0(opt$plink2,' --bfile ',tmp_dir,'/ref_targ --remove ',tmp_dir,'/REF.psam --make-pgen --memory 5000 --threads 1 --out ',opt$output))
 
 end.time <- Sys.time()
 time.taken <- end.time - start.time
diff --git a/Scripts/pipeline_reports/indiv_report_creator.Rmd b/Scripts/pipeline_reports/indiv_report_creator.Rmd
index f780da3b..33f543df 100644
--- a/Scripts/pipeline_reports/indiv_report_creator.Rmd
+++ b/Scripts/pipeline_reports/indiv_report_creator.Rmd
@@ -124,11 +124,11 @@ for(chr in CHROMS){
 # Count the number of variants in the target sample data that match reference variants
 nvar_in_target <-
   sum(sapply(format_target_logs, function(x)
-    as.numeric(gsub(' .*', '', x[grepl('reference variants are in the target.$', x)]))))
+    as.numeric(gsub('.* ','', gsub(' reference variants.', '', x[grepl('^Target contains', x) & grepl('reference variants.$', x)])))))
 
 nvar_in_ref <-
   sum(sapply(format_target_logs, function(x)
-    as.numeric(gsub('.* ','', gsub(' reference variants are in the target.', '', x[grepl('reference variants are in the target.$', x)])))))
+    as.numeric(gsub('.* ','', gsub(' variants', '', x[grepl('^Reference data contains ', x)])))))
 
 if(imp_incl){
   
diff --git a/Scripts/pipeline_reports/samp_report_creator.Rmd b/Scripts/pipeline_reports/samp_report_creator.Rmd
index f95391e5..b986b5fe 100644
--- a/Scripts/pipeline_reports/samp_report_creator.Rmd
+++ b/Scripts/pipeline_reports/samp_report_creator.Rmd
@@ -105,11 +105,11 @@ for(chr in CHROMS){
 # Count the number of variants in the target sample data that match reference variants
 nvar_in_target <-
   sum(sapply(format_target_logs, function(x)
-    as.numeric(gsub(' .*', '', x[grepl('reference variants are in the target.$', x)]))))
+    as.numeric(gsub('.* ','', gsub(' reference variants.', '', x[grepl('^Target contains', x) & grepl('reference variants.$', x)])))))
 
 nvar_in_ref <-
   sum(sapply(format_target_logs, function(x)
-    as.numeric(gsub('.* ','', gsub(' reference variants are in the target.', '', x[grepl('reference variants are in the target.$', x)])))))
+    as.numeric(gsub('.* ','', gsub(' variants', '', x[grepl('^Reference data contains ', x)])))))
 
 cat0("- The target sample contains ", nrow(target_psam), " individuals.\n")
 cat0("- The target genotype data was provided in ", target_list$type[target_list$name == params$name], " format.\n")
diff --git a/Scripts/target_scoring/target_scoring.R b/Scripts/target_scoring/target_scoring.R
index 97ea7a36..e145b240 100644
--- a/Scripts/target_scoring/target_scoring.R
+++ b/Scripts/target_scoring/target_scoring.R
@@ -80,7 +80,16 @@ if(!is.na(opt$test)){
 #####
 
 log_add(log_file = log_file, message = 'Calculating polygenic scores in the target sample.')
-scores<-plink_score(pfile = opt$target_plink_chr, chr = CHROMS, plink2 = opt$plink2, score = opt$ref_score, keep = opt$target_keep, frq = opt$ref_freq_chr, threads = opt$n_cores)
+scores <-
+  plink_score(
+    pfile = opt$target_plink_chr,
+    chr = CHROMS,
+    plink2 = opt$plink2,
+    score = opt$ref_score,
+    keep = opt$target_keep,
+    frq = opt$ref_freq_chr,
+    threads = opt$n_cores
+  )
 
 ###
 # Scale the polygenic scores based on the reference
diff --git a/docs/Images/OpenSNP/genopred-v1-comp-yengo_eur.png b/docs/Images/OpenSNP/genopred-v1-comp-yengo_eur.png
index 29af679a..e616c4b6 100644
Binary files a/docs/Images/OpenSNP/genopred-v1-comp-yengo_eur.png and b/docs/Images/OpenSNP/genopred-v1-comp-yengo_eur.png differ
diff --git a/docs/Images/OpenSNP/genopred-v1-comp_strict-yengo_eur.png b/docs/Images/OpenSNP/genopred-v1-comp_strict-yengo_eur.png
index 05adbc33..4cb55eff 100644
Binary files a/docs/Images/OpenSNP/genopred-v1-comp_strict-yengo_eur.png and b/docs/Images/OpenSNP/genopred-v1-comp_strict-yengo_eur.png differ
diff --git a/docs/Images/OpenSNP/genopred-v1-comp_strict_eur-yengo_eur.png b/docs/Images/OpenSNP/genopred-v1-comp_strict_eur-yengo_eur.png
index 226613ee..00492dd4 100644
Binary files a/docs/Images/OpenSNP/genopred-v1-comp_strict_eur-yengo_eur.png and b/docs/Images/OpenSNP/genopred-v1-comp_strict_eur-yengo_eur.png differ
diff --git a/docs/Images/OpenSNP/genopred-yengo_eas.png b/docs/Images/OpenSNP/genopred-yengo_eas.png
index 62a3041d..ed7f95be 100644
Binary files a/docs/Images/OpenSNP/genopred-yengo_eas.png and b/docs/Images/OpenSNP/genopred-yengo_eas.png differ
diff --git a/docs/Images/OpenSNP/genopred-yengo_eur-external.png b/docs/Images/OpenSNP/genopred-yengo_eur-external.png
index c9ed4d23..802e6290 100644
Binary files a/docs/Images/OpenSNP/genopred-yengo_eur-external.png and b/docs/Images/OpenSNP/genopred-yengo_eur-external.png differ
diff --git a/docs/Images/OpenSNP/genopred-yengo_eur.png b/docs/Images/OpenSNP/genopred-yengo_eur.png
index e9629c5b..d8f5d193 100644
Binary files a/docs/Images/OpenSNP/genopred-yengo_eur.png and b/docs/Images/OpenSNP/genopred-yengo_eur.png differ
diff --git a/docs/Images/OpenSNP/time_cpu_bench_pgs_methods.csv b/docs/Images/OpenSNP/time_cpu_bench_pgs_methods.csv
index e579e199..7879ac56 100644
--- a/docs/Images/OpenSNP/time_cpu_bench_pgs_methods.csv
+++ b/docs/Images/OpenSNP/time_cpu_bench_pgs_methods.csv
@@ -1,8 +1,8 @@
 method,s,h:m:s,max_rss,max_vms,max_uss,max_pss,io_in,io_out,mean_load,cpu_time,file,rule,label
-dbslmm,670.2083,0:11:10,1150.19,518875.48,1127.88,1128.51,0,792.55,7.77,272.6,prep_pgs_dbslmm_i-yengo_eur.txt,prep_pgs_dbslmm_i,DBSLMM
-lassosum,234.1869,0:03:54,4270.06,521977.98,4255.07,4255.75,0,1635.82,43.3,102.93,prep_pgs_lassosum_i-yengo_eur.txt,prep_pgs_lassosum_i,lassosum
-ldpred2,1895.106,0:31:35,20009.57,537189.04,19997.63,19998.87,0,18987.26,23.69,495.81,prep_pgs_ldpred2_i-yengo_eur.txt,prep_pgs_ldpred2_i,LDpred2
-megaprs,3974.4036,1:06:14,12626.76,523095.61,12613.95,12615.65,0,21801.18,293.76,11922.15,prep_pgs_megaprs_i-yengo_eur.txt,prep_pgs_megaprs_i,MegaPRS
-prscs,13389.7369,3:43:09,11977.42,16895.49,7635.95,8028.87,158.48,438.68,938.84,127317.72,prep_pgs_prscs_i-yengo_eur.txt,prep_pgs_prscs_i,PRS-CS
-ptclump,38.5799,0:00:38,391.48,4519.65,378.23,378.57,0,213.75,25.98,12.91,prep_pgs_ptclump_i-yengo_eur.txt,prep_pgs_ptclump_i,pT+clump
-sbayesr,1850.8346,0:30:50,23888.48,45932.89,20929.11,21196.32,0,2811.34,828.8,15532.77,prep_pgs_sbayesr_i-yengo_eur.txt,prep_pgs_sbayesr_i,SBayesR
+dbslmm,584.3754,0:09:44,1250.93,518932.27,1228.86,1235.85,0,832.08,9.55,350.71,prep_pgs_dbslmm_i-yengo_eur.txt,prep_pgs_dbslmm_i,DBSLMM
+lassosum,222.346,0:03:42,4320.55,522076.59,4307.17,4307.96,0,1600.7,45.6,107.35,prep_pgs_lassosum_i-yengo_eur.txt,prep_pgs_lassosum_i,lassosum
+ldpred2,4677.1611,1:17:57,25796.93,537282.12,25580.52,25582.33,0,18703.18,36.04,1752.48,prep_pgs_ldpred2_i-yengo_eur.txt,prep_pgs_ldpred2_i,LDpred2
+megaprs,3931.2688,1:05:31,12622.96,523078.8,12610.11,12610.78,0,21771.48,293.97,11783.88,prep_pgs_megaprs_i-yengo_eur.txt,prep_pgs_megaprs_i,MegaPRS
+prscs,13365.5906,3:42:45,11930.78,16875.75,7588.81,7982.41,0,421.89,942.42,127479.74,prep_pgs_prscs_i-yengo_eur.txt,prep_pgs_prscs_i,PRS-CS
+ptclump,27.0675,0:00:27,399.71,4519.64,385.74,388.07,0,243.38,32.97,12.32,prep_pgs_ptclump_i-yengo_eur.txt,prep_pgs_ptclump_i,pT+clump
+sbayesr,2255.2539,0:37:35,23888.02,45932.63,20929.12,21196.56,0,2877.13,864.48,19629.33,prep_pgs_sbayesr_i-yengo_eur.txt,prep_pgs_sbayesr_i,SBayesR
diff --git a/docs/Images/OpenSNP/time_cpu_bench_pgs_methods.png b/docs/Images/OpenSNP/time_cpu_bench_pgs_methods.png
index e094f76f..e8070372 100644
Binary files a/docs/Images/OpenSNP/time_cpu_bench_pgs_methods.png and b/docs/Images/OpenSNP/time_cpu_bench_pgs_methods.png differ
diff --git a/docs/opensnp_benchmark.Rmd b/docs/opensnp_benchmark.Rmd
index bf7b5289..3bbcc76d 100644
--- a/docs/opensnp_benchmark.Rmd
+++ b/docs/opensnp_benchmark.Rmd
@@ -134,7 +134,7 @@ library(data.table)
 
 # Create config file
 conf <- c(
-  'outdir: /users/k1806347/oliverpainfel/Data/OpenSNP/GenoPred/test4',
+  'outdir: /users/k1806347/oliverpainfel/Data/OpenSNP/GenoPred/test5',
   'config_file: misc/opensnp/config.yaml',
   'gwas_list: misc/opensnp/gwas_list.txt',
   'score_list: misc/opensnp/score_list.txt',
@@ -228,8 +228,8 @@ library(cowplot)
 # Read in configuration specific benchmark files
 bm_files_i <-
   paste0(
-    '/users/k1806347/oliverpainfel/Data/OpenSNP/GenoPred/test4/reference/benchmarks/',
-    list.files('/users/k1806347/oliverpainfel/Data/OpenSNP/GenoPred/test4/reference/benchmarks/')
+    '/users/k1806347/oliverpainfel/Data/OpenSNP/GenoPred/test5/reference/benchmarks/',
+    list.files('/users/k1806347/oliverpainfel/Data/OpenSNP/GenoPred/test5/reference/benchmarks/')
   )
 
 # Read in benchmark files
@@ -315,7 +315,7 @@ source_all('../functions')
 pheno <- fread('/users/k1806347/oliverpainfel/Data/OpenSNP/processed/pheno/height.txt')
 
 # Define pgs_methods used
-pgs_methods <- c('external','ptclump', 'dbslmm', 'prscs', 'sbayesr', 'lassosum', 'ldpred2', 'megaprs')
+pgs_methods <- c('external',read_param(config = 'misc/opensnp/config.yaml', param = 'pgs_methods', return_obj = F))
 
 # Define pgs_methods applied to non-EUR GWAS
 pgs_methods_eur <- c('ptclump','lassosum','megaprs','prscs','dbslmm')
@@ -673,6 +673,7 @@ keep_list <- fread('/users/k1806347/oliverpainfel/Data/OpenSNP/GenoPred/test1/op
 
 # Read in pgs
 gwas_list <- fread('misc/opensnp/gwas_list.txt')
+gwas_list$name<-gsub('_','',gwas_list$name)
 pgs_methods <- c('pt_clump','dbslmm','prscs','sbayesr','lassosum','ldpred2','megaprs')
 pgs_methods_eur <- c('pt_clump','lassosum','megaprs')
 
@@ -731,6 +732,7 @@ write.csv(
   row.names = F
 )
 
+cor<-fread('/scratch/prj/oliverpainfel/Data/OpenSNP/assoc/genopred-v1-yengo-assoc.csv')
 # Restrict to best only
 cor_subset <- NULL
 for(pop_i in unique(cor$pop)){
@@ -907,6 +909,7 @@ write.csv(
   '/scratch/prj/oliverpainfel/Data/OpenSNP/assoc/genopred-v1-strict-yengo-assoc.csv',
   row.names = F
 )
+cor<-fread('/scratch/prj/oliverpainfel/Data/OpenSNP/assoc/genopred-v1-strict-yengo-assoc.csv')
 
 # Restrict to best only
 cor_subset <- NULL
@@ -1005,7 +1008,7 @@ dev.off()
 # Plot the same plot only using the EUR target population in OpenSNP
 ######
 
-tmp <- cor_both[cor_both$gwas == 'yengoeur' & cor_both$pop == 'EUR\n N = 653', ]
+tmp <- cor_both[cor_both$gwas == 'yengoeur' & grepl('EUR', cor_both$pop), ]
 y_lim <- c(min(tmp$r - tmp$se), max(tmp$r + tmp$se))
 
 v1_plot <-
@@ -1028,7 +1031,7 @@ v1_plot <-
     background_grid() +
     theme(axis.text.x = element_text(angle = 45, hjust = 1),
           plot.title = element_text(hjust = 0.5, size=12)) +
-    facet_grid(pop ~ Version) +
+    facet_grid(. ~ Version) +
     panel_border()
 
 v2_plot <-
@@ -1051,7 +1054,7 @@ v2_plot <-
     background_grid() +
     theme(axis.text.x = element_text(angle = 45, hjust = 1),
           plot.title = element_text(hjust = 0.5, size=12)) +
-    facet_grid(pop ~ Version) +
+    facet_grid(. ~ Version) +
     panel_border()
 
 png('/users/k1806347/oliverpainfel/Software/MyGit/GenoPred/docs/Images/OpenSNP/genopred-v1-comp_strict_eur-yengo_eur.png',
@@ -1078,12 +1081,16 @@ dev.off()
 </div>
 </div>
 
+</br>
+
 <div class="centered-container">
 <div class="rounded-image-container">
 ![Using same ancestry classification threshold](Images/OpenSNP/genopred-v1-comp_strict-yengo_eur.png)
 </div>
 </div>
 
+</br>
+
 <div class="centered-container">
 <div class="rounded-image-container" style="width: 60%;">
 ![Showing results in European OpenSNP data only](Images/OpenSNP/genopred-v1-comp_strict_eur-yengo_eur.png)
diff --git a/docs/opensnp_benchmark.html b/docs/opensnp_benchmark.html
index 6e39cab3..b0381025 100644
--- a/docs/opensnp_benchmark.html
+++ b/docs/opensnp_benchmark.html
@@ -13,7 +13,7 @@
 
 <title>OpenSNP Benchmark</title>
 
-<script src="site_libs/header-attrs-2.23/header-attrs.js"></script>
+<script src="site_libs/header-attrs-2.25/header-attrs.js"></script>
 <script src="site_libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
 <meta name="viewport" content="width=device-width, initial-scale=1" />
 <link href="site_libs/bootstrap-3.3.5/css/cosmo.min.css" rel="stylesheet" />
@@ -35,8 +35,8 @@
 <script src="site_libs/navigation-1.1/tabsets.js"></script>
 <link href="site_libs/highlightjs-9.12.0/default.css" rel="stylesheet" />
 <script src="site_libs/highlightjs-9.12.0/highlight.js"></script>
-<link href="site_libs/font-awesome-6.4.0/css/all.min.css" rel="stylesheet" />
-<link href="site_libs/font-awesome-6.4.0/css/v4-shims.min.css" rel="stylesheet" />
+<link href="site_libs/font-awesome-6.4.2/css/all.min.css" rel="stylesheet" />
+<link href="site_libs/font-awesome-6.4.2/css/v4-shims.min.css" rel="stylesheet" />
 <link rel="stylesheet" href="styles/night-mode.css" id="nightModeStylesheet">
 
 <script>
@@ -531,7 +531,7 @@ <h3>Create gwas_list, target_list and config</h3>
 
 # Create config file
 conf &lt;- c(
-  &#39;outdir: /users/k1806347/oliverpainfel/Data/OpenSNP/GenoPred/test4&#39;,
+  &#39;outdir: /users/k1806347/oliverpainfel/Data/OpenSNP/GenoPred/test5&#39;,
   &#39;config_file: misc/opensnp/config.yaml&#39;,
   &#39;gwas_list: misc/opensnp/gwas_list.txt&#39;,
   &#39;score_list: misc/opensnp/score_list.txt&#39;,
@@ -603,7 +603,8 @@ <h2>Run GenoPred</h2>
 <summary>
 Show code
 </summary>
-<pre class="bash"><code>snakemake --profile slurm --use-conda --configfile=misc/opensnp/config.yaml output_all -n </code></pre>
+<pre class="bash"><code>snakemake --profile slurm --use-conda --configfile=misc/opensnp/config.yaml output_all -n 
+snakemake --profile slurm --use-conda --configfile=misc/opensnp/config.yaml outlier_detection -n </code></pre>
 </details>
 <hr />
 </div>
@@ -620,8 +621,8 @@ <h2>Check time and memory requirements</h2>
 # Read in configuration specific benchmark files
 bm_files_i &lt;-
   paste0(
-    &#39;/users/k1806347/oliverpainfel/Data/OpenSNP/GenoPred/test4/reference/benchmarks/&#39;,
-    list.files(&#39;/users/k1806347/oliverpainfel/Data/OpenSNP/GenoPred/test4/reference/benchmarks/&#39;)
+    &#39;/users/k1806347/oliverpainfel/Data/OpenSNP/GenoPred/test5/reference/benchmarks/&#39;,
+    list.files(&#39;/users/k1806347/oliverpainfel/Data/OpenSNP/GenoPred/test5/reference/benchmarks/&#39;)
   )
 
 # Read in benchmark files
@@ -706,7 +707,7 @@ <h2>Evaluate PGS</h2>
 pheno &lt;- fread(&#39;/users/k1806347/oliverpainfel/Data/OpenSNP/processed/pheno/height.txt&#39;)
 
 # Define pgs_methods used
-pgs_methods &lt;- c(&#39;external&#39;,&#39;ptclump&#39;, &#39;dbslmm&#39;, &#39;prscs&#39;, &#39;sbayesr&#39;, &#39;lassosum&#39;, &#39;ldpred2&#39;, &#39;megaprs&#39;)
+pgs_methods &lt;- c(&#39;external&#39;,read_param(config = &#39;misc/opensnp/config.yaml&#39;, param = &#39;pgs_methods&#39;, return_obj = F))
 
 # Define pgs_methods applied to non-EUR GWAS
 pgs_methods_eur &lt;- c(&#39;ptclump&#39;,&#39;lassosum&#39;,&#39;megaprs&#39;,&#39;prscs&#39;,&#39;dbslmm&#39;)
@@ -1061,6 +1062,7 @@ <h2>Evaluate PGS</h2>
 
 # Read in pgs
 gwas_list &lt;- fread(&#39;misc/opensnp/gwas_list.txt&#39;)
+gwas_list$name&lt;-gsub(&#39;_&#39;,&#39;&#39;,gwas_list$name)
 pgs_methods &lt;- c(&#39;pt_clump&#39;,&#39;dbslmm&#39;,&#39;prscs&#39;,&#39;sbayesr&#39;,&#39;lassosum&#39;,&#39;ldpred2&#39;,&#39;megaprs&#39;)
 pgs_methods_eur &lt;- c(&#39;pt_clump&#39;,&#39;lassosum&#39;,&#39;megaprs&#39;)
 
@@ -1119,6 +1121,7 @@ <h2>Evaluate PGS</h2>
   row.names = F
 )
 
+cor&lt;-fread(&#39;/scratch/prj/oliverpainfel/Data/OpenSNP/assoc/genopred-v1-yengo-assoc.csv&#39;)
 # Restrict to best only
 cor_subset &lt;- NULL
 for(pop_i in unique(cor$pop)){
@@ -1295,6 +1298,7 @@ <h2>Evaluate PGS</h2>
   &#39;/scratch/prj/oliverpainfel/Data/OpenSNP/assoc/genopred-v1-strict-yengo-assoc.csv&#39;,
   row.names = F
 )
+cor&lt;-fread(&#39;/scratch/prj/oliverpainfel/Data/OpenSNP/assoc/genopred-v1-strict-yengo-assoc.csv&#39;)
 
 # Restrict to best only
 cor_subset &lt;- NULL
@@ -1393,7 +1397,7 @@ <h2>Evaluate PGS</h2>
 # Plot the same plot only using the EUR target population in OpenSNP
 ######
 
-tmp &lt;- cor_both[cor_both$gwas == &#39;yengoeur&#39; &amp; cor_both$pop == &#39;EUR\n N = 653&#39;, ]
+tmp &lt;- cor_both[cor_both$gwas == &#39;yengoeur&#39; &amp; grepl(&#39;EUR&#39;, cor_both$pop), ]
 y_lim &lt;- c(min(tmp$r - tmp$se), max(tmp$r + tmp$se))
 
 v1_plot &lt;-
@@ -1416,7 +1420,7 @@ <h2>Evaluate PGS</h2>
     background_grid() +
     theme(axis.text.x = element_text(angle = 45, hjust = 1),
           plot.title = element_text(hjust = 0.5, size=12)) +
-    facet_grid(pop ~ Version) +
+    facet_grid(. ~ Version) +
     panel_border()
 
 v2_plot &lt;-
@@ -1439,7 +1443,7 @@ <h2>Evaluate PGS</h2>
     background_grid() +
     theme(axis.text.x = element_text(angle = 45, hjust = 1),
           plot.title = element_text(hjust = 0.5, size=12)) +
-    facet_grid(pop ~ Version) +
+    facet_grid(. ~ Version) +
     panel_border()
 
 png(&#39;/users/k1806347/oliverpainfel/Software/MyGit/GenoPred/docs/Images/OpenSNP/genopred-v1-comp_strict_eur-yengo_eur.png&#39;,
@@ -1465,6 +1469,7 @@ <h2>Evaluate PGS</h2>
 </div>
 </div>
 </div>
+<p></br></p>
 <div class="centered-container">
 <div class="rounded-image-container">
 <div class="figure">
@@ -1474,6 +1479,7 @@ <h2>Evaluate PGS</h2>
 </div>
 </div>
 </div>
+<p></br></p>
 <div class="centered-container">
 <div class="rounded-image-container" style="width: 60%;">
 <div class="figure">
diff --git a/docs/pipeline_readme.Rmd b/docs/pipeline_readme.Rmd
index b2f2ac36..8d674fdd 100644
--- a/docs/pipeline_readme.Rmd
+++ b/docs/pipeline_readme.Rmd
@@ -974,7 +974,7 @@ Outputs specific to the configuration used are stored in the `outdir` specified
   │       ├── [target_name]-report.html (sample-level report)
   │       └── individual (individual-level reports)
   │    
-  └── reference (nextflow pipeline execution data)
+  └── reference
       ├── gwas_sumstat (processed gwas sumstats)
       │   └── [gwas name] (pgs for each gwas or score file)
       ├── pgs_score_files (score files for polygenic scoring)
diff --git a/docs/pipeline_readme.html b/docs/pipeline_readme.html
index 8bc9fc83..6ea7d2ab 100644
--- a/docs/pipeline_readme.html
+++ b/docs/pipeline_readme.html
@@ -1643,7 +1643,7 @@ <h2>Output structure</h2>
   │       ├── [target_name]-report.html (sample-level report)
   │       └── individual (individual-level reports)
   │    
-  └── reference (nextflow pipeline execution data)
+  └── reference
       ├── gwas_sumstat (processed gwas sumstats)
       │   └── [gwas name] (pgs for each gwas or score file)
       ├── pgs_score_files (score files for polygenic scoring)
diff --git a/functions/plink.R b/functions/plink.R
index 9767266f..6b008d26 100644
--- a/functions/plink.R
+++ b/functions/plink.R
@@ -17,7 +17,7 @@ plink_subset<-function(plink=NULL, plink2=NULL, chr = 1:22, keep = NULL, extract
   if(!is.null(pfile) & is.null(plink2)){
     stop("plink2 must be specified when using pfile.")
   }
-  
+
   # Prepare plink options
   plink_opt<-NULL
   if(!is.null(plink)){
@@ -43,7 +43,7 @@ plink_subset<-function(plink=NULL, plink2=NULL, chr = 1:22, keep = NULL, extract
     extract <- obj_or_file(extract)
     plink_opt<-paste0(plink_opt, paste0('--extract ', extract, ' '))
   }
-  
+
   # Run plink
   for(chr_i in chr){
     cmd <- paste0(plink_opt, '--threads ', threads,' --out ',out,chr_i,' --memory ',memory)
@@ -72,7 +72,7 @@ plink_qc_snplist<-function(bfile=NULL, pfile=NULL, plink=NULL, plink2=NULL, keep
   if(!is.null(pfile) & is.null(plink2)){
     stop("plink2 must be specified when using pfile.")
   }
-  
+
   plink_opt<-NULL
   if(!is.null(plink)){
     plink_opt<-paste0(plink_opt, paste0(plink, ' '))
@@ -97,7 +97,7 @@ plink_qc_snplist<-function(bfile=NULL, pfile=NULL, plink=NULL, plink2=NULL, keep
     keep<-obj_or_file(keep)
     plink_opt <- paste0(plink_opt, paste0('--keep ',keep,' '))
   }
-  
+
   temp_file <- tempfile()
   snplist <- NULL
   for(chr_i in chr){
@@ -109,7 +109,7 @@ plink_qc_snplist<-function(bfile=NULL, pfile=NULL, plink=NULL, plink2=NULL, keep
     }
     snplist<-c(snplist, fread(paste0(temp_file, '.snplist'), header=F)$V1)
   }
-  
+
   return(snplist)
 }
 
@@ -130,9 +130,9 @@ plink_merge<-function(bfile=NULL, pfile=NULL, plink=NULL, plink2=NULL, chr = 1:2
   if(!is.null(pfile) & is.null(plink2)){
     stop("plink2 must be specified when using pfile.")
   }
-  
+
   tmp_dir<-tempdir()
-  
+
   # Create merge list
   if(!is.null(bfile)){
     ref_merge_list<-paste0(bfile, chr)
@@ -140,7 +140,7 @@ plink_merge<-function(bfile=NULL, pfile=NULL, plink=NULL, plink2=NULL, chr = 1:2
     ref_merge_list<-paste0(pfile, chr)
   }
   write.table(ref_merge_list, paste0(tmp_dir,'/ref_mergelist.txt'), row.names=F, col.names=F, quote=F)
-  
+
   # Prepare command
   plink_opt<-NULL
   if(!is.null(plink)){
@@ -179,7 +179,7 @@ plink_merge<-function(bfile=NULL, pfile=NULL, plink=NULL, plink2=NULL, chr = 1:2
     keep<-obj_or_file(keep)
     plink_opt<-paste0(plink_opt, paste0('--keep ', keep, ' '))
   }
-  
+
   cmd<-paste0(plink_opt,'--threads ', threads,' --out ', out,' --memory ', memory)
   exit_status <- system(cmd, intern=FALSE)
   if (exit_status == 2) {
@@ -195,38 +195,38 @@ plink_pca<-function(bfile=NULL, pfile=NULL, chr = 1:22, plink2, extract = NULL,
   if(!is.null(bfile) & !is.null(pfile)){
     stop("Both bfile and pfile cannot be specified.")
   }
-  
+
   tmp_dir<-tempdir()
-  
+
   # Subset data prior to merging
   if(!is.null(bfile)){
     plink_subset(bfile = bfile, chr = chr, plink2 = plink2, keep = keep, extract = extract, memory = memory, out = paste0(tmp_dir,'/ref_subset_chr'), threads=threads)
   } else {
     plink_subset(pfile = pfile, chr = chr, plink2 = plink2, keep = keep, extract = extract, memory = memory, out = paste0(tmp_dir,'/ref_subset_chr'), threads=threads)
   }
-  
+
   # Merge subset reference
   if(!is.null(bfile)){
     plink_merge(bfile = paste0(tmp_dir,'/ref_subset_chr'), chr = chr, plink2 = plink2, keep = keep, extract = extract, flip = flip, memory = memory, out = paste0(tmp_dir,'/ref_merge'), threads=threads)
   } else {
     plink_merge(pfile = paste0(tmp_dir,'/ref_subset_chr'), chr = chr, plink2 = plink2, keep = keep, extract = extract, flip = flip, memory = memory, out = paste0(tmp_dir,'/ref_merge'), threads=threads)
   }
-  
+
   plink_opt<-paste0(plink2, ' ')
   if(!is.null(bfile)){
     plink_opt<-paste0(plink_opt, paste0('--bfile ',tmp_dir,'/ref_merge '))
   } else {
     plink_opt<-paste0(plink_opt, paste0('--pfile ',tmp_dir,'/ref_merge '))
   }
-  
+
   # Calculate SNP weights
   system(paste0(plink_opt,' --threads ', threads,' --pca ',n_pc,' biallelic-var-wts  --out ',tmp_dir,'/ref_merge --memory ', memory))
-  
+
   # Format the SNP-weights
   snp_weights<-fread(paste0(tmp_dir,'/ref_merge.eigenvec.var'))
   snp_weights<-snp_weights[, -1, with=F]
   names(snp_weights)[1:3]<-c('SNP','A1','A2')
-  
+
   return(snp_weights)
 }
 
@@ -247,13 +247,13 @@ plink_prune<-function(bfile=NULL, pfile=NULL, keep = NULL, plink=NULL, plink2=NU
   if(!is.null(pfile) & is.null(plink2)){
     stop("plink2 must be specified when using pfile.")
   }
-  
+
   # Create a temporary file path to store pruning output
   tmp_file<-tempfile()
-  
+
   # Check extract file
   extract<-obj_or_file(extract)
-  
+
   # Prepare plink options
   plink_opt<-NULL
   if(!is.null(plink)){
@@ -274,7 +274,7 @@ plink_prune<-function(bfile=NULL, pfile=NULL, keep = NULL, plink=NULL, plink2=NU
     keep <- obj_or_file(keep)
     plink_opt<-paste0(plink_opt, '--keep ', keep,' ')
   }
-  
+
   # Perfom pruning and read in SNP-list
   ld_indep<-NULL
   for(chr_i in chr){
@@ -308,12 +308,12 @@ plink_clump<-function(bfile=NULL, pfile=NULL, plink=NULL, plink2=NULL, chr = 1:2
   if(!is.null(pfile) & is.null(plink2)){
     stop("plink2 must be specified when using pfile.")
   }
-  
+
   log_add(log_file = log_file, message = 'Performing LD-based clumping.')
   tmp_file <- tempfile()
-  
+
   sumstats <- obj_or_file(sumstats, header=T)
-  
+
   plink_opt<-NULL
   if(!is.null(plink)){
     plink_opt<-paste0(plink_opt, paste0(plink, ' '))
@@ -329,7 +329,7 @@ plink_clump<-function(bfile=NULL, pfile=NULL, plink=NULL, plink2=NULL, chr = 1:2
     keep <- obj_or_file(keep)
     plink_opt<-paste0(plink_opt, '--keep ', keep, ' ')
   }
-  
+
   clumped<-NULL
   for(chr_i in chr){
     cmd<-paste0(plink_opt, '--clump ', sumstats,' --clump-p1 1 --clump-p2 1 --clump-r2 0.1 --clump-kb 250 --out ',tmp_file,'.chr',chr_i,' --threads ', threads,' --memory ', memory)
@@ -344,14 +344,14 @@ plink_clump<-function(bfile=NULL, pfile=NULL, plink=NULL, plink2=NULL, chr = 1:2
         clumped <- c(clumped, fread(paste0(tmp_file,'.chr',chr_i,'.clumps'))$ID)
       }
     }
-    
+
     if(exit_status == 2){
       stop()
     }
   }
-  
+
   log_add(log_file = log_file, message = paste0(length(clumped),' variants remain after clumping.'))
-  
+
   return(clumped)
 }
 
@@ -363,54 +363,53 @@ plink_king<-function(bfile=NULL, pfile=NULL, extract = NULL, chr = 1:22, plink2=
   if(!is.null(bfile) & !is.null(pfile)){
     stop("Both bfile and pfile cannot be specified.")
   }
-  
+
   # Create object indicating tmpdir
   tmp_dir<-tempdir()
-  
+
   # Identify variants not in high LD regions
   if(!is.null(bfile)){
     snp_data <- read_bim(dat = bfile, chr = chr)
   } else {
     snp_data <- read_pvar(dat = pfile, chr = chr)
   }
-  
+
   snp_data <- remove_regions(dat = snp_data, regions = long_ld_coord)
-  
+
   # Identify intersect of snplists to extract
   if(!is.null(extract)){
     extract_snplist<-intersect(snp_data$SNP, extract)
   }
-  
+
   # Merge per chromosome files extracting selected variants
   if(!is.null(bfile)){
     plink_merge(bfile = bfile, chr = chr, plink2 = plink2, extract = extract_snplist, out = paste0(tmp_dir,'/merged'), threads=threads)
   } else {
     plink_merge(pfile = pfile, chr = chr, plink2 = plink2, extract = extract_snplist, out = paste0(tmp_dir,'/merged'), threads=threads)
   }
-  
+
   # Run KING estimator
   if(!is.null(bfile)){
     system(paste0(plink2, ' --bfile ', tmp_dir, '/merged --threads ', threads,' --make-king triangle bin --out ', tmp_dir, '/merged'))
   } else {
     system(paste0(plink2, ' --pfile ', tmp_dir, '/merged --threads ', threads,' --make-king triangle bin --out ', tmp_dir, '/merged'))
   }
-  
+
   # Identify unrelated individuals (remove 2nd degree relatives)
   if(!is.null(bfile)){
     system(paste0(plink2, ' --bfile ', tmp_dir, '/merged --threads ', threads,' --king-cutoff ',tmp_dir, '/merged 0.0884 --out ', tmp_dir, '/merged'))
   } else {
     system(paste0(plink2, ' --pfile ', tmp_dir, '/merged --threads ', threads,' --king-cutoff ',tmp_dir, '/merged 0.0884 --out ', tmp_dir, '/merged'))
   }
-  
+
   # Move the kinship matrix
   system(paste0('mv ', tmp_dir, '/merged.king.bin ', out, '.king.bin'))
   system(paste0('mv ', tmp_dir, '/merged.king.id ', out, '.king.id'))
-  
+
   # Format and save the list of unrelated individuals
   system(paste0('tail -n +2 ', tmp_dir, '/merged.king.cutoff.in.id > ', out, '.unrelated.keep'))
 }
 
-# Calculate scores in target file
 plink_score<-function(bfile=NULL, pfile=NULL, score, keep=NULL, extract=NULL, chr=1:22, frq=NULL, plink2=NULL, threads=1){
   if(is.null(bfile) & is.null(pfile)){
     stop("bfile or pfile must be specified.")
@@ -418,14 +417,14 @@ plink_score<-function(bfile=NULL, pfile=NULL, score, keep=NULL, extract=NULL, ch
   if(!is.null(bfile) & !is.null(pfile)){
     stop("Both bfile and pfile cannot be specified.")
   }
-  
+
   # Create object indicating tmpdir
   tmp_folder<-tempdir()
-  
+
   # Determine the number of scores
   score_small<-fread(score, nrows=5)
   n_scores<-ncol(score_small)-3
-  
+
   # Assemble command and files for keep and extract
   plink_opt<-paste0(plink2, ' ')
   if(!is.null(bfile)){
@@ -442,9 +441,9 @@ plink_score<-function(bfile=NULL, pfile=NULL, score, keep=NULL, extract=NULL, ch
     plink_opt<-paste0(plink_opt, '--extract ', extract, ' ')
   }
   if(!is.null(frq)){
-    plink_opt<-paste0(plink_opt, '--read-freq ', frq,'CHROMOSOME_NUMBER.afreq --score ', score,' header-read ')
+    plink_opt<-paste0(plink_opt, '--read-freq ', frq,'CHROMOSOME_NUMBER.afreq --score ', score,' header-read cols=+scoresums,-scoreavgs ')
   } else {
-    plink_opt<-paste0(plink_opt, '--score ', score,' header-read no-mean-imputation ')
+    plink_opt<-paste0(plink_opt, '--score ', score,' header-read no-mean-imputation cols=+scoresums,-scoreavgs ')
   }
   if(n_scores > 1){
     plink_opt<-paste0(plink_opt, '--score-col-nums 4-',3+n_scores, ' ')
@@ -460,7 +459,7 @@ plink_score<-function(bfile=NULL, pfile=NULL, score, keep=NULL, extract=NULL, ch
       stop()
     }
   }
-  
+
   # Add up the scores across chromosomes
   # Read in score files IDs columns from first non-missing chromosome
   # Insert FID if not present
@@ -479,36 +478,30 @@ plink_score<-function(bfile=NULL, pfile=NULL, score, keep=NULL, extract=NULL, ch
       break
     }
   }
-  
+
   # Read in the scores for each chromosome, adjust for the number of SNPs considered and add up
   scores<-list()
   for(chr_i in chr){
     if(file.exists(paste0(tmp_folder,'/profiles.chr',chr_i,'.sscore'))){
       sscore<-fread(paste0(tmp_folder,'/profiles.chr',chr_i,'.sscore'))
-      scores[[chr_i]]<-sscore[,grepl('_AVG$', names(sscore)),with=F]
-      # This allows for difference plink formats across plink versions
-      if(any(names(sscore) == 'NMISS_ALLELE_CT')){
-        scores[[chr_i]]<-as.matrix(scores[[chr_i]]*sscore$NMISS_ALLELE_CT[1]/2)
-      } else {
-        scores[[chr_i]]<-as.matrix(scores[[chr_i]]*sscore$ALLELE_CT[1]/2)
-      }
+      scores[[chr_i]]<-as.matrix(sscore[,paste0(names(score_small)[-1:-3], '_SUM'),with=F])
     } else {
       cat0('No scores for chromosome ',chr_i,'. Check plink logs file for reason.\n')
     }
   }
-  
+
   # Remove NULL elements from list (these are inserted by R when list objects are numbered)
   scores[sapply(scores, is.null)] <- NULL
-  
+
   # sum scores across chromosomes
   scores<-Reduce(`+`, scores)
-  
+
   # Combine score with IDs
   scores<-data.table(scores_ids,
                      scores)
-  
+
   # Rename columns
   names(scores)[-1:-2]<-names(score_small)[-1:-3]
-  
+
   return(scores)
 }
diff --git a/pipeline/misc/opensnp/config.yaml b/pipeline/misc/opensnp/config.yaml
index af23f447..4ed349aa 100644
--- a/pipeline/misc/opensnp/config.yaml
+++ b/pipeline/misc/opensnp/config.yaml
@@ -1,4 +1,4 @@
-outdir: /users/k1806347/oliverpainfel/Data/OpenSNP/GenoPred/test4
+outdir: /users/k1806347/oliverpainfel/Data/OpenSNP/GenoPred/test5
 config_file: misc/opensnp/config.yaml
 gwas_list: misc/opensnp/gwas_list.txt
 score_list: misc/opensnp/score_list.txt
diff --git a/pipeline/rules/pgs_methods.smk b/pipeline/rules/pgs_methods.smk
index 557e9c60..a0738581 100644
--- a/pipeline/rules/pgs_methods.smk
+++ b/pipeline/rules/pgs_methods.smk
@@ -2,7 +2,8 @@
 rule ref_pca_i:
   input:
     ref_input,
-    rules.install_genoutils.output
+    rules.install_genoutils.output,
+    "resources/last_version.txt"
   output:
     "resources/data/ref/pc_score_files/{population}/ref-{population}-pcs.EUR.scale"
   conda:
@@ -49,7 +50,8 @@ if 'gwas_list' in config:
     input:
       ref_input,
       rules.install_genoutils.output,
-      lambda w: gwas_list_df.loc[gwas_list_df['name'] == "{}".format(w.gwas), 'path'].iloc[0]
+      lambda w: gwas_list_df.loc[gwas_list_df['name'] == "{}".format(w.gwas), 'path'].iloc[0],
+      "resources/last_version.txt"
     output:
       f"{outdir}/reference/gwas_sumstat/{{gwas}}/{{gwas}}-cleaned.gz"
     benchmark:
@@ -449,7 +451,8 @@ check_list_paths(score_list_df)
 # Download PGS score files for PGSC if path is NA
 rule download_pgs_external:
   input:
-    rules.download_pgscatalog_utils.output
+    rules.download_pgscatalog_utils.output,
+    "resources/last_version.txt"
   output:
     touch(f"{outdir}/reference/pgs_score_files/raw_external/{{score}}/{{score}}_hmPOS_GRCh37.txt.gz")
   params:
diff --git a/pipeline/rules/target_qc.smk b/pipeline/rules/target_qc.smk
index 2159f563..0bc17e08 100644
--- a/pipeline/rules/target_qc.smk
+++ b/pipeline/rules/target_qc.smk
@@ -40,7 +40,8 @@ if 'target_list' in config:
     threads: n_cores_impute
     input:
       lambda w: target_list_df.loc[target_list_df['name'] == "{}".format(w.name), 'path'].iloc[0],
-      rules.download_impute2_data.output
+      rules.download_impute2_data.output,
+      "resources/last_version.txt"
     output:
       f"{outdir}/{{name}}/geno/imputed/{{name}}.chr{{chr}}.bed"
     benchmark:
@@ -117,7 +118,8 @@ if 'target_list' in config:
       lambda w: format_target_input(name = w.name),
       lambda w: target_path(name = w.name, chr = w.chr),
       ref_input,
-      rules.install_genoutils.output
+      rules.install_genoutils.output,
+      "resources/last_version.txt"
     output:
       f"{outdir}/{{name}}/geno/{{name}}.ref.chr{{chr}}.pgen"
     benchmark: