forked from aseetharam/common_scripts
-
Notifications
You must be signed in to change notification settings - Fork 40
/
ExtractSCOs.sh
executable file
·73 lines (55 loc) · 1.6 KB
/
ExtractSCOs.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/bin/bash
# This is a bash script that extracts all ortholog groups having single copy gene in different species.
# It takes the output generated by "CopyNumberGen.sh" and prints single copy orthologs.
# by default it prints to stdout
# Arun Seetharam <[email protected]>
scriptName="${0##*/}"
declare -i DEFAULT_COPY=1
declare -i copynum=DEFAULT_COPY
function printUsage() {
cat <<EOF
Synopsis
$scriptName [-h | --help] [-c number] input_file
Description
Reads the count table and prints only lines having one copy gene in every genome.
Output is printed to STDOUT
input_file
Input file should contain count table and ortholog group IDs
This file has to be generated by "CopyNumberGen.sh" script.
-c number
By default, all orthologs groups that have single copy gene are printed, but you can specify
desired number of genes per ortholog group. Using 2 will print all ortholog groups having
two copy genes per genome.
-h, --help
Brings up this help page
Author
Arun Seetharam, Bioinformatics Core, Purdue University.
EOF
}
if [ $# -lt 1 ] ; then
printUsage
exit 1
fi
while getopts ':c:' option; do
case "$option" in
c) copynum=$OPTARG
shift
;;
h) printUsage
exit
;;
help) printUsage
exit
;;
esac
done
shift $(( $# - 1 ))
FILE="$1"
head -n 1 ${FILE}
while read line; do
g=$(echo $line | cut -d " " -f 2- | tr " " "\n" | uniq | wc -l );
if [ $g -eq "1" ]; then
echo $line | sed 's/ /\t/g' | grep -w "${copynum}" ;
fi;
done<${FILE}