Skip to content

Commit

Permalink
Merge pull request #9 from genomehubs/rohan/basic-endpoint
Browse files Browse the repository at this point in the history
[GSoC 2024] Integrating KinFin Proteome Cluster analyses into Genome Browsing environments
  • Loading branch information
rjchallis authored Aug 27, 2024
2 parents 5a423b8 + 63c444a commit 29109b7
Show file tree
Hide file tree
Showing 40 changed files with 6,709 additions and 2,364 deletions.
6 changes: 0 additions & 6 deletions .eggs/README.txt

This file was deleted.

5 changes: 5 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
CLUSTER_FILE_PATH=/absolute/path/to/Orthogroups.txt
SEQUENCE_IDS_FILE_PATH=/absolute/path/to/SequenceIDs.txt
TAXON_IDX_MAPPING_FILE_PATH=/absolute/path/to/taxon_idx_mapping.json
RESULTS_BASE_DIR=/absolute/path/where/all/results/should/be/stored/
SESSION_INACTIVITY_THRESHOLD=24
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,9 @@ example/test.*
build/
dist/

venv
.test_data
result
.DS_Store
.env
data
2,195 changes: 0 additions & 2,195 deletions build/lib/kinfin/kinfin.py

This file was deleted.

Binary file removed dist/kinfin-0.9-py2.7.egg
Binary file not shown.
70 changes: 70 additions & 0 deletions example/curl_examples.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
### 1. Initialize the Analysis Process

```bash
curl -X POST "http://127.0.0.1:8000/kinfin/init" \
-H "Content-Type: application/json" \
-d '{"config": [{ "taxon": "BGLAB", "label1": "red" },{ "taxon": "CVIRG", "label1": "red" },{ "taxon": "DPOLY", "label1": "red" },{ "taxon": "GAEGI", "label1": "red" },{ "taxon": "LJAPO", "label1": "red" },{ "taxon": "LSAXA", "label1": "red" },{ "taxon": "MANGU", "label1": "red" },{ "taxon": "MAREN", "label1": "red" },{ "taxon": "MGIGA", "label1": "red" },{ "taxon": "MMERC", "label1": "red" },{ "taxon": "MTROS", "label1": "blue" },{ "taxon": "OBIMA", "label1": "blue" },{ "taxon": "OEDUL", "label1": "blue" },{ "taxon": "OSINE", "label1": "blue" },{ "taxon": "OVULG", "label1": "blue" },{ "taxon": "PCANA", "label1": "blue" },{ "taxon": "PMAXI", "label1": "blue" },{ "taxon": "PVULG", "label1": "blue" },{ "taxon": "TGRAN", "label1": "blue" }]}' | jq
```

### 2. Get Run Status

```bash
curl -X GET "http://127.0.0.1:8000/kinfin/status" \
-H "x-session-id: <session_id>" | jq
```

### 3. Get Run Summary

```bash
curl -X GET "http://127.0.0.1:8000/kinfin/run-summary" \
-H "x-session-id: <session_id>" | jq
```

### 4. Get Available Attributes and Taxon Sets

```bash
curl -X GET "http://127.0.0.1:8000/kinfin/available-attributes-taxonsets" \
-H "x-session-id: <session_id>" | jq
```

### 5. Get Counts by Taxon

```bash
curl -X GET "http://127.0.0.1:8000/kinfin/counts-by-taxon" \
-H "x-session-id: <session_id>" | jq
```

### 6. Get Cluster Summary

```bash
curl -X GET "http://127.0.0.1:8000/kinfin/cluster-summary/label1" \
-H "x-session-id: <session_id>" | jq
```

### 7. Get Attribute Summary

```bash
curl -X GET "http://127.0.0.1:8000/kinfin/attribute-summary/label1" \
-H "x-session-id: <session_id>" | jq
```

### 8. Get Cluster Metrics

```bash
curl -X GET "http://127.0.0.1:8000/kinfin/cluster-metrics/label1/red" \
-H "x-session-id: <session_id>" | jq
```

### 9. Get Pairwise Analysis

```bash
curl -X GET "http://127.0.0.1:8000/kinfin/pairwise-analysis/label1" \
-H "x-session-id: <session_id>" | jq
```

### 10. Get Plot

```bash
curl -X GET "http://127.0.0.1:8000/kinfin/plot/<plot_type>" \
-H "x-session-id: <session_id>" -o "<filename>.png"
```
8 changes: 8 additions & 0 deletions example/taxon_idx_mapping.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"A": "0",
"B": "1",
"C": "2",
"D": "3",
"E": "4",
"F": "5"
}
84 changes: 0 additions & 84 deletions install

This file was deleted.

143 changes: 143 additions & 0 deletions install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#!/usr/bin/env bash

# logging function
log() {
local GREEN='\033[0;32m'
local YELLOW='\033[0;33m'
local RED='\033[0;31m'
local NO_COLOR='\033[0m'

local level=$1
local message=$2

case $level in
INFO)
echo -e "[${NO_COLOR}INFO${NO_COLOR}] - $message"
;;
SUCCESS)
echo -e "[${GREEN}SUCCESS${NO_COLOR}] - $message"
;;
ERROR)
echo -e "[${RED}ERROR${NO_COLOR}] - $message" >&2
;;
*)
echo "Invalid log level: $level"
;;
esac
}

# Check dependencies exist
check_dependencies() {
log INFO "Checking dependencies..."

local dependencies=("wget" "gunzip")
local missing_dependencies=()

for dependency in "${dependencies[@]}"; do
local item=$(command -v "$dependency")
if [ ! -x "$item" ]; then
missing_dependencies+=("$dependency")
fi
done

if [ ${#missing_dependencies[@]} -gt 0 ]; then
log ERROR "Missing dependencies: ${missing_dependencies[*]}. Please install them."
exit 1
else
for dependency in "${dependencies[@]}"; do
log SUCCESS "$dependency is installed."
done
log SUCCESS "All dependencies are installed."
return 0
fi
}

# Function to download a file
download_file() {
local url=$1
local filename=$2

log INFO "Downloading $filename from $url"
$(which wget) -np -nd -qN --show-progress "$url" -P "$DIR/data/"

if [ $? -eq 0 ]; then
log SUCCESS "Downloaded $filename"
else
log ERROR "Failed to download $filename from $url"
exit 1
fi
}

# Extract .gz files
extract_gzip() {
local gz_file=$1
local dest=$2

log INFO "Extracting $gz_file..."

$(which gunzip) -c "$gz_file" > "$dest"

if [ $? -eq 0 ]; then
log SUCCESS "Extracted $gz_file at $dest"
else
log ERROR "Failed to extract $gz_file. Please download kinfin again."
exit 1
fi
}



main() {
# Set working directory
DIR="$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

check_dependencies

log INFO "Checking input data files..."

local pfam_dest="$DIR/data/Pfam-A.clans.tsv.gz"
local ipr_dest="$DIR/data/entry.list"
local go_dest="$DIR/data/interpro2go"
local nodesdbgz="$DIR/data/nodesdb.gz"
local nodesdb="$DIR/data/nodesdb.txt"

if [ ! -f "$nodesdb" ]; then
if [ -f "$nodesdbgz" ]; then
extract_gzip "$nodesdbgz" "$nodesdb"
else
log ERROR "$nodesdbgz not found. Please download kinfin again."
exit 1
fi
else
log SUCCESS "$nodesdb is already present."
fi

if [ ! -f "$pfam_dest" ]; then
download_file "ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.clans.tsv.gz" "Pfam-A.clans.tsv.gz"
else
log SUCCESS "Pfam-A.clans.tsv.gz is already present."
fi

if [ ! -f "$ipr_dest" ]; then
download_file "ftp.ebi.ac.uk/pub/databases/interpro/current_release/entry.list" "entry.list"
else
log SUCCESS "entry.list is already present."
fi

if [ ! -f "$go_dest" ]; then
download_file "ftp.ebi.ac.uk/pub/databases/interpro/current_release/interpro2go" "interpro2go"
else
log SUCCESS "interpro2go is already present."
fi

log SUCCESS "All required files downloaded."

# Create executable
log INFO "Creating executable..."
echo -e '#!/usr/bin/env bash\nDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"\n$DIR/src/kinfin.py "$@"' > $DIR/kinfin && chmod +x $DIR/kinfin

# Done
log SUCCESS "Kinfin was installed. Please run ./kinfin --help"
}

main
3 changes: 3 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-r requirements.txt
fastapi==0.111.0
pytest==8.2.2
11 changes: 6 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
scipy==1.11.1
matplotlib==2.0.2
scipy==1.13.1
matplotlib==3.9.0
docopt==0.6.2
networkx==1.11
powerlaw==1.4.1
ete3==3.0.0b35
networkx==3.3
powerlaw==1.5
ete3==3.1.3
fastapi==0.111.0
4 changes: 2 additions & 2 deletions scripts/get_protein_ids_from_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ def parse_groups(group_f):


def write_output(output, outprefix):
headers_found = set([k for k, v in headers.iteritems() if v])
clusters_found = set([k for k, v in clusters.iteritems() if v])
headers_found = set([k for k, v in headers.items() if v])
clusters_found = set([k for k, v in clusters.items() if v])
if headers:
print("[+] Found %s of headers ..." % "{:.0%}".format(len(headers_found) / len(headers)))
if clusters:
Expand Down
Loading

0 comments on commit 29109b7

Please sign in to comment.