From 8c97454286663847393e8f1c12fbc438ca6f947d Mon Sep 17 00:00:00 2001 From: Joseph Sirak Date: Thu, 6 Feb 2025 13:38:34 -0800 Subject: [PATCH 1/2] Checking a script that setups metaflow e2e on minikube --- scripts/metaflow_minikube | 530 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 530 insertions(+) create mode 100755 scripts/metaflow_minikube diff --git a/scripts/metaflow_minikube b/scripts/metaflow_minikube new file mode 100755 index 0000000..c6f7844 --- /dev/null +++ b/scripts/metaflow_minikube @@ -0,0 +1,530 @@ +#!/bin/bash +set -euo pipefail + +# Usage helper +usage() { + echo "Usage: $0 --install | --cleanup" + exit 1 +} + +# Check that exactly one parameter is provided. +if [ "$#" -ne 1 ]; then + usage +fi + +MODE="$1" + +######################################## +# Helm Repository Setup Function +######################################## +setup_helm_repos() { + echo "Setting up Helm repositories..." + repos=( + "minio=https://charts.min.io/" + "bitnami=https://charts.bitnami.com/bitnami" + "metaflow=https://outerbounds.github.io/metaflow-tools" + "argo=https://argoproj.github.io/argo-helm" + ) + for repo in "${repos[@]}"; do + alias="${repo%%=*}" + url="${repo#*=}" + if helm repo list | awk '{print $1}' | grep -qx "$alias"; then + echo "Helm repo '$alias' already exists." + else + echo "Adding Helm repo '$alias'..." + helm repo add "$alias" "$url" + fi + done + helm repo update +} + +######################################## +# Installation Functions (Installation Mode) +######################################## + +# Step 1: Check required commands and Python pip packages. +check_dependencies() { + echo "Step 1: Checking dependencies..." + missing=false + + # Check required commands + for cmd in minikube kubectl helm ngrok; do + if ! command -v "$cmd" >/dev/null 2>&1; then + echo "✖ ERROR: $cmd is not installed or not in PATH." + missing=true + else + echo "✔ $cmd found." + fi + done + + if [ "$missing" = true ]; then + echo "Please install the missing commands and re-run the script." + exit 1 + fi + + # Check pip packages: metaflow and kubernetes + echo "" + echo "Checking Python pip packages..." + for package in metaflow kubernetes; do + if ! python3 -m pip show "$package" >/dev/null 2>&1; then + echo "✖ $package not found. Installing..." + python3 -m pip install "$package" + else + echo "✔ $package is installed." + fi + done +} + +# Step 2: Start Minikube cluster. +start_minikube() { + echo "Step 2: Starting minikube cluster..." + cluster_status=$(minikube status --format "{{.Host}}" 2>/dev/null || echo "Stopped") + if [ "$cluster_status" == "Running" ]; then + echo "Minikube cluster is already running." + else + echo "Starting minikube with 6 CPUs and 10240 MB memory..." + minikube start --cpus 6 --memory 10240 + fi +} + +# Step 3: Deploy MinIO via Helm. +deploy_minio() { + echo "Step 3: Deploying MinIO..." + helm upgrade --install minio-s3 minio/minio \ + --set resources.requests.memory=512Mi \ + --set replicas=1 --set persistence.enabled=false \ + --set mode=standalone \ + --set rootUser=rootuser,rootPassword=rootpass123 \ + --set buckets[0].name=metaflow-test,buckets[0].policy=none,buckets[0].purge=false +} + +# Step 4: Create Kubernetes secret for MinIO. +create_minio_secret() { + echo "Step 4: Creating MinIO secret..." + kubectl apply -f <(cat < /tmp/minio-portforward.log 2>&1 & + echo "MinIO port-forward running in the background (log: minio-portforward.log)." +} + +# Step 8: Start ngrok tunnel (background) and save the tunnel URL. +start_ngrok() { + echo "Step 8: Starting ngrok tunnel on port 9000..." + nohup ngrok http 9000 > /tmp/ngrok.log 2>&1 & + NGROK_PID=$! + echo "ngrok started with PID $NGROK_PID" + echo "Waiting for ngrok to initialize..." + sleep 8 + NGROK_TUNNEL=$(curl --silent http://localhost:4040/api/tunnels | jq -r '.tunnels[0].public_url') + if [ -z "$NGROK_TUNNEL" ] || [ "$NGROK_TUNNEL" == "null" ]; then + echo "ERROR: Unable to retrieve ngrok tunnel URL. Ensure ngrok is running." + exit 1 + fi + echo "ngrok tunnel is available at: $NGROK_TUNNEL" +} + +# Step 9: Install Metaflow metadata service via Helm. +install_metadata_service() { + echo "Step 9: Installing Metaflow metadata service..." + helm upgrade --install metaflow-service metaflow/metaflow-service \ + --timeout 15m0s \ + --set metadatadb.user=metaflow \ + --set metadatadb.password=metaflow \ + --set metadatadb.database=metaflow \ + --set metadatadb.host=postgresql +} + +# Step 10: Port-forward the metadata service. +portforward_metadata() { + echo "Step 10: Port-forwarding metadata service on port 8080..." + echo "Waiting for metadata service pods to be ready..." + kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=metaflow-service --timeout=300s + + METAFLOW_SVC=$(kubectl get svc -l app.kubernetes.io/name=metaflow-service -o jsonpath='{.items[0].metadata.name}') + if [ -z "$METAFLOW_SVC" ]; then + echo "ERROR: Could not locate metaflow-service." + exit 1 + fi + kubectl port-forward svc/"$METAFLOW_SVC" 8080:8080 > /tmp/metadata-service-portforward.log 2>&1 & + echo "Metadata service port-forward running on port 8080 (log: metadata-service-portforward.log)." +} + +# Step 11: Install Argo Workflows. +install_argo() { + echo "Step 11: Installing Argo Workflows..." + kubectl create ns argo || echo "Namespace 'argo' already exists." + kubectl apply -n argo -f https://github.com/argoproj/argo-workflows/releases/latest/download/quick-start-postgres.yaml + kubectl apply -f <(cat < /tmp/metaflow-ui-backend.log 2>&1 & + echo "Metaflow UI backend port-forward running on port 8083 (log: metaflow-ui-backend.log)." + + METAFLOW_UI_STATIC=$(kubectl get svc -l app.kubernetes.io/name=metaflow-ui-static -o jsonpath='{.items[0].metadata.name}') + if [ -z "$METAFLOW_UI_STATIC" ]; then + echo "ERROR: Metaflow UI static service not found." + exit 1 + fi + kubectl port-forward svc/"$METAFLOW_UI_STATIC" 3000:3000 > /tmp/metaflow-ui-static.log 2>&1 & + echo "Metaflow UI static port-forward running on port 3000 (log: metaflow-ui-static.log)." +} + +# Step 16: Create Metaflow configuration file. +create_metaflow_config() { + echo "Step 16: Creating Metaflow config file..." + if [ -z "${NGROK_TUNNEL:-}" ]; then + echo "Retrieving NGROK_TUNNEL..." + NGROK_TUNNEL=$(curl --silent http://localhost:4040/api/tunnels | jq -r '.tunnels[0].public_url') + fi + if [ -z "$NGROK_TUNNEL" ] || [ "$NGROK_TUNNEL" == "null" ]; then + echo "ERROR: NGROK_TUNNEL could not be determined." + exit 1 + fi + CONFIG_DIR="$HOME/.metaflowconfig" + mkdir -p "$CONFIG_DIR" + cat > "$CONFIG_DIR/config_minikube.json" < Date: Thu, 20 Feb 2025 14:00:18 -0800 Subject: [PATCH 2/2] Remove the need to use ngrok --- scripts/metaflow_minikube | 53 ++++++++++----------------------------- 1 file changed, 13 insertions(+), 40 deletions(-) diff --git a/scripts/metaflow_minikube b/scripts/metaflow_minikube index c6f7844..fa8351b 100755 --- a/scripts/metaflow_minikube +++ b/scripts/metaflow_minikube @@ -20,7 +20,7 @@ MODE="$1" setup_helm_repos() { echo "Setting up Helm repositories..." repos=( - "minio=https://charts.min.io/" + "minio-s3=https://charts.min.io/" "bitnami=https://charts.bitnami.com/bitnami" "metaflow=https://outerbounds.github.io/metaflow-tools" "argo=https://argoproj.github.io/argo-helm" @@ -48,7 +48,7 @@ check_dependencies() { missing=false # Check required commands - for cmd in minikube kubectl helm ngrok; do + for cmd in minikube kubectl helm; do if ! command -v "$cmd" >/dev/null 2>&1; then echo "✖ ERROR: $cmd is not installed or not in PATH." missing=true @@ -90,11 +90,12 @@ start_minikube() { # Step 3: Deploy MinIO via Helm. deploy_minio() { echo "Step 3: Deploying MinIO..." - helm upgrade --install minio-s3 minio/minio \ + helm upgrade --install minio-s3 minio-s3/minio \ --set resources.requests.memory=512Mi \ --set replicas=1 --set persistence.enabled=false \ --set mode=standalone \ - --set rootUser=rootuser,rootPassword=rootpass123 \ + --set rootUser=rootuser \ + --set rootPassword=rootpass123 \ --set buckets[0].name=metaflow-test,buckets[0].policy=none,buckets[0].purge=false } @@ -111,6 +112,7 @@ type: Opaque stringData: AWS_ACCESS_KEY_ID: rootuser AWS_SECRET_ACCESS_KEY: rootpass123 + AWS_ENDPOINT_URL_S3: http://minio-s3.default.svc.cluster.local:9000 EOF ) } @@ -127,25 +129,13 @@ install_postgres() { # Step 7: Port-forward MinIO service. portforward_minio() { echo "Step 7: Port-forwarding MinIO on port 9000..." + echo "Waiting for MinIO pod to be ready..." + kubectl wait --for=condition=ready pod -l app=minio --timeout=300s kubectl port-forward svc/minio-s3 9000 > /tmp/minio-portforward.log 2>&1 & echo "MinIO port-forward running in the background (log: minio-portforward.log)." } -# Step 8: Start ngrok tunnel (background) and save the tunnel URL. -start_ngrok() { - echo "Step 8: Starting ngrok tunnel on port 9000..." - nohup ngrok http 9000 > /tmp/ngrok.log 2>&1 & - NGROK_PID=$! - echo "ngrok started with PID $NGROK_PID" - echo "Waiting for ngrok to initialize..." - sleep 8 - NGROK_TUNNEL=$(curl --silent http://localhost:4040/api/tunnels | jq -r '.tunnels[0].public_url') - if [ -z "$NGROK_TUNNEL" ] || [ "$NGROK_TUNNEL" == "null" ]; then - echo "ERROR: Unable to retrieve ngrok tunnel URL. Ensure ngrok is running." - exit 1 - fi - echo "ngrok tunnel is available at: $NGROK_TUNNEL" -} + # Step 9: Install Metaflow metadata service via Helm. install_metadata_service() { @@ -371,14 +361,6 @@ EOF install_metaflow_ui() { echo "Step 14: Installing Metaflow UI..." - if [ -z "${NGROK_TUNNEL:-}" ]; then - echo "Retrieving NGROK_TUNNEL..." - NGROK_TUNNEL=$(curl --silent http://localhost:4040/api/tunnels | jq -r '.tunnels[0].public_url') - fi - if [ -z "$NGROK_TUNNEL" ] || [ "$NGROK_TUNNEL" == "null" ]; then - echo "ERROR: Unable to retrieve ngrok tunnel URL. Ensure ngrok is running." - exit 1 - fi helm upgrade --install metaflow-ui metaflow/metaflow-ui \ --timeout 15m0s \ @@ -390,7 +372,7 @@ uiBackend: name: metaflow password: metaflow metaflowDatastoreSysRootS3: s3://metaflow-test - metaflowS3EndpointURL: ${NGROK_TUNNEL} + metaflowS3EndpointURL: http://minio-s3.default.svc.cluster.local:9000 env: - name: AWS_ACCESS_KEY_ID value: rootuser @@ -429,20 +411,11 @@ portforward_metaflow_ui() { # Step 16: Create Metaflow configuration file. create_metaflow_config() { echo "Step 16: Creating Metaflow config file..." - if [ -z "${NGROK_TUNNEL:-}" ]; then - echo "Retrieving NGROK_TUNNEL..." - NGROK_TUNNEL=$(curl --silent http://localhost:4040/api/tunnels | jq -r '.tunnels[0].public_url') - fi - if [ -z "$NGROK_TUNNEL" ] || [ "$NGROK_TUNNEL" == "null" ]; then - echo "ERROR: NGROK_TUNNEL could not be determined." - exit 1 - fi CONFIG_DIR="$HOME/.metaflowconfig" mkdir -p "$CONFIG_DIR" cat > "$CONFIG_DIR/config_minikube.json" <