Skip to content

Commit

Permalink
made new commit
Browse files Browse the repository at this point in the history
  • Loading branch information
peytontolbert committed Mar 17, 2024
1 parent 802636f commit 8c14701
Show file tree
Hide file tree
Showing 4 changed files with 208 additions and 26 deletions.
129 changes: 112 additions & 17 deletions main.tf
Original file line number Diff line number Diff line change
@@ -1,15 +1,63 @@
provider "aws" {
region = "us-east-1"
region = "us-west-2"
}
resource "aws_acm_certificate" "cert" {
domain_name = "api.swarms.world"
validation_method = "DNS"
lifecycle {
create_before_destroy = true
}
tags = {
Name = "your_domain_certificate"
}
}

terraform {
backend "remote" {
organization = "swarms"

workspaces {
name = "swarms"
resource "aws_route53_record" "cert_validation" {
for_each = {
for dvo in aws_acm_certificate.cert.domain_validation_options : dvo.domain_name => {
name = dvo.resource_record_name
record = dvo.resource_record_value
type = dvo.resource_record_type
}
}

zone_id = "Z0629215JQIY0GI18GHF" # Replace with your hosted zone ID
name = each.value.name
type = each.value.type
records = [each.value.record]
ttl = 60
}
resource "aws_acm_certificate_validation" "cert" {
certificate_arn = aws_acm_certificate.cert.arn
validation_record_fqdns = [for record in aws_route53_record.cert_validation : record.fqdn]
depends_on = [aws_acm_certificate.cert]
}

resource "aws_route53_record" "lb_dns" {
zone_id = "Z0629215JQIY0GI18GHF" # Replace with your hosted zone ID
name = "api.swarms.world"
type = "A"

alias {
name = aws_lb.model_api_lb.dns_name
zone_id = aws_lb.model_api_lb.zone_id
evaluate_target_health = true
}
}

resource "aws_route53_record" "yourdomain_caa_amazon" {
zone_id = "Z0629215JQIY0GI18GHF"
name = "api.swarms.world"
type = "CAA"
ttl = "300"
records = [
"0 issue \"amazon.com\""
]
}

resource "aws_acm_certificate_validation" "model_api_cert" {
certificate_arn = aws_acm_certificate.cert.arn
validation_record_fqdns = [for record in aws_route53_record.cert_validation : record.fqdn]
}

resource "aws_key_pair" "ssh_key" {
Expand All @@ -31,14 +79,14 @@ resource "aws_subnet" "main" {
vpc_id = aws_vpc.main.id
cidr_block = "10.0.1.0/24"
map_public_ip_on_launch = true
availability_zone = "us-east-1a"
availability_zone = "us-west-2a"
}

resource "aws_subnet" "main2" {
vpc_id = aws_vpc.main.id
cidr_block = "10.0.2.0/24"
map_public_ip_on_launch = true
availability_zone = "us-east-1b"
availability_zone = "us-west-2b"
}

resource "aws_route_table" "public" {
Expand Down Expand Up @@ -83,6 +131,34 @@ resource "aws_security_group" "model_api_sg" {
}
}

resource "aws_security_group" "model_lb_sg" {
name = "model_lb_sgwest"
description = "Security group for model API EC2 instances"
vpc_id = aws_vpc.main.id

ingress {
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}

# Additional rule for HTTPS
ingress {
from_port = 443
to_port = 443
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}

egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
}

resource "aws_iam_instance_profile" "ssm" {
name = "ssm"
role = aws_iam_role.ssm.name
Expand Down Expand Up @@ -111,18 +187,19 @@ resource "aws_iam_role_policy_attachment" "ssm" {
}

resource "aws_lb" "model_api_lb" {
name = "model-api-lb"
name = "model-api-lbwest"
internal = false
load_balancer_type = "application"
security_groups = [aws_security_group.model_api_sg.id]
security_groups = [aws_security_group.model_lb_sg.id]
subnets = [aws_subnet.main.id, aws_subnet.main2.id]

enable_deletion_protection = true
enable_deletion_protection = false
}


resource "aws_launch_configuration" "model_api_conf" {
name_prefix = "model-api-"
image_id = "ami-048eeb679c8e04a87"
image_id = "ami-034b335fccb9ed729"
instance_type = "p3.2xlarge" # Choose an appropriate instance type
security_groups = [aws_security_group.model_api_sg.id]
iam_instance_profile = aws_iam_instance_profile.ssm.name
Expand Down Expand Up @@ -168,18 +245,36 @@ resource "aws_autoscaling_group" "model_api_asg" {
}

resource "aws_lb_target_group" "model_api_tg" {
name = "model-api-tg"
name = "model-api-tgwest"
port = 8000
protocol = "HTTP"
vpc_id = aws_vpc.main.id

stickiness {
enabled = true
type = "lb_cookie"
cookie_duration = 86400 # Duration in seconds, this example sets it to 1 day
}
}

resource "aws_lb_listener" "model_api_listener" {
resource "aws_lb_listener" "http_redirect" {
load_balancer_arn = aws_lb.model_api_lb.arn
port = 8000
port = 80
protocol = "HTTP"

default_action {
type = "forward"
target_group_arn = aws_lb_target_group.model_api_tg.arn
}
}


resource "aws_lb_listener" "model_api_https_listener" {
load_balancer_arn = aws_lb.model_api_lb.arn
port = 443
protocol = "HTTPS"
ssl_policy = "ELBSecurityPolicy-2016-08" # Default policy, adjust as needed
certificate_arn = aws_acm_certificate.cert.arn

default_action {
type = "forward"
target_group_arn = aws_lb_target_group.model_api_tg.arn
Expand Down
55 changes: 55 additions & 0 deletions scripts/terraform_scripts/swarm_cloud_terraform/hpa.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
resource "kubernetes_horizontal_pod_autoscaler" "cogvlm_hpa" {
metadata {
name = "cogvlm-hpa"
}

spec {
max_replicas = 50 // Adjust based on the maximum expected scale
min_replicas = 2
scale_target_ref {
api_version = "apps/v1"
kind = "Deployment"
name = kubernetes_deployment.cogvlm_deployment.metadata[0].name
}
metrics {
type = "Pods"
pods {
metric {
name = "inference_requests_per_minute" // Adjusted to per minute
}
target {
type = "AverageValue"
average_value = "3" // Target requests per pod per minute
}
}
}
}
}

resource "kubernetes_horizontal_pod_autoscaler" "qwenvl_hpa" {
metadata {
name = "qwenvl-hpa"
}

spec {
max_replicas = 100 // Adjust based on the maximum expected scale
min_replicas = 2
scale_target_ref {
api_version = "apps/v1"
kind = "Deployment"
name = kubernetes_deployment.qwenvl_deployment.metadata[0].name
}
metrics {
type = "Pods"
pods {
metric {
name = "inference_requests_per_minute" // Adjusted to per minute
}
target {
type = "AverageValue"
average_value = "3" // Target requests per pod per minute
}
}
}
}
}
21 changes: 12 additions & 9 deletions scripts/terraform_scripts/swarm_cloud_terraform/launchec2.tf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ resource "aws_instance" "k8s_master" {

user_data = base64encode(<<-EOF
#!/bin/bash
# Log the status of Docker and Kubernetes services
echo "Checking the status of Docker service..."
sudo systemctl status docker | sudo tee /var/log/docker-status.log > /dev/null
Expand All @@ -25,18 +24,22 @@ user_data = base64encode(<<-EOF
echo "Ensuring kubelet service is running..."
sudo systemctl enable --now kubelet
# Log Kubernetes cluster status
echo "Logging Kubernetes cluster status..."
sudo kubectl cluster-info | sudo tee /var/log/kubectl-cluster-info.log > /dev/null
# Check for any not running system pods
echo "Checking for any not running system pods..."
sudo kubectl get pods --all-namespaces | grep -v Running | sudo tee /var/log/kubectl-non-running-pods.log > /dev/null
#Initiate the kubernetes network
kubeadm init --pod-network-cidr=10.244.0.0/16
mkdir -p $HOME/.kube
cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
chown $(id -u):$(id -g) $HOME/.kube/config
# Apply network plugin if not already applied (idempotent operation)
echo "Applying Flannel CNI plugin..."
sudo kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
# Wait for Flannel to be fully up
sleep 30
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo add grafana https://grafana.github.io/helm-charts
helm repo update
helm install prometheus prometheus-community/prometheus --namespace monitoring --create-namespace
helm install grafana grafana/grafana --namespace monitoring --create-namespace
JOIN_COMMAND=$(kubeadm token create --print-join-command)
echo "$JOIN_COMMAND" > /tmp/k8s-join-command.sh
aws s3 cp /tmp/k8s-join-command.sh s3://swarmskube/k8s-join-command.sh
Expand Down
29 changes: 29 additions & 0 deletions scripts/terraform_scripts/swarm_cloud_terraform/securitygroups.tf
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,21 @@ resource "aws_security_group" "k8s_master_sg" {
cidr_blocks = ["0.0.0.0/0"] # Adjust this to a more restricted CIDR block for enhanced security
}

# Prometheus
ingress {
from_port = 9090
to_port = 9090
protocol = "tcp"
cidr_blocks = ["199.204.135.66/32"] # Replace <your_ip> with your IP address
}

# Grafana
ingress {
from_port = 3000
to_port = 3000
protocol = "tcp"
cidr_blocks = ["199.204.135.66/32"] # Replace <your_ip> with your IP address
}
ingress {
from_port = 8080
to_port = 8080
Expand All @@ -57,7 +72,21 @@ resource "aws_security_group" "k8s_worker_sg" {
name = "k8s_worker_sg"
description = "Security group for Kubernetes workers"
vpc_id = aws_vpc.main.id
# Prometheus
ingress {
from_port = 9090
to_port = 9090
protocol = "tcp"
cidr_blocks = ["199.204.135.66/32"] # Replace <your_ip> with your IP address
}

# Grafana
ingress {
from_port = 3000
to_port = 3000
protocol = "tcp"
cidr_blocks = ["199.204.135.66/32"] # Replace <your_ip> with your IP address
}
# Allow all internal traffic for Kubernetes communication
ingress {
from_port = 0
Expand Down

0 comments on commit 8c14701

Please sign in to comment.