Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions common/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import logging
from datetime import timedelta

import celery.schedules
from django.conf import settings
from django.db.models import TextChoices

from config.celery import app

class TaskFrequency(TextChoices):
five_minutes = "five_minutes"
daily = "daily"


SCHEDULES = {
TaskFrequency.five_minutes: celery.schedules.schedule(run_every=timedelta(minutes=5)),
TaskFrequency.daily: celery.schedules.crontab(minute=0, hour=7),
}

# Set up scheduled tasks
@app.on_after_finalize.connect
def schedule_tasks(sender, **kwargs):
if settings.CELERY_TASK_ALWAYS_EAGER:
return
for task_frequency, tasks in SCHEDULED_TASKS.items():
schedule = SCHEDULES[task_frequency]
for task_name, task in tasks.items():
sender.add_periodic_task(schedule, task.s(), name=task_name)


# TODO: Delete me
@app.task
def sample_task():
logging.info("Sample task running")


SCHEDULED_TASKS = {
TaskFrequency.five_minutes: {sample_task},
TaskFrequency.daily: {},
}
28 changes: 28 additions & 0 deletions config/celery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import os

from celery import Celery
from kombu.utils.json import register_type
from django.db.models import Model
from django.apps import apps

# Allow serialization of Django models
register_type(
Model,
"model",
lambda o: [o._meta.label, o.pk],
lambda o: apps.get_model(o[0]).objects.get(pk=o[1]),
)

# Set the default Django settings module for the 'celery' program.
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')

app = Celery("sample-django-app")

# Using a string here means the worker doesn't have to serialize
# the configuration object to child processes.
# - namespace='CELERY' means all celery-related configuration keys
# should have a `CELERY_` prefix.
app.config_from_object('django.conf:settings', namespace='CELERY')

# Load task modules from all registered Django apps.
app.autodiscover_tasks()
7 changes: 7 additions & 0 deletions config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,3 +422,10 @@
]
COMPRESS_ROOT = STORAGES["sass_processor"]["ROOT"]
# END_FEATURE sass_bootstrap


# Celery configuration
CELERY_TIMEZONE = TIME_ZONE
CELERY_BROKER_URL = env("CELERY_BROKER_URL", default="")
# If no broker URL configured, run tasks in the web process
CELERY_TASK_ALWAYS_EAGER = not CELERY_BROKER_URL
99 changes: 75 additions & 24 deletions deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,13 @@ def deploy(args):
if args.skip_migration:
logging.info("Skipping database migration")
else:
# Stop worker before running migrations
stop_worker_service(env)
# Run and wait for migrations
run_migrations(args.env)

# Redeploy services
restart_web_service(args.env)
restart_services(args.env)


def setup(envs):
Expand Down Expand Up @@ -185,42 +187,91 @@ def run_migrations(env):
)
raise MigrationTimeOut()


def restart_web_service(env):
# Restart ECS web service to deploy new code
def stop_worker_service(env):
logging.info("Stopping worker service")
cluster_id = get_terraform_output("cluster_id", env)
service_name = get_terraform_output("worker_service_name", env)
ecs_client = boto3.session.Session(profile_name=AWS_PROFILE_NAME, region_name=AWS_REGION).client("ecs")
ecs_client.update_service(
cluster=cluster_id,
service=service_name,
desiredCount=0
)
# Desired status may not have updated immediately -- try both RUNNING and STOPPED
task_arns = ecs_client.list_tasks(cluster=cluster_id, serviceName=service_name)["taskArns"]
if not task_arns:
task_arns = ecs_client.list_tasks(
cluster=cluster_id, serviceName=service_name, desiredStatus="STOPPED"
)["taskArns"]
if not task_arns:
logging.info("No worker tasks to stop")
return
while True:
tasks = ecs_client.describe_tasks(cluster=cluster_id, tasks=task_arns)["tasks"]
if all(task.get("stoppedAt") for task in tasks):
break
logging.info("Waiting for worker service to stop...")
time.sleep(STATUS_CHECK_INTERVAL)
logging.info("Worker service stopped")

def restart_services(env):
# Restarts both web and worker in parallel (order doesn't matter since communication is handled via redis)
# Restart ECS web service to deploy new code
logging.info("Redeploying web service...")
web_service_name = get_terraform_output("web_service_name", env)
cluster_id = get_terraform_output("cluster_id", env)
service_name = get_terraform_output("web_service_name", env)
ecs_client = boto3.session.Session(profile_name=AWS_PROFILE_NAME, region_name=AWS_REGION).client("ecs")
ecs_client.update_service(
cluster=cluster_id,
service=service_name,
forceNewDeployment=True
service=web_service_name,
forceNewDeployment=True,
)
# Restart worker service to deploy new code and set the desired task count back to expected state
logging.info("Redeploying worker service...")
worker_service_name = get_terraform_output("worker_service_name", env)
desired_task_count = get_terraform_output("worker_task_desired_count", env)
ecs_client.update_service(
cluster=cluster_id,
service=worker_service_name,
forceNewDeployment=True,
desiredCount=int(desired_task_count)
)

while True:
logging.info("Waiting for deployment to finish...")
services_response = ecs_client.describe_services(cluster=cluster_id, services=[service_name])
deployments = services_response["services"][0]["deployments"]
new_deployment = next(deployment for deployment in deployments if deployment["status"] == "PRIMARY")
deployment_state = new_deployment["rolloutState"]
if deployment_state == "IN_PROGRESS":
services_response = ecs_client.describe_services(
cluster=cluster_id, services=[web_service_name, worker_service_name]
)
in_progress = False
failure = False
for service in services_response["services"]:
new_deployment = next(
deployment for deployment in service["deployments"] if deployment["status"] == "PRIMARY"
)
deployment_state = new_deployment["rolloutState"]
service_name = service['serviceName']
if deployment_state == "IN_PROGRESS":
in_progress = True
if deployment_state == "COMPLETED":
logging.info(f"Success! Deployment complete for service {service_name}.")
elif deployment_state == "FAILED":
is_worker = service_name == worker_service_name
logging.error(
f"Deployment failed for {service_name}! Reason: {new_deployment['rolloutStateReason']}. "
f"Check log stream for more info: {cloudwatch_log_url(env, worker=is_worker)}"
)
failure = True
else:
logging.warning(f"Unknown deployment state {deployment_state}. Please check the ECS console.")
failure = True
if in_progress and not failure:
time.sleep(STATUS_CHECK_INTERVAL)
continue
if deployment_state == "COMPLETED":
logging.info("Success! Deployment complete.")
elif deployment_state == "FAILED":
logging.error(
f"Deployment failed! Reason: {new_deployment['rolloutStateReason']}. "
f"Check log stream for more info: {cloudwatch_log_url(env)}"
)
else:
logging.warning(f"Unknown deployment state {deployment_state}. Please check the ECS console.")
break


def cloudwatch_log_url(env):
cloudwatch_log_group_name = get_terraform_output("cloudwatch_log_group_name", env)
def cloudwatch_log_url(env, worker=False):
log_name_key = "worker_log_group_name" if worker else "web_log_group_name"
cloudwatch_log_group_name = get_terraform_output(log_name_key, env)
return f"https://{AWS_REGION}.console.aws.amazon.com/cloudwatch/home?region={AWS_REGION}#logsV2:log-groups/log-group/{cloudwatch_log_group_name}"


Expand Down
10 changes: 6 additions & 4 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,13 +123,15 @@ Following that, deploy your code to the environment (see below).


## Creating a new ECS environment
The terraform configuration for ECS deployments will create both a web and worker environment,
with a Redis instance to act as a task broker.

1. Create an ECR repository
2. Build and push an initial docker file to it (ECR provides docker commands for this).
3. Create a bucket for holding terraform config
4. Create an SES identity and from email (if using SES)
5. Create an AWS certificate manager certificate for your domain
6. Create a secrets manager secret containing the config parameters needed by the application (you do not need include "DATABASE_URL", "SECRET_KEY", "AWS_STORAGE_BUCKET_NAME", or "DEFAULT_FROM_EMAIL" as those are managed by terraform in `terraform/modules/ecs_deployment/secrets_manager.tf`)
6. Create a secrets manager secret containing the config parameters needed by the application (you do not need include "DATABASE_URL", "SECRET_KEY", "AWS_STORAGE_BUCKET_NAME", "DEFAULT_FROM_EMAIL", or "CELERY_BROKER_URL" as those are managed by terraform in `terraform/modules/ecs_deployment/secrets_manager.tf`)
7. Fill in the missing values in `terraform/envs/<ENV_NAME>/main.tf`
8. Run terraform to set up that environment
```
Expand All @@ -138,7 +140,6 @@ terraform init
terraform plan
terraform apply
```

9. Redeploy your code using the steps described below (with the --use-latest option) to run initial migrations
10. Add a DNS entry from your domain name to the created load balancer

Expand All @@ -159,8 +160,9 @@ python deploy.py -env <ENV_NAME>
This script will do the following:
1. Build the docker image using your local code version.
2. Push the docker image to the ECR location for the specified environment
3. Run database migrations
4. Deploy to the running web service
3. Stop the running worker service
4. Run database migrations
5. Deploy to the running web service and restart the worker service

Run `python deploy.py --help` to see available options. You may choose to use an existing ECR image or skip migrations.

Expand Down
2 changes: 2 additions & 0 deletions requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ django-storages
# START_FEATURE docker
gunicorn
whitenoise
celery[redis]
celery-redbeat
# END_FEATURE docker

# START_FEATURE django_ses
Expand Down
34 changes: 26 additions & 8 deletions terraform/envs/production/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ module "ecs_deployment" {
rds_multi_az = true
container_web_cpu = 1024
container_web_memory = 1024
container_count = 2
container_web_count = 2
ssl_policy = "ELBSecurityPolicy-TLS13-1-2-Res-FIPS-2023-04"
}

Expand All @@ -50,9 +50,9 @@ output "cluster_id" {
value = module.ecs_deployment.cluster_id
}

output "cloudwatch_log_group_name" {
description = "The name of the cloudwatch log group for the web service task"
value = module.ecs_deployment.cloudwatch_log_group_name
output "ecr_image_uri" {
description = "ECR URI where the environment's image is stored"
value = module.ecs_deployment.ecr_image_uri
}

output "ecr_repository_name" {
Expand All @@ -66,14 +66,13 @@ output "public_ip" {
}

output "web_service_name" {
description = "The name of the ECS container running the web service"
description = "The name of the ECS web service. This is also the container name."
value = module.ecs_deployment.web_service_name
}

output "web_network_configuration_security_group" {
description = "The security groups used by the ECS web task"
value = tolist(module.ecs_deployment.web_network_configuration_security_groups)[0]

description = "The security group used by the ECS web task"
value = tolist(module.ecs_deployment.web_network_configuration_security_groups)[0]
}

output "web_network_configuration_subnet" {
Expand All @@ -85,3 +84,22 @@ output "web_task_definition_arn" {
description = "The ARN of the ECS web service task definition"
value = module.ecs_deployment.web_task_definition_arn
}

output "web_log_group_name" {
description = "The name of the cloudwatch log group for the web service task"
value = module.ecs_deployment.web_log_group_name
}

output "worker_service_name" {
description = "The name of the ECS worker service. This is also the container name."
value = module.ecs_deployment.worker_service_name
}

output "worker_task_desired_count" {
description = "The intended number of worker tasks"
value = module.ecs_deployment.worker_task_desired_count
}

output "worker_log_group_name" {
description = "The name of the cloudwatch log group for the web service task"
value = module.ecs_deployment.worker_log_group_name
34 changes: 26 additions & 8 deletions terraform/envs/staging/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ module "ecs_deployment" {
rds_multi_az = false
container_web_cpu = 256
container_web_memory = 1024
container_count = 1
container_web_count = 1
ssl_policy = "ELBSecurityPolicy-TLS13-1-2-Res-FIPS-2023-04"
}

Expand All @@ -50,9 +50,9 @@ output "cluster_id" {
value = module.ecs_deployment.cluster_id
}

output "cloudwatch_log_group_name" {
description = "The name of the cloudwatch log group for the web service task"
value = module.ecs_deployment.cloudwatch_log_group_name
output "ecr_image_uri" {
description = "ECR URI where the environment's image is stored"
value = module.ecs_deployment.ecr_image_uri
}

output "ecr_repository_name" {
Expand All @@ -66,14 +66,13 @@ output "public_ip" {
}

output "web_service_name" {
description = "The name of the ECS container running the web service"
description = "The name of the ECS web service. This is also the container name."
value = module.ecs_deployment.web_service_name
}

output "web_network_configuration_security_group" {
description = "The security groups used by the ECS web task"
value = tolist(module.ecs_deployment.web_network_configuration_security_groups)[0]

description = "The security group used by the ECS web task"
value = tolist(module.ecs_deployment.web_network_configuration_security_groups)[0]
}

output "web_network_configuration_subnet" {
Expand All @@ -85,3 +84,22 @@ output "web_task_definition_arn" {
description = "The ARN of the ECS web service task definition"
value = module.ecs_deployment.web_task_definition_arn
}

output "web_log_group_name" {
description = "The name of the cloudwatch log group for the web service task"
value = module.ecs_deployment.web_log_group_name
}

output "worker_service_name" {
description = "The name of the ECS worker service. This is also the container name."
value = module.ecs_deployment.worker_service_name
}

output "worker_task_desired_count" {
description = "The intended number of worker tasks"
value = module.ecs_deployment.worker_task_desired_count
}

output "worker_log_group_name" {
description = "The name of the cloudwatch log group for the web service task"
value = module.ecs_deployment.worker_log_group_name
Loading