DevOps Mastery: Building Robust CI/CD Pipelines
Modern software development demands automated, reliable deployment pipelines. Let’s build a comprehensive CI/CD system that handles testing, building, and deployment with confidence and speed.
CI/CD Pipeline Architecture
Pipeline Stages Overview
# .github/workflows/ci-cd.yml
name: CI/CD Pipeline
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main ]
env:
NODE_VERSION: '18'
DOCKER_REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
jobs:
# Stage 1: Code Quality & Testing
test:
runs-on: ubuntu-latest
services:
postgres:
image: postgres:15
env:
POSTGRES_PASSWORD: postgres
POSTGRES_DB: test_db
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 5432:5432
redis:
image: redis:7-alpine
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 6379:6379
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
cache: 'npm'
- name: Install dependencies
run: npm ci
- name: Run linting
run: npm run lint
- name: Run type checking
run: npm run type-check
- name: Run unit tests
run: npm run test:unit
env:
NODE_ENV: test
- name: Run integration tests
run: npm run test:integration
env:
NODE_ENV: test
DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test_db
REDIS_URL: redis://localhost:6379
- name: Generate test coverage
run: npm run test:coverage
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
file: ./coverage/lcov.info
- name: Run security audit
run: npm audit --audit-level high
Advanced GitHub Actions Workflows
Multi-Environment Deployment
# .github/workflows/deploy.yml
name: Deploy to Environments
on:
workflow_run:
workflows: ["CI/CD Pipeline"]
types:
- completed
branches: [main, develop]
jobs:
deploy-staging:
if: github.ref == 'refs/heads/develop' && github.event.workflow_run.conclusion == 'success'
runs-on: ubuntu-latest
environment: staging
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-1
- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
- name: Build and push Docker image
env:
ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
ECR_REPOSITORY: myapp-staging
IMAGE_TAG: ${{ github.sha }}
run: |
docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG .
docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
- name: Deploy to ECS
run: |
aws ecs update-service \
--cluster staging-cluster \
--service myapp-service \
--force-new-deployment
- name: Run smoke tests
run: |
sleep 60 # Wait for deployment
npm run test:smoke -- --baseUrl=https://staging.myapp.com
deploy-production:
if: github.ref == 'refs/heads/main' && github.event.workflow_run.conclusion == 'success'
runs-on: ubuntu-latest
environment: production
needs: [deploy-staging]
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Deploy to production
run: |
# Blue-green deployment logic
./scripts/deploy-production.sh
- name: Run production health checks
run: |
./scripts/health-check.sh https://myapp.com
- name: Notify team
uses: 8398a7/action-slack@v3
with:
status: ${{ job.status }}
channel: '#deployments'
webhook_url: ${{ secrets.SLACK_WEBHOOK }}
Matrix Testing Strategy
# .github/workflows/matrix-test.yml
name: Matrix Testing
on:
pull_request:
branches: [ main ]
jobs:
test-matrix:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
node-version: [16, 18, 20]
database: [postgres, mysql]
exclude:
# Exclude specific combinations
- os: windows-latest
database: mysql
steps:
- uses: actions/checkout@v4
- name: Setup Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node-version }}
- name: Setup Database
run: |
if [ "${{ matrix.database }}" == "postgres" ]; then
docker run -d -p 5432:5432 -e POSTGRES_PASSWORD=test postgres:15
else
docker run -d -p 3306:3306 -e MYSQL_ROOT_PASSWORD=test mysql:8
fi
- name: Install dependencies
run: npm ci
- name: Run tests
run: npm test
env:
DATABASE_TYPE: ${{ matrix.database }}
Docker Optimization for CI/CD
Multi-stage Production Dockerfile
# Dockerfile
FROM node:18-alpine AS base
WORKDIR /app
COPY package*.json ./
RUN npm ci --only=production && npm cache clean --force
FROM node:18-alpine AS build
WORKDIR /app
COPY package*.json ./
RUN npm ci
COPY . .
RUN npm run build
RUN npm run test:unit
FROM node:18-alpine AS production
WORKDIR /app
# Create non-root user
RUN addgroup -g 1001 -S nodejs
RUN adduser -S nextjs -u 1001
# Copy built application
COPY --from=base /app/node_modules ./node_modules
COPY --from=build --chown=nextjs:nodejs /app/dist ./dist
COPY --from=build --chown=nextjs:nodejs /app/package*.json ./
# Security hardening
RUN apk add --no-cache dumb-init
RUN rm -rf /var/cache/apk/*
USER nextjs
EXPOSE 3000
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD curl -f http://localhost:3000/health || exit 1
ENTRYPOINT ["dumb-init", "--"]
CMD ["node", "dist/server.js"]
Docker Compose for Development
# docker-compose.yml
version: '3.8'
services:
app:
build:
context: .
target: development
ports:
- "3000:3000"
volumes:
- .:/app
- /app/node_modules
environment:
- NODE_ENV=development
- DATABASE_URL=postgresql://postgres:password@db:5432/myapp
- REDIS_URL=redis://redis:6379
depends_on:
db:
condition: service_healthy
redis:
condition: service_healthy
command: npm run dev
db:
image: postgres:15-alpine
environment:
POSTGRES_DB: myapp
POSTGRES_USER: postgres
POSTGRES_PASSWORD: password
volumes:
- postgres_data:/var/lib/postgresql/data
- ./scripts/init-db.sql:/docker-entrypoint-initdb.d/init.sql
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 10s
timeout: 5s
retries: 5
redis:
image: redis:7-alpine
volumes:
- redis_data:/data
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 3s
retries: 5
nginx:
image: nginx:alpine
ports:
- "80:80"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf
depends_on:
- app
volumes:
postgres_data:
redis_data:
Infrastructure as Code
Terraform Configuration
# infrastructure/main.tf
terraform {
required_version = ">= 1.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
}
backend "s3" {
bucket = "myapp-terraform-state"
key = "production/terraform.tfstate"
region = "us-east-1"
}
}
provider "aws" {
region = var.aws_region
}
# VPC Configuration
module "vpc" {
source = "terraform-aws-modules/vpc/aws"
name = "${var.project_name}-vpc"
cidr = "10.0.0.0/16"
azs = ["${var.aws_region}a", "${var.aws_region}b"]
private_subnets = ["10.0.1.0/24", "10.0.2.0/24"]
public_subnets = ["10.0.101.0/24", "10.0.102.0/24"]
enable_nat_gateway = true
enable_vpn_gateway = false
tags = {
Environment = var.environment
Project = var.project_name
}
}
# ECS Cluster
resource "aws_ecs_cluster" "main" {
name = "${var.project_name}-cluster"
setting {
name = "containerInsights"
value = "enabled"
}
tags = {
Environment = var.environment
Project = var.project_name
}
}
# Application Load Balancer
resource "aws_lb" "main" {
name = "${var.project_name}-alb"
internal = false
load_balancer_type = "application"
security_groups = [aws_security_group.alb.id]
subnets = module.vpc.public_subnets
enable_deletion_protection = var.environment == "production"
tags = {
Environment = var.environment
Project = var.project_name
}
}
# RDS Database
resource "aws_db_instance" "main" {
identifier = "${var.project_name}-db"
engine = "postgres"
engine_version = "15.4"
instance_class = var.db_instance_class
allocated_storage = 20
max_allocated_storage = 100
storage_encrypted = true
db_name = var.db_name
username = var.db_username
password = var.db_password
vpc_security_group_ids = [aws_security_group.rds.id]
db_subnet_group_name = aws_db_subnet_group.main.name
backup_retention_period = var.environment == "production" ? 7 : 1
backup_window = "03:00-04:00"
maintenance_window = "sun:04:00-sun:05:00"
skip_final_snapshot = var.environment != "production"
tags = {
Environment = var.environment
Project = var.project_name
}
}
Kubernetes Deployment
# k8s/deployment.yml
apiVersion: apps/v1
kind: Deployment
metadata:
name: myapp
labels:
app: myapp
spec:
replicas: 3
selector:
matchLabels:
app: myapp
template:
metadata:
labels:
app: myapp
spec:
containers:
- name: myapp
image: myapp:latest
ports:
- containerPort: 3000
env:
- name: NODE_ENV
value: "production"
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: myapp-secrets
key: database-url
- name: REDIS_URL
valueFrom:
secretKeyRef:
name: myapp-secrets
key: redis-url
resources:
requests:
memory: "256Mi"
cpu: "250m"
limits:
memory: "512Mi"
cpu: "500m"
livenessProbe:
httpGet:
path: /health
port: 3000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /ready
port: 3000
initialDelaySeconds: 5
periodSeconds: 5
---
apiVersion: v1
kind: Service
metadata:
name: myapp-service
spec:
selector:
app: myapp
ports:
- protocol: TCP
port: 80
targetPort: 3000
type: ClusterIP
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: myapp-ingress
annotations:
kubernetes.io/ingress.class: nginx
cert-manager.io/cluster-issuer: letsencrypt-prod
spec:
tls:
- hosts:
- myapp.com
secretName: myapp-tls
rules:
- host: myapp.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: myapp-service
port:
number: 80
Monitoring and Observability
Application Monitoring
// monitoring/metrics.js
const prometheus = require('prom-client');
// Create metrics
const httpRequestDuration = new prometheus.Histogram({
name: 'http_request_duration_seconds',
help: 'Duration of HTTP requests in seconds',
labelNames: ['method', 'route', 'status_code'],
buckets: [0.1, 0.3, 0.5, 0.7, 1, 3, 5, 7, 10]
});
const httpRequestTotal = new prometheus.Counter({
name: 'http_requests_total',
help: 'Total number of HTTP requests',
labelNames: ['method', 'route', 'status_code']
});
const activeConnections = new prometheus.Gauge({
name: 'active_connections',
help: 'Number of active connections'
});
// Middleware to collect metrics
const metricsMiddleware = (req, res, next) => {
const start = Date.now();
res.on('finish', () => {
const duration = (Date.now() - start) / 1000;
const route = req.route ? req.route.path : req.path;
httpRequestDuration
.labels(req.method, route, res.statusCode)
.observe(duration);
httpRequestTotal
.labels(req.method, route, res.statusCode)
.inc();
});
next();
};
// Health check endpoint
app.get('/health', (req, res) => {
res.json({
status: 'healthy',
timestamp: new Date().toISOString(),
uptime: process.uptime(),
memory: process.memoryUsage(),
version: process.env.npm_package_version
});
});
// Metrics endpoint
app.get('/metrics', (req, res) => {
res.set('Content-Type', prometheus.register.contentType);
res.end(prometheus.register.metrics());
});
module.exports = { metricsMiddleware };
Logging Configuration
// logging/logger.js
const winston = require('winston');
const { ElasticsearchTransport } = require('winston-elasticsearch');
const logger = winston.createLogger({
level: process.env.LOG_LEVEL || 'info',
format: winston.format.combine(
winston.format.timestamp(),
winston.format.errors({ stack: true }),
winston.format.json()
),
defaultMeta: {
service: 'myapp',
version: process.env.npm_package_version,
environment: process.env.NODE_ENV
},
transports: [
new winston.transports.Console({
format: winston.format.combine(
winston.format.colorize(),
winston.format.simple()
)
}),
new winston.transports.File({
filename: 'logs/error.log',
level: 'error'
}),
new winston.transports.File({
filename: 'logs/combined.log'
})
]
});
// Add Elasticsearch transport in production
if (process.env.NODE_ENV === 'production') {
logger.add(new ElasticsearchTransport({
level: 'info',
clientOpts: {
node: process.env.ELASTICSEARCH_URL
},
index: 'myapp-logs'
}));
}
// Request logging middleware
const requestLogger = (req, res, next) => {
const start = Date.now();
res.on('finish', () => {
const duration = Date.now() - start;
logger.info('HTTP Request', {
method: req.method,
url: req.url,
statusCode: res.statusCode,
duration,
userAgent: req.get('User-Agent'),
ip: req.ip,
userId: req.user?.id
});
});
next();
};
module.exports = { logger, requestLogger };
DevOps is about creating reliable, automated systems that enable teams to deliver software quickly and safely. Start with basic CI/CD, then gradually add monitoring, infrastructure as code, and advanced deployment strategies as your needs grow.