Kubernetes — Practical

Kubernetes — Practical patterns

Production Deployment manifest

apiVersion: apps/v1
kind: Deployment
metadata:
  name: api
  labels: { app: api }
spec:
  replicas: 3
  strategy:
    type: RollingUpdate
    rollingUpdate: { maxSurge: 1, maxUnavailable: 0 }
  selector: { matchLabels: { app: api } }
  template:
    metadata:
      labels: { app: api }
      annotations:
        prometheus.io/scrape: "true"
        prometheus.io/port:   "9090"
    spec:
      serviceAccountName: api
      terminationGracePeriodSeconds: 30
      affinity:
        podAntiAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              podAffinityTerm:
                labelSelector: { matchLabels: { app: api } }
                topologyKey: topology.kubernetes.io/zone
      topologySpreadConstraints:
        - maxSkew: 1
          topologyKey: topology.kubernetes.io/zone
          whenUnsatisfiable: ScheduleAnyway
          labelSelector: { matchLabels: { app: api } }
      containers:
        - name: api
          image: ghcr.io/org/api@sha256:abc...
          imagePullPolicy: IfNotPresent
          ports:
            - { containerPort: 8080, name: http }
            - { containerPort: 9090, name: metrics }
          envFrom:
            - configMapRef: { name: api-config }
            - secretRef:    { name: api-secrets }
          resources:
            requests: { cpu: 100m, memory: 256Mi }
            limits:   { cpu: 500m, memory: 512Mi }
          startupProbe:
            httpGet: { path: /healthz, port: http }
            failureThreshold: 30
            periodSeconds: 5
          readinessProbe:
            httpGet: { path: /readyz, port: http }
            periodSeconds: 5
            failureThreshold: 3
          livenessProbe:
            httpGet: { path: /healthz, port: http }
            periodSeconds: 10
            failureThreshold: 3
          lifecycle:
            preStop:
              exec: { command: ["/bin/sh","-c","sleep 10"] }    # drain
          securityContext:
            runAsUser: 1000
            runAsGroup: 1000
            runAsNonRoot: true
            allowPrivilegeEscalation: false
            readOnlyRootFilesystem: true
            capabilities: { drop: ["ALL"] }

PodDisruptionBudget (during voluntary disruptions)

apiVersion: policy/v1
kind: PodDisruptionBudget
metadata: { name: api }
spec:
  minAvailable: 2
  selector: { matchLabels: { app: api } }

HPA (CPU + memory)

apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata: { name: api }
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: api
  minReplicas: 3
  maxReplicas: 30
  metrics:
    - type: Resource
      resource: { name: cpu, target: { type: Utilization, averageUtilization: 70 } }
    - type: Resource
      resource: { name: memory, target: { type: Utilization, averageUtilization: 80 } }
  behavior:
    scaleDown:
      stabilizationWindowSeconds: 300
      policies: [{ type: Percent, value: 10, periodSeconds: 60 }]
    scaleUp:
      stabilizationWindowSeconds: 0
      policies: [{ type: Percent, value: 100, periodSeconds: 30 }]

Service + Ingress

apiVersion: v1
kind: Service
metadata: { name: api }
spec:
  type: ClusterIP
  selector: { app: api }
  ports: [{ port: 80, targetPort: 8080 }]
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: api
  annotations:
    cert-manager.io/cluster-issuer: letsencrypt
    nginx.ingress.kubernetes.io/ssl-redirect: "true"
spec:
  ingressClassName: nginx
  tls:
    - hosts: [api.example.com]
      secretName: api-tls
  rules:
    - host: api.example.com
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service: { name: api, port: { number: 80 } }

NetworkPolicy (default deny + allowlist)

apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata: { name: default-deny, namespace: app }
spec:
  podSelector: {}
  policyTypes: [Ingress, Egress]
---
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata: { name: allow-api, namespace: app }
spec:
  podSelector: { matchLabels: { app: api } }
  policyTypes: [Ingress, Egress]
  ingress:
    - from:
        - podSelector: { matchLabels: { app: ingress-nginx } }
      ports: [{ port: 8080, protocol: TCP }]
  egress:
    - to:
        - podSelector: { matchLabels: { app: postgres } }
      ports: [{ port: 5432, protocol: TCP }]
    - to:
        - namespaceSelector: { matchLabels: { kubernetes.io/metadata.name: kube-system } }
      ports: [{ port: 53, protocol: UDP }]

Job + CronJob

apiVersion: batch/v1
kind: CronJob
metadata: { name: nightly-cleanup }
spec:
  schedule: "0 2 * * *"
  concurrencyPolicy: Forbid
  successfulJobsHistoryLimit: 3
  jobTemplate:
    spec:
      backoffLimit: 3
      ttlSecondsAfterFinished: 3600
      template:
        spec:
          restartPolicy: OnFailure
          containers:
            - name: cleaner
              image: ghcr.io/org/jobs:1.0
              command: ["python","-m","jobs.cleanup"]

Init container (DB migrate)

spec:
  initContainers:
    - name: migrate
      image: ghcr.io/org/api:1.2.3
      command: ["sh","-c","npm run migrate"]
      envFrom: [{ secretRef: { name: db } }]
  containers:
    - name: api
      ...

Diagnostics

kubectl get events -A --sort-by=.lastTimestamp | tail -50
kubectl describe pod <p>
kubectl logs <p> -c api --previous
kubectl logs -l app=api --tail=100 -f --max-log-requests=10

kubectl top pods -A --sort-by=memory
kubectl top nodes

kubectl get pods -o wide
kubectl exec -it <p> -- sh

kubectl explain ingress.spec.rules
kubectl get pod <p> -o yaml | yq '.status'

kubectl rollout status deploy/api
kubectl rollout history deploy/api
kubectl rollout undo deploy/api --to-revision=3

Common kubectl shortcuts

alias k='kubectl'
alias kn='kubectl config set-context --current --namespace'
alias kgp='kubectl get pods'
alias kge='kubectl get events --sort-by=.lastTimestamp'

kubectl ctx        # kubectx — switch contexts
kubectl ns         # kubens — switch namespaces
kubectl debug -it pod/<p> --image=busybox --target=api  # ephemeral debug container

GitOps with Argo CD (sketch)

apiVersion: argoproj.io/v1alpha1
kind: Application
metadata: { name: api, namespace: argocd }
spec:
  project: default
  source:
    repoURL: https://github.com/org/k8s-manifests
    path: apps/api
    targetRevision: main
  destination:
    server: https://kubernetes.default.svc
    namespace: app
  syncPolicy:
    automated: { prune: true, selfHeal: true }
    syncOptions: [CreateNamespace=true]

Useful tools

kubectx / kubens — fast context/namespace switching.
k9s — TUI for cluster.
stern — multi-pod log tail.
kubectl-tree — show resource hierarchy.
kube-no-trouble (kubent) — find deprecated APIs.
popeye — cluster lint.
velero — backup.
lens — desktop UI.
kubeval / kubeconform / datree — manifest lint.