This document describes Fortify’s performance regression testing framework for tracking and validating benchmark performance over time.
Fortify includes a comprehensive performance testing framework that:
# Run all benchmarks
./scripts/benchmark.sh run
# Generate performance baseline from results
./scripts/benchmark.sh generate-baseline
# Check for regressions
./scripts/benchmark.sh check
# Complete workflow (run + check + compare)
./scripts/benchmark.sh all
# Run benchmarks with standard duration
go test -bench=. -benchmem -benchtime=3s ./...
# Run extended benchmarks
go test -bench=. -benchmem -benchtime=10s -count=3 ./...
# Run specific package benchmarks
go test -bench=. -benchmem ./circuitbreaker
import "github.com/felixgeelhaar/fortify/testing"
// Create performance tracker
tracker := testing.NewPerformanceTracker(".benchmark-results")
// Set custom thresholds
tracker.SetThresholds(testing.RegressionThresholds{
TimeIncrease: 1.10, // 10% slower is acceptable
AllocIncrease: 1.20, // 20% more allocations
BytesIncrease: 1.15, // 15% more memory
})
// Manually add baseline
tracker.AddBaseline(testing.PerformanceBaseline{
Name: "BenchmarkCircuitBreaker",
MaxNsPerOp: 1000,
MaxAllocs: 5,
MaxBytes: 512,
Description: "Circuit breaker baseline",
})
// Generate from benchmark results (with 10% safety factor)
results := []testing.BenchmarkResult{...}
tracker.GenerateBaselineFromResults(results, 1.1)
// Save baselines to file
tracker.SaveBaselines("performance-baselines.json")
// Load baselines from file
tracker.LoadBaselines("performance-baselines.json")
// Check current results against baselines
results := []testing.BenchmarkResult{
{
Name: "BenchmarkCircuitBreaker",
NsPerOp: 950,
AllocsPerOp: 4,
BytesPerOp: 480,
Timestamp: time.Now(),
},
}
report := tracker.CheckRegressions(results)
fmt.Printf("Total checks: %d\n", report.TotalChecks)
fmt.Printf("Passed: %d\n", report.Passed)
fmt.Printf("Failed: %d\n", report.Failed)
// Handle regressions
for _, regression := range report.Regressions {
fmt.Printf("❌ %s: %s increased by %.2f%% (threshold: %.2f%%)\n",
regression.BenchmarkName,
regression.Metric,
regression.Increase,
regression.Threshold)
}
// Save benchmark report
report := testing.BenchmarkReport{
Timestamp: time.Now(),
Results: results,
Metadata: map[string]string{
"commit": "abc123",
"branch": "main",
},
}
tracker.SaveReport(report)
// Load latest report
latest, err := tracker.LoadLatestReport()
baseline := testing.BenchmarkReport{...}
current := testing.BenchmarkReport{...}
changes := testing.CompareReports(baseline, current)
for benchmark, metrics := range changes {
timeChange := metrics["time_change"]
fmt.Printf("%s: %.2f%% time change\n", benchmark, timeChange)
}
type BenchmarkResult struct {
Name string // Benchmark name
NsPerOp float64 // Nanoseconds per operation
AllocsPerOp uint64 // Allocations per operation
BytesPerOp uint64 // Bytes allocated per operation
Timestamp time.Time // When benchmark was run
GitCommit string // Git commit hash
GitBranch string // Git branch name
GoVersion string // Go version
OS string // Operating system
Arch string // Architecture
CPUModel string // CPU model
MemoryTotal uint64 // Total system memory
IterationCount int // Number of iterations
}
type PerformanceBaseline struct {
Name string // Benchmark name
MaxNsPerOp float64 // Maximum acceptable ns/op
MaxAllocs uint64 // Maximum acceptable allocations
MaxBytes uint64 // Maximum acceptable bytes
Description string // Baseline description
}
tracker.SetThresholds(testing.RegressionThresholds{
TimeIncrease: 1.05, // 5% time increase
AllocIncrease: 1.10, // 10% allocation increase
BytesIncrease: 1.08, // 8% memory increase
AbsoluteMaxNsPerOp: 10000, // Hard limit at 10µs
})
Fortify includes automated performance testing in CI/CD:
# .github/workflows/performance.yml
name: Performance Regression Testing
on:
pull_request:
branches: [ main ]
push:
branches: [ main ]
benchstat# Simulate CI workflow
./scripts/benchmark.sh all
# View results
cat .benchmark-results/latest-raw.txt
Main benchmark automation script.
# Run benchmarks only
./scripts/benchmark.sh run
# Generate baseline from current results
./scripts/benchmark.sh generate-baseline
# Check for regressions
./scripts/benchmark.sh check
# Compare with previous run
./scripts/benchmark.sh compare
# Full workflow
./scripts/benchmark.sh all
# Show help
./scripts/benchmark.sh help
# Customize benchmark duration
export BENCHMARK_TIME=5s
./scripts/benchmark.sh run
# Run multiple times for stability
export BENCHMARK_COUNT=3
./scripts/benchmark.sh run
# View raw benchmark output
cat .benchmark-results/latest-raw.txt
# View parsed JSON results
cat .benchmark-results/latest-parsed.json | jq
# View baselines
cat scripts/performance-baselines.json | jq
Install benchstat for detailed comparison:
go install golang.org/x/perf/cmd/benchstat@latest
Compare two benchmark runs:
benchstat baseline.txt current.txt
Output example:
name old time/op new time/op delta
CircuitBreakerSuccess 850ns ± 2% 920ns ± 3% +8.24%
RetrySuccess 2.10µs ± 1% 2.25µs ± 2% +7.14%
Example benchmark:
func BenchmarkCircuitBreaker(b *testing.B) {
cb := circuitbreaker.New[string](circuitbreaker.Config{
Timeout: 100 * time.Millisecond,
})
b.ResetTimer() // Reset after setup
for i := 0; i < b.N; i++ {
_, _ = cb.Execute(context.Background(), func(ctx context.Context) (string, error) {
return "result", nil
})
}
}
Results are stored in .benchmark-results/ with timestamps:
.benchmark-results/
├── benchmark-20240315-143022.json
├── benchmark-20240315-150432.json
└── latest-raw.txt
Monitor trends over time:
# View all historical results
ls -lt .benchmark-results/*.json
# Compare specific dates
benchstat .benchmark-results/benchmark-20240301-*.txt \
.benchmark-results/benchmark-20240315-*.txt
GitHub Actions automatically:
Problem: Benchmarks show inconsistent results
Solutions:
benchtime (e.g., -benchtime=10s)-count=5)Problem: CI reports regressions incorrectly
Solutions:
Problem: Unexpected allocation increases
Solutions:
go test -benchmem for memory profiling-memprofile=mem.profgo tool pprof mem.profSee testing/example_test.go for complete examples:
Example_performanceTracking - Basic tracking usageExample_performanceBaseline - Generating baselines