JA
Performance Tuning
Java operations v1.0.0
Java Performance Tuning
Overview
This skill covers Java performance optimization techniques including JVM tuning, garbage collection optimization, profiling methodologies, and code-level optimizations. Effective performance tuning requires measurement, analysis, and targeted improvements.
Key Concepts
Performance Optimization Cycle
┌─────────────────────────────────────────────────────────────┐
│ Performance Optimization Cycle │
├─────────────────────────────────────────────────────────────┤
│ │
│ ┌───────────┐ │
│ │ Measure │ ◄──────────────────────┐ │
│ └─────┬─────┘ │ │
│ │ │ │
│ ▼ │ │
│ ┌───────────┐ │ │
│ │ Analyze │ │ │
│ └─────┬─────┘ │ │
│ │ │ │
│ ▼ │ │
│ ┌───────────┐ │ │
│ │ Identify │ │ │
│ │ Bottleneck│ │ │
│ └─────┬─────┘ │ │
│ │ │ │
│ ▼ │ │
│ ┌───────────┐ │ │
│ │ Optimize │ │ │
│ └─────┬─────┘ │ │
│ │ │ │
│ ▼ │ │
│ ┌───────────┐ │ │
│ │ Verify │────────────────────────┘ │
│ └───────────┘ │
│ │
└─────────────────────────────────────────────────────────────┘
Garbage Collectors
| GC Type | Best For | Latency | Throughput |
|---|---|---|---|
| G1GC (default) | General purpose | Medium | High |
| ZGC | Ultra-low latency | Very Low | High |
| Shenandoah | Low latency | Low | High |
| Parallel GC | Throughput-focused | High | Very High |
| Serial GC | Small heaps | High | Medium |
Best Practices
1. Measure Before Optimizing
Never optimize without profiling data to guide decisions.
2. Use JMH for Microbenchmarks
JMH avoids common benchmarking pitfalls.
3. Size Heap Based on Live Data Set
Set heap 3-4x the size of live data for G1GC.
4. Avoid Premature Optimization
Focus on algorithmic improvements before micro-optimizations.
5. Monitor in Production
Use lightweight profiling for production insights.
Code Examples
Example 1: JVM Tuning Options
#!/bin/bash
# Production JVM configuration for a typical Spring Boot app
java \
# Memory sizing
-Xms4g \
-Xmx4g \
-XX:MaxMetaspaceSize=512m \
# G1GC tuning (default in Java 9+)
-XX:+UseG1GC \
-XX:MaxGCPauseMillis=200 \
-XX:G1HeapRegionSize=16m \
-XX:InitiatingHeapOccupancyPercent=45 \
# GC logging (JDK 11+ unified logging)
-Xlog:gc*:file=/var/log/app/gc.log:time,uptime,level,tags:filecount=5,filesize=100m \
# Performance
-XX:+UseStringDeduplication \
-XX:+OptimizeStringConcat \
# Diagnostics
-XX:+HeapDumpOnOutOfMemoryError \
-XX:HeapDumpPath=/var/log/app/heapdump.hprof \
-XX:ErrorFile=/var/log/app/hs_err_pid%p.log \
# Container awareness (important for Docker/K8s)
-XX:+UseContainerSupport \
-XX:MaxRAMPercentage=75.0 \
# Enable JFR for production profiling
-XX:StartFlightRecording=duration=0,maxage=24h,maxsize=1g,name=continuous \
-jar app.jar
// ZGC configuration for ultra-low latency (Java 15+)
public class ZGCConfiguration {
/*
* JVM args for ZGC:
* -XX:+UseZGC
* -XX:+ZGenerational # Java 21+ generational ZGC
* -Xmx16g # ZGC works best with larger heaps
* -XX:SoftMaxHeapSize=14g # Prefer to stay under this
* -XX:+UseLargePages # Optional: improved performance
*/
public static void printGCInfo() {
var gcBeans = ManagementFactory.getGarbageCollectorMXBeans();
for (var gc : gcBeans) {
System.out.println("GC: " + gc.getName());
System.out.println(" Collections: " + gc.getCollectionCount());
System.out.println(" Time: " + gc.getCollectionTime() + "ms");
}
}
}
Example 2: JMH Benchmarking
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Benchmark)
@Fork(value = 2, jvmArgs = {"-Xms2g", "-Xmx2g"})
@Warmup(iterations = 5, time = 1)
@Measurement(iterations = 10, time = 1)
public class StringConcatBenchmark {
@Param({"10", "100", "1000"})
private int iterations;
private String[] strings;
@Setup
public void setup() {
strings = new String[iterations];
for (int i = 0; i < iterations; i++) {
strings[i] = "string" + i;
}
}
@Benchmark
public String stringConcat() {
String result = "";
for (String s : strings) {
result = result + s; // Creates new String each iteration
}
return result;
}
@Benchmark
public String stringBuilder() {
StringBuilder sb = new StringBuilder();
for (String s : strings) {
sb.append(s);
}
return sb.toString();
}
@Benchmark
public String stringBuilderSized() {
// Pre-size to avoid resizing
StringBuilder sb = new StringBuilder(iterations * 10);
for (String s : strings) {
sb.append(s);
}
return sb.toString();
}
@Benchmark
public String stringJoin() {
return String.join("", strings);
}
@Benchmark
public String streamCollect() {
return Arrays.stream(strings).collect(Collectors.joining());
}
public static void main(String[] args) throws Exception {
org.openjdk.jmh.Main.main(args);
}
}
// Results example:
// Benchmark (iterations) Mode Cnt Score Error Units
// StringConcatBenchmark.stringConcat 100 avgt 20 15234.567 ± 123 ns/op
// StringConcatBenchmark.stringBuilder 100 avgt 20 823.456 ± 12 ns/op
// StringConcatBenchmark.stringBuilderSized 100 avgt 20 654.321 ± 8 ns/op
// StringConcatBenchmark.stringJoin 100 avgt 20 712.345 ± 10 ns/op
Example 3: Memory-Efficient Data Structures
public class MemoryEfficientCollections {
// Primitive collections to avoid boxing
public void primitiveCollections() {
// Standard - boxes every int to Integer
List<Integer> boxedList = new ArrayList<>();
for (int i = 0; i < 1_000_000; i++) {
boxedList.add(i); // Autoboxing creates objects
}
// Memory: ~20MB (Integer object overhead)
// Eclipse Collections - primitive arrays
IntList primitiveList = IntLists.mutable.empty();
for (int i = 0; i < 1_000_000; i++) {
primitiveList.add(i);
}
// Memory: ~4MB (just int values)
}
// Object pooling for frequently created objects
private final ObjectPool<ExpensiveObject> pool = new GenericObjectPool<>(
new BasePooledObjectFactory<>() {
@Override
public ExpensiveObject create() {
return new ExpensiveObject();
}
@Override
public PooledObject<ExpensiveObject> wrap(ExpensiveObject obj) {
return new DefaultPooledObject<>(obj);
}
@Override
public void passivateObject(PooledObject<ExpensiveObject> p) {
p.getObject().reset();
}
}
);
public void usePooledObject() {
ExpensiveObject obj = null;
try {
obj = pool.borrowObject();
obj.doWork();
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
if (obj != null) {
try {
pool.returnObject(obj);
} catch (Exception e) {
// Log and ignore
}
}
}
}
// Flyweight pattern for immutable objects
private static final Map<String, Currency> CURRENCY_CACHE = new ConcurrentHashMap<>();
public Currency getCurrency(String code) {
return CURRENCY_CACHE.computeIfAbsent(code, Currency::new);
}
// Compact data representation
public class CompactUserStats {
// Instead of Map<String, Object> with boxed values
// Use bit packing for boolean flags
private byte flags; // 8 boolean flags in 1 byte
// Use primitive arrays for metrics
private final long[] dailyCounts = new long[365]; // 2.9KB vs 23KB with List<Long>
public void setActive(boolean active) {
if (active) {
flags |= 0x01;
} else {
flags &= ~0x01;
}
}
public boolean isActive() {
return (flags & 0x01) != 0;
}
}
}
Example 4: Profiling with JFR
public class JFRProfiling {
// Programmatic JFR recording
public void recordWithJFR() throws Exception {
Configuration config = Configuration.getConfiguration("profile");
try (Recording recording = new Recording(config)) {
recording.setName("PerformanceAnalysis");
recording.setMaxAge(Duration.ofMinutes(10));
recording.setMaxSize(100 * 1024 * 1024); // 100 MB
recording.start();
// Run workload
performWork();
recording.stop();
// Save to file
Path path = Path.of("/tmp/recording.jfr");
recording.dump(path);
System.out.println("Recording saved to: " + path);
}
}
// Custom JFR events
@Name("com.example.OrderProcessed")
@Label("Order Processed")
@Category("Business")
@StackTrace(false)
public static class OrderProcessedEvent extends Event {
@Label("Order ID")
String orderId;
@Label("Processing Time")
@Timespan(Timespan.MILLISECONDS)
long processingTime;
@Label("Item Count")
int itemCount;
}
public void processOrderWithEvent(Order order) {
OrderProcessedEvent event = new OrderProcessedEvent();
event.begin();
long start = System.currentTimeMillis();
// Process order
doProcess(order);
event.orderId = order.getId();
event.processingTime = System.currentTimeMillis() - start;
event.itemCount = order.getItems().size();
event.commit();
}
// JFR streaming for live analysis (Java 14+)
public void streamJFREvents() {
try (var stream = new RecordingStream()) {
stream.enable("jdk.CPULoad").withPeriod(Duration.ofSeconds(1));
stream.enable("jdk.GCHeapSummary");
stream.enable("jdk.JavaMonitorWait").withThreshold(Duration.ofMillis(10));
stream.onEvent("jdk.CPULoad", event -> {
float jvmUser = event.getFloat("jvmUser");
float jvmSystem = event.getFloat("jvmSystem");
if (jvmUser + jvmSystem > 0.8) {
System.out.println("High CPU: " + (jvmUser + jvmSystem));
}
});
stream.onEvent("jdk.JavaMonitorWait", event -> {
Duration waitTime = event.getDuration("duration");
if (waitTime.toMillis() > 100) {
System.out.println("Lock contention: " + waitTime);
}
});
stream.startAsync();
// Run for 5 minutes
Thread.sleep(Duration.ofMinutes(5).toMillis());
}
}
}
Example 5: Common Performance Optimizations
public class PerformanceOptimizations {
// Lazy initialization
private volatile ExpensiveResource resource;
public ExpensiveResource getResource() {
ExpensiveResource result = resource;
if (result == null) {
synchronized (this) {
result = resource;
if (result == null) {
resource = result = new ExpensiveResource();
}
}
}
return result;
}
// Batch operations instead of individual calls
public void batchInserts(List<Record> records) {
// BAD: N database calls
// for (Record record : records) {
// repository.save(record);
// }
// GOOD: Batch insert
repository.saveAll(records);
}
// Avoid unnecessary object creation
private static final DateTimeFormatter FORMATTER =
DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
public String formatDate(LocalDateTime dateTime) {
// Reuse formatter instead of creating new one
return dateTime.format(FORMATTER);
}
// Use appropriate collection sizes
public List<String> processItems(int expectedSize) {
// Pre-size to avoid resizing
List<String> results = new ArrayList<>(expectedSize);
// Process items...
return results;
}
// Avoid regex compilation in loops
private static final Pattern EMAIL_PATTERN =
Pattern.compile("^[A-Za-z0-9+_.-]+@(.+)$");
public boolean isValidEmail(String email) {
return EMAIL_PATTERN.matcher(email).matches();
}
// Use primitives over boxed types
public long sumArray(int[] values) {
long sum = 0L;
for (int value : values) {
sum += value;
}
return sum;
}
// Short-circuit evaluation
public boolean isValid(Order order) {
// Check cheap conditions first
return order != null
&& order.getId() != null
&& !order.getItems().isEmpty()
&& expensiveValidation(order); // Only called if previous checks pass
}
// Connection/resource pooling
private final HikariDataSource dataSource;
public PerformanceOptimizations() {
HikariConfig config = new HikariConfig();
config.setMaximumPoolSize(10);
config.setMinimumIdle(5);
config.setIdleTimeout(300_000);
config.setConnectionTimeout(10_000);
this.dataSource = new HikariDataSource(config);
}
// Parallel processing for CPU-bound work
public List<Result> processInParallel(List<Input> inputs) {
return inputs.parallelStream()
.map(this::cpuIntensiveOperation)
.toList();
}
// Caching expensive computations
private final LoadingCache<String, ComputedValue> cache = Caffeine.newBuilder()
.maximumSize(10_000)
.expireAfterWrite(Duration.ofMinutes(10))
.recordStats()
.build(this::computeExpensiveValue);
public ComputedValue getCachedValue(String key) {
return cache.get(key);
}
}
Anti-Patterns
❌ Premature Optimization
// WRONG - optimizing without measurement
public String process(String input) {
char[] chars = input.toCharArray();
// Complex manual optimization...
}
// RIGHT - profile first, then optimize if needed
public String process(String input) {
return input.toUpperCase(); // Simple and often fast enough
}
❌ Ignoring GC Pauses
// WRONG - allocating in tight loops
public void processEvents(List<Event> events) {
for (Event event : events) {
String formatted = String.format("Event: %s at %s",
event.getName(), new Date()); // Creates garbage
}
}
// RIGHT - reuse objects
private final StringBuilder sb = new StringBuilder();
public void processEvents(List<Event> events) {
for (Event event : events) {
sb.setLength(0);
sb.append("Event: ").append(event.getName());
// Use sb.toString() only when needed
}
}
Testing Strategies
Performance Testing with JMH
@State(Scope.Benchmark)
public class PerformanceTest {
@Benchmark
public void baselineMethod() {
// Establish baseline
}
@Benchmark
public void optimizedMethod() {
// Compare against baseline
}
@Test
void performanceRegression() {
// Integrate with CI/CD
Options opt = new OptionsBuilder()
.include(PerformanceTest.class.getSimpleName())
.build();
Collection<RunResult> results = new Runner(opt).run();
for (RunResult result : results) {
Result<?> primary = result.getPrimaryResult();
assertThat(primary.getScore())
.isLessThan(expectedMaxLatency);
}
}
}