MongoDB Monitoring
Server Status Monitoring
Basic Server Metrics
// Server status overview
db.serverStatus();
// Memory usage
db.serverStatus().mem;
// Connection statistics
db.serverStatus().connections;
// Operation counters
db.serverStatus().opcounters;
// Network metrics
db.serverStatus().network;
// Storage engine stats (WiredTiger)
db.serverStatus().wiredTiger;
Database and Collection Stats
// Database statistics
db.stats();
db.stats(1024*1024); // Stats in MB
// Collection statistics
db.users.stats();
db.users.stats(1024*1024); // Stats in MB
// Index statistics
db.users.aggregate([{$indexStats: {}}]);
// List all collections with sizes
db.runCommand("listCollections").cursor.firstBatch.forEach(
function(collection) {
print(collection.name + ": " +
(db[collection.name].stats().size / 1024 / 1024).toFixed(2) + " MB");
}
);
Performance Monitoring
Current Operations
// View current operations
db.currentOp();
// Filter long-running operations
db.currentOp({"secs_running": {$gt: 5}});
// Kill long-running operation
db.killOp(123456); // Use opid from currentOp
// Find operations by user
db.currentOp({"user": "appUser"});
// Find operations by database
db.currentOp({"ns": /^myapp\./});
Query Profiling
// Enable profiling for slow queries
db.setProfilingLevel(1, {slowms: 100});
// View profiler data
db.system.profile.find().sort({ts: -1}).limit(5).pretty();
// Find slowest queries
db.system.profile.find({"millis": {$exists: true}}).sort({"millis": -1}).limit(5);
// Profile data by collection
db.system.profile.aggregate([
{$group: {
_id: "$ns",
count: {$sum: 1},
avgTime: {$avg: "$millis"},
maxTime: {$max: "$millis"}
}},
{$sort: {avgTime: -1}}
]);
// Clear profiler collection
db.system.profile.drop();
Real-time Monitoring
# mongostat - real-time server statistics
mongostat --host localhost:27017 -u admin -p password
# mongotop - collection-level statistics
mongotop --host localhost:27017 -u admin -p password
# Custom mongostat fields
mongostat --host localhost:27017 -o 'host,time,insert,query,update,delete,getmore,command'
Replica Set Monitoring
Replica Set Status
// Replica set status
rs.status();
// Replica set configuration
rs.conf();
// Check if node is primary
rs.isMaster();
// Replication lag information
db.printSlaveReplicationInfo();
// Replication info
db.getReplicationInfo();
Oplog Monitoring
// Switch to local database
use local;
// Oplog statistics
db.oplog.rs.stats();
// Oplog size and usage
var oplogStats = db.oplog.rs.stats();
print("Oplog size: " + (oplogStats.maxSize / 1024 / 1024 / 1024).toFixed(2) + " GB");
print("Oplog used: " + (oplogStats.size / 1024 / 1024 / 1024).toFixed(2) + " GB");
// Recent oplog entries
db.oplog.rs.find().sort({$natural: -1}).limit(5);
// Check oplog window
var first = db.oplog.rs.find().sort({$natural: 1}).limit(1).next();
var last = db.oplog.rs.find().sort({$natural: -1}).limit(1).next();
var windowHours = (last.ts.getTime() - first.ts.getTime()) / 1000 / 60 / 60;
print("Oplog window: " + windowHours.toFixed(2) + " hours");
Sharding Monitoring
Shard Status
// Sharding status overview
sh.status();
// Balancer status
sh.getBalancerState();
sh.isBalancerRunning();
// Chunk distribution
db.chunks.aggregate([
{$group: {_id: "$shard", count: {$sum: 1}}},
{$sort: {count: -1}}
]);
// Migration history
db.changelog.find({"what": /moveChunk/}).sort({"time": -1}).limit(10);
// Failed migrations
db.actionlog.find({"what": /migrate/, "details.errmsg": {$exists: true}});
mongos Monitoring
// Connected to mongos
db.isMaster();
// Shard connection pool stats
db.runCommand("connPoolStats");
// Config server connection
db.runCommand("getCmdLineOpts").parsed.sharding.configDB;
Custom Monitoring Scripts
Health Check Script
// Comprehensive health check
function mongoHealthCheck() {
var health = {
timestamp: new Date(),
server: {},
replica: {},
performance: {},
issues: []
};
try {
// Server health
var serverStatus = db.serverStatus();
health.server = {
uptime: serverStatus.uptime,
connections: serverStatus.connections,
memory: serverStatus.mem,
opcounters: serverStatus.opcounters
};
// Connection usage
var connPct = (serverStatus.connections.current /
(serverStatus.connections.current + serverStatus.connections.available)) * 100;
if (connPct > 80) {
health.issues.push("High connection usage: " + connPct.toFixed(1) + "%");
}
// Memory usage
if (serverStatus.mem.resident > serverStatus.mem.virtual * 0.8) {
health.issues.push("High memory usage");
}
// Replica set health (if applicable)
try {
var rsStatus = rs.status();
health.replica = {
name: rsStatus.set,
members: rsStatus.members.length,
primary: rsStatus.members.find(m => m.state === 1)?.name
};
// Check for unhealthy members
rsStatus.members.forEach(function(member) {
if (member.health !== 1) {
health.issues.push("Unhealthy replica member: " + member.name);
}
if (member.state === 2) { // Secondary
var lag = (rsStatus.date - member.optimeDate) / 1000;
if (lag > 10) {
health.issues.push(member.name + " replication lag: " + lag + "s");
}
}
});
} catch (e) {
// Not a replica set
}
// Performance checks
var stats = db.stats();
health.performance = {
avgObjSize: stats.avgObjSize,
indexSize: stats.indexSize,
dataSize: stats.dataSize
};
if (stats.indexSize > stats.dataSize) {
health.issues.push("Index size larger than data size");
}
} catch (error) {
health.issues.push("Health check error: " + error.message);
}
return health;
}
// Run health check
var healthResult = mongoHealthCheck();
print("MongoDB Health Check:");
print("Issues found: " + healthResult.issues.length);
healthResult.issues.forEach(issue => print("- " + issue));
Performance Monitoring Script
// Performance metrics collection
function collectPerformanceMetrics() {
var metrics = {
timestamp: new Date(),
server: {},
operations: {},
slowQueries: [],
indexUsage: {}
};
var serverStatus = db.serverStatus();
// Server metrics
metrics.server = {
uptime: serverStatus.uptime,
connections: serverStatus.connections.current,
memory: {
resident: serverStatus.mem.resident,
virtual: serverStatus.mem.virtual,
mapped: serverStatus.mem.mapped
},
cache: serverStatus.wiredTiger?.cache || {}
};
// Operation metrics
metrics.operations = {
insert: serverStatus.opcounters.insert,
query: serverStatus.opcounters.query,
update: serverStatus.opcounters.update,
delete: serverStatus.opcounters.delete,
command: serverStatus.opcounters.command
};
// Slow queries (last 5 minutes)
var fiveMinutesAgo = new Date(Date.now() - 5 * 60 * 1000);
metrics.slowQueries = db.system.profile.find({
"ts": {$gte: fiveMinutesAgo},
"millis": {$gt: 100}
}).sort({"millis": -1}).limit(10).toArray();
// Index usage statistics
db.adminCommand("listCollections").cursor.firstBatch.forEach(
function(collection) {
if (!collection.name.startsWith("system.")) {
try {
var indexStats = db[collection.name].aggregate([{$indexStats: {}}]).toArray();
metrics.indexUsage[collection.name] = indexStats;
} catch (e) {
// Skip collections that can't be accessed
}
}
}
);
return metrics;
}
// Collect and display metrics
var perfMetrics = collectPerformanceMetrics();
print("Performance Metrics:");
print("Active connections: " + perfMetrics.server.connections);
print("Memory usage: " + perfMetrics.server.memory.resident + " MB");
print("Slow queries (last 5 min): " + perfMetrics.slowQueries.length);
External Monitoring Tools
MongoDB Ops Manager
// Install Ops Manager monitoring agent
// Configure monitoring in ops manager UI
// Custom metrics via HTTP API
var metricsData = {
"timestamp": new Date(),
"hostname": "mongodb-server-1",
"metrics": {
"connections": db.serverStatus().connections.current,
"opcounters": db.serverStatus().opcounters,
"memory": db.serverStatus().mem
}
};
// Send to monitoring system (pseudo-code)
// HTTP.post("http://monitoring-api/metrics", metricsData);
Prometheus Integration
# mongodb_exporter configuration
global:
scrape_interval: 15s
scrape_configs:
- job_name: 'mongodb'
static_configs:
- targets: ['localhost:9216']
scrape_interval: 15s
metrics_path: /metrics
# Start MongoDB exporter for Prometheus
mongodb_exporter --mongodb.uri="mongodb://user:pass@localhost:27017" \
--web.listen-address=":9216"
Custom Alerting
// Alert conditions
function checkAlertConditions() {
var alerts = [];
var serverStatus = db.serverStatus();
// High connection usage
var connUsage = serverStatus.connections.current /
(serverStatus.connections.current + serverStatus.connections.available);
if (connUsage > 0.9) {
alerts.push({
severity: "critical",
message: "Connection usage above 90%: " + (connUsage * 100).toFixed(1) + "%"
});
}
// High memory usage
if (serverStatus.mem.resident > 8192) { // 8GB
alerts.push({
severity: "warning",
message: "Memory usage high: " + serverStatus.mem.resident + " MB"
});
}
// Replication lag
try {
var rsStatus = rs.status();
rsStatus.members.forEach(function(member) {
if (member.state === 2) { // Secondary
var lag = (rsStatus.date - member.optimeDate) / 1000;
if (lag > 30) {
alerts.push({
severity: "warning",
message: member.name + " replication lag: " + lag + " seconds"
});
}
}
});
} catch (e) {
// Not a replica set
}
// Slow queries
var slowQueries = db.system.profile.find({
"ts": {$gte: new Date(Date.now() - 60000)}, // Last minute
"millis": {$gt: 5000} // > 5 seconds
}).count();
if (slowQueries > 0) {
alerts.push({
severity: "warning",
message: slowQueries + " slow queries detected in last minute"
});
}
return alerts;
}
// Check and process alerts
var currentAlerts = checkAlertConditions();
currentAlerts.forEach(function(alert) {
print("[" + alert.severity.toUpperCase() + "] " + alert.message);
// Send to alerting system
});
Log Analysis
MongoDB Log Parsing
# Find slow queries in logs
grep "slow operation" /var/log/mongodb/mongod.log
# Connection events
grep "connection" /var/log/mongodb/mongod.log
# Index usage warnings
grep "index" /var/log/mongodb/mongod.log
# Memory warnings
grep -i "memory" /var/log/mongodb/mongod.log
Log Analysis Script
// Analyze MongoDB logs (pseudo-code for shell script integration)
function analyzeMongoLogs() {
// This would typically be implemented as a shell script
// that processes MongoDB log files
var logAnalysis = {
slowQueries: 0,
connections: {
opened: 0,
closed: 0,
failed: 0
},
errors: [],
warnings: []
};
// Parse log file and populate analysis
// Implementation depends on log format and parsing tools
return logAnalysis;
}
Monitoring Dashboard Queries
Key Metrics for Dashboards
// Connection metrics
var connectionMetrics = {
current: db.serverStatus().connections.current,
available: db.serverStatus().connections.available,
totalCreated: db.serverStatus().connections.totalCreated
};
// Operation metrics
var opMetrics = db.serverStatus().opcounters;
// Memory metrics
var memMetrics = db.serverStatus().mem;
// Storage metrics
var storageMetrics = {
dataSize: db.stats().dataSize,
indexSize: db.stats().indexSize,
storageSize: db.stats().storageSize
};
// Combine for dashboard
var dashboardData = {
timestamp: new Date(),
connections: connectionMetrics,
operations: opMetrics,
memory: memMetrics,
storage: storageMetrics
};
print(JSON.stringify(dashboardData, null, 2));