MongoDB Sharding
Sharded Cluster Setup
Start Config Servers
# Start config server replica set (3 servers recommended)
mongod --configsvr --replSet configReplSet --dbpath /data/configdb --port 27019
# Initialize config server replica set
mongosh --port 27019
rs.initiate({
_id: "configReplSet",
configsvr: true,
members: [
{ _id: 0, host: "config1.example.com:27019" },
{ _id: 1, host: "config2.example.com:27019" },
{ _id: 2, host: "config3.example.com:27019" }
]
});
Start Shard Servers
# Start shard replica sets
# Shard 1
mongod --shardsvr --replSet shard1ReplSet --dbpath /data/shard1 --port 27018
# Shard 2
mongod --shardsvr --replSet shard2ReplSet --dbpath /data/shard2 --port 27018
# Initialize shard replica sets
mongosh --port 27018
rs.initiate({
_id: "shard1ReplSet",
members: [
{ _id: 0, host: "shard1-a.example.com:27018" },
{ _id: 1, host: "shard1-b.example.com:27018" },
{ _id: 2, host: "shard1-c.example.com:27018" }
]
});
Start mongos Router
# Start mongos router
mongos --configdb configReplSet/config1.example.com:27019,config2.example.com:27019,config3.example.com:27019 --port 27017
Add Shards to Cluster
// Connect to mongos
mongosh --port 27017
// Add shards
sh.addShard("shard1ReplSet/shard1-a.example.com:27018");
sh.addShard("shard2ReplSet/shard2-a.example.com:27018");
sh.addShard("shard3ReplSet/shard3-a.example.com:27018");
// Check shard status
sh.status();
Enable Sharding
Enable Sharding on Database
// Enable sharding for database
sh.enableSharding("myapp");
// Create shard key index first
db.users.createIndex({ userId: 1 });
// Shard collection
sh.shardCollection("myapp.users", { userId: 1 });
// Compound shard key
db.orders.createIndex({ customerId: 1, orderDate: 1 });
sh.shardCollection("myapp.orders", { customerId: 1, orderDate: 1 });
Shard Key Strategies
// Hashed shard key (even distribution)
db.users.createIndex({ _id: "hashed" });
sh.shardCollection("myapp.users", { _id: "hashed" });
// Range-based shard key
db.events.createIndex({ timestamp: 1, userId: 1 });
sh.shardCollection("myapp.events", { timestamp: 1, userId: 1 });
// Tag-aware sharding
db.products.createIndex({ category: 1, _id: 1 });
sh.shardCollection("myapp.products", { category: 1, _id: 1 });
Shard Management
Check Shard Distribution
// Overall cluster status
sh.status();
// Collection shard distribution
db.users.getShardDistribution();
// Chunk information
db.chunks.find({"ns": "myapp.users"}).pretty();
// Shard sizes
db.stats();
Manual Chunk Operations
// Split chunk at specific value
sh.splitAt("myapp.users", { userId: "user123" });
// Split chunk in half
sh.splitFind("myapp.users", { userId: "user123" });
// Move chunk to specific shard
sh.moveChunk("myapp.users", { userId: "user123" }, "shard2ReplSet");
Balancer Management
// Check balancer status
sh.getBalancerState();
sh.isBalancerRunning();
// Start/stop balancer
sh.startBalancer();
sh.stopBalancer();
// Configure balancer window
db.settings.update(
{ _id: "balancer" },
{ $set: { activeWindow: { start: "23:00", stop: "06:00" } } },
{ upsert: true }
);
// Disable balancer for maintenance
sh.disableBalancing("myapp.orders");
sh.enableBalancing("myapp.orders");
Zone Sharding (Tag-based)
Configure Zones
// Add shard tags
sh.addShardTag("shard1ReplSet", "US-EAST");
sh.addShardTag("shard2ReplSet", "US-WEST");
sh.addShardTag("shard3ReplSet", "EUROPE");
// Define tag ranges
sh.addTagRange(
"myapp.users",
{ region: "us-east", userId: MinKey },
{ region: "us-east", userId: MaxKey },
"US-EAST"
);
sh.addTagRange(
"myapp.users",
{ region: "us-west", userId: MinKey },
{ region: "us-west", userId: MaxKey },
"US-WEST"
);
sh.addTagRange(
"myapp.users",
{ region: "europe", userId: MinKey },
{ region: "us-west", userId: MaxKey },
"EUROPE"
);
Remove Zones
// Remove tag range
sh.removeTagRange(
"myapp.users",
{ region: "us-east", userId: MinKey },
{ region: "us-east", userId: MaxKey }
);
// Remove shard tag
sh.removeShardTag("shard1ReplSet", "US-EAST");
Sharding Best Practices
Good Shard Key Characteristics
// High Cardinality - many unique values
{ userId: 1 } // Good if many users
{ status: 1 } // Bad - only few status values
// Even Distribution
{ _id: "hashed" } // Good - hash provides even distribution
{ timestamp: 1 } // Bad - hotspotting on recent data
// Avoid Monotonic Keys
{ timestamp: 1 } // Bad - always increasing
{ customerId: 1, timestamp: 1 } // Better - compound key
// Query Isolation
{ tenantId: 1 } // Good - queries usually tenant-specific
{ category: 1 } // Good - if queries filter by category
Shard Key Patterns
// User-based sharding
sh.shardCollection("myapp.users", { userId: "hashed" });
sh.shardCollection("myapp.user_posts", { userId: 1, postId: 1 });
// Time-based with compound key
sh.shardCollection("myapp.events", { eventType: 1, timestamp: 1 });
// Geographic sharding
sh.shardCollection("myapp.stores", { region: 1, storeId: 1 });
// Multi-tenant sharding
sh.shardCollection("myapp.documents", { tenantId: 1, documentId: 1 });
Monitoring and Maintenance
Performance Monitoring
// Check chunk distribution
db.printShardingStatus();
// Find jumbo chunks (> 64MB)
db.chunks.find({"jumbo": true});
// Check balancer history
db.actionlog.find({"what": "balancer.round"}).sort({"time": -1}).limit(5);
// Migration statistics
db.changelog.find().sort({"time": -1}).limit(10);
Maintenance Operations
// Compact chunks after deletions
db.runCommand({"compact": "users"});
// Rebuild chunk metadata
db.adminCommand({"flushRouterConfig": 1});
// Clean up orphaned documents
db.adminCommand({"cleanupOrphaned": "myapp.users"});
Sharding Troubleshooting
Common Issues
// Check for failed migrations
db.actionlog.find({"what": /migrate/, "details.errmsg": {$exists: true}});
// Find uneven chunk distribution
sh.status().shards.forEach(function(shard) {
print(shard._id + ": " + shard.chunks + " chunks");
});
// Check for hot shards
db.serverStatus().sharding;
// Identify problematic queries
db.setProfilingLevel(1, {slowms: 100});
db.system.profile.find({"command.shardVersion": {$exists: false}}).limit(5);
Performance Optimization
// Pre-split collection for better initial distribution
for (var i = 0; i < 1000; i++) {
sh.splitAt("myapp.users", {userId: "user" + i});
}
// Optimize chunk size
db.settings.save({_id: "chunksize", value: 32}); // 32MB chunks
// Configure balancer more aggressively
db.settings.update(
{_id: "balancer"},
{$set: {
"_secondaryThrottle": false,
"writeConcern": {w: 1}
}},
{upsert: true}
);
Application Considerations
Query Patterns
// Target single shard (includes shard key)
db.users.find({userId: "user123"}); // Good - hits one shard
// Scatter-gather (no shard key)
db.users.find({email: "user@example.com"}); // Bad - queries all shards
// Range query on shard key
db.orders.find({
customerId: {$gte: "cust100", $lte: "cust200"}
}); // Good - targets specific shards
Write Patterns
// Bulk operations should include shard key
db.users.bulkWrite([
{
updateOne: {
filter: {userId: "user123"}, // Include shard key
update: {$set: {status: "active"}}
}
}
]);
// Aggregation with $match on shard key
db.orders.aggregate([
{$match: {customerId: "cust123"}}, // Target single shard
{$group: {_id: "$status", count: {$sum: 1}}}
]);
Configuration Templates
Production Sharded Cluster
# Config server (mongod.conf)
sharding:
clusterRole: configsvr
replication:
replSetName: configReplSet
net:
port: 27019
bindIp: 0.0.0.0
storage:
dbPath: /data/configdb
# Shard server (mongod.conf)
sharding:
clusterRole: shardsvr
replication:
replSetName: shard1ReplSet
net:
port: 27018
bindIp: 0.0.0.0
storage:
dbPath: /data/shard1
# mongos router (mongos.conf)
sharding:
configDB: configReplSet/config1:27019,config2:27019,config3:27019
net:
port: 27017
bindIp: 0.0.0.0
Connection Strings
// Application connection to mongos
const uri = "mongodb://mongos1:27017,mongos2:27017/myapp?readPreference=secondaryPreferred";
// Python with sharded cluster
client = MongoClient(
"mongodb://mongos1:27017,mongos2:27017/",
readPreference=ReadPreference.SECONDARY_PREFERRED
)
// Node.js with multiple mongos
const client = new MongoClient(
"mongodb://mongos1:27017,mongos2:27017/myapp",
{
readPreference: 'secondaryPreferred',
maxPoolSize: 100
}
);