Skip to main content

MongoDB Sharding

Sharded Cluster Setup

Start Config Servers

# Start config server replica set (3 servers recommended)
mongod --configsvr --replSet configReplSet --dbpath /data/configdb --port 27019

# Initialize config server replica set
mongosh --port 27019
rs.initiate({
_id: "configReplSet",
configsvr: true,
members: [
{ _id: 0, host: "config1.example.com:27019" },
{ _id: 1, host: "config2.example.com:27019" },
{ _id: 2, host: "config3.example.com:27019" }
]
});

Start Shard Servers

# Start shard replica sets
# Shard 1
mongod --shardsvr --replSet shard1ReplSet --dbpath /data/shard1 --port 27018

# Shard 2
mongod --shardsvr --replSet shard2ReplSet --dbpath /data/shard2 --port 27018

# Initialize shard replica sets
mongosh --port 27018
rs.initiate({
_id: "shard1ReplSet",
members: [
{ _id: 0, host: "shard1-a.example.com:27018" },
{ _id: 1, host: "shard1-b.example.com:27018" },
{ _id: 2, host: "shard1-c.example.com:27018" }
]
});

Start mongos Router

# Start mongos router
mongos --configdb configReplSet/config1.example.com:27019,config2.example.com:27019,config3.example.com:27019 --port 27017

Add Shards to Cluster

// Connect to mongos
mongosh --port 27017

// Add shards
sh.addShard("shard1ReplSet/shard1-a.example.com:27018");
sh.addShard("shard2ReplSet/shard2-a.example.com:27018");
sh.addShard("shard3ReplSet/shard3-a.example.com:27018");

// Check shard status
sh.status();

Enable Sharding

Enable Sharding on Database

// Enable sharding for database
sh.enableSharding("myapp");

// Create shard key index first
db.users.createIndex({ userId: 1 });

// Shard collection
sh.shardCollection("myapp.users", { userId: 1 });

// Compound shard key
db.orders.createIndex({ customerId: 1, orderDate: 1 });
sh.shardCollection("myapp.orders", { customerId: 1, orderDate: 1 });

Shard Key Strategies

// Hashed shard key (even distribution)
db.users.createIndex({ _id: "hashed" });
sh.shardCollection("myapp.users", { _id: "hashed" });

// Range-based shard key
db.events.createIndex({ timestamp: 1, userId: 1 });
sh.shardCollection("myapp.events", { timestamp: 1, userId: 1 });

// Tag-aware sharding
db.products.createIndex({ category: 1, _id: 1 });
sh.shardCollection("myapp.products", { category: 1, _id: 1 });

Shard Management

Check Shard Distribution

// Overall cluster status
sh.status();

// Collection shard distribution
db.users.getShardDistribution();

// Chunk information
db.chunks.find({"ns": "myapp.users"}).pretty();

// Shard sizes
db.stats();

Manual Chunk Operations

// Split chunk at specific value
sh.splitAt("myapp.users", { userId: "user123" });

// Split chunk in half
sh.splitFind("myapp.users", { userId: "user123" });

// Move chunk to specific shard
sh.moveChunk("myapp.users", { userId: "user123" }, "shard2ReplSet");

Balancer Management

// Check balancer status
sh.getBalancerState();
sh.isBalancerRunning();

// Start/stop balancer
sh.startBalancer();
sh.stopBalancer();

// Configure balancer window
db.settings.update(
{ _id: "balancer" },
{ $set: { activeWindow: { start: "23:00", stop: "06:00" } } },
{ upsert: true }
);

// Disable balancer for maintenance
sh.disableBalancing("myapp.orders");
sh.enableBalancing("myapp.orders");

Zone Sharding (Tag-based)

Configure Zones

// Add shard tags
sh.addShardTag("shard1ReplSet", "US-EAST");
sh.addShardTag("shard2ReplSet", "US-WEST");
sh.addShardTag("shard3ReplSet", "EUROPE");

// Define tag ranges
sh.addTagRange(
"myapp.users",
{ region: "us-east", userId: MinKey },
{ region: "us-east", userId: MaxKey },
"US-EAST"
);

sh.addTagRange(
"myapp.users",
{ region: "us-west", userId: MinKey },
{ region: "us-west", userId: MaxKey },
"US-WEST"
);

sh.addTagRange(
"myapp.users",
{ region: "europe", userId: MinKey },
{ region: "us-west", userId: MaxKey },
"EUROPE"
);

Remove Zones

// Remove tag range
sh.removeTagRange(
"myapp.users",
{ region: "us-east", userId: MinKey },
{ region: "us-east", userId: MaxKey }
);

// Remove shard tag
sh.removeShardTag("shard1ReplSet", "US-EAST");

Sharding Best Practices

Good Shard Key Characteristics

// High Cardinality - many unique values
{ userId: 1 } // Good if many users
{ status: 1 } // Bad - only few status values

// Even Distribution
{ _id: "hashed" } // Good - hash provides even distribution
{ timestamp: 1 } // Bad - hotspotting on recent data

// Avoid Monotonic Keys
{ timestamp: 1 } // Bad - always increasing
{ customerId: 1, timestamp: 1 } // Better - compound key

// Query Isolation
{ tenantId: 1 } // Good - queries usually tenant-specific
{ category: 1 } // Good - if queries filter by category

Shard Key Patterns

// User-based sharding
sh.shardCollection("myapp.users", { userId: "hashed" });
sh.shardCollection("myapp.user_posts", { userId: 1, postId: 1 });

// Time-based with compound key
sh.shardCollection("myapp.events", { eventType: 1, timestamp: 1 });

// Geographic sharding
sh.shardCollection("myapp.stores", { region: 1, storeId: 1 });

// Multi-tenant sharding
sh.shardCollection("myapp.documents", { tenantId: 1, documentId: 1 });

Monitoring and Maintenance

Performance Monitoring

// Check chunk distribution
db.printShardingStatus();

// Find jumbo chunks (> 64MB)
db.chunks.find({"jumbo": true});

// Check balancer history
db.actionlog.find({"what": "balancer.round"}).sort({"time": -1}).limit(5);

// Migration statistics
db.changelog.find().sort({"time": -1}).limit(10);

Maintenance Operations

// Compact chunks after deletions
db.runCommand({"compact": "users"});

// Rebuild chunk metadata
db.adminCommand({"flushRouterConfig": 1});

// Clean up orphaned documents
db.adminCommand({"cleanupOrphaned": "myapp.users"});

Sharding Troubleshooting

Common Issues

// Check for failed migrations
db.actionlog.find({"what": /migrate/, "details.errmsg": {$exists: true}});

// Find uneven chunk distribution
sh.status().shards.forEach(function(shard) {
print(shard._id + ": " + shard.chunks + " chunks");
});

// Check for hot shards
db.serverStatus().sharding;

// Identify problematic queries
db.setProfilingLevel(1, {slowms: 100});
db.system.profile.find({"command.shardVersion": {$exists: false}}).limit(5);

Performance Optimization

// Pre-split collection for better initial distribution
for (var i = 0; i < 1000; i++) {
sh.splitAt("myapp.users", {userId: "user" + i});
}

// Optimize chunk size
db.settings.save({_id: "chunksize", value: 32}); // 32MB chunks

// Configure balancer more aggressively
db.settings.update(
{_id: "balancer"},
{$set: {
"_secondaryThrottle": false,
"writeConcern": {w: 1}
}},
{upsert: true}
);

Application Considerations

Query Patterns

// Target single shard (includes shard key)
db.users.find({userId: "user123"}); // Good - hits one shard

// Scatter-gather (no shard key)
db.users.find({email: "user@example.com"}); // Bad - queries all shards

// Range query on shard key
db.orders.find({
customerId: {$gte: "cust100", $lte: "cust200"}
}); // Good - targets specific shards

Write Patterns

// Bulk operations should include shard key
db.users.bulkWrite([
{
updateOne: {
filter: {userId: "user123"}, // Include shard key
update: {$set: {status: "active"}}
}
}
]);

// Aggregation with $match on shard key
db.orders.aggregate([
{$match: {customerId: "cust123"}}, // Target single shard
{$group: {_id: "$status", count: {$sum: 1}}}
]);

Configuration Templates

Production Sharded Cluster

# Config server (mongod.conf)
sharding:
clusterRole: configsvr
replication:
replSetName: configReplSet
net:
port: 27019
bindIp: 0.0.0.0
storage:
dbPath: /data/configdb

# Shard server (mongod.conf)
sharding:
clusterRole: shardsvr
replication:
replSetName: shard1ReplSet
net:
port: 27018
bindIp: 0.0.0.0
storage:
dbPath: /data/shard1

# mongos router (mongos.conf)
sharding:
configDB: configReplSet/config1:27019,config2:27019,config3:27019
net:
port: 27017
bindIp: 0.0.0.0

Connection Strings

// Application connection to mongos
const uri = "mongodb://mongos1:27017,mongos2:27017/myapp?readPreference=secondaryPreferred";

// Python with sharded cluster
client = MongoClient(
"mongodb://mongos1:27017,mongos2:27017/",
readPreference=ReadPreference.SECONDARY_PREFERRED
)

// Node.js with multiple mongos
const client = new MongoClient(
"mongodb://mongos1:27017,mongos2:27017/myapp",
{
readPreference: 'secondaryPreferred',
maxPoolSize: 100
}
);