Skip to main content

MongoDB Schema Design

Design Principles

Document Structure Patterns

// Embedded documents (1:1, 1:few)
{
_id: ObjectId("..."),
name: "John Doe",
address: {
street: "123 Main St",
city: "New York",
zipCode: "10001"
},
preferences: {
theme: "dark",
language: "en",
notifications: true
}
}

// Array of embedded documents (1:many)
{
_id: ObjectId("..."),
title: "Blog Post",
comments: [
{
author: "Alice",
text: "Great post!",
createdAt: ISODate("...")
},
{
author: "Bob",
text: "Thanks for sharing",
createdAt: ISODate("...")
}
]
}

// References (many:many, large arrays)
{
_id: ObjectId("..."),
name: "Product",
categoryIds: [
ObjectId("cat1"),
ObjectId("cat2")
]
}

Common Schema Patterns

One-to-One (Embed)

// User profile
{
_id: ObjectId("..."),
username: "john_doe",
email: "john@example.com",
profile: {
firstName: "John",
lastName: "Doe",
bio: "Software developer",
avatar: "https://...",
socialLinks: {
twitter: "@johndoe",
linkedin: "johndoe"
}
}
}

One-to-Few (Embed Array)

// Blog post with comments (< 100 comments)
{
_id: ObjectId("..."),
title: "Getting Started with MongoDB",
content: "...",
author: "john_doe",
tags: ["mongodb", "database", "nosql"],
comments: [
{
_id: ObjectId("..."),
author: "alice",
text: "Great tutorial!",
createdAt: ISODate("..."),
likes: 5
}
],
metadata: {
views: 1250,
likes: 45,
shares: 12
}
}

One-to-Many (Reference)

// User document
{
_id: ObjectId("user1"),
name: "John Doe",
email: "john@example.com"
}

// Order documents (separate collection)
{
_id: ObjectId("..."),
userId: ObjectId("user1"),
orderNumber: "ORD-001",
items: [...],
total: 99.99,
createdAt: ISODate("...")
}

Many-to-Many (Two-Way References)

// Category document
{
_id: ObjectId("cat1"),
name: "Electronics",
productIds: [
ObjectId("prod1"),
ObjectId("prod2")
]
}

// Product document
{
_id: ObjectId("prod1"),
name: "Laptop",
price: 999.99,
categoryIds: [
ObjectId("cat1"),
ObjectId("cat2")
]
}

Advanced Patterns

Polymorphic Pattern

// Different event types in same collection
{
_id: ObjectId("..."),
type: "user_login",
timestamp: ISODate("..."),
userId: ObjectId("..."),
ipAddress: "192.168.1.1",
userAgent: "Mozilla/5.0..."
}

{
_id: ObjectId("..."),
type: "purchase",
timestamp: ISODate("..."),
userId: ObjectId("..."),
orderId: ObjectId("..."),
amount: 49.99,
items: [...]
}

{
_id: ObjectId("..."),
type: "system_alert",
timestamp: ISODate("..."),
severity: "warning",
service: "payment-processor",
message: "High latency detected"
}

Bucket Pattern (Time Series)

// Instead of one document per measurement
// Group measurements into buckets
{
_id: ObjectId("..."),
deviceId: "sensor_001",
bucketTime: ISODate("2024-01-01T12:00:00Z"),
measurements: {
"00": { temp: 23.5, humidity: 45 },
"01": { temp: 23.7, humidity: 44 },
"02": { temp: 23.9, humidity: 43 },
// ... minute-by-minute data
},
summary: {
count: 60,
avgTemp: 24.1,
minTemp: 22.8,
maxTemp: 25.2
}
}

Attribute Pattern

// Flexible product specifications
{
_id: ObjectId("..."),
name: "Gaming Laptop",
category: "electronics",
attributes: [
{ k: "processor", v: "Intel i7-12700H" },
{ k: "memory", v: "32GB" },
{ k: "storage", v: "1TB SSD" },
{ k: "weight", v: "2.3kg" }
]
}

// Create index for flexible queries
db.products.createIndex({ "attributes.k": 1, "attributes.v": 1 });

// Query any attribute
db.products.find({
attributes: {
$elemMatch: { k: "processor", v: /Intel/ }
}
});

Outlier Pattern

// Handle cases with too many related items
{
_id: ObjectId("..."),
name: "Popular Product",
category: "electronics",
reviewCount: 50000,
averageRating: 4.5,
// Only store recent/featured reviews
reviews: [
{
author: "alice",
rating: 5,
text: "Excellent product!",
featured: true
}
],
// Flag for additional processing
hasOverflowReviews: true
}

// Separate collection for overflow data
{
_id: ObjectId("..."),
productId: ObjectId("..."),
reviews: [
// Batch of older reviews
]
}

Schema Validation

JSON Schema Validation

// Create collection with schema validation
db.createCollection("users", {
validator: {
$jsonSchema: {
bsonType: "object",
required: ["name", "email", "createdAt"],
properties: {
name: {
bsonType: "string",
minLength: 2,
maxLength: 100,
description: "must be a string between 2-100 characters"
},
email: {
bsonType: "string",
pattern: "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$",
description: "must be a valid email address"
},
age: {
bsonType: "int",
minimum: 0,
maximum: 150,
description: "must be an integer between 0-150"
},
createdAt: {
bsonType: "date",
description: "must be a date"
}
}
}
}
});

// Add validation to existing collection
db.runCommand({
collMod: "products",
validator: {
$jsonSchema: {
bsonType: "object",
required: ["name", "price", "category"],
properties: {
price: {
bsonType: "number",
minimum: 0,
description: "price must be a positive number"
}
}
}
},
validationLevel: "strict", // "strict" or "moderate"
validationAction: "error" // "error" or "warn"
});

Data Modeling Decisions

Embed vs Reference Decision Tree

// EMBED when:
// - Data is accessed together
// - Data doesn't grow unbounded
// - Related data is updated together
// - 1:1 or 1:few relationships

// User with preferences
{
name: "John",
preferences: {
theme: "dark",
language: "en"
}
}

// REFERENCE when:
// - Data grows large or unbounded
// - Data is accessed independently
// - Many-to-many relationships
// - Data is updated by different processes

// Order referencing customer
{
orderId: "12345",
customerId: ObjectId("..."),
items: [...]
}

Duplication for Performance

// Strategic duplication for read performance
{
_id: ObjectId("..."),
title: "Blog Post",
authorId: ObjectId("..."),
// Duplicate author name for display
authorName: "John Doe",
content: "...",
tags: ["mongodb", "database"],
stats: {
views: 1250,
likes: 45,
comments: 12
}
}

Version Pattern

// Handle schema evolution
{
_id: ObjectId("..."),
schemaVersion: "2.1",
name: "Product",
// v2.1 fields
specifications: {
processor: "Intel i7",
memory: "16GB"
},
// Legacy v1.0 field (deprecated)
specs: "Intel i7, 16GB RAM"
}

Collection Design Best Practices

Naming Conventions

// Collections: plural, lowercase
db.users
db.products
db.orderItems

// Fields: camelCase
{
firstName: "John",
lastName: "Doe",
createdAt: ISODate("..."),
isActive: true
}

// Consistent ID references
{
userId: ObjectId("..."), // Reference to users collection
productId: ObjectId("..."), // Reference to products collection
categoryIds: [ObjectId("...")], // Array of references
}

Common Field Patterns

// Standard audit fields
{
_id: ObjectId("..."),
createdAt: ISODate("..."),
updatedAt: ISODate("..."),
createdBy: ObjectId("..."),
updatedBy: ObjectId("..."),
version: 1,
isDeleted: false,
deletedAt: null
}

// Status and state management
{
status: "active", // "active", "inactive", "pending", "archived"
workflow: {
currentStage: "approval",
stages: ["draft", "review", "approval", "published"],
history: [
{
stage: "draft",
timestamp: ISODate("..."),
userId: ObjectId("...")
}
]
}
}