Skip to main content

Data Loading & Processing

Comprehensive guide to loading, parsing, and processing data in D3.js applications.

Data Loading Methods

CSV Data

// Basic CSV loading
d3.csv('data.csv').then(function (data) {
console.log(data);
createVisualization(data);
});

// With row conversion function
d3.csv('data.csv', function (d) {
return {
date: new Date(d.date),
value: +d.value, // Convert string to number
category: d.category,
active: d.active === 'true', // Convert string to boolean
};
}).then(function (data) {
createVisualization(data);
});

// With error handling
d3.csv('data.csv')
.then(function (data) {
createVisualization(data);
})
.catch(function (error) {
console.error('Error loading CSV:', error);
});

JSON Data

// Load JSON
d3.json('data.json').then(function (data) {
createVisualization(data);
});

// Load and process JSON
d3.json('data.json').then(function (data) {
// Process nested data
const processedData = data.items.map(d => ({
id: d.id,
value: d.metrics.value,
timestamp: new Date(d.timestamp),
}));

createVisualization(processedData);
});

TSV (Tab-Separated Values)

d3.tsv('data.tsv', function (d) {
return {
name: d.name,
value: +d.value,
};
}).then(function (data) {
createVisualization(data);
});

Text Data

d3.text('data.txt').then(function (text) {
const lines = text.split('\n');
const data = lines.map(line => {
const [name, value] = line.split(',');
return { name, value: +value };
});
createVisualization(data);
});

XML Data

d3.xml('data.xml').then(function (xml) {
const data = Array.from(xml.querySelectorAll('item')).map(item => ({
name: item.getAttribute('name'),
value: +item.getAttribute('value'),
}));
createVisualization(data);
});

Multiple Data Sources

Loading Multiple Files

Promise.all([
d3.csv('sales.csv'),
d3.json('metadata.json'),
d3.tsv('categories.tsv'),
]).then(function ([salesData, metadata, categories]) {
// Combine data sources
const combinedData = salesData.map(sale => ({
...sale,
categoryName: categories.find(cat => cat.id === sale.categoryId)?.name,
settings: metadata.settings,
}));

createVisualization(combinedData);
});

Sequential Loading

async function loadData() {
try {
const config = await d3.json('config.json');
const data = await d3.csv(config.dataUrl);
const processed = processData(data, config.settings);
createVisualization(processed);
} catch (error) {
console.error('Error loading data:', error);
}
}

Data Type Conversion

String to Number

// Using unary plus operator
const value = +d.value;

// Using Number()
const value = Number(d.value);

// Using parseInt/parseFloat
const intValue = parseInt(d.value, 10);
const floatValue = parseFloat(d.value);

// Safe conversion with fallback
const value = isNaN(+d.value) ? 0 : +d.value;

Date Parsing

// Parse ISO date strings
const date = new Date(d.date);

// Parse custom date formats
const parseDate = d3.timeParse('%Y-%m-%d');
const date = parseDate(d.date);

// Multiple date formats
const parseDate = d3.timeParse('%d-%b-%y');
const parseDate2 = d3.timeParse('%Y-%m-%d');

function parseFlexibleDate(dateString) {
return parseDate(dateString) || parseDate2(dateString);
}

// Common date formats
const formats = [
d3.timeParse('%Y-%m-%d'), // 2024-01-15
d3.timeParse('%m/%d/%Y'), // 01/15/2024
d3.timeParse('%d-%b-%Y'), // 15-Jan-2024
d3.timeParse('%Y-%m-%dT%H:%M:%S'), // 2024-01-15T10:30:00
];

function parseAnyDate(dateString) {
for (const format of formats) {
const parsed = format(dateString);
if (parsed) return parsed;
}
return null;
}

Boolean Conversion

// String to boolean
const active = d.active === 'true' || d.active === '1';
const enabled = ['true', 'yes', '1', 'on'].includes(d.enabled.toLowerCase());

// Number to boolean
const hasValue = d.value > 0;

Data Cleaning

Handling Missing Values

function cleanData(data) {
return data
.filter(d => d.value !== null && d.value !== undefined && d.value !== '')
.map(d => ({
...d,
value: isNaN(+d.value) ? 0 : +d.value,
category: d.category || 'Unknown',
}));
}

Removing Outliers

function removeOutliers(data, key) {
const values = data.map(d => d[key]).sort(d3.ascending);
const q1 = d3.quantile(values, 0.25);
const q3 = d3.quantile(values, 0.75);
const iqr = q3 - q1;
const lowerBound = q1 - 1.5 * iqr;
const upperBound = q3 + 1.5 * iqr;

return data.filter(d => {
const value = d[key];
return value >= lowerBound && value <= upperBound;
});
}

Data Validation

function validateData(data) {
const required = ['id', 'value', 'date'];

return data.filter(d => {
// Check required fields
const hasRequired = required.every(
field => d[field] !== null && d[field] !== undefined && d[field] !== ''
);

// Check data types
const validTypes =
typeof d.id === 'string' &&
!isNaN(+d.value) &&
d.date instanceof Date &&
!isNaN(d.date);

return hasRequired && validTypes;
});
}

Data Transformation

Aggregation

// Group by category and sum values
const aggregated = d3.rollup(
data,
v => d3.sum(v, d => d.value), // Aggregation function
d => d.category // Grouping key
);

// Convert Map to Array
const aggregatedArray = Array.from(aggregated, ([category, total]) => ({
category,
total,
}));

// Multiple aggregations
const multiAgg = d3.rollup(
data,
v => ({
total: d3.sum(v, d => d.value),
average: d3.mean(v, d => d.value),
count: v.length,
max: d3.max(v, d => d.value),
}),
d => d.category
);

Nested Grouping

// Group by multiple levels
const nested = d3.rollup(
data,
v => v.length, // Count items
d => d.region, // First level: region
d => d.category // Second level: category
);

// Access nested data
nested.get('North').get('Electronics'); // Count of Electronics in North region

Hierarchical Data

// Convert flat data to hierarchy
const hierarchyData = {
name: 'root',
children: d3.group(data, d => d.category),
};

const root = d3
.hierarchy(hierarchyData)
.sum(d => d.value) // Set node values
.sort((a, b) => b.value - a.value); // Sort by value

// Tree layout example
const tree = d3.tree().size([height, width]);
tree(root);

Time Series Processing

// Parse and sort time series data
function processTimeSeries(data) {
return data
.map(d => ({
date: d3.timeParse('%Y-%m-%d')(d.date),
value: +d.value,
}))
.filter(d => d.date && !isNaN(d.value))
.sort((a, b) => a.date - b.date);
}

// Fill missing dates
function fillMissingDates(data, startDate, endDate, defaultValue = 0) {
const dateRange = d3.timeDay.range(startDate, endDate);
const dataMap = new Map(data.map(d => [d.date.toISOString(), d.value]));

return dateRange.map(date => ({
date,
value: dataMap.get(date.toISOString()) || defaultValue,
}));
}

Data Reshaping

Pivot Data

function pivotData(data, rowKey, colKey, valueKey) {
const pivoted = [];
const grouped = d3.group(data, d => d[rowKey]);

grouped.forEach((values, row) => {
const rowData = { [rowKey]: row };
values.forEach(d => {
rowData[d[colKey]] = d[valueKey];
});
pivoted.push(rowData);
});

return pivoted;
}

// Example usage
const sales = [
{ month: 'Jan', product: 'A', sales: 100 },
{ month: 'Jan', product: 'B', sales: 150 },
{ month: 'Feb', product: 'A', sales: 120 },
{ month: 'Feb', product: 'B', sales: 180 },
];

const pivoted = pivotData(sales, 'month', 'product', 'sales');
// Result: [
// { month: "Jan", A: 100, B: 150 },
// { month: "Feb", A: 120, B: 180 }
// ]

Melt Data (Unpivot)

function meltData(
data,
idVars,
valueVars,
varName = 'variable',
valueName = 'value'
) {
const melted = [];

data.forEach(row => {
valueVars.forEach(variable => {
const newRow = {};
idVars.forEach(id => {
newRow[id] = row[id];
});
newRow[varName] = variable;
newRow[valueName] = row[variable];
melted.push(newRow);
});
});

return melted;
}

// Example usage
const wide = [
{ month: 'Jan', A: 100, B: 150 },
{ month: 'Feb', A: 120, B: 180 },
];

const melted = meltData(wide, ['month'], ['A', 'B'], 'product', 'sales');
// Result: [
// { month: "Jan", product: "A", sales: 100 },
// { month: "Jan", product: "B", sales: 150 },
// { month: "Feb", product: "A", sales: 120 },
// { month: "Feb", product: "B", sales: 180 }
// ]

Real-time Data

WebSocket Integration

class RealTimeChart {
constructor(container, maxPoints = 100) {
this.data = [];
this.maxPoints = maxPoints;
this.setupChart(container);
this.setupWebSocket();
}

setupWebSocket() {
this.ws = new WebSocket('ws://localhost:8080');

this.ws.onmessage = event => {
const newData = JSON.parse(event.data);
this.addDataPoint({
timestamp: new Date(newData.timestamp),
value: newData.value,
});
};
}

addDataPoint(point) {
this.data.push(point);

// Keep only recent points
if (this.data.length > this.maxPoints) {
this.data.shift();
}

this.updateChart();
}

updateChart() {
// Update scales and redraw
this.xScale.domain(d3.extent(this.data, d => d.timestamp));
this.yScale.domain(d3.extent(this.data, d => d.value));

// Update line with transition
this.line
.datum(this.data)
.transition()
.duration(200)
.attr('d', this.lineGenerator);
}
}

Polling Data

class PollingChart {
constructor(dataUrl, interval = 5000) {
this.dataUrl = dataUrl;
this.interval = interval;
this.isPolling = false;
}

startPolling() {
this.isPolling = true;
this.poll();
}

stopPolling() {
this.isPolling = false;
if (this.timeoutId) {
clearTimeout(this.timeoutId);
}
}

async poll() {
if (!this.isPolling) return;

try {
const data = await d3.json(this.dataUrl);
this.updateVisualization(data);
} catch (error) {
console.error('Polling error:', error);
}

this.timeoutId = setTimeout(() => this.poll(), this.interval);
}
}

Performance Optimization

Lazy Loading

function loadDataLazily(files, batchSize = 3) {
const batches = [];
for (let i = 0; i < files.length; i += batchSize) {
batches.push(files.slice(i, i + batchSize));
}

return batches.reduce((promise, batch) => {
return promise.then(results => {
return Promise.all(batch.map(d3.csv)).then(batchResults => {
return results.concat(batchResults);
});
});
}, Promise.resolve([]));
}

Data Streaming

function streamLargeDataset(url, chunkSize = 1000) {
return new Promise((resolve, reject) => {
const allData = [];

fetch(url)
.then(response => response.text())
.then(text => {
const lines = text.split('\n');
const headers = lines[0].split(',');

for (let i = 1; i < lines.length; i += chunkSize) {
const chunk = lines.slice(i, i + chunkSize).map(line => {
const values = line.split(',');
const obj = {};
headers.forEach((header, index) => {
obj[header] = values[index];
});
return obj;
});

allData.push(...chunk);

// Process chunk immediately if needed
if (typeof onChunkLoaded === 'function') {
onChunkLoaded(chunk);
}
}

resolve(allData);
})
.catch(reject);
});
}