string_decoder
The string_decoder module provides an API for decoding Buffer objects into strings while preserving encoded multi-byte UTF-8 and UTF-16 characters. It's particularly useful when working with streams.
Import
const { StringDecoder } = require('string_decoder');
API Reference
Class: StringDecoder
new StringDecoder([encoding])
Creates a new StringDecoder instance.
Parameters:
encoding- The character encoding to use (default:'utf8')
Supported encodings:
'utf8'/'utf-8''utf16le'/'utf-16le''latin1''base64''ascii''hex'
decoder.write(buffer)
Returns a decoded string, ensuring that any incomplete multibyte characters at the end of the Buffer are omitted and stored in an internal buffer for the next call.
Parameters:
buffer- The Buffer to decode
Returns: Decoded string
decoder.end([buffer])
Returns any remaining input stored in the internal buffer as a string. If buffer is provided, it performs one final call to write() before returning.
Parameters:
buffer(optional) - Final Buffer to decode
Returns: Decoded string
Examples
Basic String Decoding
const { StringDecoder } = require('string_decoder');
export async function handler(event) {
const decoder = new StringDecoder('utf8');
// Decode buffer
const buffer = Buffer.from('Hello, World!', 'utf8');
const str = decoder.write(buffer);
// End decoding
const remaining = decoder.end();
return {
decoded: str,
remaining: remaining || '(none)'
};
}
Handling Multi-Byte Characters
const { StringDecoder } = require('string_decoder');
export async function handler(event) {
const decoder = new StringDecoder('utf8');
// UTF-8 string with multi-byte characters
const text = '你好世界'; // "Hello World" in Chinese
const buffer = Buffer.from(text, 'utf8');
// Split buffer in the middle of a multi-byte character
const part1 = buffer.slice(0, 3);
const part2 = buffer.slice(3);
// Decode parts - StringDecoder handles incomplete characters
const str1 = decoder.write(part1);
const str2 = decoder.write(part2);
const remaining = decoder.end();
return {
original: text,
part1Length: part1.length,
part2Length: part2.length,
decoded: str1 + str2 + remaining,
isCorrect: (str1 + str2 + remaining) === text
};
}
Stream Processing with StringDecoder
const { StringDecoder } = require('string_decoder');
const { Readable } = require('stream');
export async function handler(event) {
const decoder = new StringDecoder('utf8');
// Create a readable stream with multi-byte characters
const text = 'Hello 世界 🌍';
const buffer = Buffer.from(text, 'utf8');
// Split into chunks
const chunks = [];
const chunkSize = 3;
for (let i = 0; i < buffer.length; i += chunkSize) {
chunks.push(buffer.slice(i, i + chunkSize));
}
// Decode chunks
const decoded = chunks.map(chunk => decoder.write(chunk)).join('');
const final = decoded + decoder.end();
return {
original: text,
chunks: chunks.length,
decoded: final,
isCorrect: final === text
};
}
Comparing with Buffer.toString()
const { StringDecoder } = require('string_decoder');
export async function handler(event) {
const text = 'Hello 世界!';
const buffer = Buffer.from(text, 'utf8');
// Split in middle of multi-byte character
const chunk1 = buffer.slice(0, 8);
const chunk2 = buffer.slice(8);
// Using Buffer.toString() - may produce incorrect output
const bufferMethod = chunk1.toString('utf8') + chunk2.toString('utf8');
// Using StringDecoder - handles incomplete characters
const decoder = new StringDecoder('utf8');
const decoderMethod = decoder.write(chunk1) + decoder.write(chunk2) + decoder.end();
return {
original: text,
usingBufferToString: bufferMethod,
usingStringDecoder: decoderMethod,
bufferMethodCorrect: bufferMethod === text,
decoderMethodCorrect: decoderMethod === text
};
}
Different Encodings
const { StringDecoder } = require('string_decoder');
export async function handler(event) {
const text = 'Hello, World!';
// UTF-8
const utf8Decoder = new StringDecoder('utf8');
const utf8Buffer = Buffer.from(text, 'utf8');
const utf8Result = utf8Decoder.write(utf8Buffer) + utf8Decoder.end();
// Base64
const base64Decoder = new StringDecoder('base64');
const base64Buffer = Buffer.from(text, 'utf8');
const base64Encoded = base64Buffer.toString('base64');
const base64Result = base64Decoder.write(Buffer.from(base64Encoded, 'utf8')) + base64Decoder.end();
// Hex
const hexDecoder = new StringDecoder('hex');
const hexEncoded = utf8Buffer.toString('hex');
const hexResult = hexDecoder.write(Buffer.from(hexEncoded, 'utf8')) + hexDecoder.end();
return {
original: text,
utf8: utf8Result,
base64: base64Result,
hex: hexResult
};
}
Processing Streaming Data
const { StringDecoder } = require('string_decoder');
export async function handler(event) {
const decoder = new StringDecoder('utf8');
const results = [];
// Simulate receiving data in chunks
const data = 'Streaming data with émojis 🎉 and unicode ★';
const buffer = Buffer.from(data, 'utf8');
let offset = 0;
const chunkSize = 5;
while (offset < buffer.length) {
const chunk = buffer.slice(offset, offset + chunkSize);
const decoded = decoder.write(chunk);
if (decoded) {
results.push({
offset,
chunkSize: chunk.length,
decoded
});
}
offset += chunkSize;
}
// Get any remaining bytes
const final = decoder.end();
if (final) {
results.push({
offset: 'final',
decoded: final
});
}
const fullDecoded = results.map(r => r.decoded).join('');
return {
original: data,
chunks: results.length,
decoded: fullDecoded,
isCorrect: fullDecoded === data,
details: results
};
}
Handling Incomplete Sequences
const { StringDecoder } = require('string_decoder');
export async function handler(event) {
const decoder = new StringDecoder('utf8');
// Create buffer with emoji (4-byte UTF-8 sequence)
const emoji = '🎉';
const buffer = Buffer.from(emoji, 'utf8');
console.log('Emoji buffer length:', buffer.length); // 4 bytes
// Send incomplete sequences
const results = [];
// First byte only
results.push({
input: 'First byte',
output: decoder.write(buffer.slice(0, 1))
});
// Second byte
results.push({
input: 'Second byte',
output: decoder.write(buffer.slice(1, 2))
});
// Third byte
results.push({
input: 'Third byte',
output: decoder.write(buffer.slice(2, 3))
});
// Fourth byte - now complete
results.push({
input: 'Fourth byte',
output: decoder.write(buffer.slice(3, 4))
});
const final = decoder.end();
return {
emoji,
results,
final,
reconstructed: results.map(r => r.output).join('') + final
};
}
Line-by-Line Processing
const { StringDecoder } = require('string_decoder');
export async function handler(event) {
const decoder = new StringDecoder('utf8');
let remainder = '';
const lines = [];
// Simulate receiving data in chunks
const data = 'Line 1\nLine 2 with émoji 🎨\nLine 3\nIncomplete line';
const buffer = Buffer.from(data, 'utf8');
// Process in small chunks
let offset = 0;
const chunkSize = 10;
while (offset < buffer.length) {
const chunk = buffer.slice(offset, offset + chunkSize);
const decoded = remainder + decoder.write(chunk);
// Split by newlines
const parts = decoded.split('\n');
// All but last part are complete lines
for (let i = 0; i < parts.length - 1; i++) {
lines.push(parts[i]);
}
// Last part might be incomplete
remainder = parts[parts.length - 1];
offset += chunkSize;
}
// Add final data
remainder += decoder.end();
if (remainder) {
lines.push(remainder);
}
return {
totalLines: lines.length,
lines
};
}
UTF-16 Decoding
const { StringDecoder } = require('string_decoder');
export async function handler(event) {
const text = 'Hello, UTF-16! 你好';
// Encode as UTF-16LE
const buffer = Buffer.from(text, 'utf16le');
// Decode with StringDecoder
const decoder = new StringDecoder('utf16le');
// Process in chunks
const chunk1 = buffer.slice(0, 10);
const chunk2 = buffer.slice(10);
const decoded = decoder.write(chunk1) + decoder.write(chunk2) + decoder.end();
return {
original: text,
bufferLength: buffer.length,
decoded,
isCorrect: decoded === text
};
}
Real-World: HTTP Response Processing
const { StringDecoder } = require('string_decoder');
export async function handler(event) {
// Simulate HTTP response chunks
const response = 'HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\nHello, 世界! 🌍';
const buffer = Buffer.from(response, 'utf8');
const decoder = new StringDecoder('utf8');
const chunks = [];
// Simulate network packets (variable size)
const sizes = [15, 20, 10, 25, buffer.length];
let offset = 0;
for (const size of sizes) {
if (offset >= buffer.length) break;
const chunk = buffer.slice(offset, Math.min(offset + size, buffer.length));
const decoded = decoder.write(chunk);
if (decoded) {
chunks.push(decoded);
}
offset += size;
}
chunks.push(decoder.end());
const fullResponse = chunks.join('');
return {
chunks: chunks.length,
response: fullResponse,
isComplete: fullResponse === response
};
}
When to Use StringDecoder
Use StringDecoder when:
- Processing streaming data with potentially incomplete multi-byte characters
- Building custom stream transformations
- Handling buffers that may split in the middle of multi-byte sequences
- You need proper UTF-8/UTF-16 boundary handling
Use Buffer.toString() when:
- You have complete buffers
- You're not processing streaming data
- Simpler, one-shot conversions
Best Practices
- Always call
decoder.end()when finished to get remaining buffered data - Reuse the same decoder instance for a stream of related data
- Use appropriate encoding for your data source
- StringDecoder is specifically designed for text streams
- For binary data, work directly with Buffers