File size: 3,303 Bytes
6f25f68 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
/**
* A class that helps with chunking streaming responing
* from an LLM into whole sentences (or as close as we can get).
*/
export class SentenceChunker {
/**
* @param {Object} options
* @param {number} options.chunkLength - The maximum length of a chunk (default: 96)
* @param {boolean} options.emitParagraphs - Whether to emit paragraphs as chunks (default: true)
*/
constructor(options = {}) {
this.buffer = "";
this.chunkLength = options.chunkLength || 128;
this.emitParagraphs = options.emitParagraphs !== false;
this.callbacks = [];
}
/**
* Emit a chunk of text
* @param {string} output - The chunk of text to emit
*/
emit(output) {
this.callbacks.forEach(cb => cb(output));
}
/**
* Register a callback to be called when a chunk is emitted
* @param {Function} callback - The callback to call
*/
onChunk(callback) {
this.callbacks.push(callback);
}
/**
* Push new data into the chunker
* @param {string} data - The new data to push
*/
push(data) {
let paragraphs = data.split(/(\n+)/);
let numParagraphs = paragraphs.length;
for (let i = 0; i < numParagraphs; i++) {
let paragraph = paragraphs[i];
if (!paragraph) {
continue;
}
let sentences = paragraph.split(/(?<=[;:,.!?]\s+)|(?<=[;:,。!?])/);
let bufferLength = this.buffer.length;
for (let sentence of sentences) {
let sentenceLength = sentence.length;
if (sentenceLength === 0) {
continue;
}
if (bufferLength + sentenceLength <= this.chunkLength) {
this.buffer += sentence;
bufferLength += sentenceLength;
} else {
if (bufferLength > 0) {
this.emit(this.buffer);
}
this.buffer = sentence;
bufferLength = sentenceLength;
}
}
if (this.emitParagraphs && numParagraphs > 1 && i < numParagraphs - 1) {
this.emit(this.buffer);
this.buffer = "";
}
}
}
/**
* Flush the buffer, emitting any remaining text
*/
flush() {
if (this.buffer.length > 0) {
this.emit(this.buffer);
this.buffer = "";
}
}
}
/**
* A SentenceChunker that can handle streaming responses that grow over time
* (e.g. when the LLM is still generating a response and concatenating it to the previous response)
*/
export class GrowingSentenceChunker extends SentenceChunker {
constructor(options = {}) {
super(options);
this.partialSentence = "";
}
/**
* Push new data into the chunker
* @param {string} data - The new data to push
*/
push(data) {
const newData = data.substring(this.partialSentence.length);
this.partialSentence += newData;
super.push(newData);
}
/**
* Flush the buffer, emitting any remaining text
*/
flush() {
super.flush();
this.partialSentence = "";
}
}
|