Skip to content

SentenceSplitter

Defined in: packages/core/src/node-parser/sentence-splitter.ts:23

Parse text with a preference for complete sentences.

SentenceSplitter<Options>(nodes, options?): TextNode<Metadata>[]

Defined in: packages/core/src/node-parser/sentence-splitter.ts:23

Parse text with a preference for complete sentences.

Options extends Record<string, unknown>

BaseNode<Metadata>[]

Options

TextNode<Metadata>[]

new SentenceSplitter(params?): SentenceSplitter

Defined in: packages/core/src/node-parser/sentence-splitter.ts:56

Partial<SentenceSplitterParams> & SplitterParams & object

SentenceSplitter

MetadataAwareTextSplitter.constructor

includeMetadata: boolean = true

Defined in: packages/core/src/node-parser/base.ts:17

MetadataAwareTextSplitter.includeMetadata


includePrevNextRel: boolean = true

Defined in: packages/core/src/node-parser/base.ts:18

MetadataAwareTextSplitter.includePrevNextRel


chunkSize: number = 1024

Defined in: packages/core/src/node-parser/sentence-splitter.ts:27

The chunk size for each chunk. If no tokenSizer is set, this is the number of characters in the chunk. If a tokenSizer is set, this is the number of tokens in the chunk.


chunkOverlap: number = 200

Defined in: packages/core/src/node-parser/sentence-splitter.ts:31

The chunk overlap of each chunk when splitting. If no tokenSizer is set, this is the number of characters in the overlap. If a tokenSizer is set, this is the number of tokens in the overlap.


separator: string = " "

Defined in: packages/core/src/node-parser/sentence-splitter.ts:35

Default separator for splitting into words


paragraphSeparator: string = "\n\n\n"

Defined in: packages/core/src/node-parser/sentence-splitter.ts:39

Separator between paragraphs.


secondaryChunkingRegex: string = "[^,.;。?!]+[,.;。?!]?"

Defined in: packages/core/src/node-parser/sentence-splitter.ts:43

Backup regex for splitting into sentences.


extraAbbreviations: string[] | undefined = []

Defined in: packages/core/src/node-parser/sentence-splitter.ts:48

Extra abbreviations to consider while splitting into sentences. For example, for contracts, you may want to consider “LLC.” as an important abbreviation


id: string

Defined in: packages/core/src/schema/type.ts:22

MetadataAwareTextSplitter.id

protected postProcessParsedNodes(nodes, parentDocMap): TextNode<Metadata>[]

Defined in: packages/core/src/node-parser/base.ts:27

TextNode<Metadata>[]

Map<string, TextNode<Metadata>>

TextNode<Metadata>[]

MetadataAwareTextSplitter.postProcessParsedNodes


getNodesFromDocuments(documents): TextNode<Metadata>[]

Defined in: packages/core/src/node-parser/base.ts:84

TextNode<Metadata>[]

TextNode<Metadata>[]

MetadataAwareTextSplitter.getNodesFromDocuments


splitTexts(texts): string[]

Defined in: packages/core/src/node-parser/base.ts:126

string[]

string[]

MetadataAwareTextSplitter.splitTexts


splitTextsMetadataAware(texts, metadata): string[]

Defined in: packages/core/src/node-parser/base.ts:142

string[]

string[]

string[]

MetadataAwareTextSplitter.splitTextsMetadataAware


protected getMetadataString(node): string

Defined in: packages/core/src/node-parser/base.ts:151

TextNode

string

MetadataAwareTextSplitter.getMetadataString


protected parseNodes(nodes): TextNode<Metadata>[]

Defined in: packages/core/src/node-parser/base.ts:161

TextNode<Metadata>[]

TextNode<Metadata>[]

MetadataAwareTextSplitter.parseNodes


splitTextMetadataAware(text, metadata): string[]

Defined in: packages/core/src/node-parser/sentence-splitter.ts:82

string

string

string[]

MetadataAwareTextSplitter.splitTextMetadataAware


splitText(text): string[]

Defined in: packages/core/src/node-parser/sentence-splitter.ts:97

string

string[]

MetadataAwareTextSplitter.splitText


_splitText(text, chunkSize): string[]

Defined in: packages/core/src/node-parser/sentence-splitter.ts:101

string

number

string[]