Skip to content

Commit 5261a81

Browse files
authored
Merge pull request #388 from ivalkshfoeif/feature/jsonlines-loader
Feature/JSON Lines loader
2 parents 0800a66 + 7646e97 commit 5261a81

File tree

2 files changed

+122
-0
lines changed

2 files changed

+122
-0
lines changed
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
import { INode, INodeData, INodeParams } from '../../../src/Interface'
2+
import { TextSplitter } from 'langchain/text_splitter'
3+
import { JSONLinesLoader } from 'langchain/document_loaders/fs/json'
4+
5+
class Jsonlines_DocumentLoaders implements INode {
6+
label: string
7+
name: string
8+
description: string
9+
type: string
10+
icon: string
11+
category: string
12+
baseClasses: string[]
13+
inputs: INodeParams[]
14+
15+
constructor() {
16+
this.label = 'Json Lines File'
17+
this.name = 'jsonlinesFile'
18+
this.type = 'Document'
19+
this.icon = 'jsonlines.svg'
20+
this.category = 'Document Loaders'
21+
this.description = `Load data from JSON Lines files`
22+
this.baseClasses = [this.type]
23+
this.inputs = [
24+
{
25+
label: 'Jsonlines File',
26+
name: 'jsonlinesFile',
27+
type: 'file',
28+
fileType: '.jsonl'
29+
},
30+
{
31+
label: 'Text Splitter',
32+
name: 'textSplitter',
33+
type: 'TextSplitter',
34+
optional: true
35+
},
36+
{
37+
label: 'Pointer Extraction',
38+
name: 'pointerName',
39+
type: 'string',
40+
placeholder: 'Enter pointer name',
41+
optional: false
42+
},
43+
{
44+
label: 'Metadata',
45+
name: 'metadata',
46+
type: 'json',
47+
optional: true,
48+
additionalParams: true
49+
}
50+
]
51+
}
52+
53+
async init(nodeData: INodeData): Promise<any> {
54+
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
55+
const jsonLinesFileBase64 = nodeData.inputs?.jsonlinesFile as string
56+
const pointerName = nodeData.inputs?.pointerName as string
57+
const metadata = nodeData.inputs?.metadata
58+
59+
let alldocs = []
60+
let files: string[] = []
61+
62+
let pointer = '/' + pointerName.trim()
63+
64+
if (jsonLinesFileBase64.startsWith('[') && jsonLinesFileBase64.endsWith(']')) {
65+
files = JSON.parse(jsonLinesFileBase64)
66+
} else {
67+
files = [jsonLinesFileBase64]
68+
}
69+
70+
for (const file of files) {
71+
const splitDataURI = file.split(',')
72+
splitDataURI.pop()
73+
const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
74+
const blob = new Blob([bf])
75+
const loader = new JSONLinesLoader(blob, pointer)
76+
77+
if (textSplitter) {
78+
const docs = await loader.loadAndSplit(textSplitter)
79+
alldocs.push(...docs)
80+
} else {
81+
const docs = await loader.load()
82+
alldocs.push(...docs)
83+
}
84+
}
85+
86+
if (metadata) {
87+
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
88+
let finaldocs = []
89+
for (const doc of alldocs) {
90+
const newdoc = {
91+
...doc,
92+
metadata: {
93+
...doc.metadata,
94+
...parsedMetadata
95+
}
96+
}
97+
finaldocs.push(newdoc)
98+
}
99+
return finaldocs
100+
}
101+
102+
return alldocs
103+
}
104+
}
105+
106+
module.exports = { nodeClass: Jsonlines_DocumentLoaders }
Lines changed: 16 additions & 0 deletions
Loading

0 commit comments

Comments
 (0)