Skip to content

Commit 3181166

Browse files
authored
perf: redirect request and err log replace (#768)
perf: dataset openapi openapi
1 parent 379673c commit 3181166

File tree

4 files changed

+210
-5
lines changed

4 files changed

+210
-5
lines changed

docSite/content/docs/development/openapi/dataset.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ data 为集合的 ID。
342342
{{< /tabs >}}
343343

344344

345-
### 创建一个纯文本集合(商业版)
345+
### 创建一个纯文本集合
346346

347347
传入一段文字,创建一个集合,会根据传入的文字进行分割。
348348

@@ -351,7 +351,7 @@ data 为集合的 ID。
351351
{{< markdownify >}}
352352

353353
```bash
354-
curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/collection/create/text' \
354+
curl --location --request POST 'http://localhost:3000/api/core/dataset/collection/create/text' \
355355
--header 'Authorization: Bearer {{authorization}}' \
356356
--header 'Content-Type: application/json' \
357357
--data-raw '{
@@ -418,7 +418,7 @@ data 为集合的 ID。
418418
{{< /tab >}}
419419
{{< /tabs >}}
420420

421-
### 创建一个链接集合(商业版)
421+
### 创建一个链接集合
422422

423423
传入一个网络链接,创建一个集合,会先去对应网页抓取内容,再抓取的文字进行分割。
424424

@@ -427,7 +427,7 @@ data 为集合的 ID。
427427
{{< markdownify >}}
428428

429429
```bash
430-
curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/collection/create/link' \
430+
curl --location --request POST 'http://localhost:3000/api/core/dataset/collection/create/link' \
431431
--header 'Authorization: Bearer {{authorization}}' \
432432
--header 'Content-Type: application/json' \
433433
--data-raw '{
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
Create one dataset collection
3+
*/
4+
import type { NextApiRequest, NextApiResponse } from 'next';
5+
import { jsonRes } from '@fastgpt/service/common/response';
6+
import { connectToDatabase } from '@/service/mongo';
7+
import type { LinkCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
8+
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
9+
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
10+
import {
11+
TrainingModeEnum,
12+
DatasetCollectionTypeEnum
13+
} from '@fastgpt/global/core/dataset/constants';
14+
import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset';
15+
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
16+
import { createTrainingBill } from '@fastgpt/service/support/wallet/bill/controller';
17+
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
18+
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
19+
import { reloadCollectionChunks } from '@fastgpt/service/core/dataset/collection/utils';
20+
21+
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
22+
try {
23+
await connectToDatabase();
24+
const {
25+
link,
26+
trainingType = TrainingModeEnum.chunk,
27+
chunkSize = 512,
28+
chunkSplitter,
29+
qaPrompt,
30+
...body
31+
} = req.body as LinkCreateDatasetCollectionParams;
32+
33+
const { teamId, tmbId, dataset } = await authDataset({
34+
req,
35+
authToken: true,
36+
authApiKey: true,
37+
datasetId: body.datasetId,
38+
per: 'w'
39+
});
40+
41+
// 1. check dataset limit
42+
await checkDatasetLimit({
43+
teamId,
44+
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize,
45+
insertLen: predictDataLimitLength(trainingType, new Array(10))
46+
});
47+
48+
// 2. create collection
49+
const collectionId = await createOneCollection({
50+
...body,
51+
name: link,
52+
teamId,
53+
tmbId,
54+
type: DatasetCollectionTypeEnum.link,
55+
56+
trainingType,
57+
chunkSize,
58+
chunkSplitter,
59+
qaPrompt,
60+
61+
rawLink: link
62+
});
63+
64+
// 3. create bill and start sync
65+
const { billId } = await createTrainingBill({
66+
teamId,
67+
tmbId,
68+
appName: 'core.dataset.collection.Sync Collection',
69+
billSource: BillSourceEnum.training,
70+
vectorModel: getVectorModel(dataset.vectorModel).name,
71+
agentModel: getQAModel(dataset.agentModel).name
72+
});
73+
await reloadCollectionChunks({
74+
collectionId,
75+
tmbId,
76+
billId
77+
});
78+
79+
jsonRes(res, {
80+
data: { collectionId }
81+
});
82+
} catch (err) {
83+
jsonRes(res, {
84+
code: 500,
85+
error: err
86+
});
87+
}
88+
}
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
/*
2+
Create one dataset collection
3+
*/
4+
import type { NextApiRequest, NextApiResponse } from 'next';
5+
import { jsonRes } from '@fastgpt/service/common/response';
6+
import { connectToDatabase } from '@/service/mongo';
7+
import type { TextCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
8+
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
9+
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
10+
import {
11+
TrainingModeEnum,
12+
DatasetCollectionTypeEnum
13+
} from '@fastgpt/global/core/dataset/constants';
14+
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
15+
import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset';
16+
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
17+
import { pushDataToTrainingQueue } from '@/service/core/dataset/data/controller';
18+
import { hashStr } from '@fastgpt/global/common/string/tools';
19+
import { createTrainingBill } from '@fastgpt/service/support/wallet/bill/controller';
20+
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
21+
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
22+
23+
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
24+
try {
25+
await connectToDatabase();
26+
const {
27+
name,
28+
text,
29+
trainingType = TrainingModeEnum.chunk,
30+
chunkSize = 512,
31+
chunkSplitter,
32+
qaPrompt,
33+
...body
34+
} = req.body as TextCreateDatasetCollectionParams;
35+
36+
const { teamId, tmbId, dataset } = await authDataset({
37+
req,
38+
authToken: true,
39+
authApiKey: true,
40+
datasetId: body.datasetId,
41+
per: 'w'
42+
});
43+
44+
// 1. split text to chunks
45+
const { chunks } = splitText2Chunks({
46+
text,
47+
chunkLen: chunkSize,
48+
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
49+
customReg: chunkSplitter ? [chunkSplitter] : []
50+
});
51+
52+
// 2. check dataset limit
53+
await checkDatasetLimit({
54+
teamId,
55+
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize,
56+
insertLen: predictDataLimitLength(trainingType, chunks)
57+
});
58+
59+
// 3. create collection and training bill
60+
const [collectionId, { billId }] = await Promise.all([
61+
createOneCollection({
62+
...body,
63+
teamId,
64+
tmbId,
65+
type: DatasetCollectionTypeEnum.virtual,
66+
67+
name,
68+
trainingType,
69+
chunkSize,
70+
chunkSplitter,
71+
qaPrompt,
72+
73+
hashRawText: hashStr(text),
74+
rawTextLength: text.length
75+
}),
76+
createTrainingBill({
77+
teamId,
78+
tmbId,
79+
appName: name,
80+
billSource: BillSourceEnum.training,
81+
vectorModel: getVectorModel(dataset.vectorModel)?.name,
82+
agentModel: getQAModel(dataset.agentModel)?.name
83+
})
84+
]);
85+
86+
// 4. push chunks to training queue
87+
const insertResults = await pushDataToTrainingQueue({
88+
teamId,
89+
tmbId,
90+
collectionId,
91+
trainingMode: trainingType,
92+
prompt: qaPrompt,
93+
billId,
94+
data: chunks.map((text, index) => ({
95+
q: text,
96+
chunkIndex: index
97+
}))
98+
});
99+
100+
jsonRes(res, {
101+
data: { collectionId, results: insertResults }
102+
});
103+
} catch (err) {
104+
jsonRes(res, {
105+
code: 500,
106+
error: err
107+
});
108+
}
109+
}
110+
111+
export const config = {
112+
api: {
113+
bodyParser: {
114+
sizeLimit: '10mb'
115+
}
116+
}
117+
};

projects/app/src/web/core/dataset/api.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ export const getDatasetCollectionById = (id: string) =>
7676
export const postDatasetCollection = (data: CreateDatasetCollectionParams) =>
7777
POST<string>(`/core/dataset/collection/create`, data);
7878
export const postCreateDatasetLinkCollection = (data: LinkCreateDatasetCollectionParams) =>
79-
POST<{ collectionId: string }>(`/proApi/core/dataset/collection/create/link`, data);
79+
POST<{ collectionId: string }>(`/core/dataset/collection/create/link`, data);
8080

8181
export const putDatasetCollectionById = (data: UpdateDatasetCollectionParams) =>
8282
POST(`/core/dataset/collection/update`, data);

0 commit comments

Comments
 (0)