Skip to content

Commit 1454207

Browse files
ewilliams-clouderajkwatsonbaasitshariefmliu-clouderaactions-user
authored
Upload files to a chat (#256)
* make the indexing reconciler request summarization immediately after each file is complete * immediately submit docs for summarization on upload * update default chunk overlap to 10 percent in the database * default chunk size to 512 at the service layer * wip lastFile:backend/src/test/java/com/cloudera/cai/rag/sessions/SessionServiceTest.java * WIP lastFile:backend/src/test/java/com/cloudera/cai/rag/projects/ProjectControllerTest.java * wip on refactoring session creation lastFile:backend/src/test/java/com/cloudera/cai/rag/sessions/SessionServiceTest.java * wip lastFile:backend/src/main/java/com/cloudera/cai/rag/sessions/SessionService.java * WIP lastFile:backend/src/main/java/com/cloudera/cai/rag/sessions/SessionService.java * mob next [ci-skip] [ci skip] [skip ci] lastFile:backend/src/main/java/com/cloudera/cai/rag/sessions/SessionService.java * insert new session with data source functioning * wip lastFile:backend/src/main/java/com/cloudera/cai/rag/sessions/SessionService.java * remove mcp.json and example file * wip lastFile:backend/src/main/java/com/cloudera/cai/rag/sessions/SessionService.java * WIP lastFile:backend/src/main/java/com/cloudera/cai/rag/datasources/RagDataSourceRepository.java * wip on session creation lastFile:backend/src/main/java/com/cloudera/cai/rag/sessions/SessionService.java * wip lastFile:backend/src/main/java/com/cloudera/cai/rag/sessions/SessionRepository.java * WIP add session kb lastFile:backend/src/main/java/com/cloudera/cai/rag/sessions/SessionService.java * wip lastFile:backend/src/main/java/com/cloudera/cai/rag/datasources/RagDataSourceRepository.java * remove redundant type * simplify request * add embedding model and fix default project id * add associated data source id to the session type * small refactor to text area * dynamic columns on the uploaded files table * drop databases lastFile:ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx * WIP lastFile:ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx * wip lastFile:ui/src/pages/DataSources/ManageTab/UploadedFilesHeader.tsx * WIP on session doc dialog lastFile:ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx * drop databases lastFile:ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx * WIP lastFile:ui/src/pages/RagChatTab/FooterComponents/SessionDocuments.tsx * use lastFile:ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx * drop databases lastFile:ui/src/pages/RagChatTab/FooterComponents/ChatSessionDocuments.tsx * wip on badge for doc uploading status * WIP lastFile:ui/src/pages/DataSources/ManageTab/UploadedFilesTable.tsx * drop databases lastFile:ui/src/pages/DataSources/ManageTab/UploadedFilesTable.tsx * WIP on doc icon badging lastFile:ui/src/pages/RagChatTab/FooterComponents/ChatSessionDocuments.tsx * WIP lastFile:ui/src/pages/RagChatTab/FooterComponents/ChatSessionDocuments.tsx * get all data source Ids including associated lastFile:ui/src/pages/RagChatTab/FooterComponents/ChatSessionDocuments.tsx * formatting * drop databases lastFile:llm-service/summaries/6/graph_store.json * start dragndroppin lastFile:ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx * WIP drag and drop lastFile:ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx * remove drop code from input lastFile:ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.tsx * drop databases lastFile:ui/src/pages/RagChatTab/RagChat.tsx * get drag and drop working * remove unused imports * invalidate queries to provide status * small ui improvements to chat doc ui * add files to chat with no active session * fix: revert to using chat engine when no tools is selected * Update release version to dev-testing * small UI change to swap textarea with dropzone * refetch suggested questions after file is added in drag and drop * wip on badge for doc uploading status * filter data sources count for metrics based on associated session id * remove old code for completedIndexing * modify test for metrics * nits * remove bad directory --------- Co-authored-by: jwatson <[email protected]> Co-authored-by: Baasit Sharief <[email protected]> Co-authored-by: Michael Liu <[email protected]> Co-authored-by: actions-user <[email protected]>
1 parent 405ce6e commit 1454207

File tree

66 files changed

+1626
-579
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+1626
-579
lines changed

backend/src/main/java/com/cloudera/cai/rag/Types.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ public record RagDataSource(
9999
ConnectionType connectionType,
100100
@Nullable Integer documentCount,
101101
@Nullable Long totalDocSize,
102-
boolean availableForDefaultProject) {}
102+
boolean availableForDefaultProject,
103+
Long associatedSessionId) {}
103104

104105
@With
105106
public record QueryConfiguration(
@@ -121,6 +122,7 @@ public record Session(
121122
String updatedById,
122123
Instant lastInteractionTime,
123124
String inferenceModel,
125+
Long associatedDataSourceId,
124126
String rerankModel,
125127
Integer responseChunks,
126128
QueryConfiguration queryConfiguration) {
@@ -137,6 +139,7 @@ public static Session fromCreateRequest(CreateSession input, String username) {
137139
username,
138140
null,
139141
input.inferenceModel(),
142+
null,
140143
input.rerankModel(),
141144
input.responseChunks(),
142145
input.queryConfiguration());
@@ -148,6 +151,7 @@ public record CreateSession(
148151
String name,
149152
@Singular List<Long> dataSourceIds,
150153
String inferenceModel,
154+
String embeddingModel,
151155
String rerankModel,
152156
Integer responseChunks,
153157
QueryConfiguration queryConfiguration,

backend/src/main/java/com/cloudera/cai/rag/datasources/RagDataSourceRepository.java

Lines changed: 59 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import java.time.Instant;
4545
import java.util.List;
4646
import lombok.extern.slf4j.Slf4j;
47+
import org.jdbi.v3.core.Handle;
4748
import org.jdbi.v3.core.Jdbi;
4849
import org.jdbi.v3.core.mapper.reflect.ConstructorMapper;
4950
import org.jdbi.v3.core.statement.Query;
@@ -59,26 +60,26 @@ public RagDataSourceRepository(Jdbi jdbi) {
5960
}
6061

6162
public Long createRagDataSource(RagDataSource input) {
63+
return jdbi.inTransaction(handle -> createRagDataSource(handle, input));
64+
}
65+
66+
public Long createRagDataSource(Handle handle, RagDataSource input) {
6267
RagDataSource cleanedInputs = cleanInputs(input);
63-
return jdbi.inTransaction(
64-
handle -> {
65-
var sql =
66-
"""
67-
INSERT INTO rag_data_source (name, chunk_size, chunk_overlap_percent, created_by_id, updated_by_id, connection_type, embedding_model, summarization_model)
68-
VALUES (:name, :chunkSize, :chunkOverlapPercent, :createdById, :updatedById, :connectionType, :embeddingModel, :summarizationModel)
69-
""";
70-
Long result;
71-
try (var update = handle.createUpdate(sql)) {
72-
update.bindMethods(cleanedInputs);
73-
result = update.executeAndReturnGeneratedKeys("id").mapTo(Long.class).one();
74-
}
75-
if (Boolean.TRUE.equals(input.availableForDefaultProject())) {
76-
handle.execute(
77-
"INSERT INTO project_data_source (data_source_id, project_id) VALUES (?, 1)",
78-
result);
79-
}
80-
return result;
81-
});
68+
var sql =
69+
"""
70+
INSERT INTO rag_data_source (name, chunk_size, chunk_overlap_percent, created_by_id, updated_by_id, connection_type, embedding_model, summarization_model, ASSOCIATED_SESSION_ID)
71+
VALUES (:name, :chunkSize, :chunkOverlapPercent, :createdById, :updatedById, :connectionType, :embeddingModel, :summarizationModel, :associatedSessionId)
72+
""";
73+
Long result;
74+
try (var update = handle.createUpdate(sql)) {
75+
update.bindMethods(cleanedInputs);
76+
result = update.executeAndReturnGeneratedKeys("id").mapTo(Long.class).one();
77+
}
78+
if (input.availableForDefaultProject()) {
79+
handle.execute(
80+
"INSERT INTO project_data_source (data_source_id, project_id) VALUES (?, 1)", result);
81+
}
82+
return result;
8283
}
8384

8485
private static RagDataSource cleanInputs(RagDataSource input) {
@@ -94,10 +95,10 @@ public void updateRagDataSource(RagDataSource input) {
9495
handle -> {
9596
var sql =
9697
"""
97-
UPDATE rag_data_source
98-
SET name = :name, connection_type = :connectionType, updated_by_id = :updatedById, summarization_model = :summarizationModel, time_updated = :now
99-
WHERE id = :id AND deleted IS NULL
100-
""";
98+
UPDATE rag_data_source
99+
SET name = :name, connection_type = :connectionType, updated_by_id = :updatedById, summarization_model = :summarizationModel, time_updated = :now
100+
WHERE id = :id AND deleted IS NULL
101+
""";
101102
try (var update = handle.createUpdate(sql)) {
102103
update
103104
.bind("name", cleanedInputs.name())
@@ -111,7 +112,7 @@ public void updateRagDataSource(RagDataSource input) {
111112
handle.execute(
112113
"DELETE FROM project_data_source WHERE data_source_id = ? AND project_id = 1",
113114
input.id());
114-
if (Boolean.TRUE.equals(input.availableForDefaultProject())) {
115+
if (input.availableForDefaultProject()) {
115116
handle.execute(
116117
"INSERT INTO project_data_source (data_source_id, project_id) VALUES (?, 1)",
117118
input.id());
@@ -124,18 +125,18 @@ public RagDataSource getRagDataSourceById(Long id) {
124125
handle -> {
125126
var sql =
126127
"""
127-
SELECT rds.*, count(rdsd.ID) as document_count, sum(rdsd.SIZE_IN_BYTES) as total_doc_size,
128-
EXISTS(
129-
SELECT 1 from project_data_source pds
130-
WHERE pds.data_source_id = rds.id
131-
AND pds.project_id = 1
132-
) as available_for_default_project
133-
FROM rag_data_source rds
134-
LEFT JOIN RAG_DATA_SOURCE_DOCUMENT rdsd ON rds.id = rdsd.data_source_id
135-
WHERE rds.deleted IS NULL
136-
AND rds.id = :id
137-
GROUP BY rds.ID
138-
""";
128+
SELECT rds.*, count(rdsd.ID) as document_count, sum(rdsd.SIZE_IN_BYTES) as total_doc_size,
129+
EXISTS(
130+
SELECT 1 from project_data_source pds
131+
WHERE pds.data_source_id = rds.id
132+
AND pds.project_id = 1
133+
) as available_for_default_project
134+
FROM rag_data_source rds
135+
LEFT JOIN RAG_DATA_SOURCE_DOCUMENT rdsd ON rds.id = rdsd.data_source_id
136+
WHERE rds.deleted IS NULL
137+
AND rds.id = :id
138+
GROUP BY rds.ID
139+
""";
139140
handle.registerRowMapper(ConstructorMapper.factory(RagDataSource.class));
140141
try (Query query = handle.createQuery(sql)) {
141142
query.bind("id", id);
@@ -153,17 +154,18 @@ public List<RagDataSource> getRagDataSources() {
153154
handle -> {
154155
var sql =
155156
"""
156-
SELECT rds.*, count(rdsd.ID) as document_count, sum(rdsd.SIZE_IN_BYTES) as total_doc_size,
157-
EXISTS(
158-
SELECT 1 from project_data_source pds
159-
WHERE pds.data_source_id = rds.id
160-
AND pds.project_id = 1
161-
) as available_for_default_project
162-
FROM rag_data_source rds
163-
LEFT JOIN RAG_DATA_SOURCE_DOCUMENT rdsd ON rds.id = rdsd.data_source_id
164-
WHERE rds.deleted IS NULL
165-
GROUP BY rds.ID
166-
""";
157+
SELECT rds.*, count(rdsd.ID) as document_count, sum(rdsd.SIZE_IN_BYTES) as total_doc_size,
158+
EXISTS(
159+
SELECT 1 from project_data_source pds
160+
WHERE pds.data_source_id = rds.id
161+
AND pds.project_id = 1
162+
) as available_for_default_project
163+
FROM rag_data_source rds
164+
LEFT JOIN RAG_DATA_SOURCE_DOCUMENT rdsd ON rds.id = rdsd.data_source_id
165+
WHERE rds.deleted IS NULL
166+
AND rds.ASSOCIATED_SESSION_ID IS NULL
167+
GROUP BY rds.ID
168+
""";
167169
handle.registerRowMapper(ConstructorMapper.factory(RagDataSource.class));
168170
try (Query query = handle.createQuery(sql)) {
169171
return query.mapTo(RagDataSource.class).list();
@@ -172,18 +174,21 @@ public List<RagDataSource> getRagDataSources() {
172174
}
173175

174176
public void deleteDataSource(Long id) {
175-
jdbi.useTransaction(
176-
handle -> {
177-
handle.execute("UPDATE RAG_DATA_SOURCE SET DELETED = ? where ID = ?", true, id);
178-
handle.execute("DELETE FROM PROJECT_DATA_SOURCE WHERE DATA_SOURCE_ID = ?", id);
179-
handle.execute("DELETE FROM CHAT_SESSION_DATA_SOURCE WHERE DATA_SOURCE_ID = ?", id);
180-
});
177+
jdbi.useTransaction(handle -> deleteDataSource(handle, id));
178+
}
179+
180+
public void deleteDataSource(Handle handle, Long id) {
181+
handle.execute("UPDATE RAG_DATA_SOURCE SET DELETED = ? where ID = ?", true, id);
182+
handle.execute("DELETE FROM PROJECT_DATA_SOURCE WHERE DATA_SOURCE_ID = ?", id);
183+
handle.execute("DELETE FROM CHAT_SESSION_DATA_SOURCE WHERE DATA_SOURCE_ID = ?", id);
181184
}
182185

183186
public int getNumberOfDataSources() {
184187
return jdbi.withHandle(
185188
handle -> {
186-
try (var query = handle.createQuery("SELECT count(*) FROM RAG_DATA_SOURCE")) {
189+
try (var query =
190+
handle.createQuery(
191+
"SELECT count(*) FROM RAG_DATA_SOURCE where ASSOCIATED_SESSION_ID IS NULL AND DELETED IS NULL")) {
187192
return query.mapTo(Integer.class).one();
188193
}
189194
});

backend/src/main/java/com/cloudera/cai/rag/datasources/RagDataSourceService.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@
4747

4848
@Component
4949
public class RagDataSourceService {
50+
public static final int DEFAULT_CHUNK_OVERLAP = 10;
51+
public static final int DEFAULT_CHUNK_SIZE = 512;
5052
private final RagDataSourceRepository ragDataSourceRepository;
5153

5254
public RagDataSourceService(RagDataSourceRepository ragDataSourceRepository) {
@@ -55,7 +57,10 @@ public RagDataSourceService(RagDataSourceRepository ragDataSourceRepository) {
5557

5658
public RagDataSource createRagDataSource(RagDataSource input) {
5759
if (input.chunkOverlapPercent() == null) {
58-
input = input.withChunkOverlapPercent(10);
60+
input = input.withChunkOverlapPercent(DEFAULT_CHUNK_OVERLAP);
61+
}
62+
if (input.chunkSize() == null) {
63+
input = input.withChunkSize(DEFAULT_CHUNK_SIZE);
5964
}
6065
var id =
6166
ragDataSourceRepository.createRagDataSource(

backend/src/main/java/com/cloudera/cai/rag/files/RagFileIndexReconciler.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ public class RagFileIndexReconciler extends BaseReconciler<RagDocument> {
6868
private final RagBackendClient ragBackendClient;
6969
private final RagDataSourceRepository ragDataSourceRepository;
7070
private final RagFileRepository ragFileRepository;
71+
private final RagFileSummaryReconciler ragFileSummaryReconciler;
7172

7273
@Autowired
7374
public RagFileIndexReconciler(
@@ -77,13 +78,15 @@ public RagFileIndexReconciler(
7778
RagDataSourceRepository ragDataSourceRepository,
7879
@Qualifier("singleWorkerReconcilerConfig") ReconcilerConfig reconcilerConfig,
7980
RagFileRepository ragFileRepository,
80-
OpenTelemetry openTelemetry) {
81+
OpenTelemetry openTelemetry,
82+
RagFileSummaryReconciler ragFileSummaryReconciler) {
8183
super(reconcilerConfig, openTelemetry);
8284
this.bucketName = bucketName;
8385
this.jdbi = jdbi;
8486
this.ragBackendClient = ragBackendClient;
8587
this.ragDataSourceRepository = ragDataSourceRepository;
8688
this.ragFileRepository = ragFileRepository;
89+
this.ragFileSummaryReconciler = ragFileSummaryReconciler;
8790
}
8891

8992
@Override
@@ -120,6 +123,7 @@ public ReconcileResult reconcile(Set<RagDocument> documents) {
120123
IndexConfiguration indexConfiguration = fetchIndexConfiguration(document.dataSourceId());
121124
RagDocument finalDocument = doIndexing(document, indexConfiguration);
122125
updateFinalStatus(finalDocument);
126+
ragFileSummaryReconciler.submit(finalDocument);
123127
}
124128
return new ReconcileResult();
125129
}
@@ -194,6 +198,7 @@ public static RagFileIndexReconciler createNull() {
194198
RagDataSourceRepository.createNull(),
195199
ReconcilerConfig.builder().isTestReconciler(true).build(),
196200
RagFileRepository.createNull(),
197-
OpenTelemetry.noop());
201+
OpenTelemetry.noop(),
202+
RagFileSummaryReconciler.createNull());
198203
}
199204
}

backend/src/main/java/com/cloudera/cai/rag/files/RagFileService.java

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ public class RagFileService {
7272
private final String s3PathPrefix;
7373
private final RagDataSourceRepository ragDataSourceRepository;
7474
private final RagFileDeleteReconciler ragFileDeleteReconciler;
75+
private final RagFileSummaryReconciler ragFileSummaryReconciler;
7576

7677
@Autowired
7778
public RagFileService(
@@ -81,14 +82,16 @@ public RagFileService(
8182
RagFileIndexReconciler ragFileIndexReconciler,
8283
@Qualifier("s3BucketPrefix") String s3PathPrefix,
8384
RagDataSourceRepository ragDataSourceRepository,
84-
RagFileDeleteReconciler ragFileDeleteReconciler) {
85+
RagFileDeleteReconciler ragFileDeleteReconciler,
86+
RagFileSummaryReconciler ragFileSummaryReconciler) {
8587
this.idGenerator = idGenerator;
8688
this.ragFileRepository = ragFileRepository;
8789
this.ragFileUploader = ragFileUploader;
8890
this.ragFileIndexReconciler = ragFileIndexReconciler;
8991
this.s3PathPrefix = s3PathPrefix;
9092
this.ragDataSourceRepository = ragDataSourceRepository;
9193
this.ragFileDeleteReconciler = ragFileDeleteReconciler;
94+
this.ragFileSummaryReconciler = ragFileSummaryReconciler;
9295
}
9396

9497
public List<RagDocumentMetadata> saveRagFile(
@@ -148,6 +151,7 @@ private RagDocumentMetadata processFile(
148151
log.info("Saved document with id: {}", id);
149152

150153
ragFileIndexReconciler.submit(ragDocument.withId(id));
154+
ragFileSummaryReconciler.submit(ragDocument.withId(id));
151155

152156
return new RagDocumentMetadata(
153157
ragDocument.filename(), documentId, ragDocument.extension(), ragDocument.sizeInBytes());
@@ -206,19 +210,6 @@ public void deleteRagFile(Long id, Long dataSourceId) {
206210
ragFileDeleteReconciler.submit(document);
207211
}
208212

209-
// Nullables stuff down here
210-
211-
public static RagFileService createNull(String... dummyIds) {
212-
return new RagFileService(
213-
IdGenerator.createNull(dummyIds),
214-
RagFileRepository.createNull(),
215-
RagFileUploader.createNull(),
216-
RagFileIndexReconciler.createNull(),
217-
"prefix",
218-
RagDataSourceRepository.createNull(),
219-
RagFileDeleteReconciler.createNull());
220-
}
221-
222213
public List<RagDocument> getRagDocuments(Long dataSourceId) {
223214
return ragFileRepository.getRagDocuments(dataSourceId);
224215
}
@@ -259,4 +250,18 @@ public long getSize() {
259250
return size;
260251
}
261252
}
253+
254+
// Nullables stuff down here
255+
256+
public static RagFileService createNull(String... dummyIds) {
257+
return new RagFileService(
258+
IdGenerator.createNull(dummyIds),
259+
RagFileRepository.createNull(),
260+
RagFileUploader.createNull(),
261+
RagFileIndexReconciler.createNull(),
262+
"prefix",
263+
RagDataSourceRepository.createNull(),
264+
RagFileDeleteReconciler.createNull(),
265+
RagFileSummaryReconciler.createNull());
266+
}
262267
}

backend/src/main/java/com/cloudera/cai/rag/sessions/SessionController.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,7 @@ public Session getSession(@PathVariable Long id, HttpServletRequest request) {
6666
@PostMapping(consumes = "application/json", produces = "application/json")
6767
public Session create(@RequestBody CreateSession input, HttpServletRequest request) {
6868
String username = usernameExtractor.extractUsername(request);
69-
Session toCreate = Session.fromCreateRequest(input, username);
70-
return sessionService.create(toCreate, username);
69+
return sessionService.create(input, username);
7170
}
7271

7372
@PostMapping(path = "/{id}", consumes = "application/json", produces = "application/json")
@@ -78,8 +77,9 @@ public Session update(@RequestBody Session input, HttpServletRequest request) {
7877
}
7978

8079
@DeleteMapping(path = "/{id}")
81-
public void delete(@PathVariable Long id) {
82-
sessionService.delete(id);
80+
public void delete(@PathVariable Long id, HttpServletRequest request) {
81+
String username = usernameExtractor.extractUsername(request);
82+
sessionService.delete(id, username);
8383
}
8484

8585
@GetMapping(produces = "application/json")

0 commit comments

Comments
 (0)