@@ -7,37 +7,36 @@ title: Indexer - es8
7
7
weight : 0
8
8
---
9
9
10
- ## ES8 Indexer
10
+ ## ES8 索引器
11
11
12
- An Elasticsearch 8.x indexer implementation for [ Eino] ( https://github.com/cloudwego/eino ) that implements the ` Indexer ` interface. This enables seamless integration with Eino's vector storage and retrieval system for enhanced semantic search capabilities.
12
+ 这是一个 [ Eino] ( https://github.com/cloudwego/eino ) 的 Elasticsearch 8.x 索引器实现,它实现了 ` Indexer ` 接口。这使得与 Eino 的向量存储和检索系统无缝集成,从而增强了语义搜索能力。
13
13
14
- ## Features
14
+ ## 特性
15
15
16
- - Implements ` github.com/cloudwego/eino/components/indexer.Indexer `
17
- - Easy integration with Eino's indexer system
18
- - Configurable Elasticsearch parameters
19
- - Support for vector similarity search
20
- - Bulk indexing operations
21
- - Custom field mapping support
22
- - Flexible document vectorization
16
+ - 实现了 ` github.com/cloudwego/eino/components/indexer.Indexer `
17
+ - 易于与 Eino 的索引系统集成
18
+ - 可配置的 Elasticsearch 参数
19
+ - 支持向量相似度搜索
20
+ - 批量索引操作
21
+ - 支持自定义字段映射
22
+ - 灵活的文档向量化
23
23
24
- ## Installation
24
+ ## 安装
25
25
26
26
``` bash
27
27
go get github.com/cloudwego/eino-ext/components/indexer/es8@latest
28
28
```
29
29
30
- ## Quick Start
30
+ ## 快速开始
31
31
32
- Here's a quick example of how to use the indexer, you could read components/indexer/es8/examples/indexer/add_documents.go for more details:
32
+ 这是一个如何使用索引器的快速示例,你可以阅读 ` components/indexer/es8/examples/indexer/add_documents.go ` 获取更多细节:
33
33
34
34
``` go
35
35
import (
36
36
" github.com/cloudwego/eino/components/embedding"
37
37
" github.com/cloudwego/eino/schema"
38
38
" github.com/elastic/go-elasticsearch/v8"
39
-
40
- " github.com/cloudwego/eino-ext/components/indexer/es8"
39
+ " github.com/cloudwego/eino-ext/components/indexer/es8" // 导入 es8 索引器
41
40
)
42
41
43
42
const (
@@ -51,7 +50,7 @@ const (
51
50
func main () {
52
51
ctx := context.Background ()
53
52
54
- // es supports multiple ways to connect
53
+ // es 支持多种连接方式
55
54
username := os.Getenv (" ES_USERNAME" )
56
55
password := os.Getenv (" ES_PASSWORD" )
57
56
httpCACertPath := os.Getenv (" ES_HTTP_CA_CERT_PATH" )
@@ -61,72 +60,81 @@ func main() {
61
60
log.Fatalf (" read file failed, err=%v " , err)
62
61
}
63
62
64
- client , _ := elasticsearch.NewClient (elasticsearch.Config {
63
+ client , err := elasticsearch.NewClient (elasticsearch.Config {
65
64
Addresses: []string {" https://localhost:9200" },
66
65
Username: username,
67
66
Password: password,
68
67
CACert: cert,
69
68
})
69
+ if err != nil {
70
+ log.Panicf (" connect es8 failed, err=%v " , err)
71
+ }
70
72
71
- // create embedding component
73
+ // 创建 embedding 组件
72
74
emb := createYourEmbedding ()
73
75
74
- // load docs
76
+ // 加载文档
75
77
docs := loadYourDocs ()
76
78
77
- // create es indexer component
78
- indexer , _ := es8.NewIndexer (ctx, &es8.IndexerConfig {
79
+ // 创建 es 索引器组件
80
+ indexer , err := es8.NewIndexer (ctx, &es8.IndexerConfig {
79
81
Client: client,
80
82
Index: indexName,
81
83
BatchSize: 10 ,
82
84
DocumentToFields: func (ctx context.Context , doc *schema.Document ) (field2Value map [string ]es8.FieldValue , err error ) {
83
85
return map [string ]es8.FieldValue {
84
86
fieldContent: {
85
87
Value: doc.Content ,
86
- EmbedKey: fieldContentVector, // vectorize doc content and save vector to field "content_vector"
88
+ EmbedKey: fieldContentVector, // 对文档内容进行向量化并保存向量到 "content_vector" 字段
87
89
},
88
90
fieldExtraLocation: {
89
91
Value: doc.MetaData [docExtraLocation],
90
92
},
91
93
}, nil
92
94
},
93
- Embedding: emb, // replace it with real embedding component
95
+ Embedding: emb, // 替换为真实的 embedding 组件
94
96
})
97
+ if err != nil {
98
+ log.Panicf (" create indexer failed, err=%v " , err)
99
+ }
95
100
96
- ids , _ := indexer.Store (ctx, docs)
101
+ ids , err := indexer.Store (ctx, docs)
102
+ if err != nil {
103
+ log.Panicf (" create docs failed, err=%v " , err)
104
+ }
97
105
98
106
fmt.Println (ids)
99
- // Use with Eino's system
100
- // ... configure and use with Eino
107
+ // 与 Eino 系统一起使用
108
+ // ... 配置并与 Eino 一起使用
101
109
}
102
110
```
103
111
104
- ## Configuration
112
+ ## 配置
105
113
106
- The indexer can be configured using the ` IndexerConfig ` struct:
114
+ 索引器可以通过 ` IndexerConfig ` 结构体进行配置:
107
115
108
116
``` go
109
117
type IndexerConfig struct {
110
- Client *elasticsearch.Client // Required: Elasticsearch client instance
111
- Index string // Required: Index name to store documents
112
- BatchSize int // Optional: Max texts size for embedding (default: 5)
113
-
114
- // Required: Function to map Document fields to Elasticsearch fields
115
- DocumentToFields func (ctx context.Context , doc *schema.Document ) (map [string ]FieldValue, error )
116
-
117
- // Optional: Required only if vectorization is needed
118
- Embedding embedding.Embedder
118
+ Client *elasticsearch.Client // 必填: Elasticsearch 客户端实例
119
+ Index string // 必填:存储文档的索引名称
120
+ BatchSize int // 可选: embedding 的最大文本大小(默认:5)
121
+
122
+ // 必填:将文档字段映射到 Elasticsearch 字段的函数
123
+ DocumentToFields func (ctx context.Context , doc *schema.Document ) (map [string ]FieldValue, error )
124
+
125
+ // 可选:仅当需要向量化时才需要
126
+ Embedding embedding.Embedder
119
127
}
120
128
121
- // FieldValue defines how a field should be stored and vectorized
129
+ // FieldValue 定义了字段应如何存储和向量化
122
130
type FieldValue struct {
123
- Value any // Original value to store
124
- EmbedKey string // If set, Value will be vectorized and saved
125
- Stringify func (val any) (string , error ) // Optional: custom string conversion
131
+ Value any // 要存储的原始值
132
+ EmbedKey string // 如果设置, Value 将被向量化并保存
133
+ Stringify func (val any) (string , error ) // 可选:自定义字符串转换
126
134
}
127
135
```
128
136
129
- ## For More Details
137
+ ## 更多详情
130
138
131
- - [ Eino Documentation ] ( https://github.com/cloudwego/eino )
132
- - [ Elasticsearch Go Client Documentation ] ( https://github.com/elastic/go-elasticsearch )
139
+ - [ Eino 文档 ] ( https://github.com/cloudwego/eino )
140
+ - [ Elasticsearch Go 客户端文档 ] ( https://github.com/elastic/go-elasticsearch )
0 commit comments