Skip to content

Commit 48eed4a

Browse files
committed
Add article summary feature with OpenAI integration
- Introduce 'summary' query parameter in /api/content/v1/parser endpoint - Integrate OpenAI API for generating article summaries - Add OpenAIKey field to Server struct and corresponding command-line flag - Update extractArticleEmulateReadability to handle summary requests - Add generateSummary method using OpenAI's GPT-4o model (turns out to be faster than even 4o mini) - Add OpenAIClient interface and mock for testing - Update README.md with new configuration options and API details This feature allows users to request a summary of extracted articles using OpenAI's GPT-4o model. To ensure secure usage, summary generation requires a valid server token. The changes include comprehensive error handling and test coverage for various scenarios, including token validation and server misconfiguration.
1 parent 326a191 commit 48eed4a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+6603
-3
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
| address | UKEEPER_ADDRESS | all interfaces | web server listening address |
1212
| port | UKEEPER_PORT | `8080` | web server port |
1313
| mongo_uri | MONGO_URI | none | MongoDB connection string, _required_ |
14+
| openai_key | OPENAI_KEY | none | OpenAI API key for summary generation |
1415
| frontend_dir | FRONTEND_DIR | `/srv/web` | directory with frontend files |
1516
| token | TOKEN | none | token for /content/v1/parser endpoint auth |
1617
| mongo-delay | MONGO_DELAY | `0` | mongo initial delay |
@@ -20,7 +21,7 @@
2021

2122
### API
2223

23-
GET /api/content/v1/parser?token=secret&url=http://aa.com/blah - extract content (emulate Readability API parse call)
24+
GET /api/content/v1/parser?token=secret&summary=true&url=http://aa.com/blah - extract content (emulate Readability API parse call), summary is optional and requires OpenAI key and token to be enabled
2425
POST /api/v1/extract {url: http://aa.com/blah} - extract content
2526

2627
## Development

backend/extractor/readability.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ type UReadability struct {
3737

3838
// Response from api calls
3939
type Response struct {
40+
Summary string `json:"summary,omitempty"`
4041
Content string `json:"content"`
4142
Rich string `json:"rich_content"`
4243
Domain string `json:"domain"`

backend/go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ require (
1515
github.com/jessevdk/go-flags v1.6.1
1616
github.com/kennygrant/sanitize v1.2.4
1717
github.com/mauidude/go-readability v0.0.0-20220221173116-a9b3620098b7
18+
github.com/sashabaranov/go-openai v1.28.1
1819
github.com/stretchr/testify v1.9.0
1920
go.mongodb.org/mongo-driver v1.16.1
2021
golang.org/x/net v0.28.0

backend/go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,8 @@ github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6So
183183
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
184184
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
185185
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
186+
github.com/sashabaranov/go-openai v1.28.1 h1:aREx6faUTeOZNMDTNGAY8B9vNmmN7qoGvDV0Ke2J1Mc=
187+
github.com/sashabaranov/go-openai v1.28.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
186188
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
187189
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
188190
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=

backend/main.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ var opts struct {
2626
MongoURI string `short:"m" long:"mongo_uri" env:"MONGO_URI" required:"true" description:"MongoDB connection string"`
2727
MongoDelay time.Duration `long:"mongo-delay" env:"MONGO_DELAY" default:"0" description:"mongo initial delay"`
2828
MongoDB string `long:"mongo-db" env:"MONGO_DB" default:"ureadability" description:"mongo database name"`
29+
OpenAIKey string `long:"openai_key" env:"OPENAI_KEY" description:"OpenAI API key for summary generation"`
2930
Debug bool `long:"dbg" env:"DEBUG" description:"debug mode"`
3031
}
3132

@@ -45,6 +46,7 @@ func main() {
4546
Token: opts.Token,
4647
Credentials: opts.Credentials,
4748
Version: revision,
49+
OpenAIKey: opts.OpenAIKey,
4850
}
4951

5052
ctx, cancel := context.WithCancel(context.Background())

backend/rest/openai_mock.go

Lines changed: 82 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

backend/rest/server.go

Lines changed: 74 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"net/http"
1010
"os"
1111
"path/filepath"
12+
"strconv"
1213
"strings"
1314
"time"
1415

@@ -20,21 +21,29 @@ import (
2021
log "github.com/go-pkgz/lgr"
2122
UM "github.com/go-pkgz/rest"
2223
"github.com/go-pkgz/rest/logger"
24+
"github.com/sashabaranov/go-openai"
2325
"go.mongodb.org/mongo-driver/bson/primitive"
2426

2527
"github.com/ukeeper/ukeeper-redabilty/backend/datastore"
2628
"github.com/ukeeper/ukeeper-redabilty/backend/extractor"
2729
)
2830

31+
//go:generate moq -out openai_mock.go . OpenAIClient
32+
type OpenAIClient interface {
33+
CreateChatCompletion(ctx context.Context, request openai.ChatCompletionRequest) (openai.ChatCompletionResponse, error)
34+
}
35+
2936
// Server is a basic rest server providing access to store and invoking parser
3037
type Server struct {
3138
Readability extractor.UReadability
3239
Version string
3340
Token string
3441
Credentials map[string]string
42+
OpenAIKey string
3543

36-
indexPage *template.Template
37-
rulePage *template.Template
44+
openAIClient OpenAIClient
45+
indexPage *template.Template
46+
rulePage *template.Template
3847
}
3948

4049
// JSON is a map alias, just for convenience
@@ -185,12 +194,28 @@ func (s *Server) extractArticle(w http.ResponseWriter, r *http.Request) {
185194
// if token is not set for application, it won't be checked
186195
func (s *Server) extractArticleEmulateReadability(w http.ResponseWriter, r *http.Request) {
187196
token := r.URL.Query().Get("token")
197+
summary, _ := strconv.ParseBool(r.URL.Query().Get("summary"))
198+
188199
if s.Token != "" && token == "" {
189200
render.Status(r, http.StatusExpectationFailed)
190201
render.JSON(w, r, JSON{"error": "no token passed"})
191202
return
192203
}
193204

205+
// Check if summary is requested but token is not provided, or OpenAI key is not set
206+
if summary {
207+
if s.OpenAIKey == "" {
208+
render.Status(r, http.StatusBadRequest)
209+
render.JSON(w, r, JSON{"error": "OpenAI key is not set"})
210+
return
211+
}
212+
if s.Token == "" {
213+
render.Status(r, http.StatusBadRequest)
214+
render.JSON(w, r, JSON{"error": "summary generation requires token, but token is not set for the server"})
215+
return
216+
}
217+
}
218+
194219
if s.Token != "" && s.Token != token {
195220
render.Status(r, http.StatusUnauthorized)
196221
render.JSON(w, r, JSON{"error": "wrong token passed"})
@@ -211,6 +236,16 @@ func (s *Server) extractArticleEmulateReadability(w http.ResponseWriter, r *http
211236
return
212237
}
213238

239+
if summary {
240+
summaryText, err := s.generateSummary(r.Context(), res.Content)
241+
if err != nil {
242+
render.Status(r, http.StatusInternalServerError)
243+
render.JSON(w, r, JSON{"error": fmt.Sprintf("failed to generate summary: %v", err)})
244+
return
245+
}
246+
res.Summary = summaryText
247+
}
248+
214249
render.JSON(w, r, &res)
215250
}
216251

@@ -250,6 +285,13 @@ func (s *Server) handlePreview(w http.ResponseWriter, r *http.Request) {
250285
continue
251286
}
252287

288+
if s.OpenAIKey != "" {
289+
result.Summary, e = s.generateSummary(r.Context(), result.Content)
290+
if e != nil {
291+
log.Printf("[WARN] failed to generate summary for preview of %s: %v", url, e)
292+
}
293+
}
294+
253295
responses = append(responses, *result)
254296
}
255297

@@ -260,6 +302,7 @@ func (s *Server) handlePreview(w http.ResponseWriter, r *http.Request) {
260302
Excerpt string
261303
Rich template.HTML
262304
Content string
305+
Summary template.HTML
263306
}
264307

265308
var results []result
@@ -270,6 +313,7 @@ func (s *Server) handlePreview(w http.ResponseWriter, r *http.Request) {
270313
//nolint: gosec // this content is escaped by Extractor, so it's safe to use it as is
271314
Rich: template.HTML(r.Rich),
272315
Content: r.Content,
316+
Summary: template.HTML(strings.ReplaceAll(r.Summary, "\n", "<br>")),
273317
})
274318
}
275319

@@ -354,6 +398,34 @@ func (s *Server) authFake(w http.ResponseWriter, r *http.Request) {
354398
render.JSON(w, r, JSON{"pong": t.Format("20060102150405")})
355399
}
356400

401+
func (s *Server) generateSummary(ctx context.Context, content string) (string, error) {
402+
if s.openAIClient == nil {
403+
s.openAIClient = openai.NewClient(s.OpenAIKey)
404+
}
405+
resp, err := s.openAIClient.CreateChatCompletion(
406+
ctx,
407+
openai.ChatCompletionRequest{
408+
Model: openai.GPT4o,
409+
Messages: []openai.ChatCompletionMessage{
410+
{
411+
Role: openai.ChatMessageRoleSystem,
412+
Content: "You are a helpful assistant that summarizes articles. Please summarize the main points in a few sentences as TLDR style (don't add a TLDR label). Then, list up to five detailed bullet points. Provide the response in plain text. Do not add any additional information. Do not add a Summary at the beginning of the response. If detailed bullet points are too similar to the summary, don't include them at all:",
413+
},
414+
{
415+
Role: openai.ChatMessageRoleUser,
416+
Content: content,
417+
},
418+
},
419+
},
420+
)
421+
422+
if err != nil {
423+
return "", err
424+
}
425+
426+
return resp.Choices[0].Message.Content, nil
427+
}
428+
357429
func getBid(id string) primitive.ObjectID {
358430
bid, err := primitive.ObjectIDFromHex(id)
359431
if err != nil {

0 commit comments

Comments
 (0)