From 1224fabc1ddc848fde70dd46cb80caa9e8f822c5 Mon Sep 17 00:00:00 2001 From: AbdulRahimOM Date: Tue, 11 Mar 2025 22:19:18 +0530 Subject: [PATCH 1/4] Done :white_check_mark: * Judicially used go routines for fetching data from external API * Data is stored in map(simple cache) after fetching from external API. But need to provide expiry * Used sync.RWMutex for handling concurrent read and write to this cache * Pprof endpoints for monitoring * Periodic logging of statistics (goroutine counts) (for simple monitoring) * Checking for final person's existence to avoid infinite run * Predefined number of workers for fetching data * Proper closing of channels and goroutines * Context for cancellation, to terminate goroutines and close channels whenever required seperation is found * Simply ignoring the error if the external API call returns an error, logged at debug level * Used env variables for configuration * Net/http used for pprof endpoints, as fiber uses fasthttp --- .gitignore | 2 + Makefile | 10 ++ cmd/main.go | 50 +++++++++ go.mod | 22 ++++ go.sum | 29 +++++ internal/config/config.go | 79 ++++++++++++++ internal/data/cache.go | 53 +++++++++ internal/data/check.go | 28 +++++ internal/data/external_fetch.go | 62 +++++++++++ internal/data/type.go | 28 +++++ internal/status/statistics.go | 37 +++++++ internal/tracer/execute.go | 187 ++++++++++++++++++++++++++++++++ internal/tracer/workers.go | 118 ++++++++++++++++++++ no-secrets.env | 7 ++ 14 files changed, 712 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 cmd/main.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 internal/config/config.go create mode 100644 internal/data/cache.go create mode 100644 internal/data/check.go create mode 100644 internal/data/external_fetch.go create mode 100644 internal/data/type.go create mode 100644 internal/status/statistics.go create mode 100644 internal/tracer/execute.go create mode 100644 internal/tracer/workers.go create mode 100644 no-secrets.env diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a31c3df --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.vscode +main \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5d44b4b --- /dev/null +++ b/Makefile @@ -0,0 +1,10 @@ +.PHONY: build run dev + +build: + go build -o ./cmd/main ./cmd + +run: + go run ./cmd/main.go + +running: + CompileDaemon -build="go build -o ./cmd/main ./cmd" -command=./cmd/main \ No newline at end of file diff --git a/cmd/main.go b/cmd/main.go new file mode 100644 index 0000000..2189eec --- /dev/null +++ b/cmd/main.go @@ -0,0 +1,50 @@ +package main + +import ( + "test/internal/config" + "test/internal/tracer" + + "net/http" + _ "net/http/pprof" + + "github.com/gofiber/fiber/v2" + "github.com/gofiber/fiber/v2/log" +) + +func main() { + setLogLevel() + + app := fiber.New() + + app.Get("/seperation", func(c *fiber.Ctx) error { + from := c.Query("from") + to := c.Query("to") + if from == "" || to == "" { + return c.Status(fiber.StatusBadRequest).JSON(map[string]string{"error": "from and to query params are required"}) + } + + seperation, err := tracer.FindSeperation(from, to) + if err != nil { + return c.Status(fiber.StatusInternalServerError).JSON(map[string]string{"error": err.Error()}) + } + + return c.Status(fiber.StatusOK).JSON(map[string]interface{}{ + "seperation": seperation, + }) + }) + go func() { + log.Fatal(http.ListenAndServe(":"+config.PprofPort, nil)) //for pprof, as fiber doesn't use net/http. + }() + log.Fatal(app.Listen(":" + config.Port)) +} + +func setLogLevel() { + switch config.LogLevel { + case "debug", "DEBUG": + log.SetLevel(log.LevelDebug) + case "info", "INFO": + log.SetLevel(log.LevelInfo) + default: + log.SetLevel(log.LevelInfo) + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..0ba8dd7 --- /dev/null +++ b/go.mod @@ -0,0 +1,22 @@ +module test + +go 1.23.2 + +require ( + github.com/gofiber/fiber/v2 v2.52.6 + github.com/joho/godotenv v1.5.1 +) + +require ( + github.com/andybalholm/brotli v1.1.0 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/klauspost/compress v1.17.11 // indirect + github.com/mattn/go-colorable v0.1.13 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mattn/go-runewidth v0.0.16 // indirect + github.com/rivo/uniseg v0.2.0 // indirect + github.com/valyala/bytebufferpool v1.0.0 // indirect + github.com/valyala/fasthttp v1.51.0 // indirect + github.com/valyala/tcplisten v1.0.0 // indirect + golang.org/x/sys v0.28.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..ed894e6 --- /dev/null +++ b/go.sum @@ -0,0 +1,29 @@ +github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= +github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= +github.com/gofiber/fiber/v2 v2.52.6 h1:Rfp+ILPiYSvvVuIPvxrBns+HJp8qGLDnLJawAu27XVI= +github.com/gofiber/fiber/v2 v2.52.6/go.mod h1:YEcBbO/FB+5M1IZNBP9FO3J9281zgPAreiI1oqg8nDw= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= +github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= +github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasthttp v1.51.0 h1:8b30A5JlZ6C7AS81RsWjYMQmrZG6feChmgAolCl1SqA= +github.com/valyala/fasthttp v1.51.0/go.mod h1:oI2XroL+lI7vdXyYoQk03bXBThfFl2cVdIA3Xl7cH8g= +github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8= +github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..a3669f0 --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,79 @@ +package config + +import ( + "fmt" + "strconv" + + "os" + + "github.com/gofiber/fiber/v2/log" + "github.com/joho/godotenv" +) + +const envPath = "no-secrets.env" + +var ( + Port string // PORT + PprofPort string // PPROF_PORT + LogLevel string // LOG_LEVEL + PersonDataFetchWorkersCount int // PERSON_DATA_FETCH_WORKERS_COUNT + MovieDataFetchWorkersCount int // MOVIE_DATA_FETCH_WORKERS_COUNT + LogGoroutineCount bool // LOG_GOROUTINE_COUNT +) + +func init() { + LoadEnv() +} + +func LoadEnv() { + fmt.Println("Loading .env file...") + err := godotenv.Load(envPath) + if err != nil { + log.Fatal("Error loading .env file. err", err) + } + + Port = getEnvString("PORT", "3001") + PprofPort = getEnvString("PPROF_PORT", "6060") + LogLevel = getEnvString("LOG_LEVEL", "INFO") + PersonDataFetchWorkersCount = getEnvInt("PERSON_DATA_FETCH_WORKERS_COUNT", 10) + MovieDataFetchWorkersCount = getEnvInt("MOVIE_DATA_FETCH_WORKERS_COUNT", 10) + LogGoroutineCount = getEnvBool("LOG_GOROUTINE_COUNT", false) + fmt.Println("Load .env file completed") +} + +func getEnvString(key, defaultValue string) string { + str := os.Getenv(key) + if str == "" { + log.Info("Environment variable not found. Using default value for ", key) + return defaultValue + } + return str +} + +func getEnvInt(key string, defaultValue int) int { + str := os.Getenv(key) + if str == "" { + log.Info("Environment variable not found. Using default value for ", key) + return defaultValue + } + value, err := strconv.Atoi(str) + if err != nil { + log.Info("Error parsing environment variable. Using default value for ", key) + return defaultValue + } + return value +} + +func getEnvBool(key string, defaultValue bool) bool { + str := os.Getenv(key) + if str == "" { + log.Info("Environment variable not found. Using default value for ", key) + return defaultValue + } + value, err := strconv.ParseBool(str) + if err != nil { + log.Info("Error parsing environment variable. Using default value for ", key) + return defaultValue + } + return value +} \ No newline at end of file diff --git a/internal/data/cache.go b/internal/data/cache.go new file mode 100644 index 0000000..ca05d42 --- /dev/null +++ b/internal/data/cache.go @@ -0,0 +1,53 @@ +package data + +import ( + "sync" +) + +type cache struct { + PersonMutex sync.RWMutex + MovieMutex sync.RWMutex + Persons map[string]*Person + Movies map[string]*Movie +} + +var CachedData cache + +func init() { + CachedData.Persons = make(map[string]*Person) + CachedData.Movies = make(map[string]*Movie) +} + +func (c *cache) GetCachedPerson(personURL string) *Person { + c.PersonMutex.RLock() + defer c.PersonMutex.RUnlock() + return c.Persons[personURL] +} + +func (c *cache) GetCachedMovie(movieURL string) *Movie { + c.MovieMutex.RLock() + defer c.MovieMutex.RUnlock() + return c.Movies[movieURL] +} + +func (c *cache) CachePerson(personURL string, person *Person) { + if person == nil { + return + } + c.PersonMutex.Lock() + defer c.PersonMutex.Unlock() + if _, ok := c.Persons[personURL]; ok { + c.Persons[personURL] = person + } +} + +func (c *cache) CacheMovie(movieURL string, movie *Movie) { + if movie == nil { + return + } + c.MovieMutex.Lock() + defer c.MovieMutex.Unlock() + if _, ok := c.Movies[movieURL]; ok { + c.Movies[movieURL] = movie + } +} diff --git a/internal/data/check.go b/internal/data/check.go new file mode 100644 index 0000000..e3fad0a --- /dev/null +++ b/internal/data/check.go @@ -0,0 +1,28 @@ +package data + +import ( + "fmt" + "net/http" + _ "test/internal/status" //to use GetStatistics function +) + +func CheckPersonExistence(personURL string) (bool, error) { + if CachedData.GetCachedPerson(personURL) != nil { + return true, nil + } + + // If not in cache, check if it exists in external API + url := fmt.Sprintf("http://data.moviebuff.com/%s", personURL) + + resp, err := http.Head(url) // Use HEAD instead of GET to check existence + if err != nil { + return false, fmt.Errorf("failed to fetch data: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode == http.StatusOK { + return true, nil + } else { + return false, nil + } +} diff --git a/internal/data/external_fetch.go b/internal/data/external_fetch.go new file mode 100644 index 0000000..1a4c03c --- /dev/null +++ b/internal/data/external_fetch.go @@ -0,0 +1,62 @@ +package data + +import ( + "encoding/json" + "fmt" + "io" + "net/http" +) + +func FetchMovieDataFromExternalAPI(movieURL string) (*Movie, error) { + url := fmt.Sprintf("http://data.moviebuff.com/%s", movieURL) + + resp, err := http.Get(url) + if err != nil { + return nil, fmt.Errorf("failed to fetch data: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("received non-200 response: %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %v", err) + } + + var movie Movie + err = json.Unmarshal(body, &movie) + if err != nil { + return nil, fmt.Errorf("failed to parse JSON: %v", err) + } + + return &movie, nil +} + +func FetchPersonDataFromExternalAPI(personURL string) (*Person, error) { + url := fmt.Sprintf("http://data.moviebuff.com/%s", personURL) + + resp, err := http.Get(url) + if err != nil { + return nil, fmt.Errorf("failed to fetch data: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("received non-200 response: %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %v", err) + } + + var person Person + err = json.Unmarshal(body, &person) + if err != nil { + return nil, fmt.Errorf("failed to parse JSON: %v", err) + } + + return &person, nil +} diff --git a/internal/data/type.go b/internal/data/type.go new file mode 100644 index 0000000..d218149 --- /dev/null +++ b/internal/data/type.go @@ -0,0 +1,28 @@ +package data + +type Person struct { + // Name string `json:"name"` + // URL string `json:"url"` + // Type string `json:"type"` + MovieRoles []MovieRole `json:"movies"` +} + +type MovieRole struct { + // Name string `json:"name"` + URL string `json:"url"` + // Role string `json:"role"` +} + +type Movie struct { + // Name string `json:"name"` + // URL string `json:"url"` + // Type string `json:"type"` + Cast []CastCrew `json:"cast"` + Crew []CastCrew `json:"crew"` +} + +type CastCrew struct { + // Name string `json:"name"` + URL string `json:"url"` + // Role string `json:"role"` +} diff --git a/internal/status/statistics.go b/internal/status/statistics.go new file mode 100644 index 0000000..400ec4e --- /dev/null +++ b/internal/status/statistics.go @@ -0,0 +1,37 @@ +package status + +import ( + "fmt" + "runtime" + "test/internal/config" + "time" +) + +func init() { + if config.LogGoroutineCount { + go func() { + for { + time.Sleep(3 * time.Second) + GetStatistics() + } + }() + } +} + +func GetStatistics() { + + var memStats runtime.MemStats + runtime.ReadMemStats(&memStats) + + fmt.Println("=====================================") + fmt.Println("Number of Goroutines:", runtime.NumGoroutine(), "@", time.Now().Format("15:04:05")) + // fmt.Println("Heap Allocation (bytes):", memStats.HeapAlloc) + // fmt.Println("Stack System (bytes):", memStats.StackSys) + // fmt.Println("Stack In Use (bytes):", memStats.StackInuse) + + // buf := make([]byte, 10*1024) + // n := runtime.Stack(buf, true) + // fmt.Println("result:", string(buf[:n])) + + // fmt.Println("=====================================") +} diff --git a/internal/tracer/execute.go b/internal/tracer/execute.go new file mode 100644 index 0000000..a0e5f62 --- /dev/null +++ b/internal/tracer/execute.go @@ -0,0 +1,187 @@ +package tracer + +import ( + "context" + "fmt" + "sync" + "test/internal/data" +) + +func FindSeperation(p1URL string, targetPerson string) (int, error) { + + if p1URL == targetPerson { + return 0, nil + } + + //check if target person exists to avoid infinite search + exists, err := data.CheckPersonExistence(targetPerson) + if err != nil { + return 0, fmt.Errorf("error checking target person existence: %v", err) + } + if !exists { + return 0, fmt.Errorf("target person not found") + } + + var ( + personURLQueue = []string{p1URL} + visitedPersons = make(map[string]bool) + visitedMovies = make(map[string]bool) + ctx = context.Background() + ) + + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + for seperation := 2; len(personURLQueue) > 0; seperation++ { + found, newPersonURLQueue := findTargetOrNextPersonList(ctx, personURLQueue, targetPerson, visitedPersons, visitedMovies) + if found { + return seperation, nil + } + personURLQueue = newPersonURLQueue + } + + return -1, fmt.Errorf("seperation not found") + +} + +func findTargetOrNextPersonList(ctx context.Context, personURLQueue []string, targetPerson string, visitedPersons map[string]bool, visitedMovies map[string]bool) (bool, []string) { + var ( + personChan = make(chan *data.Person, 10) + movieChan = make(chan *data.Movie, 10) + + movieUrlChan = make(chan string, 100) + ) + + go startFetchingPersons(personURLQueue, personChan, ctx) + for _, personURL := range personURLQueue { + visitedPersons[personURL] = true + } + + //receive person data and send movie urls + go func() { + defer close(movieUrlChan) + for { + select { + case personData, ok := <-personChan: + if !ok { + return + } + for _, movieRole := range personData.MovieRoles { + if _, isVisited := visitedMovies[movieRole.URL]; isVisited { + continue + } else { + visitedMovies[movieRole.URL] = true + + select { + case movieUrlChan <- movieRole.URL: + case <-ctx.Done(): + return + } + } + } + case <-ctx.Done(): + return + } + } + }() + + //call startMovieFetching to fetch movie data and send person urls of cast and crew + go startMovieFetching(movieUrlChan, movieChan, ctx) + + newPersonURLQueue := []string{} + + //add unvisited persons to new queue + for movieData := range movieChan { + for _, cast := range movieData.Cast { + if cast.URL == targetPerson { + return true, nil + } + if _, isVisited := visitedPersons[cast.URL]; isVisited { + continue + } + newPersonURLQueue = append(newPersonURLQueue, cast.URL) + } + + for _, crew := range movieData.Crew { + if _, isVisited := visitedPersons[crew.URL]; isVisited { + continue + } + if crew.URL == targetPerson { + return true, nil + } + newPersonURLQueue = append(newPersonURLQueue, crew.URL) + } + } + + //reset personURLQueue to newPersonURLQueue + return false, newPersonURLQueue +} + +func startFetchingPersons(personURLs []string, personChan chan *data.Person, ctx context.Context) { + + wg := sync.WaitGroup{} + defer close(personChan) + defer wg.Wait() + for _, personURL := range personURLs { + if person := data.CachedData.GetCachedPerson(personURL); person != nil { + select { + case personChan <- person: + continue + case <-ctx.Done(): + return + } + } + + wg.Add(1) + select { + case <-ctx.Done(): + return + default: + externalFetcher.PersonRequestChan <- personRequest{ + personURL: personURL, + replyChan: personChan, + ctx: ctx, + wg: &wg, + } + } + } +} + +// initiateMovieFetcher +func startMovieFetching(movieURLsChan chan string, movieChan chan *data.Movie, ctx context.Context) { + + wg := sync.WaitGroup{} + defer close(movieChan) + defer wg.Wait() + + for { + select { + case movieURL, ok := <-movieURLsChan: + if !ok { + return + } + if movie := data.CachedData.GetCachedMovie(movieURL); movie != nil { + select { + case movieChan <- movie: + continue + case <-ctx.Done(): + return + } + } + wg.Add(1) + select { + case <-ctx.Done(): + return + default: + externalFetcher.MovieRequestChan <- movieRequest{ + movieURL: movieURL, + replyChan: movieChan, + ctx: ctx, + wg: &wg, + } + } + case <-ctx.Done(): + return + } + } +} diff --git a/internal/tracer/workers.go b/internal/tracer/workers.go new file mode 100644 index 0000000..7fb0263 --- /dev/null +++ b/internal/tracer/workers.go @@ -0,0 +1,118 @@ +package tracer + +import ( + "context" + "sync" + "test/internal/config" + "test/internal/data" + + "github.com/gofiber/fiber/v2/log" +) + +var ( + fetchPersonWorkersCount = config.PersonDataFetchWorkersCount + fetchMovieWorkersCount = config.MovieDataFetchWorkersCount +) + +type personRequest struct { + personURL string + replyChan chan *data.Person + ctx context.Context + wg *sync.WaitGroup +} + +type movieRequest struct { + movieURL string + replyChan chan *data.Movie + ctx context.Context + wg *sync.WaitGroup +} + +var externalFetcher = struct { + PersonRequestChan chan personRequest + MovieRequestChan chan movieRequest +}{ + PersonRequestChan: make(chan personRequest, 3*fetchPersonWorkersCount), + MovieRequestChan: make(chan movieRequest, 3*fetchMovieWorkersCount), +} + +// initiate workers +func init() { + for range fetchPersonWorkersCount { + go fetchPersonWorker(externalFetcher.PersonRequestChan) + } + for range fetchMovieWorkersCount { + go fetchMovieWorker(externalFetcher.MovieRequestChan) + } +} + +// fetchPersonWorkers +func fetchPersonWorker(personRequestChan chan personRequest) { + for req := range personRequestChan { + fetchAndCachePerson(req.personURL, req.wg, req.replyChan, req.ctx) + } +} + +func fetchAndCachePerson(personURL string, wg *sync.WaitGroup, replyChan chan *data.Person, ctx context.Context) { + defer wg.Done() + + // Again recheck if the person is cached (while the URL was in queue) + if person := data.CachedData.GetCachedPerson(personURL); person != nil { + select { + case replyChan <- person: + return + case <-ctx.Done(): + return + } + } + person, err := data.FetchPersonDataFromExternalAPI(personURL) + if err != nil { + log.Debugf("failed to fetch person data: %v\n", err) + return + } + + data.CachedData.CachePerson(personURL, person) + + select { + case replyChan <- person: + return + case <-ctx.Done(): + return + } +} + +// fetchMovieWorkers +func fetchMovieWorker(requestChan chan movieRequest) { + for req := range requestChan { + fetchAndCacheMovie(req.movieURL, req.wg, req.replyChan, req.ctx) + } +} + +func fetchAndCacheMovie(movieURL string, wg *sync.WaitGroup, replyChan chan *data.Movie, ctx context.Context) { + defer wg.Done() + + //Again recheck if the movie is cached (while the URL was in queue) + if movie := data.CachedData.GetCachedMovie(movieURL); movie != nil { + select { + case replyChan <- movie: + return + case <-ctx.Done(): + return + } + } + + movie, err := data.FetchMovieDataFromExternalAPI(movieURL) + if err != nil { + log.Debugf("failed to fetch movie data: %v\n", err) + return + } + + data.CachedData.CacheMovie(movieURL, movie) + + select { + case replyChan <- movie: + return + case <-ctx.Done(): + return + } + } diff --git a/no-secrets.env b/no-secrets.env new file mode 100644 index 0000000..ad54b09 --- /dev/null +++ b/no-secrets.env @@ -0,0 +1,7 @@ +# Public, because no secrets here +PORT=3001 # Or any other port you want to run, default is 3001 +PPROF_PORT=3002 # Port for pprof, default +PERSON_DATA_FETCH_WORKERS_COUNT=100 # Number of workers to fetch person data +MOVIE_DATA_FETCH_WORKERS_COUNT=100 # Number of workers to fetch movie data +LOG_LEVEL=DEBUG # Log level, can be DEBUG, INFO (other levels not used). Default is INFO +LOG_GOROUTINE_COUNT="true" # Log number of go-routines periodically \ No newline at end of file From 87f6db4f0fc62c3b1ad57cab7b9d0503b885e64e Mon Sep 17 00:00:00 2001 From: AbdulRahimOM Date: Tue, 11 Mar 2025 22:32:15 +0530 Subject: [PATCH 2/4] Rate limitting :vertical_traffic_light: --- cmd/main.go | 6 ++++++ go.mod | 2 ++ go.sum | 4 ++++ internal/config/config.go | 2 ++ no-secrets.env | 1 + 5 files changed, 15 insertions(+) diff --git a/cmd/main.go b/cmd/main.go index 2189eec..e6a3446 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -3,18 +3,24 @@ package main import ( "test/internal/config" "test/internal/tracer" + "time" "net/http" _ "net/http/pprof" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/log" + "github.com/gofiber/fiber/v2/middleware/limiter" ) func main() { setLogLevel() app := fiber.New() + app.Use(limiter.New(limiter.Config{ + Max: config.RateLimit, + Expiration: 1 * time.Minute, + })) app.Get("/seperation", func(c *fiber.Ctx) error { from := c.Query("from") diff --git a/go.mod b/go.mod index 0ba8dd7..ceaa784 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,9 @@ require ( github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-runewidth v0.0.16 // indirect + github.com/philhofer/fwd v1.1.3-0.20240916144458-20a13a1f6b7c // indirect github.com/rivo/uniseg v0.2.0 // indirect + github.com/tinylib/msgp v1.2.5 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/fasthttp v1.51.0 // indirect github.com/valyala/tcplisten v1.0.0 // indirect diff --git a/go.sum b/go.sum index ed894e6..a1ff890 100644 --- a/go.sum +++ b/go.sum @@ -15,8 +15,12 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/philhofer/fwd v1.1.3-0.20240916144458-20a13a1f6b7c h1:dAMKvw0MlJT1GshSTtih8C2gDs04w8dReiOGXrGLNoY= +github.com/philhofer/fwd v1.1.3-0.20240916144458-20a13a1f6b7c/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/tinylib/msgp v1.2.5 h1:WeQg1whrXRFiZusidTQqzETkRpGjFjcIhW6uqWH09po= +github.com/tinylib/msgp v1.2.5/go.mod h1:ykjzy2wzgrlvpDCRc4LA8UXy6D8bzMSuAF3WD57Gok0= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasthttp v1.51.0 h1:8b30A5JlZ6C7AS81RsWjYMQmrZG6feChmgAolCl1SqA= diff --git a/internal/config/config.go b/internal/config/config.go index a3669f0..6d4ecb2 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -15,6 +15,7 @@ const envPath = "no-secrets.env" var ( Port string // PORT PprofPort string // PPROF_PORT + RateLimit int // RATE_LIMIT LogLevel string // LOG_LEVEL PersonDataFetchWorkersCount int // PERSON_DATA_FETCH_WORKERS_COUNT MovieDataFetchWorkersCount int // MOVIE_DATA_FETCH_WORKERS_COUNT @@ -35,6 +36,7 @@ func LoadEnv() { Port = getEnvString("PORT", "3001") PprofPort = getEnvString("PPROF_PORT", "6060") LogLevel = getEnvString("LOG_LEVEL", "INFO") + RateLimit = getEnvInt("RATE_LIMIT", 10) PersonDataFetchWorkersCount = getEnvInt("PERSON_DATA_FETCH_WORKERS_COUNT", 10) MovieDataFetchWorkersCount = getEnvInt("MOVIE_DATA_FETCH_WORKERS_COUNT", 10) LogGoroutineCount = getEnvBool("LOG_GOROUTINE_COUNT", false) diff --git a/no-secrets.env b/no-secrets.env index ad54b09..66db44f 100644 --- a/no-secrets.env +++ b/no-secrets.env @@ -1,6 +1,7 @@ # Public, because no secrets here PORT=3001 # Or any other port you want to run, default is 3001 PPROF_PORT=3002 # Port for pprof, default +RATE_LIMIT=10 # Number of requests per minute PERSON_DATA_FETCH_WORKERS_COUNT=100 # Number of workers to fetch person data MOVIE_DATA_FETCH_WORKERS_COUNT=100 # Number of workers to fetch movie data LOG_LEVEL=DEBUG # Log level, can be DEBUG, INFO (other levels not used). Default is INFO From 07b33c5840e13e71b7d259bbe290a26744979325 Mon Sep 17 00:00:00 2001 From: AbdulRahimOM Date: Wed, 12 Mar 2025 09:45:38 +0530 Subject: [PATCH 3/4] README :page_with_curl: --- ASSIGNMENT.md | 52 +++++++++++++++++++++ README.md | 124 ++++++++++++++++++++++++++++++++++++------------- no-secrets.env | 14 +++--- 3 files changed, 152 insertions(+), 38 deletions(-) create mode 100644 ASSIGNMENT.md diff --git a/ASSIGNMENT.md b/ASSIGNMENT.md new file mode 100644 index 0000000..6df56a5 --- /dev/null +++ b/ASSIGNMENT.md @@ -0,0 +1,52 @@ +#Degrees of Separation + +With cinema going global these days, every one of the [A-Z]ollywoods are now connected. Use the wealth of data available at [Moviebuff](http://www.moviebuff.com) to see how. + +Write a Go program that behaves the following way: + +``` +$ degrees amitabh-bachchan robert-de-niro + +Degrees of Separation: 3 + +1. Movie: The Great Gatsby +Supporting Actor: Amitabh Bachchan +Actor: Leonardo DiCaprio + +2. Movie: The Wolf of Wall Street +Actor: Leonardo DiCaprio +Director: Martin Scorsese + +3. Movie: Taxi Driver +Director: Martin Scorsese +Actor: Robert De Niro +``` + +Your solution should use the Moviebuff data available to figure out the smallest degree of separation between the two people. +All the inputs should be Moviebuff URLs for their respective people: For Amitabh Bachchan, his page is on http://www.moviebuff.com/amitabh-bachchan and his Moviebuff URL is `amitabh-bachchan`. + +Please do not attempt to scrape the Moviebuff website - All the data is available on an S3 bucket in an easy to parse JSON format here: `https://data.moviebuff.com/{moviebuff_url}` + +To solve the example above, your solution would fetch at least the following: + +http://data.moviebuff.com/amitabh-bachchan + +http://data.moviebuff.com/the-great-gatsby + +http://data.moviebuff.com/leonardo-dicaprio + +http://data.moviebuff.com/the-wolf-of-wall-street + +http://data.moviebuff.com/martin-scorsese + +http://data.moviebuff.com/taxi-driver + +##Notes +* If you receive HTTP errors when trying to fetch the data, that might be the CDN throttling you. Luckily, Go has some very elegant idioms for rate limiting :) +* There may be a discrepancy in some cases where a movie appears on an actor's list but not vice versa. This usually happens when we edit data while exporting it, so feel free to either ignore these mismatches or handle them in some way. + +Write a program in any language you want (If you're here from Gophercon, use Go :D) that does this. Feel free to make your own input and output format / command line tool / GUI / Webservice / whatever you want. Feel free to hold the dataset in whatever structure you want, but try not to use external databases - as far as possible stick to your langauage without bringing in MySQL/Postgres/MongoDB/Redis/Etc. + +To submit a solution, fork this repo and send a Pull Request on Github. + +For any questions or clarifications, raise an issue on this repo and we'll answer your questions as fast as we can. diff --git a/README.md b/README.md index 6df56a5..3c7de5f 100644 --- a/README.md +++ b/README.md @@ -1,52 +1,114 @@ -#Degrees of Separation +# ๐ŸŽฌ Degrees of Separation - Movie Industry Connections -With cinema going global these days, every one of the [A-Z]ollywoods are now connected. Use the wealth of data available at [Moviebuff](http://www.moviebuff.com) to see how. +This Go application finds the degrees of separation between two people in the movie industry using data from Moviebuff. It implements an efficient graph traversal algorithm with concurrent data fetching to determine the shortest path between two industry professionals through their movie collaborations. -Write a Go program that behaves the following way: +## โœจ Features -``` -$ degrees amitabh-bachchan robert-de-niro +- ๐Ÿš€ **Concurrent Data Fetching**: Efficiently fetches data from external APIs using goroutines +- ๐Ÿ’พ **In-Memory Caching**: Implements a thread-safe caching mechanism for person and movie data +- ๐Ÿ›ก๏ธ **Rate Limiting**: Protects against API throttling with built-in rate limiting +- ๐Ÿ“Š **Performance Monitoring**: Includes pprof endpoints for runtime analysis +- โšก **Graceful Error Handling**: Robust error handling for API failures and invalid inputs +- โš™๏ธ **Configuration via Environment Variables**: Flexible configuration through environment variables +- ๐Ÿ”„ **Resource Management**: Proper channel and goroutine lifecycle management + +## ๐Ÿ—๏ธ Architecture + +### ๐ŸŒ Data Fetching +- Uses worker pools for concurrent data fetching from external APIs +- Implements separate workers for person and movie data +- Controlled concurrency with predefined worker counts + +### ๐Ÿ“ฆ Caching +- Thread-safe in-memory cache using maps +- Implements `sync.RWMutex` for concurrent read/write operations +- Caches both person and movie data after fetching -Degrees of Separation: 3 +### ๐Ÿ“ˆ Performance & Monitoring +- Pprof endpoints for runtime profiling and debugging +- Periodic logging of goroutine statistics +- Rate limiting to prevent API throttling -1. Movie: The Great Gatsby -Supporting Actor: Amitabh Bachchan -Actor: Leonardo DiCaprio +### ๐Ÿ› ๏ธ Error Handling & Resource Management +- Context-based cancellation for cleanup +- Proper channel closing mechanisms +- Graceful error handling for API failures +- Existence validation (of target person) to prevent long unnecessary searches -2. Movie: The Wolf of Wall Street -Actor: Leonardo DiCaprio -Director: Martin Scorsese +## ๐Ÿ”Œ API Endpoints + +### GET /separation +Query Parameters: +- `from`: Moviebuff URL of the first person +- `to`: Moviebuff URL of the second person + +Example: +``` +GET /separation?from=amitabh-bachchan&to=robert-de-niro +``` -3. Movie: Taxi Driver -Director: Martin Scorsese -Actor: Robert De Niro +Response: +```json +{ + "separation": 3 +} ``` -Your solution should use the Moviebuff data available to figure out the smallest degree of separation between the two people. -All the inputs should be Moviebuff URLs for their respective people: For Amitabh Bachchan, his page is on http://www.moviebuff.com/amitabh-bachchan and his Moviebuff URL is `amitabh-bachchan`. +## โš™๏ธ Configuration + +The application can be configured using the following environment variables: -Please do not attempt to scrape the Moviebuff website - All the data is available on an S3 bucket in an easy to parse JSON format here: `https://data.moviebuff.com/{moviebuff_url}` +- `PORT`: Server port (default: 3001) +- `PPROF_PORT`: Port for pprof endpoints +- `LOG_LEVEL`: Logging level (debug/info) +- `RATE_LIMIT`: API rate limit per minute +- `PERSON_DATA_FETCH_WORKERS`: Number of concurrent person data fetchers +- `MOVIE_DATA_FETCH_WORKERS`: Number of concurrent movie data fetchers -To solve the example above, your solution would fetch at least the following: +## ๐Ÿš€ Running the Application + +1. Set up environment variables (optional) +2. Run the application: +```bash +go run cmd/main.go +``` -http://data.moviebuff.com/amitabh-bachchan +## ๐Ÿ’ช Performance Considerations -http://data.moviebuff.com/the-great-gatsby +1. **Concurrent Data Fetching** + - ๐Ÿ”„ Optimized worker pools for API requests + - ๐Ÿ‘ฅ Separate workers for person and movie data -http://data.moviebuff.com/leonardo-dicaprio +2. **Caching** + - ๐Ÿ“ฆ In-memory caching reduces API calls + - ๐Ÿ”’ Thread-safe read/write operations -http://data.moviebuff.com/the-wolf-of-wall-street +3. **Resource Management** + - โšก Context-based cancellation + - ๐Ÿงน Proper cleanup of resources + - ๐Ÿ›ก๏ธ Rate limiting to prevent throttling -http://data.moviebuff.com/martin-scorsese +## ๐Ÿ“Š Monitoring + +### ๐Ÿ” Pprof Endpoints +Access pprof endpoints at: +``` +http://localhost:{PPROF_PORT}/debug/pprof/ +``` -http://data.moviebuff.com/taxi-driver +Available profiles: +- ๐Ÿงต Goroutine +- ๐Ÿ’พ Heap +- ๐Ÿ”„ Thread +- ๐Ÿšซ Block +- ๐Ÿ“ˆ CPU profile -##Notes -* If you receive HTTP errors when trying to fetch the data, that might be the CDN throttling you. Luckily, Go has some very elegant idioms for rate limiting :) -* There may be a discrepancy in some cases where a movie appears on an actor's list but not vice versa. This usually happens when we edit data while exporting it, so feel free to either ignore these mismatches or handle them in some way. +## ๐Ÿ”ฎ Future Improvements +- โฐ Add cache expiration mechanism (Relevant, as new movies and persons are added) +- ๐Ÿ”— Show connection chain along with degree of seperation -Write a program in any language you want (If you're here from Gophercon, use Go :D) that does this. Feel free to make your own input and output format / command line tool / GUI / Webservice / whatever you want. Feel free to hold the dataset in whatever structure you want, but try not to use external databases - as far as possible stick to your langauage without bringing in MySQL/Postgres/MongoDB/Redis/Etc. +## ๐Ÿ“š Dependencies -To submit a solution, fork this repo and send a Pull Request on Github. +- ๐Ÿš€ [Fiber](github.com/gofiber/fiber/v2) - Web framework +- ๐Ÿ“ฆ Standard Go libraries for concurrency and HTTP operations -For any questions or clarifications, raise an issue on this repo and we'll answer your questions as fast as we can. diff --git a/no-secrets.env b/no-secrets.env index 66db44f..841a4e1 100644 --- a/no-secrets.env +++ b/no-secrets.env @@ -1,8 +1,8 @@ # Public, because no secrets here -PORT=3001 # Or any other port you want to run, default is 3001 -PPROF_PORT=3002 # Port for pprof, default -RATE_LIMIT=10 # Number of requests per minute -PERSON_DATA_FETCH_WORKERS_COUNT=100 # Number of workers to fetch person data -MOVIE_DATA_FETCH_WORKERS_COUNT=100 # Number of workers to fetch movie data -LOG_LEVEL=DEBUG # Log level, can be DEBUG, INFO (other levels not used). Default is INFO -LOG_GOROUTINE_COUNT="true" # Log number of go-routines periodically \ No newline at end of file +PORT=3001 # Or any other port you want to run. Default: 3001 +PPROF_PORT=3002 # Port for pprof. Default: 6060 +RATE_LIMIT=10 # Number of requests per minute. Default: 10 +PERSON_DATA_FETCH_WORKERS_COUNT=100 # Number of workers to fetch person data. Default:10 +MOVIE_DATA_FETCH_WORKERS_COUNT=100 # Number of workers to fetch movie data. Default:10 +LOG_LEVEL=DEBUG # Log level, can be DEBUG, INFO (other levels not used). Default: 'INFO' +LOG_GOROUTINE_COUNT="true" # Log number of go-routines periodically. Default: False \ No newline at end of file From bf1bb2cbe041bc5e329c2ea1e1391dcacf2a2f61 Mon Sep 17 00:00:00 2001 From: AbdulRahimOM Date: Wed, 12 Mar 2025 22:41:53 +0530 Subject: [PATCH 4/4] README, Context cancellation improvement. --- README.md | 19 +++++++++++++++++++ internal/tracer/execute.go | 17 ++++++++--------- no-secrets.env | 4 ++-- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 3c7de5f..f026848 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,25 @@ The application can be configured using the following environment variables: - `PERSON_DATA_FETCH_WORKERS`: Number of concurrent person data fetchers - `MOVIE_DATA_FETCH_WORKERS`: Number of concurrent movie data fetchers +## ๐Ÿ“ฅ Getting Started + +1. Clone the repository: +```bash +git clone https://github.com/AbdulRahimOM/challenge2015.git +cd challenge2015 +``` + +2. Install dependencies: +```bash +go mod download +``` + +3. Copy the environment file and configure: +```bash +cp no-secrets.env .env +# Edit .env with your preferred settings (or keep it as it is to run in default settings) +``` + ## ๐Ÿš€ Running the Application 1. Set up environment variables (optional) diff --git a/internal/tracer/execute.go b/internal/tracer/execute.go index a0e5f62..9249f87 100644 --- a/internal/tracer/execute.go +++ b/internal/tracer/execute.go @@ -26,14 +26,10 @@ func FindSeperation(p1URL string, targetPerson string) (int, error) { personURLQueue = []string{p1URL} visitedPersons = make(map[string]bool) visitedMovies = make(map[string]bool) - ctx = context.Background() ) - ctx, cancel := context.WithCancel(ctx) - defer cancel() - for seperation := 2; len(personURLQueue) > 0; seperation++ { - found, newPersonURLQueue := findTargetOrNextPersonList(ctx, personURLQueue, targetPerson, visitedPersons, visitedMovies) + found, newPersonURLQueue := findTargetOrNextPersonList(personURLQueue, targetPerson, visitedPersons, visitedMovies) if found { return seperation, nil } @@ -44,14 +40,17 @@ func FindSeperation(p1URL string, targetPerson string) (int, error) { } -func findTargetOrNextPersonList(ctx context.Context, personURLQueue []string, targetPerson string, visitedPersons map[string]bool, visitedMovies map[string]bool) (bool, []string) { +func findTargetOrNextPersonList(personURLQueue []string, targetPerson string, visitedPersons map[string]bool, visitedMovies map[string]bool) (bool, []string) { var ( - personChan = make(chan *data.Person, 10) - movieChan = make(chan *data.Movie, 10) - + personChan = make(chan *data.Person, 10) + movieChan = make(chan *data.Movie, 10) movieUrlChan = make(chan string, 100) + ctx = context.Background() ) + ctx, cancel := context.WithCancel(ctx) + defer cancel() + go startFetchingPersons(personURLQueue, personChan, ctx) for _, personURL := range personURLQueue { visitedPersons[personURL] = true diff --git a/no-secrets.env b/no-secrets.env index 841a4e1..c55c2f1 100644 --- a/no-secrets.env +++ b/no-secrets.env @@ -4,5 +4,5 @@ PPROF_PORT=3002 # Port for pprof. Default: 6060 RATE_LIMIT=10 # Number of requests per minute. Default: 10 PERSON_DATA_FETCH_WORKERS_COUNT=100 # Number of workers to fetch person data. Default:10 MOVIE_DATA_FETCH_WORKERS_COUNT=100 # Number of workers to fetch movie data. Default:10 -LOG_LEVEL=DEBUG # Log level, can be DEBUG, INFO (other levels not used). Default: 'INFO' -LOG_GOROUTINE_COUNT="true" # Log number of go-routines periodically. Default: False \ No newline at end of file +LOG_LEVEL=INFO # Log level, can be DEBUG, INFO (other levels not used). Default: 'INFO' +LOG_GOROUTINE_COUNT="false" # Log number of go-routines periodically. Default: False \ No newline at end of file