diff --git a/README.md b/README.md deleted file mode 100644 index 6df56a5..0000000 --- a/README.md +++ /dev/null @@ -1,52 +0,0 @@ -#Degrees of Separation - -With cinema going global these days, every one of the [A-Z]ollywoods are now connected. Use the wealth of data available at [Moviebuff](http://www.moviebuff.com) to see how. - -Write a Go program that behaves the following way: - -``` -$ degrees amitabh-bachchan robert-de-niro - -Degrees of Separation: 3 - -1. Movie: The Great Gatsby -Supporting Actor: Amitabh Bachchan -Actor: Leonardo DiCaprio - -2. Movie: The Wolf of Wall Street -Actor: Leonardo DiCaprio -Director: Martin Scorsese - -3. Movie: Taxi Driver -Director: Martin Scorsese -Actor: Robert De Niro -``` - -Your solution should use the Moviebuff data available to figure out the smallest degree of separation between the two people. -All the inputs should be Moviebuff URLs for their respective people: For Amitabh Bachchan, his page is on http://www.moviebuff.com/amitabh-bachchan and his Moviebuff URL is `amitabh-bachchan`. - -Please do not attempt to scrape the Moviebuff website - All the data is available on an S3 bucket in an easy to parse JSON format here: `https://data.moviebuff.com/{moviebuff_url}` - -To solve the example above, your solution would fetch at least the following: - -http://data.moviebuff.com/amitabh-bachchan - -http://data.moviebuff.com/the-great-gatsby - -http://data.moviebuff.com/leonardo-dicaprio - -http://data.moviebuff.com/the-wolf-of-wall-street - -http://data.moviebuff.com/martin-scorsese - -http://data.moviebuff.com/taxi-driver - -##Notes -* If you receive HTTP errors when trying to fetch the data, that might be the CDN throttling you. Luckily, Go has some very elegant idioms for rate limiting :) -* There may be a discrepancy in some cases where a movie appears on an actor's list but not vice versa. This usually happens when we edit data while exporting it, so feel free to either ignore these mismatches or handle them in some way. - -Write a program in any language you want (If you're here from Gophercon, use Go :D) that does this. Feel free to make your own input and output format / command line tool / GUI / Webservice / whatever you want. Feel free to hold the dataset in whatever structure you want, but try not to use external databases - as far as possible stick to your langauage without bringing in MySQL/Postgres/MongoDB/Redis/Etc. - -To submit a solution, fork this repo and send a Pull Request on Github. - -For any questions or clarifications, raise an issue on this repo and we'll answer your questions as fast as we can. diff --git a/Readme.md b/Readme.md new file mode 100644 index 0000000..5d0bfb5 --- /dev/null +++ b/Readme.md @@ -0,0 +1,21 @@ +# Degrees of Separation CLI + +A Go CLI application that calculates the shortest path between two actors/movie professionals using data from [Moviebuff](https://www.moviebuff.com/). The solution uses concurrent BFS (Breadth-First Search) to efficiently find connections through movies and crew members. + +## Features + +- **Concurrent BFS implementation in Graph** with worker pooling +- **Rate limiting** to handle API throttling (currently 1000 requests per second with a 1000 as burst) +- **Smart retry mechanism** for failed requests (some entries are not accessible) +- **Path reconstruction** showing movie connections +- **Efficient memory management** with sync.Map and compact storage + +## Installation + +```go build -o degrees``` + +```./degrees ``` + +```./degrees -a amitabh-bachchan -b robert-de-niro``` + + diff --git a/cmd/root.go b/cmd/root.go new file mode 100644 index 0000000..c6583fe --- /dev/null +++ b/cmd/root.go @@ -0,0 +1,58 @@ +package cmd + +import ( + "fmt" + "log" + "os" + + "github.com/karthikkalarikal/Qube/models" + "github.com/karthikkalarikal/Qube/pkg/traversal" + "github.com/karthikkalarikal/Qube/pkg/util" + "github.com/spf13/cobra" +) + +var ( + rootCmd = &cobra.Command{ + Use: "separation", + Short: "Connect your favorite celebrities.", + Long: `A cli application that let's you figure out the connection between actors by separation`, + Run: generate, + } + actor1 string + actor2 string + separation uint +) + +func Execute() { + if err := rootCmd.Execute(); err != nil { + fmt.Println(err) + os.Exit(1) + } +} + +func init() { + + rootCmd.PersistentFlags().StringVarP(&actor1, "actor1", "a", "", "name an actor/celeb") + rootCmd.PersistentFlags().StringVarP(&actor2, "actor2", "b", "", "name an actor/celeb") + // rootCmd.PersistentFlags().UintVarP(&separation, "separation", "s", 3, "separation between the actors/celebs") + +} + +func generate(_ *cobra.Command, args []string) { + config := models.Config{ + Actor1: actor1, + Actor2: actor2, + // Separation: separation, + } + + if err := util.Exists(config.Actor1); err != nil { + log.Printf("the name of the actor1: %s is incorrect %v", actor1, err) + os.Exit(1) + } + if err := util.Exists(config.Actor2); err != nil { + log.Printf("the name of the actor2: %s is incorrect %v", actor2, err) + os.Exit(1) + } + + traversal.NewNode(config.Actor1, config.Actor2) +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..90c16ba --- /dev/null +++ b/go.mod @@ -0,0 +1,13 @@ +module github.com/karthikkalarikal/Qube + +go 1.23.5 + +require ( + github.com/spf13/cobra v1.9.1 + golang.org/x/time v0.11.0 +) + +require ( + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/spf13/pflag v1.0.6 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..748bdbd --- /dev/null +++ b/go.sum @@ -0,0 +1,12 @@ +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= +github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= +golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go new file mode 100644 index 0000000..5712602 --- /dev/null +++ b/main.go @@ -0,0 +1,7 @@ +package main + +import "github.com/karthikkalarikal/Qube/cmd" + +func main() { + cmd.Execute() +} diff --git a/models/model.go b/models/model.go new file mode 100644 index 0000000..8228391 --- /dev/null +++ b/models/model.go @@ -0,0 +1,40 @@ +package models + +type Config struct { + Actor1 string + Actor2 string + Separation uint +} + +type MovieConn struct { + Name string `json:"name"` + URL string `json:"url"` + Role string `json:"role"` +} + +type Actor struct { + URL string `json:"url"` + Type string `json:"type"` + Name string `json:"name"` + Movies []MovieConn `json:"movies"` +} + +type Cast struct { + URL string `json:"url"` + Name string `json:"name"` + Role string `json:"role"` +} + +type Crew struct { + Name string `json:"name"` + URL string `json:"url"` + Role string `json:"role"` +} + +type Movie struct { + URL string `json:"url"` + Type string `json:"type"` + Name string `json:"name"` + Cast []Cast `json:"cast"` + Crew []Crew `json:"crew"` +} diff --git a/pkg/traversal/bfs.go b/pkg/traversal/bfs.go new file mode 100644 index 0000000..82d6852 --- /dev/null +++ b/pkg/traversal/bfs.go @@ -0,0 +1,188 @@ +package traversal + +import ( + "context" + "fmt" + "log" + "strings" + "sync" + + "github.com/karthikkalarikal/Qube/models" + "github.com/karthikkalarikal/Qube/pkg/util" +) + +type Node struct { + Entity string + Type string + From *Node + Link string +} + +func NewNode(start, target string) { + node := bfs(start, target) + printPath(node) +} + +func bfs(start, end string) *Node { + var visited sync.Map + var once sync.Once + var wg sync.WaitGroup + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + type safeQueue struct { + mu sync.Mutex + cond *sync.Cond + items []Node + } + + queue := &safeQueue{items: make([]Node, 0)} + queue.cond = sync.NewCond(&queue.mu) + result := make(chan *Node, 1) + + queue.mu.Lock() + queue.items = append(queue.items, Node{Entity: start, Type: "person"}) + queue.cond.Signal() + queue.mu.Unlock() + + worker := func() { + defer wg.Done() + for { + select { + case <-ctx.Done(): + return + default: + queue.mu.Lock() + // Wait for work or cancellation + for len(queue.items) == 0 { + if ctx.Err() != nil { + queue.mu.Unlock() + return + } + queue.cond.Wait() + } + // Dequeue node + current := queue.items[0] + queue.items = queue.items[1:] + queue.mu.Unlock() + + // Skip if already processed + if _, loaded := visited.LoadOrStore(current.Entity, true); loaded { + continue + } + + // Early termination check + if current.Entity == end { + once.Do(func() { + result <- ¤t + cancel() + }) + return + } + + // Process node + if current.Type == "person" { + var person models.Actor + if err := util.GetByURL(current.Entity, &person); err != nil { + if checkErrorForbidden(err) { + visited.Delete(current.Entity) + } + log.Printf("Retryable error on %s: %v", current.Entity, err) + continue + } + + for _, credit := range person.Movies { + child := Node{ + Entity: credit.URL, + Type: "movie", + From: ¤t, + Link: "Movie: " + credit.Name + " (" + credit.Role + ")", + } + queue.mu.Lock() + queue.items = append(queue.items, child) + queue.cond.Signal() + queue.mu.Unlock() + } + } else { + var movie models.Movie + if err := util.GetByURL(current.Entity, &movie); err != nil { + if checkErrorForbidden(err) { + visited.Delete(current.Entity) + } + log.Printf("Retryable error on %s: %v", current.Entity, err) //since some of the links are not accessible, the retry logic is specific. + continue + } + + for _, cast := range movie.Cast { + child := Node{ + Entity: cast.URL, + Type: "person", + From: ¤t, + Link: "Cast: " + cast.Name + " (" + cast.Role + ")", + } + queue.mu.Lock() + queue.items = append(queue.items, child) + queue.cond.Signal() + queue.mu.Unlock() + } + for _, crew := range movie.Crew { + child := Node{ + Entity: crew.URL, + Type: "person", + From: ¤t, + Link: "Crew: " + crew.Name + " (" + crew.Role + ")", + } + queue.mu.Lock() + queue.items = append(queue.items, child) + queue.cond.Signal() + queue.mu.Unlock() + } + } + } + } + } + + // Start workers + numWorkers := 30 + wg.Add(numWorkers) + for i := 0; i < numWorkers; i++ { + go worker() + } + + go func() { + wg.Wait() + once.Do(func() { close(result) }) + }() + + select { + case res := <-result: + return res + case <-ctx.Done(): + return nil + } +} + +func printPath(node *Node) { + if node == nil { + fmt.Println("No path found.") + return + } + + var path []*Node + for node != nil { + path = append([]*Node{node}, path...) + node = node.From + } + for i, n := range path { + if i == 0 { + fmt.Printf("%d. Start: %s\n", i+1, n.Entity) + } else { + fmt.Printf("%d. %s\n", i+1, n.Link) + } + } +} + +func checkErrorForbidden(err error) bool { + return strings.Contains(err.Error(), "access denied for URL:") +} diff --git a/pkg/util/util.go b/pkg/util/util.go new file mode 100644 index 0000000..efb437b --- /dev/null +++ b/pkg/util/util.go @@ -0,0 +1,33 @@ +package util + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "time" + + "golang.org/x/time/rate" +) + +var limiter = rate.NewLimiter(rate.Every(1*time.Millisecond), 1000) + +// fetch resources +func GetByURL(url string, target any) error { + + err := limiter.Wait(context.Background()) + if err != nil { + return err + } + resp, err := http.Get("http://data.moviebuff.com/" + url) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode == http.StatusForbidden { + + return fmt.Errorf("access denied for URL: %s", url) + + } + return json.NewDecoder(resp.Body).Decode(target) +} diff --git a/pkg/util/validate.go b/pkg/util/validate.go new file mode 100644 index 0000000..8c5a1c5 --- /dev/null +++ b/pkg/util/validate.go @@ -0,0 +1,25 @@ +package util + +import ( + "context" + "fmt" + "net/http" +) + +func Exists(url string) error { + + err := limiter.Wait(context.Background()) + if err != nil { + return err + } + resp, err := http.Get("http://data.moviebuff.com/" + url) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + + return fmt.Errorf("unexpected status: %s", resp.Status) + } + return nil +}