diff --git a/Notes.md b/Notes.md new file mode 100644 index 0000000..d3bffe9 --- /dev/null +++ b/Notes.md @@ -0,0 +1,15 @@ +# Key Considerations in Solving the Assessment + +1. **Algorithm** + The solution uses the Breadth-First Search (BFS) algorithm to find the shortest degree of separation between two artists efficiently. + +2. **Rate Limiting** + A custom HTTP client with an adjustable rate limiter is implemented to handle `http.StatusTooManyRequests` responses effectively. + +3. **Optimizations** + - **Concurrency**: The solution leverages goroutines and channels for concurrent requests, significantly reducing search times. + - **Caching**: A concurrency-safe `sync.Map` cache stores results from previously fetched requests, minimizing redundant API calls. + - **Note**: A substantial number of requests returned `403 Forbidden`, which are also cached to avoid repeated requests to those URLs. + +4. **Best Practices** + The `FetchEntityDetails()` function is designed as a generic utility, serving both "Person" and "Movie" requests for streamlined and reusable code. diff --git a/api.go b/api.go new file mode 100644 index 0000000..cd537f2 --- /dev/null +++ b/api.go @@ -0,0 +1,92 @@ +package main + +import ( + "encoding/json" + "fmt" + "log" + "net/http" + "os" + "time" + + "golang.org/x/time/rate" +) + +// This should ideally be store as an environment variable +const API_ENDPOINT = "http://data.moviebuff.com/" + +// NewClient with a ratelimiter +func NewClient(rl *rate.Limiter) *HTTPClient { + c := &HTTPClient{ + client: http.DefaultClient, + RateLimiter: rl, + } + return c +} + +// A wrapper over client.Do() method for Rate limiting. +func (c *HTTPClient) Do(req *http.Request) (*http.Response, error) { + err := c.RateLimiter.Wait(req.Context()) + if err != nil { + return nil, err + } + resp, err := c.client.Do(req) + if err != nil { + return nil, err + } + return resp, nil +} + +// Generic Function to Fetch Person|Movie Details +func FetchEntityDetails[T Entity](url string) (*T, error) { + req, err := http.NewRequest(http.MethodGet, API_ENDPOINT+url, nil) + if err != nil { + return nil, err + } + + // Reduce the following limit in case of http.StatusTooManyRequests + rl := rate.NewLimiter(rate.Every(1*time.Second), 10000) // 10000 requests per second + client := NewClient(rl) + + res, err := client.Do(req) + + switch true { + case err != nil: + log.Println("Error occurred") + return nil, err + + case res.StatusCode != http.StatusOK: + return nil, fmt.Errorf("%d: error occurred", res.StatusCode) + + // In case of DoS prevention from the CDN, reduce the rate limit and try again + case res.StatusCode == http.StatusTooManyRequests: + log.Println("Reduce Rate Limit and Try Again!") + os.Exit(1) + } + + var entity T + if err := json.NewDecoder(res.Body).Decode(&entity); err != nil { + return nil, err + } + defer res.Body.Close() + + return &entity, nil +} + +// Fetches the names of the persons and movie +func GetNames(parentURL string, personURL string, movieURL string) (string, string, string) { + parent, err := FetchEntityDetails[Person](parentURL) + if err != nil { + log.Println(err) + } + + person, err := FetchEntityDetails[Person](personURL) + if err != nil { + log.Println(err) + } + + movie, err := FetchEntityDetails[Movie](movieURL) + if err != nil { + log.Println(err) + } + return parent.Name, person.Name, movie.Name +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..e117141 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module github.com/xhermitx/challenge2015 + +go 1.22.4 + +require golang.org/x/time v0.7.0 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..60aa8f9 --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ= +golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= diff --git a/main.go b/main.go new file mode 100644 index 0000000..9c19d42 --- /dev/null +++ b/main.go @@ -0,0 +1,32 @@ +package main + +import ( + "log" + "os" +) + +func main() { + + // Usage: + /* + Compile the program using "go build main.go -o main.exe" + Following is an example usage: + + [For Windows] + ./main.exe amitabh-bachchan robert-de-niro + + [For Others] + Follow OS specific extensions in place of ".exe" + */ + + // Command Line Arguments + artistA := os.Args[1] + artistB := os.Args[2] + + personA, err := FetchEntityDetails[Person](artistA) + if err != nil { + log.Println(err) + } + + Separation(*personA, artistB) +} diff --git a/search.go b/search.go new file mode 100644 index 0000000..c75b96a --- /dev/null +++ b/search.go @@ -0,0 +1,169 @@ +package main + +import ( + "fmt" + "os" + "sync" +) + +var ( + // semaphores and concurrency safe Cache + sm = NewSyncManager() + Cache = sync.Map{} + + queue = []QueueData{} // A Queue to track artist details for BFS traversal + + parent = make(map[string]Path) // Parent Map to keep track of the Path + currentPerson = "" +) + +func Separation(artistA Person, artistB string) { + go func() { + printResult(artistA.URL) + os.Exit(0) + }() + + queue = append(queue, QueueData{ + URL: artistA.URL, + Distance: 0, + }) + + for len(queue) > 0 { + // Pop a node from Queue + current := queue[0] + queue = queue[1:] + + // Update the Path on each iteration + key := current.URL + if _, ok := parent[key]; !ok { + parent[key] = Path{ + ParentURL: current.ParentURL, + Movie: current.Movie, + Role: current.Role, + ParentRole: current.ParentRole, + } + } + + if current.URL == artistB { + currentPerson = current.URL + sm.dos <- current.Distance + } + + person, ok := Cache.Load(current.URL) + if !ok { + /* person isn't present in the cache. + Fetch personDetails and update the cache.*/ + personDetails, err := FetchEntityDetails[Person](current.URL) + if err != nil { + if err.Error()[:4] == "403" { + /* Storing the details when encountered 403 error + so that we do not make a call to the same url again */ + Cache.Store(current.URL, Person{ + Type: "Forbidden", + }) + } + } + if personDetails != nil { + Cache.Store(current.URL, *personDetails) + person = *personDetails + } + } + + if person != nil && person.(Person).Type != "Forbidden" { + // Iterate through the MovieList to find related Persons + for _, movie := range person.(Person).Movies { + sm.wg.Add(1) + go handleMovieData(movie, current, artistB) + } + sm.wg.Wait() + } + } +} + +// Function to handle the movie Data. i.e. find the linked artists and push them on the queue +func handleMovieData(m Details, current QueueData, artistB string) { + defer sm.wg.Done() + + movie, ok := Cache.Load(m.URL) + if !ok { + /* Movie isn't present in the cache. + Fetch movieDetails and update the cache */ + movieDetails, err := FetchEntityDetails[Movie](m.URL) + if err != nil { + if err.Error()[:4] == "403" { + /* Storing the details when encountered 403 error + so that we do not make a call to the same url again */ + Cache.Store(m.URL, Movie{ + Type: "Forbidden", + }) + } + } + if movieDetails != nil { + Cache.Store(m.URL, *movieDetails) + movie = *movieDetails + } + } + + // Check if movie is Valid + if movie != nil && movie.(Movie).Type != "Forbidden" { + // Get the total list of related artists and append them to the queue with added distance + artists := append(movie.(Movie).Cast, movie.(Movie).Crew...) + sm.mu.Lock() + for _, a := range artists { + // Push artists on the queue + queue = append(queue, QueueData{ + // Artist Details pushed on the queue + URL: a.URL, + Movie: m.URL, + Role: a.Role, + + // Parent details pushed on the queue + ParentURL: current.URL, + ParentRole: m.Role, + + // Increment the distance + Distance: current.Distance + 1, + }) + + if a.URL == artistB { + + // If found, update the path and signal degrees of separation + key := a.URL + parent[key] = Path{ + ParentURL: current.URL, + Movie: m.URL, + Role: a.Role, + ParentRole: current.Role, + } + currentPerson = a.URL + sm.dos <- current.Distance + 1 + } + } + sm.mu.Unlock() + } +} + +// Function to Print the results in the specified Format +func printResult(sourceArtistURL string) { + degrees := <-sm.dos + fmt.Println("Distance of Separation: ", degrees) + + parentPerson := parent[currentPerson] + for { + // A defer function call helps tracing the path in a reverse order. + defer func(parentPerson Path, currentPerson string, count int) { + // Fetch the names of parent, person and movie + parentName, personName, movieName := GetNames(parentPerson.ParentURL, currentPerson, parentPerson.Movie) + fmt.Printf("\n%d. Movie: %s", count, movieName) + fmt.Printf("\n%s: %s", parentPerson.ParentRole, parentName) + fmt.Printf("\n%s: %s\n", parentPerson.Role, personName) + }(parentPerson, currentPerson, degrees) + + currentPerson = parentPerson.ParentURL + if currentPerson == sourceArtistURL { + break + } + parentPerson = parent[currentPerson] + degrees-- + } +} diff --git a/types.go b/types.go new file mode 100644 index 0000000..9fa3f25 --- /dev/null +++ b/types.go @@ -0,0 +1,70 @@ +package main + +import ( + "net/http" + "sync" + + "golang.org/x/time/rate" +) + +type Meta struct { + URL string `json:"url"` + Name string `json:"name"` +} + +type Details struct { + Meta + Role string `json:"role"` +} + +type Person struct { + Meta + Type string `json:"type"` + Movies []Details `json:"movies"` +} + +type Movie struct { + Meta + Type string `json:"type"` + Cast []Details `json:"cast"` + Crew []Details `json:"crew"` +} + +type Entity interface { + Person | Movie +} + +type QueueData struct { + URL string + Movie string + Role string + ParentURL string + ParentRole string + Distance int +} + +type HTTPClient struct { + client *http.Client + RateLimiter *rate.Limiter +} + +type SyncManager struct { + wg *sync.WaitGroup + mu *sync.RWMutex + dos chan int +} + +func NewSyncManager() SyncManager { + return SyncManager{ + wg: &sync.WaitGroup{}, + mu: &sync.RWMutex{}, + dos: make(chan int), // degrees of separation + } +} + +type Path struct { + ParentURL string // Parent URL + Movie string + Role string // Role of the Main Actor + ParentRole string // Role of the parent actor +}