diff --git a/README.md b/README.md index 6df56a5..8848ebb 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ #Degrees of Separation -With cinema going global these days, every one of the [A-Z]ollywoods are now connected. Use the wealth of data available at [Moviebuff](http://www.moviebuff.com) to see how. +With cinema going global these days, every one of the [A-Z]ollywoods are now connected. Use the wealth of data available at [Moviebuff](http://www.moviebuff.com) to see how. Write a Go program that behaves the following way: @@ -22,7 +22,7 @@ Director: Martin Scorsese Actor: Robert De Niro ``` -Your solution should use the Moviebuff data available to figure out the smallest degree of separation between the two people. +Your solution should use the Moviebuff data available to figure out the smallest degree of separation between the two people. All the inputs should be Moviebuff URLs for their respective people: For Amitabh Bachchan, his page is on http://www.moviebuff.com/amitabh-bachchan and his Moviebuff URL is `amitabh-bachchan`. Please do not attempt to scrape the Moviebuff website - All the data is available on an S3 bucket in an easy to parse JSON format here: `https://data.moviebuff.com/{moviebuff_url}` @@ -42,11 +42,25 @@ http://data.moviebuff.com/martin-scorsese http://data.moviebuff.com/taxi-driver ##Notes -* If you receive HTTP errors when trying to fetch the data, that might be the CDN throttling you. Luckily, Go has some very elegant idioms for rate limiting :) -* There may be a discrepancy in some cases where a movie appears on an actor's list but not vice versa. This usually happens when we edit data while exporting it, so feel free to either ignore these mismatches or handle them in some way. + +- If you receive HTTP errors when trying to fetch the data, that might be the CDN throttling you. Luckily, Go has some very elegant idioms for rate limiting :) +- There may be a discrepancy in some cases where a movie appears on an actor's list but not vice versa. This usually happens when we edit data while exporting it, so feel free to either ignore these mismatches or handle them in some way. Write a program in any language you want (If you're here from Gophercon, use Go :D) that does this. Feel free to make your own input and output format / command line tool / GUI / Webservice / whatever you want. Feel free to hold the dataset in whatever structure you want, but try not to use external databases - as far as possible stick to your langauage without bringing in MySQL/Postgres/MongoDB/Redis/Etc. To submit a solution, fork this repo and send a Pull Request on Github. For any questions or clarifications, raise an issue on this repo and we'll answer your questions as fast as we can. + +# How to use CLI tool to find degrees of separation + +1. Clone this repo. +2. Navigate to the project directory. +3. Install dependencies using the below command: + ``` + go mod download + ``` +4. Run the tool using the below command: + ``` + go run . + ``` diff --git a/api.go b/api.go new file mode 100644 index 0000000..a796cb7 --- /dev/null +++ b/api.go @@ -0,0 +1,93 @@ +package main + +import ( + "encoding/json" + "fmt" + "io" + "net/http" +) + +// CommonData is the common data that exists in both Person and Movie. +type CommonData struct { + URL string `json:"url"` + Type string `json:"type"` + Name string `json:"name"` +} + +// Participation is the data that represents both: +// 1. A movie in which a person has played a role. +// 2. A person who played a role in a movie. +type Participation struct { + URL string `json:"url"` + Name string `json:"name"` + Role string `json:"role"` +} + +// Person is the data that represents a person. +type Person struct { + CommonData + Movies []Participation `json:"movies"` +} + +// Movie is the data that represents a movie. +type Movie struct { + CommonData + Cast []Participation `json:"cast"` + Crew []Participation `json:"crew"` +} + +// baseURL is the base URL for the Moviebuff API. +const baseURL = "https://data.moviebuff.com" + +// FetchData is a helper function that fetches data from any Moviebuff URL. +func FetchData(moviebuffURL string) ([]byte, error) { + res, err := http.Get(fmt.Sprintf("%s/%s", baseURL, moviebuffURL)) + if err != nil { + return nil, err + } + + data, err := io.ReadAll(res.Body) + if err != nil { + return nil, err + } + + switch res.StatusCode { + case 200: + return data, nil + + default: + return nil, fmt.Errorf("invalid response. Status code: %d. Data: %s", res.StatusCode, string(data)) + } +} + +// FetchMovie is a helper function that fetches movie data from a Moviebuff URL. +func FetchMovie(movieURL string) (Movie, error) { + data, err := FetchData(movieURL) + if err != nil { + return Movie{}, err + } + + var movie Movie + err = json.Unmarshal(data, &movie) + if err != nil { + return Movie{}, err + } + + return movie, nil +} + +// FetchPerson is a helper function that fetches person data from a Moviebuff URL. +func FetchPerson(personURL string) (Person, error) { + data, err := FetchData(personURL) + if err != nil { + return Person{}, err + } + + var person Person + err = json.Unmarshal(data, &person) + if err != nil { + return Person{}, err + } + + return person, nil +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..d12d0fc --- /dev/null +++ b/go.mod @@ -0,0 +1,13 @@ +module github.com/milanvthakor/qube-cinema-task + +go 1.23.1 + +require github.com/briandowns/spinner v1.23.1 + +require ( + github.com/fatih/color v1.7.0 // indirect + github.com/mattn/go-colorable v0.1.2 // indirect + github.com/mattn/go-isatty v0.0.8 // indirect + golang.org/x/sys v0.0.0-20220412211240-33da011f77ad // indirect + golang.org/x/term v0.1.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..9cad878 --- /dev/null +++ b/go.sum @@ -0,0 +1,13 @@ +github.com/briandowns/spinner v1.23.1 h1:t5fDPmScwUjozhDj4FA46p5acZWIPXYE30qW2Ptu650= +github.com/briandowns/spinner v1.23.1/go.mod h1:LaZeM4wm2Ywy6vO571mvhQNRcWfRUnXOs0RcKV0wYKM= +github.com/fatih/color v1.7.0 h1:DkWD4oS2D8LGGgTQ6IvwJJXSL5Vp2ffcQg58nFV38Ys= +github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= +github.com/mattn/go-colorable v0.1.2 h1:/bC9yWikZXAL9uJdulbSfyVNIR3n3trXl+v8+1sx8mU= +github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= +github.com/mattn/go-isatty v0.0.8 h1:HLtExJ+uU2HOZ+wI0Tt5DtUDrx8yhUqDcp7fYERX4CE= +github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20220412211240-33da011f77ad h1:ntjMns5wyP/fN65tdBD4g8J5w8n015+iIIs9rtjXkY0= +golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.1.0 h1:g6Z6vPFA9dYBAF7DWcH6sCcOntplXsDKcliusYijMlw= +golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= diff --git a/main.go b/main.go new file mode 100644 index 0000000..1d46d4e --- /dev/null +++ b/main.go @@ -0,0 +1,31 @@ +package main + +import ( + "fmt" + "os" + "time" + + "github.com/briandowns/spinner" +) + +func main() { + if len(os.Args) < 3 { + fmt.Println("Error: Invalid number of arguments.") + fmt.Println("Usage: go run . ") + return + } + + moviebuffURL1 := os.Args[1] + moviebuffURL2 := os.Args[2] + if moviebuffURL1 == moviebuffURL2 { + fmt.Println("Error: Moviebuff URLs must be different.") + return + } + + fmt.Println("Searching Moviebuff...") + s := spinner.New(spinner.CharSets[9], 100*time.Millisecond) + s.Start() + defer s.Stop() + + SeperationDegrees(moviebuffURL1, moviebuffURL2) +} diff --git a/utils.go b/utils.go new file mode 100644 index 0000000..5e7b4cf --- /dev/null +++ b/utils.go @@ -0,0 +1,30 @@ +package main + +import ( + "fmt" +) + +// Collaboration represents a collaboration between two people in a movie. +type Collaboration struct { + Movie string + Person1 string + Person1Role string + Person2 string + Person2Role string +} + +// PrintCollaborations prints a list of collaborations. +func PrintCollaborations(collabs []Collaboration, degree int64) { + if len(collabs) == 0 { + fmt.Println("\nNo collaborations found.") + return + } + + fmt.Println("\nDegree of Seperation: ", degree) + for i, connection := range collabs { + fmt.Printf("%d. Movie: %s\n", i+1, connection.Movie) + fmt.Printf("%s: %s\n", connection.Person1Role, connection.Person1) + fmt.Printf("%s: %s\n", connection.Person2Role, connection.Person2) + fmt.Println() + } +} diff --git a/visited-sources.go b/visited-sources.go new file mode 100644 index 0000000..b4739a3 --- /dev/null +++ b/visited-sources.go @@ -0,0 +1,33 @@ +package main + +import "sync" + +// VisitedSources is a set of visited sources with a mutex lock for concurrent access. +// This set is used to avoid repeatedly fetching data from the same source and processing it multiple times. +type VisitedSources struct { + URLs map[string]struct{} + mutex *sync.RWMutex +} + +// NewVisitedSources creates a new VisitedSources. +func NewVisitedSources() *VisitedSources { + return &VisitedSources{ + URLs: make(map[string]struct{}), + mutex: &sync.RWMutex{}, + } +} + +// Add adds a URL to the VisitedSources. +func (vs *VisitedSources) Add(url string) { + vs.mutex.Lock() + defer vs.mutex.Unlock() + vs.URLs[url] = struct{}{} +} + +// Has checks if a URL is in the VisitedSources. +func (vs *VisitedSources) Has(url string) bool { + vs.mutex.RLock() + defer vs.mutex.RUnlock() + _, ok := vs.URLs[url] + return ok +} diff --git a/worker.go b/worker.go new file mode 100644 index 0000000..fc2fd9c --- /dev/null +++ b/worker.go @@ -0,0 +1,192 @@ +package main + +import ( + "context" + "sync" +) + +// GraphSource represents a node in the graph to search for connections. +type GraphSource struct { + // PersonURL is the URL of the person in the Moviebuff API. + PersonURL string + // Degree is the number of connections between the source person and current person. + Degree int64 + // Connections are the details of the connections between the source person and current person. + Connections []Collaboration + // IsDest indicates if the current person is the destination person. + IsDest bool +} + +// Reader represents the data that is read by the reader goroutine. +type Reader struct { + // Queue is the next level of people to be explored. + Queue []GraphSource + // FoundDest is true if the destination is found. + FoundDest bool + // DestSource is the details of the destination, it is only set if the "FoundDest" is true. + DestSource GraphSource +} + +// GraphTraversalWorker traverses the nodes of the level provided in the "jobs" channel and writes the nodes of the next level to the "results" channel. +func GraphTraversalWorker(ctx context.Context, destPerson string, vs *VisitedSources, jobs <-chan GraphSource, results chan<- GraphSource) { + for job := range jobs { + // Fetch the person data. + personData, err := FetchPerson(job.PersonURL) + if err != nil { + continue + } + + // Iterate over the movies of the person. + for _, movie := range personData.Movies { + // Skip if the movie has already been visited. + if vs.Has(movie.URL) { + continue + } + + vs.Add(movie.URL) + // Fetch the movie data. + movieData, err := FetchMovie(movie.URL) + if err != nil { + continue + } + + // movieMembers is the list of cast and crew of the movie. + movieMembers := append(movieData.Cast, movieData.Crew...) + // Iterate over the members of the movie. + for _, member := range movieMembers { + // Skip if the member has already been visited. + if vs.Has(member.URL) { + continue + } + + // Store the details of the collaboration between the source person and the member. + collab := append(job.Connections, Collaboration{ + Movie: movieData.Name, + Person1: personData.Name, + Person1Role: movie.Role, + Person2: member.Name, + Person2Role: member.Role, + }) + + gs := GraphSource{ + PersonURL: member.URL, + Degree: job.Degree + 1, + Connections: collab, + } + + // Check if the destination person has been found. + if member.URL == destPerson { + gs.IsDest = true + } + + // Add the member to the visited list and write the node to the results channel. + vs.Add(member.URL) + select { + case results <- gs: + case <-ctx.Done(): + return + } + + if gs.IsDest { + return + } + } + } + } +} + +// SeperationDegrees calculates the minimum degrees of seperation i.e. the number of connections between two people. +// This is classical graph theory problem where we need to find the shortest path between two people. +// Each node in the graph represents a person and each edge represents a connection between two people which is a movie. +// However, the tricky part is we only know source and destination people, the rest of the people are hidden. The rest of +// the people will be explored at runtime. +// The BFS algorithm the most efficient in this case as we need to find the shortest path and the rest of the nodes are hidden. +// So, as we do in BFS, we will traverse the graph level by level and the moment we find the destination, we will return the +// number of connections. +func SeperationDegrees(srcPerson string, destPerson string) { + // vs will store both visited people and movies. + vs := NewVisitedSources() + queue := []GraphSource{} + + // Add the source person to the queue. + vs.Add(srcPerson) + queue = append(queue, GraphSource{ + PersonURL: srcPerson, + Degree: 0, + Connections: []Collaboration{}, + }) + + // Traverse the graph level by level. + for len(queue) > 0 { + // ctx will be used to stop processing further when the destination is found. + ctx, ctxCancel := context.WithCancel(context.Background()) + + // jobs is the queue of people to be explored. + jobs := make(chan GraphSource, len(queue)) + // results is the next level of people to be explored. + results := make(chan GraphSource) + + // rwGroup is used to wait for the reader (reading from "results" channel) and writer (writing to "results" channel) to finish. + rwGroup := &sync.WaitGroup{} + rwGroupChn := make(chan Reader, 1) + + // As the we don't know how many people are in the next level, the size of the + // "results" channel is set to 1. That means we need to haver reader always reading from the channel. + // Hence, we need to add a goroutine to read from the channel. + rwGroup.Add(1) + go func() { + defer rwGroup.Done() + + newQueue := []GraphSource{} + for result := range results { + if result.IsDest { + rwGroupChn <- Reader{FoundDest: true, DestSource: result} + ctxCancel() + return + } + + newQueue = append(newQueue, result) + } + + rwGroupChn <- Reader{Queue: newQueue} + }() + + // Add a goroutine to process the people in the queue. + rwGroup.Add(1) + go func() { + defer rwGroup.Done() + + // As the number of people to be explored could be huge, we need to process them in batches. + wg := &sync.WaitGroup{} + for i := 0; i < 100; i++ { + wg.Add(1) + go func() { + defer wg.Done() + GraphTraversalWorker(ctx, destPerson, vs, jobs, results) + }() + } + + // Add the people in the queue to the jobs channel. + for _, gs := range queue { + jobs <- gs + } + close(jobs) + + // Wait for the workers to finish. Then close the results channel so that the reader can finish. + wg.Wait() + close(results) + }() + + rwGroup.Wait() + close(rwGroupChn) + + res := <-rwGroupChn + if res.FoundDest { + PrintCollaborations(res.DestSource.Connections, res.DestSource.Degree) + return + } + queue = res.Queue + } + + PrintCollaborations(nil, 0) +}