Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 57 additions & 26 deletions Youtube-trend-analysis/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

bright_data_api_key = os.getenv("BRIGHT_DATA_API_KEY")

os.makedirs("transcripts", exist_ok=True)

@st.cache_resource
def load_llm():

Expand Down Expand Up @@ -124,6 +126,45 @@ def start_analysis():

channel_scrapped_output = get_output(bright_data_api_key, status['snapshot_id'], format="json")

status_container.info("Processing transcripts...")
st.session_state.all_files = []
error_files = []

for i in tqdm(range(len(channel_scrapped_output[0]))):
youtube_video_id = channel_scrapped_output[0][i]["shortcode"]

os.makedirs("transcripts", exist_ok=True)

file = f"transcripts/{youtube_video_id}.txt"
st.session_state.all_files.append(file)

with open(file, "w", encoding="utf-8") as f:
transcript = channel_scrapped_output[0][i].get(
"formatted_transcript", []
)
if isinstance(transcript, list):
for entry in transcript:
text = entry.get("text", "")
start = entry.get("start_time", 0.0)
end = entry.get("end_time", 0.0)
line = f"({start:.2f}-{end:.2f}): {text}\n"
f.write(line)
else:
f.write(str(transcript))
error_files.append(i)
del st.session_state.all_files[-1]
Comment on lines +141 to +155
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Add error handling for file operations.

The file writing operations should include error handling to gracefully handle potential issues like permission errors or encoding problems.

Wrap the file operations in a try-except block:

-                with open(file, "w", encoding="utf-8") as f:
-                    transcript = channel_scrapped_output[0][i].get(
-                        "formatted_transcript", []
-                    )
-                    if isinstance(transcript, list):
-                        for entry in transcript:
-                            text = entry.get("text", "")
-                            start = entry.get("start_time", 0.0)
-                            end = entry.get("end_time", 0.0)
-                            line = f"({start:.2f}-{end:.2f}): {text}\n"
-                            f.write(line)
-                    else:
-                        f.write(str(transcript))
-                        error_files.append(i)
-                        del st.session_state.all_files[-1]
+                try:
+                    with open(file, "w", encoding="utf-8") as f:
+                        transcript = channel_scrapped_output[0][i].get(
+                            "formatted_transcript", []
+                        )
+                        if isinstance(transcript, list):
+                            for entry in transcript:
+                                text = entry.get("text", "")
+                                start = entry.get("start_time", 0.0)
+                                end = entry.get("end_time", 0.0)
+                                line = f"({start:.2f}-{end:.2f}): {text}\n"
+                                f.write(line)
+                        else:
+                            f.write(str(transcript))
+                            error_files.append(i)
+                            del st.session_state.all_files[-1]
+                except (IOError, OSError) as e:
+                    st.error(f"Failed to write transcript for video {youtube_video_id}: {str(e)}")
+                    error_files.append(i)
+                    if file in st.session_state.all_files:
+                        st.session_state.all_files.remove(file)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
with open(file, "w", encoding="utf-8") as f:
transcript = channel_scrapped_output[0][i].get(
"formatted_transcript", []
)
if isinstance(transcript, list):
for entry in transcript:
text = entry.get("text", "")
start = entry.get("start_time", 0.0)
end = entry.get("end_time", 0.0)
line = f"({start:.2f}-{end:.2f}): {text}\n"
f.write(line)
else:
f.write(str(transcript))
error_files.append(i)
del st.session_state.all_files[-1]
try:
with open(file, "w", encoding="utf-8") as f:
transcript = channel_scrapped_output[0][i].get(
"formatted_transcript", []
)
if isinstance(transcript, list):
for entry in transcript:
text = entry.get("text", "")
start = entry.get("start_time", 0.0)
end = entry.get("end_time", 0.0)
line = f"({start:.2f}-{end:.2f}): {text}\n"
f.write(line)
else:
f.write(str(transcript))
error_files.append(i)
del st.session_state.all_files[-1]
except (IOError, OSError) as e:
st.error(f"Failed to write transcript for video {youtube_video_id}: {str(e)}")
error_files.append(i)
if file in st.session_state.all_files:
st.session_state.all_files.remove(file)
🤖 Prompt for AI Agents
In Youtube-trend-analysis/app.py around lines 141 to 155, the file writing
operations lack error handling, which can cause the program to crash on issues
like permission errors or encoding problems. Wrap the entire file open and write
block in a try-except statement to catch exceptions such as IOError or OSError.
In the except block, log or handle the error appropriately to ensure the program
continues running gracefully without abrupt termination.


if error_files:
for idx in error_files:
youtube_video_id = channel_scrapped_output[0][idx]["shortcode"]
file = f"transcripts/{youtube_video_id}.txt"
if os.path.exists(file):
os.remove(file)
print(f"Removed file: {file}")
else:
print(f"File not found: {file}")

st.session_state.channel_scrapped_output = channel_scrapped_output

st.markdown("## YouTube Videos Extracted")
# Create a container for the carousel
Expand All @@ -147,44 +188,34 @@ def start_analysis():

# Check if we still have videos to display
if video_idx < num_videos:
with cols[col_idx]:
st.video(channel_scrapped_output[0][video_idx]['url'])
if video_idx in error_files:
continue

status_container.info("Processing transcripts...")
st.session_state.all_files = []
# Calculate transcripts
for i in tqdm(range(len(channel_scrapped_output[0]))):


# save transcript to file
youtube_video_id = channel_scrapped_output[0][i]['shortcode']

file = "transcripts/" + youtube_video_id + ".txt"
st.session_state.all_files.append(file)

with open(file, "w") as f:
for j in range(len(channel_scrapped_output[0][i]['formatted_transcript'])):
text = channel_scrapped_output[0][i]['formatted_transcript'][j]['text']
start_time = channel_scrapped_output[0][i]['formatted_transcript'][j]['start_time']
end_time = channel_scrapped_output[0][i]['formatted_transcript'][j]['end_time']
f.write(f"({start_time:.2f}-{end_time:.2f}): {text}\n")

f.close()
with cols[col_idx]:
st.video(channel_scrapped_output[0][video_idx]["url"])

st.session_state.channel_scrapped_output = channel_scrapped_output
status_container.success("Scraping complete! We shall now analyze the videos and report trends...")
status_container.success("Scraping complete! Analyzing trends...")

else:
status_container.error(f"Scraping failed with status: {status}")

if status['status'] == "ready":

file_contents = []
for file in st.session_state.all_files:
with open(file, "r", encoding="utf-8") as f:
content = f.read()
file_contents.append(content)

merge_content = "\n\n".join(file_contents)

status_container = st.empty()
with st.spinner('The agent is analyzing the videos... This may take a moment.'):
# create crew
st.session_state.crew = create_agents_and_tasks()
st.session_state.response = st.session_state.crew.kickoff(inputs={"file_paths": ", ".join(st.session_state.all_files)})

st.session_state.response = st.session_state.crew.kickoff(
inputs={"file_contents": merge_content}
)


# ===========================
Expand Down
4 changes: 2 additions & 2 deletions Youtube-trend-analysis/brightdata_scrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,9 @@ def get_output(api_key, snapshot_id, format="json"):
f"https://api.brightdata.com/datasets/v3/snapshot/{snapshot_id}?format={format}"
]

result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, encoding='utf-8')

if result.returncode == 0:
if result.returncode == 0 and result.stdout:
json_lines = result.stdout.strip().split("\n")
print(json_lines)
json_objects = [json.loads(line) for line in json_lines]
Expand Down
4 changes: 2 additions & 2 deletions Youtube-trend-analysis/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ agents:
- name: analysis_agent
role: "YouTube Transcript Analyzer"
goal: >
Analyze the transcripts of several videos located in {file_paths}.
Analyze the transcripts of several videos using {file_contents}.
Break down the analysis into structured sections, including:
1. Key topics discussed.
2. Emerging trends or patterns across multiple transcripts.
Expand Down Expand Up @@ -34,7 +34,7 @@ agents:
tasks:
- name: analysis_task
description: >
Conduct a fine-grained analysis of the transcripts of several videos located in {file_paths}.
Conduct a fine-grained analysis of the transcripts of several videos using {file_contents}.
Break the analysis into the following sections:
1. Key topics and themes discussed in the videos.
2. Emerging trends or patterns across multiple videos.
Expand Down