diff --git a/ui.py b/ui.py
index 89e21ee..8853b8e 100644
--- a/ui.py
+++ b/ui.py
@@ -6,166 +6,138 @@
from llm_utils import BufferedStreamingHandler
from llm import get_llm, refine_query, filter_results, generate_summary
-
# Cache expensive backend calls
-@st.cache_data(ttl=200, show_spinner=False)
+@st.cache_data(ttl=600, show_spinner=False)
def cached_search_results(refined_query: str, threads: int):
- return get_search_results(refined_query.replace(" ", "+"), max_workers=threads)
-
+ return get_search_results(refined_query, max_workers=threads)
-@st.cache_data(ttl=200, show_spinner=False)
+@st.cache_data(ttl=600, show_spinner=False)
def cached_scrape_multiple(filtered: list, threads: int):
return scrape_multiple(filtered, max_workers=threads)
-
-# Streamlit page configuration
st.set_page_config(
page_title="Robin: AI-Powered Dark Web OSINT Tool",
page_icon="🕵️♂️",
initial_sidebar_state="expanded",
)
-# Custom CSS for styling
st.markdown(
- """
- """,
unsafe_allow_html=True,
)
-
# Sidebar
st.sidebar.title("Robin")
-st.sidebar.text("AI-Powered Dark Web OSINT Tool")
-st.sidebar.markdown(
- """Made by [Apurv Singh Gautam](https://www.linkedin.com/in/apurvsinghgautam/)"""
-)
+st.sidebar.info("AI-Powered Dark Web OSINT Tool")
+st.sidebar.markdown("Made by [Apurv Singh Gautam](https://www.linkedin.com/in/apurvsinghgautam/)")
st.sidebar.subheader("Settings")
model = st.sidebar.selectbox(
"Select LLM Model",
["gpt-5.1", "gpt-5-mini", "gpt-5-nano", "gpt-4.1", "claude-sonnet-4-5", "claude-sonnet-4-0", "llama3.1", "llama3.2", "gemma3", "deepseek-r1", "gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-2.5-pro"],
key="model_select",
)
-threads = st.sidebar.slider("Scraping Threads", 1, 16, 4, key="thread_slider")
+threads = st.sidebar.slider("Scraping Threads", 1, 16, 5, key="thread_slider")
-
-# Main UI - logo and input
+# Main UI
_, logo_col, _ = st.columns(3)
with logo_col:
- st.image(".github/assets/robin_logo.png", width=200)
-
-# Display text box and button
-with st.form("search_form", clear_on_submit=True):
- col_input, col_button = st.columns([10, 1])
- query = col_input.text_input(
- "Enter Dark Web Search Query",
- placeholder="Enter Dark Web Search Query",
- label_visibility="collapsed",
- key="query_input",
- )
- run_button = col_button.form_submit_button("Run")
-
-# Display a status message
+ # Placeholder for logo if exists, otherwise text
+ st.markdown("### 🕵️♂️ Robin OSINT")
+
+with st.form("search_form", clear_on_submit=False):
+ col_input, col_button = st.columns([8, 2])
+ query = col_input.text_input("Enter Dark Web Search Query", placeholder="e.g. ransomware leak sites")
+ run_button = col_button.form_submit_button("Run Investigation")
+
status_slot = st.empty()
-# Pre-allocate three placeholders-one per card
cols = st.columns(3)
p1, p2, p3 = [col.empty() for col in cols]
-# Summary placeholders
-summary_container_placeholder = st.empty()
+summary_container = st.empty()
-
-# Process the query
if run_button and query:
- # clear old state
+ # Clear previous state
for k in ["refined", "results", "filtered", "scraped", "streamed_summary"]:
- st.session_state.pop(k, None)
-
- # Stage 1 - Load LLM
- with status_slot.container():
- with st.spinner("🔄 Loading LLM..."):
- llm = get_llm(model)
-
- # Stage 2 - Refine query
- with status_slot.container():
- with st.spinner("🔄 Refining query..."):
- st.session_state.refined = refine_query(llm, query)
- p1.container(border=True).markdown(
- f"
Refined Query
{st.session_state.refined}
",
- unsafe_allow_html=True,
- )
-
- # Stage 3 - Search dark web
- with status_slot.container():
- with st.spinner("🔍 Searching dark web..."):
- st.session_state.results = cached_search_results(
- st.session_state.refined, threads
- )
- p2.container(border=True).markdown(
- f"Search Results
{len(st.session_state.results)}
",
- unsafe_allow_html=True,
- )
-
- # Stage 4 - Filter results
- with status_slot.container():
- with st.spinner("🗂️ Filtering results..."):
- st.session_state.filtered = filter_results(
- llm, st.session_state.refined, st.session_state.results
- )
- p3.container(border=True).markdown(
- f"Filtered Results
{len(st.session_state.filtered)}
",
- unsafe_allow_html=True,
- )
-
- # Stage 5 - Scrape content
- with status_slot.container():
- with st.spinner("📜 Scraping content..."):
- st.session_state.scraped = cached_scrape_multiple(
- st.session_state.filtered, threads
- )
-
- # Stage 6 - Summarize
- # 6a) Prepare session state for streaming text
- st.session_state.streamed_summary = ""
-
- # 6c) UI callback for each chunk
- def ui_emit(chunk: str):
- st.session_state.streamed_summary += chunk
- summary_slot.markdown(st.session_state.streamed_summary)
-
- with summary_container_placeholder.container(): # border=True, height=450):
- hdr_col, btn_col = st.columns([4, 1], vertical_alignment="center")
- with hdr_col:
- st.subheader(":red[Investigation Summary]", anchor=None, divider="gray")
- summary_slot = st.empty()
-
- # 6d) Inject your two callbacks and invoke exactly as before
- with status_slot.container():
- with st.spinner("✍️ Generating summary..."):
- stream_handler = BufferedStreamingHandler(ui_callback=ui_emit)
- llm.callbacks = [stream_handler]
- _ = generate_summary(llm, query, st.session_state.scraped)
-
- with btn_col:
+ if k in st.session_state:
+ del st.session_state[k]
+
+ try:
+ # 1. Load LLM
+ with status_slot.container():
+ with st.spinner("🔄 Loading LLM..."):
+ llm = get_llm(model)
+
+ # 2. Refine Query
+ with status_slot.container():
+ with st.spinner("🔄 Refining query..."):
+ st.session_state.refined = refine_query(llm, query)
+ p1.container(border=True).markdown(
+ f"Refined Query
{st.session_state.refined}
",
+ unsafe_allow_html=True
+ )
+
+ # 3. Search
+ with status_slot.container():
+ with st.spinner("🔍 Searching dark web (this may take time)..."):
+ st.session_state.results = cached_search_results(st.session_state.refined, threads)
+
+ result_count = len(st.session_state.results)
+ p2.container(border=True).markdown(
+ f"Found Links
{result_count}
",
+ unsafe_allow_html=True
+ )
+
+ if result_count == 0:
+ st.error("No results found. The search engines might be unreachable via Tor right now.")
+ st.stop()
+
+ # 4. Filter
+ with status_slot.container():
+ with st.spinner("🗂️ Filtering relevance..."):
+ st.session_state.filtered = filter_results(llm, st.session_state.refined, st.session_state.results)
+
+ filtered_count = len(st.session_state.filtered)
+ p3.container(border=True).markdown(
+ f"Relevant Links
{filtered_count}
",
+ unsafe_allow_html=True
+ )
+
+ # 5. Scrape
+ with status_slot.container():
+ with st.spinner(f"📜 Scraping {filtered_count} sites..."):
+ st.session_state.scraped = cached_scrape_multiple(st.session_state.filtered, threads)
+
+ if not st.session_state.scraped:
+ st.error("Scraping failed. All selected sites were unreachable.")
+ st.stop()
+
+ # 6. Summarize
+ st.session_state.streamed_summary = ""
+
+ def ui_emit(chunk: str):
+ st.session_state.streamed_summary += chunk
+ summary_slot.markdown(st.session_state.streamed_summary)
+
+ with summary_container.container():
+ st.subheader("Investigation Summary", divider="red")
+ summary_slot = st.empty()
+
+ with status_slot.container():
+ with st.spinner("✍️ Analyzing intelligence..."):
+ stream_handler = BufferedStreamingHandler(ui_callback=ui_emit)
+ llm.callbacks = [stream_handler]
+ _ = generate_summary(llm, query, st.session_state.scraped)
+
+ # Download Button
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
- fname = f"summary_{now}.md"
b64 = base64.b64encode(st.session_state.streamed_summary.encode()).decode()
- href = f''
+ href = f''
st.markdown(href, unsafe_allow_html=True)
- status_slot.success("✔️ Pipeline completed successfully!")
+
+ status_slot.success("✔️ Investigation Complete")
+
+ except Exception as e:
+ st.error(f"An error occurred: {e}")