diff --git a/ui.py b/ui.py index 89e21ee..8853b8e 100644 --- a/ui.py +++ b/ui.py @@ -6,166 +6,138 @@ from llm_utils import BufferedStreamingHandler from llm import get_llm, refine_query, filter_results, generate_summary - # Cache expensive backend calls -@st.cache_data(ttl=200, show_spinner=False) +@st.cache_data(ttl=600, show_spinner=False) def cached_search_results(refined_query: str, threads: int): - return get_search_results(refined_query.replace(" ", "+"), max_workers=threads) - + return get_search_results(refined_query, max_workers=threads) -@st.cache_data(ttl=200, show_spinner=False) +@st.cache_data(ttl=600, show_spinner=False) def cached_scrape_multiple(filtered: list, threads: int): return scrape_multiple(filtered, max_workers=threads) - -# Streamlit page configuration st.set_page_config( page_title="Robin: AI-Powered Dark Web OSINT Tool", page_icon="🕵️‍♂️", initial_sidebar_state="expanded", ) -# Custom CSS for styling st.markdown( - """ - """, unsafe_allow_html=True, ) - # Sidebar st.sidebar.title("Robin") -st.sidebar.text("AI-Powered Dark Web OSINT Tool") -st.sidebar.markdown( - """Made by [Apurv Singh Gautam](https://www.linkedin.com/in/apurvsinghgautam/)""" -) +st.sidebar.info("AI-Powered Dark Web OSINT Tool") +st.sidebar.markdown("Made by [Apurv Singh Gautam](https://www.linkedin.com/in/apurvsinghgautam/)") st.sidebar.subheader("Settings") model = st.sidebar.selectbox( "Select LLM Model", ["gpt-5.1", "gpt-5-mini", "gpt-5-nano", "gpt-4.1", "claude-sonnet-4-5", "claude-sonnet-4-0", "llama3.1", "llama3.2", "gemma3", "deepseek-r1", "gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-2.5-pro"], key="model_select", ) -threads = st.sidebar.slider("Scraping Threads", 1, 16, 4, key="thread_slider") +threads = st.sidebar.slider("Scraping Threads", 1, 16, 5, key="thread_slider") - -# Main UI - logo and input +# Main UI _, logo_col, _ = st.columns(3) with logo_col: - st.image(".github/assets/robin_logo.png", width=200) - -# Display text box and button -with st.form("search_form", clear_on_submit=True): - col_input, col_button = st.columns([10, 1]) - query = col_input.text_input( - "Enter Dark Web Search Query", - placeholder="Enter Dark Web Search Query", - label_visibility="collapsed", - key="query_input", - ) - run_button = col_button.form_submit_button("Run") - -# Display a status message + # Placeholder for logo if exists, otherwise text + st.markdown("### 🕵️‍♂️ Robin OSINT") + +with st.form("search_form", clear_on_submit=False): + col_input, col_button = st.columns([8, 2]) + query = col_input.text_input("Enter Dark Web Search Query", placeholder="e.g. ransomware leak sites") + run_button = col_button.form_submit_button("Run Investigation") + status_slot = st.empty() -# Pre-allocate three placeholders-one per card cols = st.columns(3) p1, p2, p3 = [col.empty() for col in cols] -# Summary placeholders -summary_container_placeholder = st.empty() +summary_container = st.empty() - -# Process the query if run_button and query: - # clear old state + # Clear previous state for k in ["refined", "results", "filtered", "scraped", "streamed_summary"]: - st.session_state.pop(k, None) - - # Stage 1 - Load LLM - with status_slot.container(): - with st.spinner("🔄 Loading LLM..."): - llm = get_llm(model) - - # Stage 2 - Refine query - with status_slot.container(): - with st.spinner("🔄 Refining query..."): - st.session_state.refined = refine_query(llm, query) - p1.container(border=True).markdown( - f"

Refined Query

{st.session_state.refined}

", - unsafe_allow_html=True, - ) - - # Stage 3 - Search dark web - with status_slot.container(): - with st.spinner("🔍 Searching dark web..."): - st.session_state.results = cached_search_results( - st.session_state.refined, threads - ) - p2.container(border=True).markdown( - f"

Search Results

{len(st.session_state.results)}

", - unsafe_allow_html=True, - ) - - # Stage 4 - Filter results - with status_slot.container(): - with st.spinner("🗂️ Filtering results..."): - st.session_state.filtered = filter_results( - llm, st.session_state.refined, st.session_state.results - ) - p3.container(border=True).markdown( - f"

Filtered Results

{len(st.session_state.filtered)}

", - unsafe_allow_html=True, - ) - - # Stage 5 - Scrape content - with status_slot.container(): - with st.spinner("📜 Scraping content..."): - st.session_state.scraped = cached_scrape_multiple( - st.session_state.filtered, threads - ) - - # Stage 6 - Summarize - # 6a) Prepare session state for streaming text - st.session_state.streamed_summary = "" - - # 6c) UI callback for each chunk - def ui_emit(chunk: str): - st.session_state.streamed_summary += chunk - summary_slot.markdown(st.session_state.streamed_summary) - - with summary_container_placeholder.container(): # border=True, height=450): - hdr_col, btn_col = st.columns([4, 1], vertical_alignment="center") - with hdr_col: - st.subheader(":red[Investigation Summary]", anchor=None, divider="gray") - summary_slot = st.empty() - - # 6d) Inject your two callbacks and invoke exactly as before - with status_slot.container(): - with st.spinner("✍️ Generating summary..."): - stream_handler = BufferedStreamingHandler(ui_callback=ui_emit) - llm.callbacks = [stream_handler] - _ = generate_summary(llm, query, st.session_state.scraped) - - with btn_col: + if k in st.session_state: + del st.session_state[k] + + try: + # 1. Load LLM + with status_slot.container(): + with st.spinner("🔄 Loading LLM..."): + llm = get_llm(model) + + # 2. Refine Query + with status_slot.container(): + with st.spinner("🔄 Refining query..."): + st.session_state.refined = refine_query(llm, query) + p1.container(border=True).markdown( + f"

Refined Query

{st.session_state.refined}

", + unsafe_allow_html=True + ) + + # 3. Search + with status_slot.container(): + with st.spinner("🔍 Searching dark web (this may take time)..."): + st.session_state.results = cached_search_results(st.session_state.refined, threads) + + result_count = len(st.session_state.results) + p2.container(border=True).markdown( + f"

Found Links

{result_count}

", + unsafe_allow_html=True + ) + + if result_count == 0: + st.error("No results found. The search engines might be unreachable via Tor right now.") + st.stop() + + # 4. Filter + with status_slot.container(): + with st.spinner("🗂️ Filtering relevance..."): + st.session_state.filtered = filter_results(llm, st.session_state.refined, st.session_state.results) + + filtered_count = len(st.session_state.filtered) + p3.container(border=True).markdown( + f"

Relevant Links

{filtered_count}

", + unsafe_allow_html=True + ) + + # 5. Scrape + with status_slot.container(): + with st.spinner(f"📜 Scraping {filtered_count} sites..."): + st.session_state.scraped = cached_scrape_multiple(st.session_state.filtered, threads) + + if not st.session_state.scraped: + st.error("Scraping failed. All selected sites were unreachable.") + st.stop() + + # 6. Summarize + st.session_state.streamed_summary = "" + + def ui_emit(chunk: str): + st.session_state.streamed_summary += chunk + summary_slot.markdown(st.session_state.streamed_summary) + + with summary_container.container(): + st.subheader("Investigation Summary", divider="red") + summary_slot = st.empty() + + with status_slot.container(): + with st.spinner("✍️ Analyzing intelligence..."): + stream_handler = BufferedStreamingHandler(ui_callback=ui_emit) + llm.callbacks = [stream_handler] + _ = generate_summary(llm, query, st.session_state.scraped) + + # Download Button now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") - fname = f"summary_{now}.md" b64 = base64.b64encode(st.session_state.streamed_summary.encode()).decode() - href = f'
📥 Download
' + href = f'
📥 Download Report
' st.markdown(href, unsafe_allow_html=True) - status_slot.success("✔️ Pipeline completed successfully!") + + status_slot.success("✔️ Investigation Complete") + + except Exception as e: + st.error(f"An error occurred: {e}")