Url scan raw search (demisto#40291) (demisto#40657)

content-bot · mcvic1rj · itssapir · TOUFIKIzakarya · commit bedc10e33f98 · 2025-07-24T08:18:49.000Z
* Added new search method (raw)

* add release notes and documentation on command change

* Update context to match format for command, and updated command documentation

* lint fixes

* Lint fixes

* Fix release version

* Update Packs/UrlScan/ReleaseNotes/1_2_20.md



---------

Co-authored-by: Ryan McVicar &lt;rymcvicar@gmail.com&gt;
Co-authored-by: Sapir Malka &lt;44067957+itssapir@users.noreply.github.com&gt;
diff --git a/Packs/UrlScan/Integrations/UrlScan/README.md b/Packs/UrlScan/Integrations/UrlScan/README.md
@@ -113,6 +113,7 @@
 <h5>Command Example</h5>
 <p><code>!urlscan-search searchParameter=8.8.8.8</code></p>
 <p><code>!urlscan-search searchType=advanced searchParameter="filename:logo.png AND date:>now-24h"</code></p>
+<p><code>!urlscan-search searchType=raw searchParameter="q=meta%3Asearchhit.search.04eb755f-468d-4421-ab86-210a01ee1bdd&datasource=hostnames&search_after="</code></p>
 <h3 id="h_872696191351541575062805">2. (Deprecated) Submit a URL directly to urlscan.io</h3>
 <hr>
 <p>Submits a URL to urlscan.io.</p>
diff --git a/Packs/UrlScan/Integrations/UrlScan/UrlScan.py b/Packs/UrlScan/Integrations/UrlScan/UrlScan.py
@@ -668,6 +668,8 @@ def urlscan_search_only(client: Client, url: str, command_results: list, executi
 def urlscan_search(client, search_type, query, size=None):
     if search_type == "advanced":
         r, _, _ = http_request(client, "GET", "search/?q=" + query)
+    elif search_type == "raw":
+        r, _, _ = http_request(client, "GET", f"search/?{query}")
     else:
         url_suffix = "search/?q=" + search_type + ':"' + query + '"' + (f"&size={size}" if size else "")
         r, _, _ = http_request(client, "GET", url_suffix)
@@ -699,118 +701,127 @@ def urlscan_search_command(client):
                 search_type = "hash"
             else:
                 search_type = "page.url"
+    if search_type == "raw":
+        r = urlscan_search(client, search_type, raw_query)
+        results = CommandResults(
+            outputs_prefix="URLScan.Search.Results",
+            raw_response=r,
+            outputs=r["results"],
+            readable_output=f'{r["total"]} results found for {raw_query}',
+        )
+        return_results(results)
+    else:
+        # Making the query string safe for Elastic Search
+        query = quote(raw_query, safe="")
+
+        r = urlscan_search(client, search_type, query)
+
+        if r["total"] == 0:
+            demisto.results(f"No results found for {raw_query}")
+            return
+        if r["total"] > 0:
+            demisto.results("{} results found for {}".format(r["total"], raw_query))
+
+        # Opening empty string for url comparison
+        last_url = ""
+        hr_md = []
+        cont_array = []
+        ip_array = []
+        dom_array = []
+        url_array = []
+
+        for res in r["results"][:LIMIT]:
+            ec = makehash()
+            cont = makehash()
+            url_cont = makehash()
+            ip_cont = makehash()
+            dom_cont = makehash()
+            file_context = makehash()
+            res_dict = res
+            res_tasks = res_dict["task"]
+            res_page = res_dict["page"]
+
+            if last_url == res_tasks["url"]:
+                continue
 
-    # Making the query string safe for Elastic Search
-    query = quote(raw_query, safe="")
-
-    r = urlscan_search(client, search_type, query)
-
-    if r["total"] == 0:
-        demisto.results(f"No results found for {raw_query}")
-        return
-    if r["total"] > 0:
-        demisto.results("{} results found for {}".format(r["total"], raw_query))
-
-    # Opening empty string for url comparison
-    last_url = ""
-    hr_md = []
-    cont_array = []
-    ip_array = []
-    dom_array = []
-    url_array = []
-
-    for res in r["results"][:LIMIT]:
-        ec = makehash()
-        cont = makehash()
-        url_cont = makehash()
-        ip_cont = makehash()
-        dom_cont = makehash()
-        file_context = makehash()
-        res_dict = res
-        res_tasks = res_dict["task"]
-        res_page = res_dict["page"]
-
-        if last_url == res_tasks["url"]:
-            continue
-
-        human_readable = makehash()
-
-        if "url" in res_tasks:
-            url = res_tasks["url"]
-            human_readable["URL"] = url
-            cont["URL"] = url
-            url_cont["Data"] = url
-        if "domain" in res_page:
-            domain = res_page["domain"]
-            human_readable["Domain"] = domain
-            cont["Domain"] = domain
-            dom_cont["Name"] = domain
-        if "asn" in res_page:
-            asn = res_page["asn"]
-            cont["ASN"] = asn
-            ip_cont["ASN"] = asn
-            human_readable["ASN"] = asn
-        if "ip" in res_page:
-            ip = res_page["ip"]
-            cont["IP"] = ip
-            ip_cont["Address"] = ip
-            human_readable["IP"] = ip
-        if "_id" in res_dict:
-            scanID = res_dict["_id"]
-            cont["ScanID"] = scanID
-            human_readable["Scan ID"] = scanID
-        if "time" in res_tasks:
-            scanDate = res_tasks["time"]
-            cont["ScanDate"] = scanDate
-            human_readable["Scan Date"] = scanDate
-        if "files" in res_dict:
-            HUMAN_READBALE_HEADERS = ["URL", "Domain", "IP", "ASN", "Scan ID", "Scan Date", "File"]
-            files = res_dict["files"][0]
-            sha256 = files.get("sha256")
-            filename = files.get("filename")
-            filesize = files.get("filesize")
-            filetype = files.get("mimeType")
-            url = res_tasks["url"]
-            if sha256:
-                human_readable["File"]["Hash"] = sha256
-                cont["Hash"] = sha256
-                file_context["SHA256"] = sha256
-            if filename:
-                human_readable["File"]["Name"] = filename
-                cont["FileName"] = filename
-                file_context["File"]["Name"] = filename
-            if filesize:
-                human_readable["File"]["Size"] = filesize
-                cont["FileSize"] = filesize
-                file_context["Size"] = filesize
-            if filetype:
-                human_readable["File"]["Type"] = filetype
-                cont["FileType"] = filetype
-                file_context["File"]["Type"] = filetype
-            file_context["File"]["Hostname"] = url
-
-        ec[outputPaths["file"]] = file_context
-        hr_md.append(human_readable)
-        cont_array.append(cont)
-        ip_array.append(ip_cont)
-        url_array.append(url_cont)
-        dom_array.append(dom_cont)
-
-        # Storing last url in memory for comparison on next loop
-        last_url = url
-
-    ec = {"URLScan(val.URL && val.URL == obj.URL)": cont_array, "URL": url_array, "IP": ip_array, "Domain": dom_array}
-    demisto.results(
-        {
-            "Type": entryTypes["note"],
-            "ContentsFormat": formats["markdown"],
-            "Contents": r,
-            "HumanReadable": tableToMarkdown(
-                f"URLScan.io query results for {raw_query}", hr_md, HUMAN_READBALE_HEADERS, removeNull=True
-            ),
-            "EntryContext": ec,
-        }
-    )
+            human_readable = makehash()
+
+            if "url" in res_tasks:
+                url = res_tasks["url"]
+                human_readable["URL"] = url
+                cont["URL"] = url
+                url_cont["Data"] = url
+            if "domain" in res_page:
+                domain = res_page["domain"]
+                human_readable["Domain"] = domain
+                cont["Domain"] = domain
+                dom_cont["Name"] = domain
+            if "asn" in res_page:
+                asn = res_page["asn"]
+                cont["ASN"] = asn
+                ip_cont["ASN"] = asn
+                human_readable["ASN"] = asn
+            if "ip" in res_page:
+                ip = res_page["ip"]
+                cont["IP"] = ip
+                ip_cont["Address"] = ip
+                human_readable["IP"] = ip
+            if "_id" in res_dict:
+                scanID = res_dict["_id"]
+                cont["ScanID"] = scanID
+                human_readable["Scan ID"] = scanID
+            if "time" in res_tasks:
+                scanDate = res_tasks["time"]
+                cont["ScanDate"] = scanDate
+                human_readable["Scan Date"] = scanDate
+            if "files" in res_dict:
+                HUMAN_READBALE_HEADERS = ["URL", "Domain", "IP", "ASN", "Scan ID", "Scan Date", "File"]
+                files = res_dict["files"][0]
+                sha256 = files.get("sha256")
+                filename = files.get("filename")
+                filesize = files.get("filesize")
+                filetype = files.get("mimeType")
+                url = res_tasks["url"]
+                if sha256:
+                    human_readable["File"]["Hash"] = sha256
+                    cont["Hash"] = sha256
+                    file_context["SHA256"] = sha256
+                if filename:
+                    human_readable["File"]["Name"] = filename
+                    cont["FileName"] = filename
+                    file_context["File"]["Name"] = filename
+                if filesize:
+                    human_readable["File"]["Size"] = filesize
+                    cont["FileSize"] = filesize
+                    file_context["Size"] = filesize
+                if filetype:
+                    human_readable["File"]["Type"] = filetype
+                    cont["FileType"] = filetype
+                    file_context["File"]["Type"] = filetype
+                file_context["File"]["Hostname"] = url
+
+            ec[outputPaths["file"]] = file_context
+            hr_md.append(human_readable)
+            cont_array.append(cont)
+            ip_array.append(ip_cont)
+            url_array.append(url_cont)
+            dom_array.append(dom_cont)
+
+            # Storing last url in memory for comparison on next loop
+            last_url = url
+
+        ec = {"URLScan(val.URL && val.URL == obj.URL)": cont_array, "URL": url_array, "IP": ip_array, "Domain": dom_array}
+        demisto.results(
+            {
+                "Type": entryTypes["note"],
+                "ContentsFormat": formats["markdown"],
+                "Contents": r,
+                "HumanReadable": tableToMarkdown(
+                    f"URLScan.io query results for {raw_query}", hr_md, HUMAN_READBALE_HEADERS, removeNull=True
+                ),
+                "EntryContext": ec,
+            }
+        )
 
 
 def format_http_transaction_list(client):
diff --git a/Packs/UrlScan/Integrations/UrlScan/UrlScan.yml b/Packs/UrlScan/Integrations/UrlScan/UrlScan.yml
@@ -130,6 +130,7 @@ script:
       description: The search type. When advanced, allows to query multiple search parameters.
       predefined:
       - advanced
+      - raw
       name: searchType
     - defaultValue: '20'
       description: The maximum number of results to return. Default is 20.
@@ -167,6 +168,8 @@ script:
     - contextPath: URLScan.FileType
       description: File type of the file scanned.
       type: string
+    - contextPath: URLScan.Search.Results
+      description: The unformatted search results from a raw search.
     polling: true
   - arguments:
     - description: The URL to scan.
diff --git a/Packs/UrlScan/ReleaseNotes/1_2_20.md b/Packs/UrlScan/ReleaseNotes/1_2_20.md
@@ -0,0 +1,5 @@
+
+#### Integrations
+
+##### urlscan.io
+Added support for the raw *searchType* in the ***urlscan-search*** command, allowing to specify a full formatted query string for the search.
diff --git a/Packs/UrlScan/pack_metadata.json b/Packs/UrlScan/pack_metadata.json
@@ -2,7 +2,7 @@
     "name": "URLScan.io",
     "description": "urlscan.io Web Threat Intelligence",
     "support": "partner",
-    "currentVersion": "1.2.19",
+    "currentVersion": "1.2.20",
     "author": "urlscan GmbH",
     "url": "https://urlscan.io",
     "email": "support@urlscan.io",