Skip to content

Commit bedc10e

Browse files
content-botmcvic1rjitssapir
authored andcommitted
Url scan raw search (demisto#40291) (demisto#40657)
* Added new search method (raw) * add release notes and documentation on command change * Update context to match format for command, and updated command documentation * lint fixes * Lint fixes * Fix release version * Update Packs/UrlScan/ReleaseNotes/1_2_20.md --------- Co-authored-by: Ryan McVicar <[email protected]> Co-authored-by: Sapir Malka <[email protected]>
1 parent ac820ed commit bedc10e

File tree

5 files changed

+132
-112
lines changed

5 files changed

+132
-112
lines changed

Packs/UrlScan/Integrations/UrlScan/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@
113113
<h5>Command Example</h5>
114114
<p><code>!urlscan-search searchParameter=8.8.8.8</code></p>
115115
<p><code>!urlscan-search searchType=advanced searchParameter="filename:logo.png AND date:>now-24h"</code></p>
116+
<p><code>!urlscan-search searchType=raw searchParameter="q=meta%3Asearchhit.search.04eb755f-468d-4421-ab86-210a01ee1bdd&datasource=hostnames&search_after="</code></p>
116117
<h3 id="h_872696191351541575062805">2. (Deprecated) Submit a URL directly to urlscan.io</h3>
117118
<hr>
118119
<p>Submits a URL to urlscan.io.</p>

Packs/UrlScan/Integrations/UrlScan/UrlScan.py

Lines changed: 122 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,8 @@ def urlscan_search_only(client: Client, url: str, command_results: list, executi
668668
def urlscan_search(client, search_type, query, size=None):
669669
if search_type == "advanced":
670670
r, _, _ = http_request(client, "GET", "search/?q=" + query)
671+
elif search_type == "raw":
672+
r, _, _ = http_request(client, "GET", f"search/?{query}")
671673
else:
672674
url_suffix = "search/?q=" + search_type + ':"' + query + '"' + (f"&size={size}" if size else "")
673675
r, _, _ = http_request(client, "GET", url_suffix)
@@ -699,118 +701,127 @@ def urlscan_search_command(client):
699701
search_type = "hash"
700702
else:
701703
search_type = "page.url"
704+
if search_type == "raw":
705+
r = urlscan_search(client, search_type, raw_query)
706+
results = CommandResults(
707+
outputs_prefix="URLScan.Search.Results",
708+
raw_response=r,
709+
outputs=r["results"],
710+
readable_output=f'{r["total"]} results found for {raw_query}',
711+
)
712+
return_results(results)
713+
else:
714+
# Making the query string safe for Elastic Search
715+
query = quote(raw_query, safe="")
716+
717+
r = urlscan_search(client, search_type, query)
718+
719+
if r["total"] == 0:
720+
demisto.results(f"No results found for {raw_query}")
721+
return
722+
if r["total"] > 0:
723+
demisto.results("{} results found for {}".format(r["total"], raw_query))
724+
725+
# Opening empty string for url comparison
726+
last_url = ""
727+
hr_md = []
728+
cont_array = []
729+
ip_array = []
730+
dom_array = []
731+
url_array = []
732+
733+
for res in r["results"][:LIMIT]:
734+
ec = makehash()
735+
cont = makehash()
736+
url_cont = makehash()
737+
ip_cont = makehash()
738+
dom_cont = makehash()
739+
file_context = makehash()
740+
res_dict = res
741+
res_tasks = res_dict["task"]
742+
res_page = res_dict["page"]
743+
744+
if last_url == res_tasks["url"]:
745+
continue
702746

703-
# Making the query string safe for Elastic Search
704-
query = quote(raw_query, safe="")
705-
706-
r = urlscan_search(client, search_type, query)
707-
708-
if r["total"] == 0:
709-
demisto.results(f"No results found for {raw_query}")
710-
return
711-
if r["total"] > 0:
712-
demisto.results("{} results found for {}".format(r["total"], raw_query))
713-
714-
# Opening empty string for url comparison
715-
last_url = ""
716-
hr_md = []
717-
cont_array = []
718-
ip_array = []
719-
dom_array = []
720-
url_array = []
721-
722-
for res in r["results"][:LIMIT]:
723-
ec = makehash()
724-
cont = makehash()
725-
url_cont = makehash()
726-
ip_cont = makehash()
727-
dom_cont = makehash()
728-
file_context = makehash()
729-
res_dict = res
730-
res_tasks = res_dict["task"]
731-
res_page = res_dict["page"]
732-
733-
if last_url == res_tasks["url"]:
734-
continue
735-
736-
human_readable = makehash()
737-
738-
if "url" in res_tasks:
739-
url = res_tasks["url"]
740-
human_readable["URL"] = url
741-
cont["URL"] = url
742-
url_cont["Data"] = url
743-
if "domain" in res_page:
744-
domain = res_page["domain"]
745-
human_readable["Domain"] = domain
746-
cont["Domain"] = domain
747-
dom_cont["Name"] = domain
748-
if "asn" in res_page:
749-
asn = res_page["asn"]
750-
cont["ASN"] = asn
751-
ip_cont["ASN"] = asn
752-
human_readable["ASN"] = asn
753-
if "ip" in res_page:
754-
ip = res_page["ip"]
755-
cont["IP"] = ip
756-
ip_cont["Address"] = ip
757-
human_readable["IP"] = ip
758-
if "_id" in res_dict:
759-
scanID = res_dict["_id"]
760-
cont["ScanID"] = scanID
761-
human_readable["Scan ID"] = scanID
762-
if "time" in res_tasks:
763-
scanDate = res_tasks["time"]
764-
cont["ScanDate"] = scanDate
765-
human_readable["Scan Date"] = scanDate
766-
if "files" in res_dict:
767-
HUMAN_READBALE_HEADERS = ["URL", "Domain", "IP", "ASN", "Scan ID", "Scan Date", "File"]
768-
files = res_dict["files"][0]
769-
sha256 = files.get("sha256")
770-
filename = files.get("filename")
771-
filesize = files.get("filesize")
772-
filetype = files.get("mimeType")
773-
url = res_tasks["url"]
774-
if sha256:
775-
human_readable["File"]["Hash"] = sha256
776-
cont["Hash"] = sha256
777-
file_context["SHA256"] = sha256
778-
if filename:
779-
human_readable["File"]["Name"] = filename
780-
cont["FileName"] = filename
781-
file_context["File"]["Name"] = filename
782-
if filesize:
783-
human_readable["File"]["Size"] = filesize
784-
cont["FileSize"] = filesize
785-
file_context["Size"] = filesize
786-
if filetype:
787-
human_readable["File"]["Type"] = filetype
788-
cont["FileType"] = filetype
789-
file_context["File"]["Type"] = filetype
790-
file_context["File"]["Hostname"] = url
791-
792-
ec[outputPaths["file"]] = file_context
793-
hr_md.append(human_readable)
794-
cont_array.append(cont)
795-
ip_array.append(ip_cont)
796-
url_array.append(url_cont)
797-
dom_array.append(dom_cont)
798-
799-
# Storing last url in memory for comparison on next loop
800-
last_url = url
801-
802-
ec = {"URLScan(val.URL && val.URL == obj.URL)": cont_array, "URL": url_array, "IP": ip_array, "Domain": dom_array}
803-
demisto.results(
804-
{
805-
"Type": entryTypes["note"],
806-
"ContentsFormat": formats["markdown"],
807-
"Contents": r,
808-
"HumanReadable": tableToMarkdown(
809-
f"URLScan.io query results for {raw_query}", hr_md, HUMAN_READBALE_HEADERS, removeNull=True
810-
),
811-
"EntryContext": ec,
812-
}
813-
)
747+
human_readable = makehash()
748+
749+
if "url" in res_tasks:
750+
url = res_tasks["url"]
751+
human_readable["URL"] = url
752+
cont["URL"] = url
753+
url_cont["Data"] = url
754+
if "domain" in res_page:
755+
domain = res_page["domain"]
756+
human_readable["Domain"] = domain
757+
cont["Domain"] = domain
758+
dom_cont["Name"] = domain
759+
if "asn" in res_page:
760+
asn = res_page["asn"]
761+
cont["ASN"] = asn
762+
ip_cont["ASN"] = asn
763+
human_readable["ASN"] = asn
764+
if "ip" in res_page:
765+
ip = res_page["ip"]
766+
cont["IP"] = ip
767+
ip_cont["Address"] = ip
768+
human_readable["IP"] = ip
769+
if "_id" in res_dict:
770+
scanID = res_dict["_id"]
771+
cont["ScanID"] = scanID
772+
human_readable["Scan ID"] = scanID
773+
if "time" in res_tasks:
774+
scanDate = res_tasks["time"]
775+
cont["ScanDate"] = scanDate
776+
human_readable["Scan Date"] = scanDate
777+
if "files" in res_dict:
778+
HUMAN_READBALE_HEADERS = ["URL", "Domain", "IP", "ASN", "Scan ID", "Scan Date", "File"]
779+
files = res_dict["files"][0]
780+
sha256 = files.get("sha256")
781+
filename = files.get("filename")
782+
filesize = files.get("filesize")
783+
filetype = files.get("mimeType")
784+
url = res_tasks["url"]
785+
if sha256:
786+
human_readable["File"]["Hash"] = sha256
787+
cont["Hash"] = sha256
788+
file_context["SHA256"] = sha256
789+
if filename:
790+
human_readable["File"]["Name"] = filename
791+
cont["FileName"] = filename
792+
file_context["File"]["Name"] = filename
793+
if filesize:
794+
human_readable["File"]["Size"] = filesize
795+
cont["FileSize"] = filesize
796+
file_context["Size"] = filesize
797+
if filetype:
798+
human_readable["File"]["Type"] = filetype
799+
cont["FileType"] = filetype
800+
file_context["File"]["Type"] = filetype
801+
file_context["File"]["Hostname"] = url
802+
803+
ec[outputPaths["file"]] = file_context
804+
hr_md.append(human_readable)
805+
cont_array.append(cont)
806+
ip_array.append(ip_cont)
807+
url_array.append(url_cont)
808+
dom_array.append(dom_cont)
809+
810+
# Storing last url in memory for comparison on next loop
811+
last_url = url
812+
813+
ec = {"URLScan(val.URL && val.URL == obj.URL)": cont_array, "URL": url_array, "IP": ip_array, "Domain": dom_array}
814+
demisto.results(
815+
{
816+
"Type": entryTypes["note"],
817+
"ContentsFormat": formats["markdown"],
818+
"Contents": r,
819+
"HumanReadable": tableToMarkdown(
820+
f"URLScan.io query results for {raw_query}", hr_md, HUMAN_READBALE_HEADERS, removeNull=True
821+
),
822+
"EntryContext": ec,
823+
}
824+
)
814825

815826

816827
def format_http_transaction_list(client):

Packs/UrlScan/Integrations/UrlScan/UrlScan.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ script:
130130
description: The search type. When advanced, allows to query multiple search parameters.
131131
predefined:
132132
- advanced
133+
- raw
133134
name: searchType
134135
- defaultValue: '20'
135136
description: The maximum number of results to return. Default is 20.
@@ -167,6 +168,8 @@ script:
167168
- contextPath: URLScan.FileType
168169
description: File type of the file scanned.
169170
type: string
171+
- contextPath: URLScan.Search.Results
172+
description: The unformatted search results from a raw search.
170173
polling: true
171174
- arguments:
172175
- description: The URL to scan.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
2+
#### Integrations
3+
4+
##### urlscan.io
5+
Added support for the raw *searchType* in the ***urlscan-search*** command, allowing to specify a full formatted query string for the search.

Packs/UrlScan/pack_metadata.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"name": "URLScan.io",
33
"description": "urlscan.io Web Threat Intelligence",
44
"support": "partner",
5-
"currentVersion": "1.2.19",
5+
"currentVersion": "1.2.20",
66
"author": "urlscan GmbH",
77
"url": "https://urlscan.io",
88
"email": "[email protected]",

0 commit comments

Comments
 (0)