Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions iocp.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,16 @@
#
###################################################################################################

#from __future__ import unicode_literals
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remnants of a change that was not kept. This line has no real purpose any longer and should not be merged.

import os
import sys
import fnmatch
import argparse
import re
from StringIO import StringIO
try:
from io import StringIO
except:
from StringIO import StringIO
try:
import configparser as ConfigParser
except ImportError:
Expand Down Expand Up @@ -190,7 +194,7 @@ def parse_pdf_pypdf2(self, f, fpath):
def parse_pdf_pdfminer(self, f, fpath):
try:
laparams = LAParams()
laparams.all_texts = True
laparams.all_texts = True
rsrcmgr = PDFResourceManager()
pagenos = set()

Expand Down Expand Up @@ -223,7 +227,7 @@ def parse_pdf(self, f, fpath):
except AttributeError:
e = 'Selected PDF parser library is not supported: %s' % (self.library)
raise NotImplementedError(e)

self.parser_func(f, fpath)

def parse_txt(self, f, fpath):
Expand All @@ -244,19 +248,19 @@ def parse_html(self, f, fpath):
try:
if self.dedup:
self.dedup_store = set()

data = f.read()
soup = BeautifulSoup(data)
html = soup.findAll(text=True)

text = u''
text = ''
for elem in html:
if elem.parent.name in ['style', 'script', '[document]', 'head', 'title']:
continue
elif re.match('<!--.*-->', unicode(elem)):
elif re.match('<!--.*-->', elem):
continue
else:
text += unicode(elem)
text += elem

self.handler.print_header(fpath)
self.parse_page(fpath, text, 1)
Expand Down Expand Up @@ -304,7 +308,7 @@ def parse(self, path):
argparser.add_argument('-i', dest='INPUT_FORMAT', default='pdf', help='Input format (pdf/txt/html)')
argparser.add_argument('-o', dest='OUTPUT_FORMAT', default='csv', help='Output format (csv/json/yara/netflow)')
argparser.add_argument('-d', dest='DEDUP', action='store_true', default=False, help='Deduplicate matches')
argparser.add_argument('-l', dest='LIB', default='pdfminer', help='PDF parsing library (pypdf2/pdfminer)')
argparser.add_argument('-l', dest='LIB', default='pypdf2', help='PDF parsing library (pypdf2/pdfminer)')
args = argparser.parse_args()

parser = IOC_Parser(args.INI, args.INPUT_FORMAT, args.DEDUP, args.LIB, args.OUTPUT_FORMAT)
Expand Down
8 changes: 4 additions & 4 deletions output.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def print_match(self, fpath, page, name, match):
self.cnt[name] += 1
else:
self.cnt[name] = 1

string_id = "$%s%d" % (name, self.cnt[name])
self.sids.append(string_id)
string_value = match.replace('\\', '\\\\')
Expand All @@ -92,15 +92,15 @@ def print_footer(self, fpath):
print("\tcondition:")
print("\t\t" + cond)
print("}")

class OutputHandler_netflow(OutputHandler):
def __init__(self):
print "host 255.255.255.255"
print("host 255.255.255.255")

def print_match(self, fpath, page, name, match):
data = {
'type' : name,
'match': match
}
if data["type"] == "IP":
print " or host %s " % data["match"]
print(" or host %s " % data["match"])
4 changes: 2 additions & 2 deletions whitelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

class WhiteList(dict):
def __init__(self, basedir):
searchdir = os.path.join(basedir, "whitelists/whitelist_*.ini")
searchdir = os.path.join(basedir, "whitelists/whitelist_*.ini")
fpaths = glob.glob(searchdir)
for fpath in fpaths:
t = os.path.splitext(os.path.split(fpath)[1])[0].split('_',1)[1]
patterns = [line.strip() for line in open(fpath)]
self[t] = [re.compile(p) for p in patterns]
self[t] = [re.compile(p) for p in patterns]