From 45c9aec614f01ab8b52c583f597429c2de7859fb Mon Sep 17 00:00:00 2001
From: mkind <sascha.zinke@splone.com>
Date: Sun, 17 Dec 2017 11:05:14 +0100
Subject: [PATCH 1/6] This commit removes analyzers and switches to
 elasticsearch. Now, every packet is indexed. Also unnecessary code like slow
 parsers are removed.

---
 pcapscanner/analyzers/__init__.py      |   0
 pcapscanner/analyzers/conversations.py |  66 -----
 pcapscanner/analyzers/hosts.py         |  41 ----
 pcapscanner/main.py                    |  32 +--
 pcapscanner/pcap.py                    | 320 +++----------------------
 requirements.txt                       |   1 +
 6 files changed, 44 insertions(+), 416 deletions(-)
 delete mode 100644 pcapscanner/analyzers/__init__.py
 delete mode 100644 pcapscanner/analyzers/conversations.py
 delete mode 100644 pcapscanner/analyzers/hosts.py

diff --git a/pcapscanner/analyzers/__init__.py b/pcapscanner/analyzers/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/pcapscanner/analyzers/conversations.py b/pcapscanner/analyzers/conversations.py
deleted file mode 100644
index d7e5cae..0000000
--- a/pcapscanner/analyzers/conversations.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from multiprocessing import Manager
-import csv
-import os
-
-CSVFN = "conversations.csv"
-
-manager = Manager()
-
-
-def __add_protocol(storage, pkt):
-    protocol = str(pkt.protocol)
-
-    if protocol in storage.keys():
-        storage[protocol] += 1
-    else:
-        storage[protocol] = 1
-
-
-def __add_port(storage, pkt):
-    port = str(pkt.port_dst)
-
-    if port not in storage.keys():
-        storage[port] = manager.dict()
-    __add_protocol(storage[port], pkt)
-
-
-def __add_dst_addr(storage, pkt):
-    dst_addr = str(pkt.ip_dst)
-
-    if dst_addr not in storage.keys():
-        storage[dst_addr] = manager.dict()
-    __add_port(storage[dst_addr], pkt)
-
-
-def init():
-    setattr(analyze, 'storage', manager.dict())
-
-
-def log(outputdir):
-    fn = os.path.join(outputdir, CSVFN)
-    with open(fn, 'w') as f:
-        w = csv.writer(f)
-
-        for src_addr, conversation in analyze.storage.items():
-            for dst_addr, ports in conversation.items():
-                for port, protocols in ports.items():
-                    for protocol, counter in protocols.items():
-                        w.writerow(
-                            [src_addr, dst_addr, port, protocol, counter]
-                        )
-
-
-def analyze(pkt):
-    """ Count conversations between hosts. """
-
-    conversations = analyze.storage
-    try:
-        src_addr = str(pkt.ip_src)
-
-        if src_addr not in conversations.keys():
-            conversations[src_addr] = manager.dict()
-        __add_dst_addr(conversations[src_addr], pkt)
-
-    except AttributeError as e:
-        # ignore packets that aren't TCP/UDP or IPv4
-        pass
diff --git a/pcapscanner/analyzers/hosts.py b/pcapscanner/analyzers/hosts.py
deleted file mode 100644
index e70b3e6..0000000
--- a/pcapscanner/analyzers/hosts.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from multiprocessing import Manager
-import csv
-import os
-
-CSVFN = "hostcounter.csv"
-
-manager = Manager()
-
-
-def init():
-    setattr(analyze, 'storage', manager.dict())
-
-
-def log(outputdir):
-    fn = os.path.join(outputdir, CSVFN)
-    with open(fn, 'w') as f:
-        w = csv.writer(f)
-        w.writerows(analyze.storage.items())
-
-
-def analyze(pkt):
-    """ Count the occurences of all host either as src or dest. """
-
-    hosts = analyze.storage
-    try:
-        src_addr = str(pkt.ip_src)
-        dst_addr = str(pkt.ip_dst)
-
-        if src_addr in hosts.keys():
-            hosts[src_addr] += 1
-        else:
-            hosts[src_addr] = 1
-
-        if dst_addr in hosts.keys():
-            hosts[dst_addr] += 1
-        else:
-            hosts[dst_addr] = 1
-
-    except AttributeError as e:
-        # ignore packets that aren't TCP/UDP or IPv4
-        pass
diff --git a/pcapscanner/main.py b/pcapscanner/main.py
index 6cf9570..4786069 100755
--- a/pcapscanner/main.py
+++ b/pcapscanner/main.py
@@ -13,16 +13,10 @@
 import time
 from multiprocessing import Pool
 
-from analyzers import hosts, conversations
 import pcap
 
 NUM_THREADS = 4
 
-ANALYZERS = [
-    hosts,
-    conversations
-]
-
 ASCII_LOGO = """
 
 @@@@@@@    @@@@@@@   @@@@@@   @@@@@@@    @@@@@@    @@@@@@@   @@@@@@   @@@  @@@
@@ -38,9 +32,10 @@
 
 """
 
+
 class Main:
 
-    def __init__(self, outputdir, inputdir, parser):
+    def __init__(self, outputdir, inputdir):
 
         # log files
         self.outputdir = outputdir
@@ -59,12 +54,6 @@ def __init__(self, outputdir, inputdir, parser):
             )
         self.inputdir = inputdir
 
-        # initialize all analyzers
-        for a in ANALYZERS:
-            a.init()
-
-        self.parser = parser
-
     def _log_errors(self):
         if not self.ignoredFiles:
             return
@@ -75,12 +64,9 @@ def _log_errors(self):
 
         print("ignored {} files".format(len(self.ignoredFiles)))
 
-    def _log_results(self):
-        for a in ANALYZERS:
-            a.log(self.outputdir)
 
     def start(self):
-        pcapfiles = pcap.walk(self.inputdir)
+        pcapfiles = pcap.walk(self.inputdir)[:3]
         print(
             "Collected list of {} files in {}".
             format(len(pcapfiles), self.inputdir)
@@ -98,7 +84,7 @@ def start(self):
                 # asynchronously
                 pool.apply_async(
                     pcap.process_pcap,
-                    (fn, [a.analyze for a in ANALYZERS], progressbar_position, self.parser)
+                    (fn, progressbar_position)
                 )
 
             # close pool
@@ -108,7 +94,6 @@ def start(self):
             pool.join()
 
         self._log_errors()
-        self._log_results()
 
         # return number of pcap files
         return len(pcapfiles)
@@ -128,20 +113,13 @@ def start(self):
         default='.',
         help='path to the output directory'
     )
-    parser.add_argument(
-        '-p', '--parser',
-        nargs='?',
-        default=pcap.Parser.DPKT.name,
-        choices=[p.name for p in pcap.Parser]
-    )
 
     args = parser.parse_args()
     print(ASCII_LOGO)
 
     scanner = Main(
         outputdir=args.outputdir,
-        inputdir=args.inputdir,
-        parser=args.parser
+        inputdir=args.inputdir
     )
     # measure time
     startTime = time.time()
diff --git a/pcapscanner/pcap.py b/pcapscanner/pcap.py
index 3a232ba..592d210 100644
--- a/pcapscanner/pcap.py
+++ b/pcapscanner/pcap.py
@@ -3,109 +3,10 @@
 import sys
 import gzip
 import dpkt
-from enum import Enum
-from dpkt.compat import compat_ord
-import pyshark
 import socket
-
-from pypacker import ppcap
-from pypacker.layer12 import ethernet
-from pypacker.layer3 import ip
-from pypacker.layer4 import tcp
-
-import functools
 from tqdm import tqdm
 from datetime import datetime as dt
-from collections import namedtuple
-
-"""
-This is the destination format of parsed pcap packages
-to decouple PCAP parser data structures from analysers code
-"""
-ParsedPackage = namedtuple('ParsedPackage', [
-    'protocol',
-    'ip_src',
-    'ip_dst',
-    'port_src',
-    'port_dst',
-    'mac_src',
-    'mac_dst',
-    'pcap_file',
-    'timestamp'
-])
-
-class Parser(Enum):
-    DPKT = 'dpkt'
-    PYPACKER = 'pypacker'
-    SCAPY = 'scapy'
-    PYSHARK = 'pyshark'
-
-
-def sort_by_date(a, b):
-    """
-    Custom sort function to compare them by their timestamp in filename
-    """
-
-    regex = '[a-zA-Z0-9\-](2017[0-9-]*)-.*pcap'
-    aBase = str(os.path.basename(a))
-    bBase = str(os.path.basename(b))
-    aDateStr = None
-    bDateStr = None
-
-    # parse first filename
-    try:
-        aDateStr = re.search(regex, aBase).group(1)
-    except AttributeError:
-        print('Ignore a', aBase)
-
-    # parse second filename
-    try:
-        bDateStr = re.search(regex, bBase).group(1)
-    except AttributeError:
-        print('Ignore b', bBase)
-
-    # in case we have no valid timestamp return 0
-    if aDateStr is None or bDateStr is None:
-        print(
-            "sort_by_date: Was not able to extract timestamp comparing {} to {}".
-            format(aBase, bBase)
-        )
-        return 0
-
-    # return nagative value, zero or positive value
-    aDate = dt.strptime(aDateStr, "%Y%m%d-%H%M%S")
-    bDate = dt.strptime(bDateStr, "%Y%m%d-%H%M%S")
-
-    # compare and sort from oldest to new
-    if aDate < bDate:
-        return -1
-
-    elif aDate == bDate:
-        try:
-            # in case date is equal there is a integer before the
-            # timestamp to sort
-            regex = '[a-zA-Z\-][0-9]\-([0-9]).*'
-            numA = int(re.search(regex, aBase).group(1))
-            numB = int(re.search(regex, bBase).group(1))
-
-            # also VPN 1 and 2 are present
-            regex = '[a-zA-Z\-]([0-9])\-[0-9].*'
-            vpnA = int(re.search(regex, aBase).group(1))
-            vpnB = int(re.search(regex, bBase).group(1))
-
-        except AttributeError:
-            numA = 0
-            numB = 0
-
-        if numA < numB:
-            return -1
-        elif numA == numB:
-            # should never be the case
-            return 0
-        else:
-            return 1
-    else:
-        return 1
+from elasticsearch import Elasticsearch
 
 
 def walk(directory):
@@ -117,23 +18,32 @@ def walk(directory):
         if re.match(regex, os.path.basename(f))
     ]
 
-    # sort them by timestamp in filename
-    return sorted(
-        pcapFilesUnordered, key=functools.cmp_to_key(sort_by_date)
-    )
+    return pcapFilesUnordered
 
 
 def parser_dpkt(pcapfile, progressbar_position):
     """
-    Parsing the RawIP encapsulated PCAPs using dpkt. Expects an unpacked file ref.
+    Parsing the RawIP encapsulated PCAPs using dpkt. Expects an
+    unpacked file ref.
     https://pypi.python.org/pypi/dpkt
     """
-    out=[]
+
     try:
         pcap = dpkt.pcap.Reader(pcapfile)
-
-        print("SUCCESS ", pcapfile.name)
-        for ts,buf in tqdm(
+        es = Elasticsearch()
+        es.indices.create(index='packet', ignore=400, body={
+            "packet": {
+                "properties": {
+                    "ip_src": {"type": "ip"},
+                    "ip_dst": {"type": "ip"},
+                    "timestamp": {"type": "date"},
+                    "port_src": {"type": "integer"},
+                    "port_dst": {"type": "integer"}
+                }
+            }
+        })
+
+        for ts, buf in tqdm(
             pcap,
             position=progressbar_position,
             unit=" packages",
@@ -143,147 +53,33 @@ def parser_dpkt(pcapfile, progressbar_position):
                 ip = dpkt.ip.IP(buf)
                 tcp = ip.data
 
-                # fetch the infos we need
-                # we use socket to convert inet IPv4 IP to human readable IP
-                # socket.inet_ntop(socket.AF_INET, inet)
-                #FIXME: get MAC adress
-                parsedPkg = ParsedPackage(
-                            protocol=ip.p,
-                            ip_src=socket.inet_ntop(socket.AF_INET, ip.src),
-                            port_src=tcp.sport,
-                            ip_dst=socket.inet_ntop(socket.AF_INET, ip.dst),
-                            port_dst=tcp.dport,
-                            mac_src='unknown',
-                            mac_dst='unknown',
-                            pcap_file=os.path.abspath(pcapfile.name),
-                            timestamp=str(dt.utcfromtimestamp(ts))
-                )
-                out.append(parsedPkg)
+                es.index(index="packets", body={
+                    "protocol": ip.p,
+                    "ip_src": socket.inet_ntop(socket.AF_INET, ip.src),
+                    "port_src": tcp.sport,
+                    "ip_dst": socket.inet_ntop(socket.AF_INET, ip.dst),
+                    "port_dst": tcp.dport,
+                    "mac_src": "unknown",
+                    "mac_dst": "unknown",
+                    "pcap_file": os.path.abspath(pcapfile.name),
+                    "timestamp": dt.utcfromtimestamp(ts)
+                }, doc_type='packet')
+
             except AttributeError:
                 # ignore packets that aren't TCP/UDP or IPv4
                 pass
-            except ValueError:
-                print(
-                    "ValueError happend as packages where parsed. We expect RawIP "
-                    "encapsulated PCAPs, maybe now we have a Ethernet encapsulated "
-                    "one. Abort.")
-                raise
+
     except KeyboardInterrupt:
         raise
-    except:
-        e = sys.exc_info()
-        print("FAILED ", e, str(os.path.abspath(pcapfile.name)))
     finally:
         pcapfile.close()
-    return out
 
 
-def parser_pyshark(pcapfile, progressbar_position):
-    """
-    Uses tshark CLI in a bash subprocess, parses stdout. Slow but works well with
-    pcap.gz and pcap files.
-    https://github.com/KimiNewt/pyshark
-    """
-    out = []
-    cap = pyshark.FileCapture(os.path.abspath(pcapfile.name), only_summaries=False)
-
-    # read array (to resolve futures) and return only the information
-    # we need to decouple data structures from analysers code
-    for pkt in tqdm(
-        cap,
-        position=progressbar_position,
-        unit=" packages",
-        desc=os.path.basename(pcapfile.name)
-    ):
-
-        try:
-            # fetch the infos we need
-            parsedPkg = ParsedPackage(
-                        protocol=pkt.transport_layer,
-                        ip_src=pkt.ip.src,
-                        port_src=pkt[pkt.transport_layer].srcport,
-                        ip_dst=pkt.ip.dst,
-                        port_dst=pkt[pkt.transport_layer].dstport,
-                        mac_src="IMPLEMENT",
-                        mac_dst="IMPLEMENT",
-                        pcap_file=os.path.abspath(pcapfile.name),
-                        timestamp=pkt.frame_info.get_field('time')
-            )
-            out.append(parsedPkg)
-        except AttributeError:
-            # ignore packets that aren't TCP/UDP or IPv4
-            continue
-    return out
-
-
-def parser_pypacker(pcapfile, progressbar_position):
-    """
-    Does not work!
-    Very fast, reads only .pcap (no .gz). Problem is it reads PCAPs with LinkType
-    Ethernet, but our dumps are RawIP. We can iterate and print the raw package
-    details, but parsing the packages does not work out of the box (because of RawIP).
-    https://github.com/mike01/pypacker
-
-    for encapsulation RawIP or Ethernet see here:
-    https://osqa-ask.wireshark.org/questions/49568/why-cant-this-wireshark-produced-1-packet-pcap-file-not-be-processed-using-winpcap-or-dpkt
-    """
-    out = []
-    cap = ppcap.Reader(filename=os.path.abspath(pcapfile.name))
-
-    # read array (to resolve futures) and return only the information
-    # we need (to reduce memory needed)
-    for ts,buf in tqdm(
-        cap,
-        position=progressbar_position,
-        unit=" packages",
-        desc=os.path.basename(pcapfile.name)
-    ):
-
-        try:
-            eth = ethernet.Ethernet(buf)
-            print("timestamp {}: {}", ts, eth)
-#            for d in eth:
-#                print("   datum ",d)
-            # FIXME: this works well for PCAPs with LinkType "Ethernet" ,
-            #        but not "RawIP" like our dumps.
-            if eth[tcp.TCP] is not None:
-                print(
-                    "{ts}: {src}:{port_src} -> {dst}:{port_dst}".
-                    format(
-                        ts=ts,
-                        src=eth[ip.IP].src_s,
-                        port_src=eth[tcp.TCP].sport,
-                        dst=eth[ip.IP].dst_s,
-                        port_dst=eth[tcp.TCP].dport
-                    )
-                )
-
-        except AttributeError:
-            # ignore packets that aren't TCP/UDP or IPv4
-            continue
-    cap.close()
-    return out
-
-
-def parser_scapy(pcapfile, progressbar_position):
-    """
-    Unfinished, never tested
-    https://phaethon.github.io/scapy/
-    """
-    out = []
-    with PcapReader(pcapfile.name) as pcap_reader:
-      for pkt in pcap_reader:
-        #do something with the packet
-        pass
-    return out
-
-
-def process_pcap(pcapfilename, analysers, progressbar_position, parser):
+def process_pcap(pcapfilename, progressbar_position):
     """
     Scan the given file object for hosts data, collect statistics for each.
     Using pypacker as parser
     """
-    print("processing {} with {}".format(pcapfilename, parser))
 
     f = open(pcapfilename, 'rb')
     try:
@@ -293,57 +89,17 @@ def process_pcap(pcapfilename, analysers, progressbar_position, parser):
             g = gzip.open(f, 'rb')
             # test if this is really GZIP, raises exception if not
             g.peek(1)
-            # if it is a gzipped files pass the unpacked file reference to the parser
+            # if it is a gzipped files pass the unpacked file
+            # reference to the parser
             f = g
         except:
-            #TODO: remove! just for debug
-            #print("THIS IS NOT A GZIP FILE: ",pcapfilename)
+            # TODO: remove! just for debug
+            # print("THIS IS NOT A GZIP FILE: ",pcapfilename)
             pass
 
-        if parser == Parser.PYSHARK.name:
-            # Pyshark CLI is slow but works (single thread ~1.200pkg/s,
-            # with 8 threads ~4.500pkg/s)
-            parsed_packets = parser_pyshark(f, progressbar_position)
-
-        elif parser == Parser.DPKT.name:
-            # DPKT works for pcap and pcap.gz and is fast (single thread ~50.000pkg/s,
-            # with 8 threads ~240.000pkg/s)
-            parsed_packets = parser_dpkt(f, progressbar_position)
-
-        elif parser == Parser.PYPACKER.name:
-            # TODO implement parser
-            parsed_packets = parser_pypacker(f, progressbar_position)
-
-        elif parser == Parser.SCAPY.name:
-            # TODO implement parser
-            parsed_packets = parser_scapy(f, progressbar_position)
-
-        else:
-            print("illegal parser")
-            return
-
-        #TODO: remove! just for debug
-        print(
-            "FETCHED {amount} PACKAGES FROM PCAP {dir}.\n  Example: {pkt} ".
-            format(
-                amount=len(parsed_packets),
-                dir=os.path.basename(pcapfilename),
-                pkt=parsed_packets[0]
-            )
-        )
-
-        # process the stats we need
-        for p in tqdm(parsed_packets,
-                position=progressbar_position,
-                ascii=True,
-                unit=" packages",
-        ):
-            for analyser in analysers:
-                analyser(p)
-
+        parser_dpkt(f, progressbar_position)
 
     except KeyboardInterrupt:
-        print("Bye")
         sys.exit()
     finally:
         if g is not None:
diff --git a/requirements.txt b/requirements.txt
index 0e8f915..ef17ab6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,4 @@ dpkt
 flake8
 pip
 wheel
+elasticsearch

From eb7e059ac24639439783730754789a2b0ca8f64a Mon Sep 17 00:00:00 2001
From: mkind <sascha.zinke@splone.com>
Date: Sun, 17 Dec 2017 12:12:56 +0100
Subject: [PATCH 2/6] using bulk API request to improve performance

---
 pcapscanner/pcap.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/pcapscanner/pcap.py b/pcapscanner/pcap.py
index 592d210..c22e574 100644
--- a/pcapscanner/pcap.py
+++ b/pcapscanner/pcap.py
@@ -7,6 +7,7 @@
 from tqdm import tqdm
 from datetime import datetime as dt
 from elasticsearch import Elasticsearch
+from elasticsearch import helpers
 
 
 def walk(directory):
@@ -43,6 +44,7 @@ def parser_dpkt(pcapfile, progressbar_position):
             }
         })
 
+        bulk_data = []
         for ts, buf in tqdm(
             pcap,
             position=progressbar_position,
@@ -53,7 +55,7 @@ def parser_dpkt(pcapfile, progressbar_position):
                 ip = dpkt.ip.IP(buf)
                 tcp = ip.data
 
-                es.index(index="packets", body={
+                bulk_data.append({
                     "protocol": ip.p,
                     "ip_src": socket.inet_ntop(socket.AF_INET, ip.src),
                     "port_src": tcp.sport,
@@ -62,13 +64,19 @@ def parser_dpkt(pcapfile, progressbar_position):
                     "mac_src": "unknown",
                     "mac_dst": "unknown",
                     "pcap_file": os.path.abspath(pcapfile.name),
-                    "timestamp": dt.utcfromtimestamp(ts)
-                }, doc_type='packet')
+                    "timestamp": dt.utcfromtimestamp(ts),
+                })
+                if len(bulk_data) == 1000:
+                    helpers.bulk(es, index="packets", actions=bulk_data, doc_type='packet')
+                    bulk_data = []
 
             except AttributeError:
                 # ignore packets that aren't TCP/UDP or IPv4
                 pass
 
+        if bulk_data:
+            helpers.bulk(es, index="packets", actions=bulk_data, doc_type='packet')
+
     except KeyboardInterrupt:
         raise
     finally:

From 5b600a9b4fd3ac6ede3caeb7b4724e7a3997bf48 Mon Sep 17 00:00:00 2001
From: mkind <sascha.zinke@splone.com>
Date: Sun, 17 Dec 2017 12:15:59 +0100
Subject: [PATCH 3/6] removing debug limitation

---
 pcapscanner/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pcapscanner/main.py b/pcapscanner/main.py
index 4786069..e70eb13 100755
--- a/pcapscanner/main.py
+++ b/pcapscanner/main.py
@@ -66,7 +66,7 @@ def _log_errors(self):
 
 
     def start(self):
-        pcapfiles = pcap.walk(self.inputdir)[:3]
+        pcapfiles = pcap.walk(self.inputdir)
         print(
             "Collected list of {} files in {}".
             format(len(pcapfiles), self.inputdir)

From 36a7710c431680b7d9add984c398b501a4fdcf15 Mon Sep 17 00:00:00 2001
From: mkind <sascha.zinke@splone.com>
Date: Sun, 17 Dec 2017 12:40:25 +0100
Subject: [PATCH 4/6] also support non tcp/udp protocols. also get proper MAC
 address

---
 pcapscanner/pcap.py | 43 +++++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/pcapscanner/pcap.py b/pcapscanner/pcap.py
index c22e574..21d1bcb 100644
--- a/pcapscanner/pcap.py
+++ b/pcapscanner/pcap.py
@@ -51,28 +51,31 @@ def parser_dpkt(pcapfile, progressbar_position):
             unit=" packages",
             desc=os.path.basename(pcapfile.name)
         ):
-            try:
-                ip = dpkt.ip.IP(buf)
+
+            eth = dpkt.ethernet.Ethernet(buf)
+            ip = dpkt.ip.IP(buf)
+
+            data = {
+                "protocol": ip.get_proto(ip.p).__name__,
+                "ip_src": socket.inet_ntop(socket.AF_INET, ip.src),
+                "ip_dst": socket.inet_ntop(socket.AF_INET, ip.dst),
+                "mac_src": ':'.join(['%02x' % dpkt.compat_ord(x) for x in eth.src]),
+                "mac_dst": ':'.join(['%02x' % dpkt.compat_ord(x) for x in eth.dst]),
+                "pcap_file": os.path.abspath(pcapfile.name),
+                "timestamp": dt.utcfromtimestamp(ts),
+            }
+
+            if ip.get_proto(ip.p) == dpkt.tcp.TCP:
                 tcp = ip.data
+                data["port_dst"] = tcp.dport
+                data["port_src"] = tcp.sport
+
+            bulk_data.append(data)
+
+            if len(bulk_data) == 1000:
+                helpers.bulk(es, index="packets", actions=bulk_data, doc_type='packet')
+                bulk_data = []
 
-                bulk_data.append({
-                    "protocol": ip.p,
-                    "ip_src": socket.inet_ntop(socket.AF_INET, ip.src),
-                    "port_src": tcp.sport,
-                    "ip_dst": socket.inet_ntop(socket.AF_INET, ip.dst),
-                    "port_dst": tcp.dport,
-                    "mac_src": "unknown",
-                    "mac_dst": "unknown",
-                    "pcap_file": os.path.abspath(pcapfile.name),
-                    "timestamp": dt.utcfromtimestamp(ts),
-                })
-                if len(bulk_data) == 1000:
-                    helpers.bulk(es, index="packets", actions=bulk_data, doc_type='packet')
-                    bulk_data = []
-
-            except AttributeError:
-                # ignore packets that aren't TCP/UDP or IPv4
-                pass
 
         if bulk_data:
             helpers.bulk(es, index="packets", actions=bulk_data, doc_type='packet')

From 1fb557e75d63319c3a7dc7d5dc224c85f69ce70c Mon Sep 17 00:00:00 2001
From: mkind <sascha.zinke@splone.com>
Date: Sun, 17 Dec 2017 12:59:12 +0100
Subject: [PATCH 5/6] storing human friendly protocol string doesn't work in
 elastic, so we store id instead

---
 pcapscanner/pcap.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pcapscanner/pcap.py b/pcapscanner/pcap.py
index 21d1bcb..af70a26 100644
--- a/pcapscanner/pcap.py
+++ b/pcapscanner/pcap.py
@@ -56,7 +56,8 @@ def parser_dpkt(pcapfile, progressbar_position):
             ip = dpkt.ip.IP(buf)
 
             data = {
-                "protocol": ip.get_proto(ip.p).__name__,
+                "protocol": ip.p, # TODO ip.get_proto(ip.p).__name__ would be human readible,
+                                  # but es only shows empty field
                 "ip_src": socket.inet_ntop(socket.AF_INET, ip.src),
                 "ip_dst": socket.inet_ntop(socket.AF_INET, ip.dst),
                 "mac_src": ':'.join(['%02x' % dpkt.compat_ord(x) for x in eth.src]),

From 819311efb3cbb1734b34f08a175db5e07a45e4c6 Mon Sep 17 00:00:00 2001
From: Sebastian Lutter <lutter@pixolution.de>
Date: Tue, 19 Dec 2017 10:34:52 +0100
Subject: [PATCH 6/6] Added docker-compose kibana and elasticsearch to project.

---
 docker/ESKibanaManager.sh     | 99 +++++++++++++++++++++++++++++++++++
 docker/docker-compose-dev.yml | 38 ++++++++++++++
 2 files changed, 137 insertions(+)
 create mode 100755 docker/ESKibanaManager.sh
 create mode 100644 docker/docker-compose-dev.yml

diff --git a/docker/ESKibanaManager.sh b/docker/ESKibanaManager.sh
new file mode 100755
index 0000000..9f38ede
--- /dev/null
+++ b/docker/ESKibanaManager.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+
+# working dir is the directory the script is located in
+WD="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+cd "$WD"
+WEB_ENV="kibana elasticsearch"
+
+CONF="-p pcapscan -f docker-compose.yml "
+
+function exitIfErr() {
+	if [ $1 -ne 0 ]; then
+		echo
+		echo "$2"
+		echo "Abort execution."
+		echo
+		exit 1
+	fi
+}
+which docker &> /dev/null
+exitIfErr $? "Docker is noch installed on this system. Please install docker.io package or dockerCE."
+function installComposerIfNotAvailable() {
+        # is docker installed?
+        which docker &> /dev/null
+        exitIfErr $? "Can not find docker executable."
+        # check if docker composer is installed
+        which docker-composer &> /dev/null
+        if [ $? -eq 0 ]; then
+                echo "Docker composer is available from PATH: $( which docker-composer )."
+                return
+        elif [ -f "/usr/local/bin/docker-compose" ]; then
+                echo "Docker composer is available in /usr/local/bin/docker-compose"
+                return
+        fi
+        # download it and put it into /usr/local/
+        echo sudo curl -L "https://github.com/docker/compose/releases/download/1.18.0/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
+        sudo curl -L "https://github.com/docker/compose/releases/download/1.18.0/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
+        exitIfErr $? "Failed to download and install docker-compose."
+        # make executeable
+        sudo chmod +x /usr/local/bin/docker-compose
+        echo "Successfully downloaded and installed docker-composer from source."
+}
+function getAllComposeServiceNames() {
+        docker-compose $CONF config --services | head -c -1
+}
+function getAllComposeServiceNamesOneliner() {
+        getAllComposeServiceNames | tr '\n' '|'
+}
+# make sure docker-composer is available
+installComposerIfNotAvailable
+
+function usage() {
+        SERVICES="[$(getAllComposeServiceNamesOneliner)]"
+	echo "Usage:"
+	echo "$0  [start|stop|restart|list|shell|log|clear] "
+	echo
+	echo "$0 shell $SERVICES"
+	echo "$0 log $SERVICES"
+	exit 1
+}
+
+
+CONF="-f docker-compose-dev.yml -p pcapscan"
+
+if [ "$1" == "start" ]; then
+	docker-compose $CONF up -d
+	echo
+	echo "Endpoints:"
+	echo "elasticsearch:	http://localhost:9200"
+	echo "kibana:       	http://localhost:5601"
+	echo
+elif [ "$1" == "stop" ]; then
+	docker-compose $CONF down
+elif [ "$1" == "list" ]; then
+	docker-compose $CONF ps
+elif [ "$1" == "restart" ]; then
+	./$0 stop
+	./$0 start
+elif [ "$1" == "log" ]; then
+	docker-compose $CONF logs -f "$2"
+elif [ "$1" == "shell" ]; then
+	docker compose $CONF exec "$2" bash
+elif [ "$1" == "clear" ]; then
+	echo
+	echo "Clear elasticsearch index?"
+	echo
+	read -r -p "Are you sure? [y/N] " response
+	echo
+	if [[ ! $response =~ ^(yes|y)$ ]]; then
+    	echo "Got $response from user. Abort processing."
+  	exit 1
+	else
+    	echo "Ok, I'll do it."
+	fi
+	docker volume rm pcapscan_stats-elasticsearch
+else
+	echo "Unknown parameter $1. "
+	echo
+	usage
+fi
diff --git a/docker/docker-compose-dev.yml b/docker/docker-compose-dev.yml
new file mode 100644
index 0000000..0716387
--- /dev/null
+++ b/docker/docker-compose-dev.yml
@@ -0,0 +1,38 @@
+version: "2"
+services:
+
+# kibana + elastic search for global statistics
+  kibana:
+#    image: docker.elastic.co/kibana/kibana:5.3.0
+    image: kibana:5.3.0
+    links:
+      - elasticsearch
+    ports:
+      - "5601:5601"
+
+# kibana database backend elastic search
+  elasticsearch:
+    image: elasticsearch:5.3.0
+    ports:
+      - "9200:9200"
+#    image: docker.elastic.co/elasticsearch/elasticsearch:5.3.0
+#    environment:
+#      - bootstrap.memory_lock=true
+#      - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
+#      - "node.max_local_storage_nodes=4"
+#    ulimits:
+#      memlock:
+#        soft: -1
+#        hard: -1
+#      nofile:
+#        soft: 65536
+#        hard: 65536
+#    mem_limit: 1g
+#    cap_add:
+#      - IPC_LOCK
+    volumes:
+      - stats-elasticsearch:/usr/share/elasticsearch/data
+
+# definition of volumens (globally)
+volumes:
+  stats-elasticsearch: