Begin parsing wifi packets for airtime report.

Also try to improve PHY report accuracy by detecting what network
packets are associated with and only counting those.

Change-Id: I2861b1111332b3fcc0a4a90ed0cfc1f429f7e26f
diff --git a/wifitables/ifstats/__init__.py b/wifitables/ifstats/__init__.py
new file mode 100644
index 0000000..cedb8cb
--- /dev/null
+++ b/wifitables/ifstats/__init__.py
@@ -0,0 +1,9 @@
+import fabric
+import skids
+
+fabric = fabric
+skids = skids
+
+InterfaceStats = fabric.InterfaceStats
+Parse = fabric.Parse
+Restore = fabric.Restore
diff --git a/wifitables/ifstats.py b/wifitables/ifstats/fabric.py
similarity index 96%
rename from wifitables/ifstats.py
rename to wifitables/ifstats/fabric.py
index a148f72..06608c7 100644
--- a/wifitables/ifstats.py
+++ b/wifitables/ifstats/fabric.py
@@ -184,9 +184,10 @@
     # the scan sometimes includes comment lines. assume that anything that has
     # a valid channel isn't a comment line.
     if chre.match(channel):
-      result += [[ssid, bssid, int(rssi), channel, ht, cc, security]]
+      result += [dict(zip(header,
+                          [ssid, bssid, int(rssi), channel, ht, cc, security]))]
 
-  return [header] + result
+  return result
 
 
 def Parse(system, cache):
@@ -213,18 +214,18 @@
       'Linux': ['ipaddr', 'iwlink', 'iwscan'],
   }
 
+  read = set()
   for system, names in system_results.items():
-    read = 0
     for name in names:
       try:
         with open(os.path.join(report_dir, name)) as infile:
           cache[name] = infile.read()
-          read += 1
+          read.add(system)
       except IOError:
         cache[name] = ''
 
-    if read:
-      return system, cache
+  if read:
+    return read, cache
 
   raise IOError('Could not open report in {}'.format(report_dir))
 
diff --git a/wifitables/ifstats_skids.py b/wifitables/ifstats/skids.py
similarity index 100%
rename from wifitables/ifstats_skids.py
rename to wifitables/ifstats/skids.py
diff --git a/wifitables/ifstats_skids_test.py b/wifitables/ifstats_test.py
similarity index 91%
rename from wifitables/ifstats_skids_test.py
rename to wifitables/ifstats_test.py
index 4e4e287..11ca13a 100755
--- a/wifitables/ifstats_skids_test.py
+++ b/wifitables/ifstats_test.py
@@ -1,7 +1,7 @@
 #!/usr/bin/python2.7
 """Tests for ifstats_skids."""
 
-import ifstats_skids
+import ifstats
 from wvtest import wvtest
 
 
@@ -9,7 +9,7 @@
 def ParseSavedStatusWireless():
   with open('testdata/skids/status_wireless.html') as status_wireless:
     text = status_wireless.read()
-    res = ifstats_skids.ParseStatusWireless(text)
+    res = ifstats.skids.ParseStatusWireless(text)
     wvtest.WVPASSEQ(res, {
         'Wireless Band': '802.11ac',
         'AP Mac Address (BSSID)': '00:26:86:F0:22:C9',
diff --git a/wifitables/report.py b/wifitables/report.py
index b5770c6..2d4a1a5 100755
--- a/wifitables/report.py
+++ b/wifitables/report.py
@@ -86,6 +86,64 @@
   return counter.most_common()[0][0], phy / alltimes
 
 
+def ParsePcap(stream, known_ssids):
+  """ParsePcap computes PHY performance for a packet capture in `stream`.
+
+  Args:
+    stream: an open `file` object pointing to a file with pcap data.
+    known_ssids: a dict mapping known BSSIDs to SSIDs.
+
+  Returns:
+    a dict containing PHY performance information. keys are descriptive strings,
+    values are strings or numbers in Mbit/s.
+  """
+
+  rates = collections.defaultdict(list)
+  times_seen = collections.Counter()
+  start_secs = None
+
+  for opt, frame in wifipacket.Packetize(stream):
+    if start_secs is None:
+      start_secs = opt.pcap_secs
+
+    # Fill in known SSIDs array if our scan didn't see anything.
+    ssid = wifipacket.IdentifySSID(opt, frame)
+    if ssid is not None:
+      known_ssids[opt.ta] = ssid
+
+    for sta, direction in [('ra', 'up'), ('ta', 'down'), ('xa', 'across')]:
+      bssid = opt.get(sta)
+      ssid = known_ssids.get(bssid)
+      if ssid:
+        rates[ssid].append((opt.pcap_secs - start_secs,
+                            direction,
+                            opt.rate,
+                            len(frame)))
+        times_seen[ssid] += 1
+        break
+
+  if not times_seen:
+    return {}
+
+  modal_ssid, _ = times_seen.most_common(1)[0]
+  summary = {}
+  for _, direction, rate, size in rates[modal_ssid]:
+    size_weighted_rate = rate * float(size)
+    if direction not in summary:
+      summary[direction] = [size_weighted_rate, size]
+    else:
+      summary[direction][0] += size_weighted_rate
+      summary[direction][1] += size
+
+  line = {'PHY ssid': modal_ssid}
+  for direction, accum in summary.items():
+    size_weighted_rate, size = accum
+    line['PHY {}'.format(direction)] = ((size_weighted_rate / size) if size
+                                        else 0.0)
+
+  return line
+
+
 def Channel(text_channel):
   """Given a text channel spec like 149,+1 return the central freq and width."""
   LoadChannels()
@@ -122,11 +180,12 @@
   line['Steps'] = int(steps)
 
   system, cache = ifstats.Restore(report_dir)
-  result = ifstats.Parse(system, cache)
 
-  if not result.get('link'):
-    pass
-  elif system == 'Darwin':
+  # known_ssids is a map from BSSID(string) => SSID(string)
+  known_ssids = {}
+
+  if 'Darwin' in system:
+    result = ifstats.Parse('Darwin', cache)
     airport = result.get('link')
     channel, width = Channel(airport['channel'])
     shared = 0
@@ -134,13 +193,15 @@
 
     scan = result.get('scan')
     if len(scan) > 1:
-      for row in scan[1:]:
-        oc, ow = Channel(row[3])
+      for row in scan:
+        oc, ow = Channel(row['CHANNEL'])
         if channel == oc and width == ow:
           shared += 1
         if Overlap(channel, width, oc, ow):
           overlap += 1
 
+        known_ssids[row['BSSID']] = row['SSID']
+
     line.update({
         'Channel': channel,
         'Width': width,
@@ -149,7 +210,9 @@
         'Shared': shared,
         'Interfering': overlap - shared
     })
-  elif system == 'Linux':
+
+  if 'Linux' in system:
+    result = ifstats.Parse('Linux', cache)
     iwlink = result.get('link')
     signal = int(iwlink.get('signal', '0 dBm').split()[0])
     channel = int(iwlink.get('freq', '0'))
@@ -165,14 +228,24 @@
         'RSSI': signal,
     })
 
+  # TODO(willangley): integrate skid statistics with the rest of the benchmark
+  #   framework system detection.
+  try:
+    with open(os.path.join(report_dir, 'status_wireless')) as status_wireless:
+      result = ifstats.skids.ParseStatusWireless(status_wireless.read())
+      width = result['Bandwidth'].split()[0]
+      line.update({
+          'Channel': result['Channel'],
+          'Width': width,
+          'RSSI': result['RSSI'],
+      })
+  except IOError:
+    pass
+
   try:
     ppath = os.path.join(report_dir, 'testnetwork.pcap')
     with open(ppath) as stream:
-      rates = [float(opt.rate) for opt, _ in wifipacket.Packetize(stream)]
-
-      # TODO(willangley): come up with a meaningful modal MCS for mixed
-      #   802.11n/802.11ac captures like we have here.
-      line['PHY'] = sum(rates)/max(len(rates), 1)
+      line.update(ParsePcap(stream, known_ssids))
   except IOError:
     try:
       mpath = os.path.join(report_dir, 'mcs')
@@ -262,10 +335,11 @@
   """Write a network testing report in .tsv format to stdout."""
   # include every field we can write in the header row
   header = ['Series', 'Time', 'Steps', 'Channel', 'Width', 'RSSI', 'Noise',
-            'Shared', 'Interfering', 'MCS', 'PHY', 'TCP BW up',
-            'TCP BW up units', 'UDP BW up', 'UDP BW up units', 'TCP BW down',
-            'TCP BW down units', 'UDP BW down', 'UDP BW down units',
-            'isostream']
+            'Shared', 'Interfering',
+            'MCS', 'PHY', 'PHY ssid', 'PHY up', 'PHY down', 'PHY across',
+            'TCP BW up', 'TCP BW up units', 'UDP BW up', 'UDP BW up units',
+            'TCP BW down', 'TCP BW down units', 'UDP BW down',
+            'UDP BW down units', 'isostream']
 
   writer = csv.DictWriter(sys.stdout, header, dialect=csv.excel_tab)
   writer.writeheader()
diff --git a/wifitables/report_test.py b/wifitables/report_test.py
index a7d5a87..e4b6e98 100755
--- a/wifitables/report_test.py
+++ b/wifitables/report_test.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.7 -S
+#!/usr/bin/python2.7
 """Tests for report."""
 
 import ifstats
@@ -39,15 +39,16 @@
 
   print 'Restoring report'
   system, cache = ifstats.Restore(rpt)
-  wvtest.WVPASSEQ(system, 'Linux')
+  wvtest.WVPASSEQ(len(system), 1)
+  wvtest.WVPASS('Linux' in system)
 
   print 'Checking IP address'
-  addrmap = ifstats.ParseIpAddr(cache['ipaddr'])
+  addrmap = ifstats.fabric.ParseIpAddr(cache['ipaddr'])
   wvtest.WVPASSEQ(addrmap.get('lo'), '127.0.0.1/32')
   wvtest.WVPASSEQ(addrmap.get('wcli0'), '192.168.1.222/24')
 
   print 'Checking for link information'
-  data = ifstats.ParseIwLink(cache['iwlink'])
+  data = ifstats.fabric.ParseIwLink(cache['iwlink'])
   wvtest.WVPASSEQ(data.get('SSID'), 'GSAFNS1441P0208_TestWifi')
   wvtest.WVPASSEQ(data.get('BSSID'), 'f4:f5:e8:80:f3:d0')
 
diff --git a/wifitables/sample.py b/wifitables/sample.py
index 6bebbdd..ddcef21 100755
--- a/wifitables/sample.py
+++ b/wifitables/sample.py
@@ -17,7 +17,7 @@
 from fabric import utils
 
 import ifstats
-import ifstats_skids
+import ifstats.skids as ifstats_skids
 import iperf
 import isostream
 import options
diff --git a/wifitables/wifipacket.py b/wifitables/wifipacket.py
index 68f8596..b50d0fb 100644
--- a/wifitables/wifipacket.py
+++ b/wifitables/wifipacket.py
@@ -16,6 +16,7 @@
 """Functions for decoding wifi pcap files."""
 
 from __future__ import print_function
+import binascii
 import bz2
 import csv
 import gzip
@@ -384,6 +385,45 @@
     yield opt, frame
 
 
+def CheckFrame(frame):
+  """CheckFrame returns True if the frame checksum is good, False otherwise."""
+  crc = struct.pack('<I', binascii.crc32(frame[:-4]) & 0xffffffff)
+  return crc == frame[-4:]
+
+
+def IdentifySSID(opt, frame):
+  """Given an opt and frame from `Packetize`, report any SSIDs found.
+
+  These are currently only expected to be in ProbeResponse frames.
+
+  Args:
+    opt: dict of options computed by Packetize
+    frame: an 802.11 frame as a str
+  Returns:
+    SSID found as a string, or None.
+  """
+  # only look at ProbeResponse frames
+  if opt.type != 0x05:
+    return None
+
+  # ignore invalid frames
+  if not CheckFrame(frame):
+    return None
+
+  # timestamp, bi, capabilities = struct.unpack('<QHH', frame[24:36])
+  rawtagged = frame[36:-4]
+  tagged = {}
+  start = 0
+  while start < len(rawtagged):
+    tnum, tlen = struct.unpack('<BB', rawtagged[start:start+2])
+    start += 2
+    tagged[tnum] = rawtagged[start:start+tlen]
+    start += tlen
+
+  ssid = tagged.get(0)
+  return ssid
+
+
 def Example(p):
   if 0:
     basetime = 0