Use d3js visualization instead of afterquery.

For that:
  * cache json representation of the packets of a given file
  * cache pairs (TA;RA)

Next step(s):
  * apply filter (show) to pairs
  * apply aliases to addresses

Additional info:
  $ gjslint --additional_extensions html d3viz.html
  1 files checked, no errors found.

Change-Id: I62484854d74ee88efb6f4fae5eec7f19a7a78427
diff --git a/app.py b/app.py
index f7fa4d9..59573ad 100644
--- a/app.py
+++ b/app.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # Copyright 2014 Google Inc. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,14 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+"""Handlers for uploading, filtering, and visualization."""
+
 import collections
 import errno
 import json
 import sys
+import time
 import traceback
 import urllib
-import webapp2
 import tornado.template
+import webapp2
 import wifipacket
 from google.appengine.api import memcache
 from google.appengine.api import users
@@ -28,27 +30,18 @@
 from google.appengine.ext import ndb
 from google.appengine.ext.webapp import blobstore_handlers
 
-
 BROADCAST = 'ff:ff:ff:ff:ff:ff'
 DEFAULT_FIELDS = ['seq', 'rate']
 AVAIL_FIELDS = ['seq', 'mcs', 'spatialstreams', 'bw', 'rate', 'retry',
                 'type', 'typestr', 'dbm_antsignal', 'dbm_antnoise',
                 'bad']
 
+IS_DEBUG = False
+SAMPLE_SIZE = 2
+
 loader = tornado.template.Loader('.')
 
 
-def _Esc(s):
-  """Like tornado.escape.url_escape, but only escapes &, #, and %."""
-  out = []
-  for c in s:
-    if c in ['&', '#', '%']:
-      out.append('%%%02X' % ord(c))
-    else:
-      out.append(c)
-  return ''.join(out)
-
-
 def AllowedEmails():
   try:
     return open('email-allow.txt').read().split()
@@ -61,6 +54,8 @@
 
 
 def GoogleLoginRequired(func):
+  """Enforcing @google.com login."""
+
   def Handler(self, *args, **kwargs):
     user = users.get_current_user()
     if not user:
@@ -75,6 +70,8 @@
 
 
 class PcapData(ndb.Model):
+  """Info about pcap file and its visualization settings/cache."""
+
   create_time = ndb.DateTimeProperty(auto_now_add=True)
   create_user_email = ndb.StringProperty()
   filename = ndb.StringProperty()
@@ -82,12 +79,19 @@
   show_fields = ndb.StringProperty(repeated=True)
   aliases = ndb.PickleProperty()
 
+  # Cached JSON representations for various useful subsets of data
+  # to be passed to the JS side for visualization.
+  # All captured packets
+  js_packets = ndb.JsonProperty(compressed=True)
+  # All pairs of (transmitter, receiver)
+  js_streams = ndb.JsonProperty(compressed=True)
+
   @staticmethod
-  def _GetDefault():
+  def GetDefault():
     return PcapData.get_or_insert(str('*'), show_hosts=[], aliases={})
 
   @staticmethod
-  def _GetOrInsertFromBlob(blob_info):
+  def GetOrInsertFromBlob(blob_info):
     u = users.get_current_user()
     if u:
       email = u.email()
@@ -100,6 +104,7 @@
 
 
 class _BaseHandler(webapp2.RequestHandler):
+
   def render(self, template, **kwargs):
     d = dict()
     d.update(kwargs)
@@ -107,6 +112,7 @@
 
 
 class MainHandler(_BaseHandler):
+
   @GoogleLoginRequired
   def get(self):
     upload_url = blobstore.create_upload_url('/upload')
@@ -117,6 +123,7 @@
 
 
 class UploadHandler(blobstore_handlers.BlobstoreUploadHandler):
+
   def post(self):
     upload_files = self.get_uploads()
     sys.stderr.write('upload: %r\n' % upload_files)
@@ -131,17 +138,21 @@
 
 
 class DownloadHandler(blobstore_handlers.BlobstoreDownloadHandler):
+
   def get(self, blobres):
     blob_info = blobstore.BlobInfo.get(str(urllib.unquote(blobres)))
     self.send_blob(blob_info)
 
 
 def _Boxes(blob_info):
+  """Re-/store from/to memcache number of packets per mac address."""
+
   boxes = memcache.get(str(blob_info.key()), namespace='boxes')
   if not boxes:
     reader = blob_info.open()
     boxes = collections.defaultdict(lambda: 0)
-    for p, frame in wifipacket.Packetize(reader):
+    # TODO(katepek): use cache here instead if available
+    for p, unused_frame in wifipacket.Packetize(reader):
       if 'flags' in p and p.flags & wifipacket.Flags.BAD_FCS: continue
       if 'ta' in p and 'ra' in p:
         boxes[p.ta] += 1
@@ -152,11 +163,12 @@
 
 
 class ViewHandler(_BaseHandler):
+
   @GoogleLoginRequired
   def get(self, blobres):
     blob_info = blobstore.BlobInfo.get(str(urllib.unquote(blobres)))
-    capdefault = PcapData._GetDefault()
-    pcapdata = PcapData._GetOrInsertFromBlob(blob_info)
+    capdefault = PcapData.GetDefault()
+    pcapdata = PcapData.GetOrInsertFromBlob(blob_info)
 
     boxes = _Boxes(blob_info)
     cutoff = max(boxes.itervalues()) * 0.01
@@ -189,17 +201,18 @@
 
 
 class SaveHandler(_BaseHandler):
+
   @GoogleLoginRequired
   def post(self, blobres):
     blob_info = blobstore.BlobInfo.get(str(urllib.unquote(blobres)))
-    capdefault = PcapData._GetDefault()
+    capdefault = PcapData.GetDefault()
     u = users.get_current_user()
     if u:
       email = u.email()
     else:
       email = 'anonymous'
-    sys.stderr.write('stupid user:%r email:%r\n' % (u, u.email()))
-    pcapdata = PcapData._GetOrInsertFromBlob(blob_info)
+    sys.stderr.write('stupid user:%r email:%r\n' % (u, email))
+    pcapdata = PcapData.GetOrInsertFromBlob(blob_info)
     boxes = _Boxes(blob_info)
     pcapdata.show_hosts = []
     for b in boxes.keys():
@@ -217,62 +230,63 @@
       if self.request.get('key-%s' % k):
         pcapdata.show_fields.append(k)
 
+    _MaybeCache('on' == self.request.get('update-cache'),
+                blob_info, pcapdata)
+
     capdefault.put()
     pcapdata.put()
-    url = ('%s?hosts=%s&keys=%s'
-           % (self.request.url.replace('/save/', '/json/'),
-              _Esc(','.join(pcapdata.show_hosts)),
-              _Esc(','.join(pcapdata.show_fields))))
-    self.redirect('//afterquery.appspot.com/?url=%s&chart=traces' % _Esc(url))
+
+    self.render('d3viz.html')
+
+
+def _MaybeCache(update_cache, blob_info, pcapdata):
+  """Update cache when asked to do so. Cache when no cache found."""
+
+  if update_cache:
+    pcapdata.js_packets = None
+    pcapdata.js_streams = None
+
+  if pcapdata.js_packets is not None:
+    print "We just used cache, didn't we"
+    return
+
+  reader = blob_info.open()
+  begin = time.time()
+
+  j = []
+  pairs = set()
+  for i, (p, unused_frame) in enumerate(wifipacket.Packetize(reader)):
+    if IS_DEBUG and i > SAMPLE_SIZE:
+      print 'Done', i
+      break
+    j.append(p)
+    pairs.add((p.get('ta', 'no_ta'), p.get('ra', 'no_ra')))
+
+  pairs_dict = [{'ta': t[0], 'ra': t[1]} for t in pairs]
+
+  pcapdata.js_packets = json.dumps(j)
+  pcapdata.js_streams = json.dumps(pairs_dict)
+
+  end = time.time()
+  print 'Spent on caching', (end - begin), 'sec'
 
 
 class JsonHandler(_BaseHandler):
+
   @GoogleLoginRequired
   def get(self, blobres):
-    # TODO(apenwarr): allow http-level caching
     blob_info = blobstore.BlobInfo.get(str(urllib.unquote(blobres)))
-    pcapdata = PcapData._GetOrInsertFromBlob(blob_info)
-    aliases = pcapdata.aliases
-    show_hosts = self.request.get('hosts').split(',')
-    reader = blob_info.open()
-    out = collections.defaultdict(list)
-    keys = self.request.get('keys', 'seq,rate').split(',')
-    timebase = 0
-    for i, (p, frame) in enumerate(wifipacket.Packetize(reader)):
-      if not timebase: timebase = p.pcap_secs
-      ta = p.get('ta')
-      ra = p.get('ra')
-      if ta not in show_hosts and aliases.get(ta) not in show_hosts:
-        ta = ra = '~other'  # '~' causes it to sort last in the list
-      elif ta in aliases:
-        ta = aliases[ta]
-      if ra not in show_hosts and aliases.get(ra) not in show_hosts:
-        ta = ra = '~other'  # '~' causes it to sort last in the list
-      elif ra in aliases:
-        ra = aliases[ra]
-      out[(ta, ra)].append(('%.6f' % (p.pcap_secs - timebase),
-                            tuple(p.get(i) for i in keys)))
-    sessions = list(sorted(out.keys(), key=lambda k: k))
-    headers = ['secs']
-    data = []
-    extra = []
-    for sesskey in sessions:
-      ta, ra = sesskey
-      for k in keys:
-        if ta == '~other' and ra == '~other':
-          headers.append('other (%s)' % (k,))
-        else:
-          headers.append('%s to %s (%s)' % (ta, ra, k))
-      for secs, values in out[sesskey]:
-        data.append([secs] + extra + list(values))
-      extra += [None] * len(keys)
-    j = json.dumps([headers] + data)
-    if self.request.get('jsonp'):
-      j = '%s(%s)' % (self.request.get('jsonp'), j)
-    self.response.write(j)
+    pcapdata = PcapData.GetOrInsertFromBlob(blob_info)
+
+    self.response.headers['Content-Type'] = 'application/json'
+    js_bundle = {
+        'js_packets': pcapdata.js_packets,
+        'js_streams': pcapdata.js_streams,
+    }
+    self.response.out.write(json.dumps(js_bundle))
 
 
-def Handle500(req, resp, exc):
+def Handle500(unused_req, resp, exc):
   resp.clear()
   resp.headers['Content-type'] = 'text/plain'
   resp.write(traceback.format_exc(exc))
diff --git a/d3viz.html b/d3viz.html
new file mode 100644
index 0000000..9a66991
--- /dev/null
+++ b/d3viz.html
@@ -0,0 +1,161 @@
+{% extends 'index.html' %}
+
+{% block body %}
+
+    <script type='text/javascript'
+      src='//ajax.googleapis.com/ajax/libs/jquery/1.10.2/jquery.min.js'></script>
+    <script type='text/javascript' src='http://d3js.org/d3.v3.min.js'></script>
+    <script type='text/javascript'>
+      // TODO(katepek): Move into a separate JS file
+
+      var debug = true;
+      function log(o) {
+        if (debug) {
+          console.log(o);
+        }
+      }
+
+      // TODO(katepek): Find better values; currently for 13" screen.
+      var width = 1350;
+      var height = 650;
+      var padding = 20;
+
+      // TODO(katepek): Pull in some standard library for this
+      var pair_colours =
+        ['black', 'red', 'blue', 'green', 'magenta',
+         'gray', 'hotpink', 'chocolate', 'deepskyblue', 'gold'];
+
+      var dataset; // all packets
+      var streams; // pairs of (transmitter, receiver)
+
+      var pcapSecsScale; // x
+      var seqScale;      // y
+
+      try {
+        $.getJSON('/json/' + get_key(), function(json) {
+            begin = new Date().getTime();
+
+            init_data(JSON.stringify(json));
+            init_scales();
+            visualize();
+
+            end = new Date().getTime();
+            log('Spent on visualization ' + ((end - begin) / 1000) + ' sec.');
+        });
+      } catch (error) {
+        console.log(error);
+      }
+
+      function get_key() {
+        parts = window.location.pathname.split('/');
+        return parts[parts.length - 1];
+      }
+
+      function init_data(json_string) {
+        // TODO(katepek): Should sanitize here? E.g., discard bad packets?
+        // Packets w/o seq?
+        js_objects = JSON.parse(json_string);
+        dataset = JSON.parse(js_objects['js_packets']);
+        streams = JSON.parse(js_objects['js_streams']);
+      }
+
+      function raw_seq(d) {
+        return Number(d['seq']);
+      }
+
+      function raw_pcapSecs(d) {
+        return parseFloat(d['pcap_secs']);
+      }
+
+      function init_scales() {
+        // Prepare scale for X axis
+        pcapSecsScale = d3.scale
+          .linear()
+          .domain([d3.min(dataset, raw_pcapSecs),
+            d3.max(dataset, raw_pcapSecs)])
+          .range([2 * padding, width - 2 * padding]);
+
+        // Prepare scale for X axis
+        seqScale = d3.scale
+          .linear()
+          .domain([d3.min(dataset, raw_seq),
+            d3.max(dataset, raw_seq)])
+          .range([height - padding, padding]);
+      }
+
+      function seq(d) {
+        var seq = seqScale(Number(d['seq']));
+        return seq;
+      }
+
+      function pcapSecs(d) {
+        var pcapSecs = pcapSecsScale(parseFloat(d['pcap_secs']));
+        return pcapSecs;
+      }
+
+      function visualize() {
+        var svg = d3
+          .select('body')
+          .append('svg')
+          .attr('width', width)
+          .attr('height', height)
+          .style('border', '1px solid black');
+
+        // TODO(katepek): Show a summary somewhere as a legend
+        // which pair corresponds to which colour
+        for (i = 0; i < streams.length; i++) {
+          log('pcap_vs_seq' + i);
+          log(get_colour(i));
+          svg
+            .selectAll('pcap_vs_seq' + i)
+            .data(dataset)
+            .enter()
+            .append('circle')
+            .filter(function(d) {
+              return d['ta'] == streams[i]['ta'] &&
+                     d['ra'] == streams[i]['ra'];
+            })
+            .attr('cx', pcapSecs)
+            .attr('cy', seq)
+            .attr('r', 1)
+            .attr('fill', get_colour(i))
+            .append('title')
+            .text(
+               function(d) {
+                 return d['typestr'] +
+                   ': pcapSecs=' + d['pcap_secs'] +
+                   '; seq=' + d['seq'] +
+                   '\n(ta=' + d['ta'] + ',' + 'ra=' + d['ra'] + ')';
+               }
+            );
+        }
+
+        // TODO(katepek): Axes seem to show range, not the domain
+        var pcapSecsAxis = d3.svg.axis()
+          .scale(pcapSecsScale)
+          .orient('bottom')
+          .ticks(5);
+        var seqAxis = d3.svg.axis()
+          .scale(seqScale)
+          .orient('right')
+          .ticks(5);
+
+        svg.append('g')
+          .attr('class', 'axis')
+          .attr('transform', 'translate(0,' + (height - padding) + ')')
+          .call(pcapSecsAxis);
+        svg.append('g')
+          .attr('class', 'axis')
+          .attr('transform', 'translate(' + (width - 2 * padding) + ',0)')
+          .call(seqAxis);
+      }
+
+      function get_colour(i) {
+        if (i < pair_colours.length)
+          return pair_colours[i];
+        return pair_colours[i % pair_colours.length];
+      }
+
+    </script>
+
+{% end %}
diff --git a/view.html b/view.html
index 987cd59..54aeded 100644
--- a/view.html
+++ b/view.html
@@ -41,6 +41,10 @@
       <td></td>
     </tr>
   </table>
-  <input type='submit' value='Save and View' style='margin: 1em' />
+  <br>
+  <input type='checkbox' name='update-cache' id='update-cache'>
+    <label for='update-cache'>Update cache</label>
+  </input><br>
+  <input type='submit' value='Save and View' />
   </form>
 {% end %}
diff --git a/wifipacket.py b/wifipacket.py
index 37e6521..68f8596 100755
--- a/wifipacket.py
+++ b/wifipacket.py
@@ -73,28 +73,28 @@
 
 
 RADIOTAP_FIELDS = [
-    ('mac_usecs', 'Q'),
-    ('flags', 'B'),
-    ('rate', 'B'),
-    ('channel', 'HH'),
-    ('fhss', 'BB'),
-    ('dbm_antsignal', 'b'),
-    ('dbm_antnoise', 'b'),
-    ('lock_quality', 'H'),
-    ('tx_attenuation', 'H'),
-    ('db_tx_attenuation', 'B'),
-    ('dbm_tx_power', 'b'),
-    ('antenna', 'B'),
-    ('db_antsignal', 'B'),
-    ('db_antnoise', 'B'),
-    ('rx_flags', 'H'),
-    ('tx_flags', 'H'),
-    ('rts_retries', 'B'),
-    ('data_retries', 'B'),
-    ('channelplus', 'II'),
-    ('ht', 'BBB'),
-    ('ampdu_status', 'IHBB'),
-    ('vht', 'HBB4sBBH'),
+    ('mac_usecs', 'Q'),          # microseconds = timestamp received
+    ('flags', 'B'),              # bit field (matches the enum above)
+    ('rate', 'B'),               # Mb/s (=speed of the packet) can slow down
+    ('channel', 'HH'),           # the channel number on which it was received
+    ('fhss', 'BB'),              # ???
+    ('dbm_antsignal', 'b'),      # power level of the received signal
+    ('dbm_antnoise', 'b'),       # power level of the background noise
+    ('lock_quality', 'H'),       # nobody really uses it for anything (NRUIFA)
+    ('tx_attenuation', 'H'),     # NRUIFA
+    ('db_tx_attenuation', 'B'),  # NRUIFA
+    ('dbm_tx_power', 'b'),       # NRUIFA
+    ('antenna', 'B'),            # which antenna
+    ('db_antsignal', 'B'),       # uncalibrated dbm_*
+    ('db_antnoise', 'B'),        # uncalibrated dmb_*
+    ('rx_flags', 'H'),           # ???
+    ('tx_flags', 'H'),           # ???
+    ('rts_retries', 'B'),        # ???
+    ('data_retries', 'B'),       # ???
+    ('channelplus', 'II'),       # ???
+    ('ht', 'BBB'),               # like 'rate' only more (=high transmit rate)
+    ('ampdu_status', 'IHBB'),    # ??? MBU
+    ('vht', 'HBB4sBBH'),         # like 'ht' only more (=higher transmit rate)
 ]
 
 
@@ -246,12 +246,15 @@
 
   last_ta = None
   last_ra = None
+
   while 1:
     opt = Struct({})
 
     # pcap packet header
     pcaphdr = stream.read(16)
+
     if len(pcaphdr) < 16: break  # EOF
+
     (ts_sec, ts_usec,
      incl_len, orig_len) = struct.unpack(byteorder + 'IIII', pcaphdr)
     if incl_len > orig_len: