Use d3js visualization instead of afterquery.
For that:
* cache json representation of the packets of a given file
* cache pairs (TA;RA)
Next step(s):
* apply filter (show) to pairs
* apply aliases to addresses
Additional info:
$ gjslint --additional_extensions html d3viz.html
1 files checked, no errors found.
Change-Id: I62484854d74ee88efb6f4fae5eec7f19a7a78427
diff --git a/app.py b/app.py
index f7fa4d9..59573ad 100644
--- a/app.py
+++ b/app.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
# Copyright 2014 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,14 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+"""Handlers for uploading, filtering, and visualization."""
+
import collections
import errno
import json
import sys
+import time
import traceback
import urllib
-import webapp2
import tornado.template
+import webapp2
import wifipacket
from google.appengine.api import memcache
from google.appengine.api import users
@@ -28,27 +30,18 @@
from google.appengine.ext import ndb
from google.appengine.ext.webapp import blobstore_handlers
-
BROADCAST = 'ff:ff:ff:ff:ff:ff'
DEFAULT_FIELDS = ['seq', 'rate']
AVAIL_FIELDS = ['seq', 'mcs', 'spatialstreams', 'bw', 'rate', 'retry',
'type', 'typestr', 'dbm_antsignal', 'dbm_antnoise',
'bad']
+IS_DEBUG = False
+SAMPLE_SIZE = 2
+
loader = tornado.template.Loader('.')
-def _Esc(s):
- """Like tornado.escape.url_escape, but only escapes &, #, and %."""
- out = []
- for c in s:
- if c in ['&', '#', '%']:
- out.append('%%%02X' % ord(c))
- else:
- out.append(c)
- return ''.join(out)
-
-
def AllowedEmails():
try:
return open('email-allow.txt').read().split()
@@ -61,6 +54,8 @@
def GoogleLoginRequired(func):
+ """Enforcing @google.com login."""
+
def Handler(self, *args, **kwargs):
user = users.get_current_user()
if not user:
@@ -75,6 +70,8 @@
class PcapData(ndb.Model):
+ """Info about pcap file and its visualization settings/cache."""
+
create_time = ndb.DateTimeProperty(auto_now_add=True)
create_user_email = ndb.StringProperty()
filename = ndb.StringProperty()
@@ -82,12 +79,19 @@
show_fields = ndb.StringProperty(repeated=True)
aliases = ndb.PickleProperty()
+ # Cached JSON representations for various useful subsets of data
+ # to be passed to the JS side for visualization.
+ # All captured packets
+ js_packets = ndb.JsonProperty(compressed=True)
+ # All pairs of (transmitter, receiver)
+ js_streams = ndb.JsonProperty(compressed=True)
+
@staticmethod
- def _GetDefault():
+ def GetDefault():
return PcapData.get_or_insert(str('*'), show_hosts=[], aliases={})
@staticmethod
- def _GetOrInsertFromBlob(blob_info):
+ def GetOrInsertFromBlob(blob_info):
u = users.get_current_user()
if u:
email = u.email()
@@ -100,6 +104,7 @@
class _BaseHandler(webapp2.RequestHandler):
+
def render(self, template, **kwargs):
d = dict()
d.update(kwargs)
@@ -107,6 +112,7 @@
class MainHandler(_BaseHandler):
+
@GoogleLoginRequired
def get(self):
upload_url = blobstore.create_upload_url('/upload')
@@ -117,6 +123,7 @@
class UploadHandler(blobstore_handlers.BlobstoreUploadHandler):
+
def post(self):
upload_files = self.get_uploads()
sys.stderr.write('upload: %r\n' % upload_files)
@@ -131,17 +138,21 @@
class DownloadHandler(blobstore_handlers.BlobstoreDownloadHandler):
+
def get(self, blobres):
blob_info = blobstore.BlobInfo.get(str(urllib.unquote(blobres)))
self.send_blob(blob_info)
def _Boxes(blob_info):
+ """Re-/store from/to memcache number of packets per mac address."""
+
boxes = memcache.get(str(blob_info.key()), namespace='boxes')
if not boxes:
reader = blob_info.open()
boxes = collections.defaultdict(lambda: 0)
- for p, frame in wifipacket.Packetize(reader):
+ # TODO(katepek): use cache here instead if available
+ for p, unused_frame in wifipacket.Packetize(reader):
if 'flags' in p and p.flags & wifipacket.Flags.BAD_FCS: continue
if 'ta' in p and 'ra' in p:
boxes[p.ta] += 1
@@ -152,11 +163,12 @@
class ViewHandler(_BaseHandler):
+
@GoogleLoginRequired
def get(self, blobres):
blob_info = blobstore.BlobInfo.get(str(urllib.unquote(blobres)))
- capdefault = PcapData._GetDefault()
- pcapdata = PcapData._GetOrInsertFromBlob(blob_info)
+ capdefault = PcapData.GetDefault()
+ pcapdata = PcapData.GetOrInsertFromBlob(blob_info)
boxes = _Boxes(blob_info)
cutoff = max(boxes.itervalues()) * 0.01
@@ -189,17 +201,18 @@
class SaveHandler(_BaseHandler):
+
@GoogleLoginRequired
def post(self, blobres):
blob_info = blobstore.BlobInfo.get(str(urllib.unquote(blobres)))
- capdefault = PcapData._GetDefault()
+ capdefault = PcapData.GetDefault()
u = users.get_current_user()
if u:
email = u.email()
else:
email = 'anonymous'
- sys.stderr.write('stupid user:%r email:%r\n' % (u, u.email()))
- pcapdata = PcapData._GetOrInsertFromBlob(blob_info)
+ sys.stderr.write('stupid user:%r email:%r\n' % (u, email))
+ pcapdata = PcapData.GetOrInsertFromBlob(blob_info)
boxes = _Boxes(blob_info)
pcapdata.show_hosts = []
for b in boxes.keys():
@@ -217,62 +230,63 @@
if self.request.get('key-%s' % k):
pcapdata.show_fields.append(k)
+ _MaybeCache('on' == self.request.get('update-cache'),
+ blob_info, pcapdata)
+
capdefault.put()
pcapdata.put()
- url = ('%s?hosts=%s&keys=%s'
- % (self.request.url.replace('/save/', '/json/'),
- _Esc(','.join(pcapdata.show_hosts)),
- _Esc(','.join(pcapdata.show_fields))))
- self.redirect('//afterquery.appspot.com/?url=%s&chart=traces' % _Esc(url))
+
+ self.render('d3viz.html')
+
+
+def _MaybeCache(update_cache, blob_info, pcapdata):
+ """Update cache when asked to do so. Cache when no cache found."""
+
+ if update_cache:
+ pcapdata.js_packets = None
+ pcapdata.js_streams = None
+
+ if pcapdata.js_packets is not None:
+ print "We just used cache, didn't we"
+ return
+
+ reader = blob_info.open()
+ begin = time.time()
+
+ j = []
+ pairs = set()
+ for i, (p, unused_frame) in enumerate(wifipacket.Packetize(reader)):
+ if IS_DEBUG and i > SAMPLE_SIZE:
+ print 'Done', i
+ break
+ j.append(p)
+ pairs.add((p.get('ta', 'no_ta'), p.get('ra', 'no_ra')))
+
+ pairs_dict = [{'ta': t[0], 'ra': t[1]} for t in pairs]
+
+ pcapdata.js_packets = json.dumps(j)
+ pcapdata.js_streams = json.dumps(pairs_dict)
+
+ end = time.time()
+ print 'Spent on caching', (end - begin), 'sec'
class JsonHandler(_BaseHandler):
+
@GoogleLoginRequired
def get(self, blobres):
- # TODO(apenwarr): allow http-level caching
blob_info = blobstore.BlobInfo.get(str(urllib.unquote(blobres)))
- pcapdata = PcapData._GetOrInsertFromBlob(blob_info)
- aliases = pcapdata.aliases
- show_hosts = self.request.get('hosts').split(',')
- reader = blob_info.open()
- out = collections.defaultdict(list)
- keys = self.request.get('keys', 'seq,rate').split(',')
- timebase = 0
- for i, (p, frame) in enumerate(wifipacket.Packetize(reader)):
- if not timebase: timebase = p.pcap_secs
- ta = p.get('ta')
- ra = p.get('ra')
- if ta not in show_hosts and aliases.get(ta) not in show_hosts:
- ta = ra = '~other' # '~' causes it to sort last in the list
- elif ta in aliases:
- ta = aliases[ta]
- if ra not in show_hosts and aliases.get(ra) not in show_hosts:
- ta = ra = '~other' # '~' causes it to sort last in the list
- elif ra in aliases:
- ra = aliases[ra]
- out[(ta, ra)].append(('%.6f' % (p.pcap_secs - timebase),
- tuple(p.get(i) for i in keys)))
- sessions = list(sorted(out.keys(), key=lambda k: k))
- headers = ['secs']
- data = []
- extra = []
- for sesskey in sessions:
- ta, ra = sesskey
- for k in keys:
- if ta == '~other' and ra == '~other':
- headers.append('other (%s)' % (k,))
- else:
- headers.append('%s to %s (%s)' % (ta, ra, k))
- for secs, values in out[sesskey]:
- data.append([secs] + extra + list(values))
- extra += [None] * len(keys)
- j = json.dumps([headers] + data)
- if self.request.get('jsonp'):
- j = '%s(%s)' % (self.request.get('jsonp'), j)
- self.response.write(j)
+ pcapdata = PcapData.GetOrInsertFromBlob(blob_info)
+
+ self.response.headers['Content-Type'] = 'application/json'
+ js_bundle = {
+ 'js_packets': pcapdata.js_packets,
+ 'js_streams': pcapdata.js_streams,
+ }
+ self.response.out.write(json.dumps(js_bundle))
-def Handle500(req, resp, exc):
+def Handle500(unused_req, resp, exc):
resp.clear()
resp.headers['Content-type'] = 'text/plain'
resp.write(traceback.format_exc(exc))
diff --git a/d3viz.html b/d3viz.html
new file mode 100644
index 0000000..9a66991
--- /dev/null
+++ b/d3viz.html
@@ -0,0 +1,161 @@
+{% extends 'index.html' %}
+
+{% block body %}
+
+ <script type='text/javascript'
+ src='//ajax.googleapis.com/ajax/libs/jquery/1.10.2/jquery.min.js'></script>
+ <script type='text/javascript' src='http://d3js.org/d3.v3.min.js'></script>
+ <script type='text/javascript'>
+ // TODO(katepek): Move into a separate JS file
+
+ var debug = true;
+ function log(o) {
+ if (debug) {
+ console.log(o);
+ }
+ }
+
+ // TODO(katepek): Find better values; currently for 13" screen.
+ var width = 1350;
+ var height = 650;
+ var padding = 20;
+
+ // TODO(katepek): Pull in some standard library for this
+ var pair_colours =
+ ['black', 'red', 'blue', 'green', 'magenta',
+ 'gray', 'hotpink', 'chocolate', 'deepskyblue', 'gold'];
+
+ var dataset; // all packets
+ var streams; // pairs of (transmitter, receiver)
+
+ var pcapSecsScale; // x
+ var seqScale; // y
+
+ try {
+ $.getJSON('/json/' + get_key(), function(json) {
+ begin = new Date().getTime();
+
+ init_data(JSON.stringify(json));
+ init_scales();
+ visualize();
+
+ end = new Date().getTime();
+ log('Spent on visualization ' + ((end - begin) / 1000) + ' sec.');
+ });
+ } catch (error) {
+ console.log(error);
+ }
+
+ function get_key() {
+ parts = window.location.pathname.split('/');
+ return parts[parts.length - 1];
+ }
+
+ function init_data(json_string) {
+ // TODO(katepek): Should sanitize here? E.g., discard bad packets?
+ // Packets w/o seq?
+ js_objects = JSON.parse(json_string);
+ dataset = JSON.parse(js_objects['js_packets']);
+ streams = JSON.parse(js_objects['js_streams']);
+ }
+
+ function raw_seq(d) {
+ return Number(d['seq']);
+ }
+
+ function raw_pcapSecs(d) {
+ return parseFloat(d['pcap_secs']);
+ }
+
+ function init_scales() {
+ // Prepare scale for X axis
+ pcapSecsScale = d3.scale
+ .linear()
+ .domain([d3.min(dataset, raw_pcapSecs),
+ d3.max(dataset, raw_pcapSecs)])
+ .range([2 * padding, width - 2 * padding]);
+
+ // Prepare scale for X axis
+ seqScale = d3.scale
+ .linear()
+ .domain([d3.min(dataset, raw_seq),
+ d3.max(dataset, raw_seq)])
+ .range([height - padding, padding]);
+ }
+
+ function seq(d) {
+ var seq = seqScale(Number(d['seq']));
+ return seq;
+ }
+
+ function pcapSecs(d) {
+ var pcapSecs = pcapSecsScale(parseFloat(d['pcap_secs']));
+ return pcapSecs;
+ }
+
+ function visualize() {
+ var svg = d3
+ .select('body')
+ .append('svg')
+ .attr('width', width)
+ .attr('height', height)
+ .style('border', '1px solid black');
+
+ // TODO(katepek): Show a summary somewhere as a legend
+ // which pair corresponds to which colour
+ for (i = 0; i < streams.length; i++) {
+ log('pcap_vs_seq' + i);
+ log(get_colour(i));
+ svg
+ .selectAll('pcap_vs_seq' + i)
+ .data(dataset)
+ .enter()
+ .append('circle')
+ .filter(function(d) {
+ return d['ta'] == streams[i]['ta'] &&
+ d['ra'] == streams[i]['ra'];
+ })
+ .attr('cx', pcapSecs)
+ .attr('cy', seq)
+ .attr('r', 1)
+ .attr('fill', get_colour(i))
+ .append('title')
+ .text(
+ function(d) {
+ return d['typestr'] +
+ ': pcapSecs=' + d['pcap_secs'] +
+ '; seq=' + d['seq'] +
+ '\n(ta=' + d['ta'] + ',' + 'ra=' + d['ra'] + ')';
+ }
+ );
+ }
+
+ // TODO(katepek): Axes seem to show range, not the domain
+ var pcapSecsAxis = d3.svg.axis()
+ .scale(pcapSecsScale)
+ .orient('bottom')
+ .ticks(5);
+ var seqAxis = d3.svg.axis()
+ .scale(seqScale)
+ .orient('right')
+ .ticks(5);
+
+ svg.append('g')
+ .attr('class', 'axis')
+ .attr('transform', 'translate(0,' + (height - padding) + ')')
+ .call(pcapSecsAxis);
+ svg.append('g')
+ .attr('class', 'axis')
+ .attr('transform', 'translate(' + (width - 2 * padding) + ',0)')
+ .call(seqAxis);
+ }
+
+ function get_colour(i) {
+ if (i < pair_colours.length)
+ return pair_colours[i];
+ return pair_colours[i % pair_colours.length];
+ }
+
+ </script>
+
+{% end %}
diff --git a/view.html b/view.html
index 987cd59..54aeded 100644
--- a/view.html
+++ b/view.html
@@ -41,6 +41,10 @@
<td></td>
</tr>
</table>
- <input type='submit' value='Save and View' style='margin: 1em' />
+ <br>
+ <input type='checkbox' name='update-cache' id='update-cache'>
+ <label for='update-cache'>Update cache</label>
+ </input><br>
+ <input type='submit' value='Save and View' />
</form>
{% end %}
diff --git a/wifipacket.py b/wifipacket.py
index 37e6521..68f8596 100755
--- a/wifipacket.py
+++ b/wifipacket.py
@@ -73,28 +73,28 @@
RADIOTAP_FIELDS = [
- ('mac_usecs', 'Q'),
- ('flags', 'B'),
- ('rate', 'B'),
- ('channel', 'HH'),
- ('fhss', 'BB'),
- ('dbm_antsignal', 'b'),
- ('dbm_antnoise', 'b'),
- ('lock_quality', 'H'),
- ('tx_attenuation', 'H'),
- ('db_tx_attenuation', 'B'),
- ('dbm_tx_power', 'b'),
- ('antenna', 'B'),
- ('db_antsignal', 'B'),
- ('db_antnoise', 'B'),
- ('rx_flags', 'H'),
- ('tx_flags', 'H'),
- ('rts_retries', 'B'),
- ('data_retries', 'B'),
- ('channelplus', 'II'),
- ('ht', 'BBB'),
- ('ampdu_status', 'IHBB'),
- ('vht', 'HBB4sBBH'),
+ ('mac_usecs', 'Q'), # microseconds = timestamp received
+ ('flags', 'B'), # bit field (matches the enum above)
+ ('rate', 'B'), # Mb/s (=speed of the packet) can slow down
+ ('channel', 'HH'), # the channel number on which it was received
+ ('fhss', 'BB'), # ???
+ ('dbm_antsignal', 'b'), # power level of the received signal
+ ('dbm_antnoise', 'b'), # power level of the background noise
+ ('lock_quality', 'H'), # nobody really uses it for anything (NRUIFA)
+ ('tx_attenuation', 'H'), # NRUIFA
+ ('db_tx_attenuation', 'B'), # NRUIFA
+ ('dbm_tx_power', 'b'), # NRUIFA
+ ('antenna', 'B'), # which antenna
+ ('db_antsignal', 'B'), # uncalibrated dbm_*
+ ('db_antnoise', 'B'), # uncalibrated dmb_*
+ ('rx_flags', 'H'), # ???
+ ('tx_flags', 'H'), # ???
+ ('rts_retries', 'B'), # ???
+ ('data_retries', 'B'), # ???
+ ('channelplus', 'II'), # ???
+ ('ht', 'BBB'), # like 'rate' only more (=high transmit rate)
+ ('ampdu_status', 'IHBB'), # ??? MBU
+ ('vht', 'HBB4sBBH'), # like 'ht' only more (=higher transmit rate)
]
@@ -246,12 +246,15 @@
last_ta = None
last_ra = None
+
while 1:
opt = Struct({})
# pcap packet header
pcaphdr = stream.read(16)
+
if len(pcaphdr) < 16: break # EOF
+
(ts_sec, ts_usec,
incl_len, orig_len) = struct.unpack(byteorder + 'IIII', pcaphdr)
if incl_len > orig_len: