authorizer: fix bugs and expire cache

Since the server wipes out existing users when their authorization
expires, periodically rerun checks for cached users and revoke their
authorization if the checks fail.

Also, make sure we only have one checker live for users at a time.
Resolves the issue where a user would be added to firewall rules
multiple times.

Change-Id: I05685e93198a0bc21fe6e2ac52e0cb6db90f2691
diff --git a/bouncer/Makefile b/bouncer/Makefile
index bda3d81..db66253 100644
--- a/bouncer/Makefile
+++ b/bouncer/Makefile
@@ -1,6 +1,8 @@
 default: all
 
+INSTALL?=install
 BINDIR=$(DESTDIR)/bin
+LIBDIR=$(DESTDIR)/usr/bouncer
 GPYLINT=$(shell \
     if which gpylint >/dev/null; then \
       echo gpylint; \
@@ -8,26 +10,21 @@
       echo 'echo "(gpylint-missing)" >&2'; \
     fi \
 )
+NOINSTALL=options.py
 
-TARGETS=authorizer hash_mac_addr http_bouncer
-
-HOST_TARGETS=$(addprefix host-,$(TARGETS))
-
-all: $(TARGETS) $(HOST_TARGETS)
+all:
 
 install:
-	mkdir -p $(BINDIR)
-	cp $(TARGETS) $(BINDIR)
+	mkdir -p $(LIBDIR) $(BINDIR)
+	$(INSTALL) -m 0644 $(filter-out $(NOINSTALL) $(TARGETS), $(wildcard *.py)) $(LIBDIR)/
+	for t in authorizer hash_mac_addr http_bouncer; do \
+		$(INSTALL) -m 0755 $$t.py $(LIBDIR)/; \
+		ln -fs /usr/bouncer/$$t.py $(BINDIR)/$$t; \
+	done
 
 install-libs:
 	@echo "No libs to install."
 
-%: %.py
-	ln -s $< $@
-
-host-%: %.py
-	ln -s $< $@
-
 TESTS = $(wildcard test-*.sh) $(wildcard test-*.py) $(wildcard *_test.py)
 runtests: all $(TESTS)
 	set -e; \
diff --git a/bouncer/authorizer.py b/bouncer/authorizer.py
index f355917..0c5e0b1 100755
--- a/bouncer/authorizer.py
+++ b/bouncer/authorizer.py
@@ -43,6 +43,7 @@
 
 MAX_TRIES = 300
 
+in_progress_users = {}
 known_users = {}
 
 
@@ -55,13 +56,22 @@
   return x | y
 
 
+def is_valid_acceptance(response_obj):
+  accepted_time = response_obj.get('accepted')
+  return accepted_time + (opt.max_age * 86400) > time.time()
+
+
+def allow_mac_rule(mac_addr):
+  # iptables, unlike other Linux utilities, capitalizes MAC addresses
+  return ('-m', 'mac', '--mac-source', mac_addr.upper(), '-j', 'ACCEPT')
+
+
 class Checker(object):
   """Manage checking and polling for Terms of Service acceptance."""
 
-  def __init__(self, mac_addr, hashed_mac_addr, url):
+  def __init__(self, mac_addr, url):
     self.mac_addr = mac_addr
-    self.hashed_mac_addr = hashed_mac_addr
-    self.url = url % {'mac': hashed_mac_addr}
+    self.url = url % {'mac': hash_mac_addr.hash_mac_addr(self.mac_addr)}
     self.tries = 0
     self.callback = None
 
@@ -69,41 +79,43 @@
     """Check if a remote service knows about a device with a supplied MAC."""
     logging.info('Checking TOS for %s', self.mac_addr)
     http_client = tornado.httpclient.HTTPClient()
-    response = http_client.fetch(self.url, ca_certs=opt.ca_certs)
-    response_obj = tornado.escape.json_decode(response.body)
-    accepted_time = response_obj.get('accepted')
     self.tries += 1
 
-    accepted = False
-    if accepted_time:
-      if accepted_time + (opt.max_age * 86400) > time.time():
-        accepted = True
-        if self.callback: self.callback.stop()
-        logging.info('TOS accepted for %s', self.mac_addr)
+    try:
+      response = http_client.fetch(self.url, ca_certs=opt.ca_certs)
+      response_obj = tornado.escape.json_decode(response.body)
+      valid = is_valid_acceptance(response_obj)
+    except tornado.httpclient.HTTPError as e:
+      logging.warning('Error checking authorization: %r', e)
+      valid = False
 
-        known_users[self.mac_addr] = response_obj
-        result = ip46tables('-A', opt.filter_chain, '-m', 'mac',
-                            '--mac-source', self.mac_addr, '-j', 'ACCEPT')
-        result |= ip46tables('-t', 'nat', '-A', opt.nat_chain, '-m', 'mac',
-                             '--mac-source', self.mac_addr, '-j', 'ACCEPT')
-        if result:
-          logging.error('Could not update firewall for device %s',
-                        self.mac_addr)
-      else:
-        logging.info('TOS accepted too long ago for %s: %r',
-                     self.mac_addr, accepted_time)
+    if valid:
+      logging.info('TOS accepted for %s', self.mac_addr)
 
-    elif self.callback and self.tries > MAX_TRIES:
-      if not accepted:
-        logging.info('TOS not accepted for %s before timeout.',
-                     self.mac_addr)
-      self.callback.stop()
+      known_users[self.mac_addr] = response_obj
+      result = ip46tables('-A', opt.filter_chain,
+                          *allow_mac_rule(self.mac_addr))
+      result |= ip46tables('-t', 'nat', '-A', opt.nat_chain,
+                           *allow_mac_rule(self.mac_addr))
+      if result:
+        logging.error('Could not update firewall for device %s',
+                      self.mac_addr)
 
-    return response, accepted
+    if valid or self.tries > MAX_TRIES:
+      if self.callback:
+        self.callback.stop()
+      if self.mac_addr in in_progress_users:
+        del in_progress_users[self.mac_addr]
+    else:
+      in_progress_users[self.mac_addr] = self
+      self.poll()
+
+    return response
 
   def poll(self):
-    self.callback = tornado.ioloop.PeriodicCallback(self.check, 1000)
-    self.callback.start()
+    if not self.callback:
+      self.callback = tornado.ioloop.PeriodicCallback(self.check, 1000)
+      self.callback.start()
 
 
 def accept(connection, unused_address):
@@ -112,27 +124,46 @@
 
   maybe_mac_addr = cf.readline().strip()
   try:
-    mac_addr, hashed_mac_addr = hash_mac_addr.hash_mac_addr(maybe_mac_addr)
+    mac_addr = hash_mac_addr.normalize_mac_addr(maybe_mac_addr)
   except ValueError:
     logging.warning('can only check authorization for a MAC address.')
     cf.write('{}')
     return
 
   if mac_addr in known_users:
-    logging.info('TOS accepted (cached) for %s', mac_addr)
     cached_response = known_users[mac_addr]
-    cached_response['cached'] = True
-    cf.write(tornado.escape.json_encode(cached_response))
-    return
+    if is_valid_acceptance(cached_response):
+      logging.info('TOS accepted (cached) for %s', mac_addr)
+      cached_response['cached'] = True
+      cf.write(tornado.escape.json_encode(cached_response))
+      return
 
-  checker = Checker(mac_addr, hashed_mac_addr, opt.url)
-  response, accepted = checker.check()
-  if not accepted:
-    checker.poll()
+  if mac_addr in in_progress_users:
+    checker = in_progress_users[mac_addr]
+  else:
+    checker = Checker(mac_addr, opt.url)
 
+  response = checker.check()
   cf.write(response.body)
 
 
+def expire_cache():
+  """Remove users whose authorization has expired from the cache."""
+  expired_users = set(mac_addr for mac_addr, cached_response
+                      in known_users.items()
+                      if not is_valid_acceptance(cached_response))
+
+  for mac_addr in expired_users:
+    logging.info('Removing expired user %s', mac_addr)
+    del known_users[mac_addr]
+
+    result = ip46tables('-D', opt.filter_chain, *allow_mac_rule(mac_addr))
+    result |= ip46tables('-t', 'nat', '-D', opt.nat_chain,
+                         *allow_mac_rule(mac_addr))
+    if result:
+      logging.warning('Error removing expired user %s !', mac_addr)
+
+
 if __name__ == '__main__':
   o = options.Options(optspec)
   opt, flags, extra = o.parse(sys.argv[1:])
@@ -157,3 +188,6 @@
   logging.info('Started authorizer.')
   ioloop.start()
 
+  expirer = tornado.ioloop.PeriodicCallback(expire_cache, 60 * 60 * 1000)
+  expirer.start()
+
diff --git a/bouncer/hash_mac_addr.py b/bouncer/hash_mac_addr.py
index 961bf10..23b45d1 100755
--- a/bouncer/hash_mac_addr.py
+++ b/bouncer/hash_mac_addr.py
@@ -15,13 +15,16 @@
 """
 
 
-def hash_mac_addr(maybe_mac_addr):
+def normalize_mac_addr(maybe_mac_addr):
   if re.match('([0-9A-Fa-f]{2}:){5}[0-9A-Fa-f]{2}$', maybe_mac_addr):
-    mac_addr = maybe_mac_addr.lower()
+    return maybe_mac_addr.lower()
   else:
     raise ValueError('%r not a MAC address' % maybe_mac_addr)
 
-  return mac_addr, hashlib.sha1(mac_addr).hexdigest()
+
+def hash_mac_addr(maybe_mac_addr):
+  mac_addr = normalize_mac_addr(maybe_mac_addr)
+  return hashlib.sha1(mac_addr).hexdigest()
 
 
 if __name__ == '__main__':
@@ -32,7 +35,7 @@
     o.usage()
 
   try:
-    _, hashed_mac_addr = hash_mac_addr(str(opt.addr))
+    hashed_mac_addr = hash_mac_addr(str(opt.addr))
     print hashed_mac_addr
   except ValueError as e:
     print >>sys.stderr, 'error:', e.message
diff --git a/bouncer/http_bouncer.py b/bouncer/http_bouncer.py
index 380da4b..5fe0a53 100755
--- a/bouncer/http_bouncer.py
+++ b/bouncer/http_bouncer.py
@@ -68,8 +68,7 @@
     else:
       if self.substitute_mac:
         mac = mac_for_ip(self.request.remote_ip)
-        _, hashed_mac = hash_mac_addr.hash_mac_addr(mac)
-        self.redirect(opt.url % {'mac': hashed_mac})
+        self.redirect(opt.url % {'mac': hash_mac_addr.hash_mac_addr(mac)})
 
         if opt.unix_path:
           try:
diff --git a/bouncer/test-hash_mac_addr.sh b/bouncer/test-hash_mac_addr.sh
index 3c84424..db6f10b 100755
--- a/bouncer/test-hash_mac_addr.sh
+++ b/bouncer/test-hash_mac_addr.sh
@@ -4,7 +4,7 @@
 
 WVSTART "hash_mac_addr test"
 
-HASH_MAC_ADDR=./host-hash_mac_addr
+HASH_MAC_ADDR=./hash_mac_addr.py
 
 WVFAIL $HASH_MAC_ADDR
 WVFAIL $HASH_MAC_ADDR -a nonsense
diff --git a/bouncer/test-http_bouncer.sh b/bouncer/test-http_bouncer.sh
index dedd742..b5bd72a 100755
--- a/bouncer/test-http_bouncer.sh
+++ b/bouncer/test-http_bouncer.sh
@@ -5,7 +5,7 @@
 
 . ./wvtest/wvtest.sh
 
-HTTP_BOUNCER=./host-http_bouncer
+HTTP_BOUNCER=./http_bouncer.py
 PORT="1337"
 URL="http://example.com"