conman:  Don't delete routes while the link is up.

We might be wrong about a connection check failing.  Rather than
deleting routes when this happens, just "deprioritize" them by
increasing their metrics by 50.

50 is chosen because it will make a wired route come after a normal
priority wireless route, but not after temporary (metric 99)
connection_check routes.

Where appropriate, tests that previously checked for no route now check
for no route below metric 50.  However, when links are actually down,
tests still check for no route at all.

Change-Id: I43e4ee79f1216a9673a39eb283cde00250f41298
diff --git a/conman/connection_manager_test.py b/conman/connection_manager_test.py
index 5bd5f13..271cac7 100755
--- a/conman/connection_manager_test.py
+++ b/conman/connection_manager_test.py
@@ -637,7 +637,7 @@
   wvtest.WVPASS(c.bridge.current_routes())
   wvtest.WVPASS(os.path.exists(acs_autoprov_filepath))
   for wifi in c.wifi:
-    wvtest.WVFAIL(wifi.current_routes())
+    wvtest.WVFAIL(wifi.current_routes_normal_testonly())
   wvtest.WVFAIL(c.has_status_files([status.P.CONNECTED_TO_WLAN,
                                     status.P.HAVE_CONFIG]))
 
@@ -646,7 +646,7 @@
   c.run_once()
   wvtest.WVFAIL(c.acs())
   wvtest.WVFAIL(c.internet())
-  wvtest.WVFAIL(c.bridge.current_routes())
+  wvtest.WVFAIL(c.bridge.current_routes_normal_testonly())
   wvtest.WVFAIL(os.path.exists(acs_autoprov_filepath))
   wvtest.WVFAIL(c.has_status_files([status.P.CAN_REACH_ACS,
                                     status.P.CAN_REACH_INTERNET]))
@@ -677,7 +677,7 @@
   c.run_until_interface_update()
   wvtest.WVFAIL(c.acs())
   wvtest.WVFAIL(c.internet())
-  wvtest.WVFAIL(c.bridge.current_routes())
+  wvtest.WVFAIL(c.bridge.current_routes_normal_testonly())
 
   # Now c connects to a restricted network.
   c.bridge.set_connection_check_result('restricted')
@@ -692,6 +692,8 @@
   c.run_once()
   wvtest.WVFAIL(c.acs())
   wvtest.WVFAIL(c.internet())
+  # We have no links, so we should have no routes (not even low priority ones),
+  # and /tmp/hosts should only contain a line for localhost.
   wvtest.WVFAIL(c.bridge.current_routes())
   check_tmp_hosts('127.0.0.1 localhost')
 
@@ -778,7 +780,7 @@
   c.run_once()
   wvtest.WVPASS(c.client_up(band))
   wvtest.WVPASS(c.wifi_for_band(band).current_routes())
-  wvtest.WVFAIL(c.bridge.current_routes())
+  wvtest.WVFAIL(c.bridge.current_routes_normal_testonly())
   c.run_until_interface_update()
   check_tmp_hosts('192.168.1.100 %s\n127.0.0.1 localhost' % hostname)
 
@@ -803,7 +805,7 @@
   c.run_once()
   wvtest.WVFAIL(c.access_point_up(band))
   wvtest.WVFAIL(c.client_up(band))
-  wvtest.WVFAIL(c.wifi_for_band(band).current_routes())
+  wvtest.WVFAIL(c.wifi_for_band(band).current_routes_normal_testonly())
   wvtest.WVPASS(c.bridge.current_routes())
   wvtest.WVFAIL(c.has_status_files([status.P.HAVE_CONFIG]))
 
@@ -812,7 +814,7 @@
   c.run_once()
   wvtest.WVPASS(c.access_point_up(band))
   wvtest.WVFAIL(c.client_up(band))
-  wvtest.WVFAIL(c.wifi_for_band(band).current_routes())
+  wvtest.WVFAIL(c.wifi_for_band(band).current_routes_normal_testonly())
   wvtest.WVPASS(c.bridge.current_routes())
 
   # Now delete the config and bring down the bridge and make sure we reprovision
@@ -823,7 +825,9 @@
   c.run_until_interface_update()
   wvtest.WVFAIL(c.acs())
   wvtest.WVFAIL(c.internet())
-  check_tmp_hosts('127.0.0.1 localhost')
+  # We still have a link and might be wrong about the connection_check, so
+  # /tmp/hosts should still contain a line for this hostname.
+  check_tmp_hosts('192.168.1.101 %s\n127.0.0.1 localhost' % hostname)
   # s3 is not what the cycler would suggest trying next.
   wvtest.WVPASSNE('s3', c.wifi_for_band(band).cycler.peek())
   # Run only once, so that only one BSS can be tried.  It should be the s3 one,
@@ -1000,8 +1004,8 @@
   wvtest.WVPASS(c.bridge.current_routes())
   wvtest.WVFAIL(c.client_up('2.4'))
   wvtest.WVFAIL(c.client_up('5'))
-  wvtest.WVFAIL(c.wifi_for_band('2.4').current_routes())
-  wvtest.WVFAIL(c.wifi_for_band('5').current_routes())
+  wvtest.WVFAIL(c.wifi_for_band('2.4').current_routes_normal_testonly())
+  wvtest.WVFAIL(c.wifi_for_band('5').current_routes_normal_testonly())
 
   # Disable the 2.4 GHz AP, make sure the 5 GHz AP stays up.  2.4 GHz should
   # join the WLAN.
@@ -1012,7 +1016,7 @@
   wvtest.WVPASS(c.client_up('2.4'))
   wvtest.WVPASS(c.bridge.current_routes())
   wvtest.WVPASS(c.wifi_for_band('2.4').current_routes())
-  wvtest.WVFAIL(c.wifi_for_band('5').current_routes())
+  wvtest.WVFAIL(c.wifi_for_band('5').current_routes_normal_testonly())
 
   # Delete the 2.4 GHz WLAN configuration; it should leave the WLAN but nothing
   # else should change.
@@ -1022,8 +1026,8 @@
   wvtest.WVPASS(c.access_point_up('5'))
   wvtest.WVFAIL(c.client_up('2.4'))
   wvtest.WVPASS(c.bridge.current_routes())
-  wvtest.WVFAIL(c.wifi_for_band('2.4').current_routes())
-  wvtest.WVFAIL(c.wifi_for_band('5').current_routes())
+  wvtest.WVFAIL(c.wifi_for_band('2.4').current_routes_normal_testonly())
+  wvtest.WVFAIL(c.wifi_for_band('5').current_routes_normal_testonly())
 
   # Disable the wired connection and remove the WLAN configurations.  Both
   # radios should scan.  Wait for 5 GHz to scan, then enable scan results for
@@ -1032,18 +1036,18 @@
   c.set_ethernet(False)
   c.run_once()
   wvtest.WVFAIL(c.acs())
-  wvtest.WVFAIL(c.bridge.current_routes())
-  wvtest.WVFAIL(c.wifi_for_band('2.4').current_routes())
-  wvtest.WVFAIL(c.wifi_for_band('5').current_routes())
+  wvtest.WVFAIL(c.bridge.current_routes_normal_testonly())
+  wvtest.WVFAIL(c.wifi_for_band('2.4').current_routes_normal_testonly())
+  wvtest.WVFAIL(c.wifi_for_band('5').current_routes_normal_testonly())
 
   # The 5 GHz scan has no results.
   c.run_until_scan('5')
   c.run_once()
   c.run_until_interface_update()
   wvtest.WVFAIL(c.acs())
-  wvtest.WVFAIL(c.bridge.current_routes())
-  wvtest.WVFAIL(c.wifi_for_band('2.4').current_routes())
-  wvtest.WVFAIL(c.wifi_for_band('5').current_routes())
+  wvtest.WVFAIL(c.bridge.current_routes_normal_testonly())
+  wvtest.WVFAIL(c.wifi_for_band('2.4').current_routes_normal_testonly())
+  wvtest.WVFAIL(c.wifi_for_band('5').current_routes_normal_testonly())
 
   # The next 2.4 GHz scan will have results.
   c.interface_with_scan_results = c.wifi_for_band('2.4').name
@@ -1053,9 +1057,9 @@
     c.run_once()
   c.run_until_interface_update()
   wvtest.WVPASS(c.acs())
-  wvtest.WVFAIL(c.bridge.current_routes())
+  wvtest.WVFAIL(c.bridge.current_routes_normal_testonly())
   wvtest.WVPASS(c.wifi_for_band('2.4').current_routes())
-  wvtest.WVFAIL(c.wifi_for_band('5').current_routes())
+  wvtest.WVFAIL(c.wifi_for_band('5').current_routes_normal_testonly())
   c.run_once()
   wvtest.WVPASSEQ(c.log_upload_count, 1)
 
@@ -1104,8 +1108,8 @@
   wvtest.WVFAIL(c.access_point_up('2.4'))
   wvtest.WVPASS(c.access_point_up('5'))
   wvtest.WVPASS(c.bridge.current_routes())
-  wvtest.WVFAIL(c.wifi_for_band('2.4').current_routes())
-  wvtest.WVFAIL(c.wifi_for_band('5').current_routes())
+  wvtest.WVFAIL(c.wifi_for_band('2.4').current_routes_normal_testonly())
+  wvtest.WVFAIL(c.wifi_for_band('5').current_routes_normal_testonly())
 
   # Disable the 2.4 GHz AP; nothing should change.  The 2.4 GHz client should
   # not be up because the same radio is being used to run a 5 GHz AP.
@@ -1115,8 +1119,8 @@
   wvtest.WVPASS(c.access_point_up('5'))
   wvtest.WVFAIL(c.client_up('2.4'))
   wvtest.WVPASS(c.bridge.current_routes())
-  wvtest.WVFAIL(c.wifi_for_band('2.4').current_routes())
-  wvtest.WVFAIL(c.wifi_for_band('5').current_routes())
+  wvtest.WVFAIL(c.wifi_for_band('2.4').current_routes_normal_testonly())
+  wvtest.WVFAIL(c.wifi_for_band('5').current_routes_normal_testonly())
 
   # Delete the 2.4 GHz WLAN configuration; nothing should change.
   c.delete_wlan_config('2.4')
@@ -1125,8 +1129,8 @@
   wvtest.WVPASS(c.access_point_up('5'))
   wvtest.WVFAIL(c.client_up('2.4'))
   wvtest.WVPASS(c.bridge.current_routes())
-  wvtest.WVFAIL(c.wifi_for_band('2.4').current_routes())
-  wvtest.WVFAIL(c.wifi_for_band('5').current_routes())
+  wvtest.WVFAIL(c.wifi_for_band('2.4').current_routes_normal_testonly())
+  wvtest.WVFAIL(c.wifi_for_band('5').current_routes_normal_testonly())
 
   # Disable the wired connection and remove the WLAN configurations.  There
   # should be a single scan that leads to ACS access.  (It doesn't matter which
@@ -1136,9 +1140,9 @@
   c.set_ethernet(False)
   c.run_once()
   wvtest.WVFAIL(c.acs())
-  wvtest.WVFAIL(c.bridge.current_routes())
-  wvtest.WVFAIL(c.wifi_for_band('2.4').current_routes())
-  wvtest.WVFAIL(c.wifi_for_band('5').current_routes())
+  wvtest.WVFAIL(c.bridge.current_routes_normal_testonly())
+  wvtest.WVFAIL(c.wifi_for_band('2.4').current_routes_normal_testonly())
+  wvtest.WVFAIL(c.wifi_for_band('5').current_routes_normal_testonly())
 
   # The scan will have results that will lead to ACS access.
   c.interface_with_scan_results = c.wifi_for_band('2.4').name
@@ -1147,7 +1151,7 @@
     c.run_once()
   c.run_until_interface_update()
   wvtest.WVPASS(c.acs())
-  wvtest.WVFAIL(c.bridge.current_routes())
+  wvtest.WVFAIL(c.bridge.current_routes_normal_testonly())
   wvtest.WVPASS(c.wifi_for_band('2.4').current_routes())
   wvtest.WVPASS(c.wifi_for_band('5').current_routes())
   c.run_once()
diff --git a/conman/interface.py b/conman/interface.py
index fd0df6f..887f487 100755
--- a/conman/interface.py
+++ b/conman/interface.py
@@ -7,6 +7,10 @@
 import re
 import subprocess
 
+# This has to be called before another module calls it with a higher log level.
+# pylint: disable=g-import-not-at-top
+logging.basicConfig(level=logging.DEBUG)
+
 import experiment
 import wpactrl
 
@@ -15,6 +19,8 @@
 METRIC_24GHZ = 22
 METRIC_TEMPORARY_CONNECTION_CHECK = 99
 
+RFC2385_MULTICAST_ROUTE = '239.0.0.0/8'
+
 experiment.register('WifiSimulateWireless')
 CWMP_PATH = '/tmp/cwmp'
 MAX_ACS_FAILURE_S = 60
@@ -30,7 +36,7 @@
   IP_ROUTE = ['ip', 'route']
   IP_ADDR_SHOW = ['ip', 'addr', 'show', 'dev']
 
-  def __init__(self, name, metric):
+  def __init__(self, name, base_metric):
     self.name = name
 
     # Currently connected links for this interface, e.g. ethernet.
@@ -42,11 +48,16 @@
 
     self._subnet = None
     self._gateway_ip = None
-    self.metric = metric
+    self.base_metric = base_metric
+    self.metric_offset = 0
 
     # Until this is set True, the routing table will not be touched.
     self._initialized = False
 
+  @property
+  def metric(self):
+    return str(int(self.base_metric) + self.metric_offset)
+
   def _connection_check(self, check_acs):
     """Check this interface's connection status.
 
@@ -77,8 +88,11 @@
     # routes.
     added_temporary_route = False
     if 'default' not in self.current_routes():
-      logging.debug('Adding temporary connection check route for dev %s',
+      logging.debug('Adding temporary connection check routes for dev %s',
                     self.name)
+      self._ip_route('add', self._gateway_ip,
+                     'dev', self.name,
+                     'metric', str(METRIC_TEMPORARY_CONNECTION_CHECK))
       self._ip_route('add', 'default',
                      'via', self._gateway_ip,
                      'dev', self.name,
@@ -98,11 +112,14 @@
 
     # Delete the temporary route.
     if added_temporary_route:
-      logging.debug('Deleting temporary connection check route for dev %s',
+      logging.debug('Deleting temporary connection check routes for dev %s',
                     self.name)
       self._ip_route('del', 'default',
                      'dev', self.name,
                      'metric', str(METRIC_TEMPORARY_CONNECTION_CHECK))
+      self._ip_route('del', self._gateway_ip,
+                     'dev', self.name,
+                     'metric', str(METRIC_TEMPORARY_CONNECTION_CHECK))
 
     return result
 
@@ -138,49 +155,54 @@
     # exists but is different, delete it before adding an updated one.
     current = self.current_routes()
     default = current.get('default', {})
-    subnet = current.get('subnet', {})
     if ((default.get('via', None), default.get('metric', None)) !=
         (self._gateway_ip, str(self.metric))):
       logging.debug('Adding default route for dev %s', self.name)
-      self.delete_route(default=True)
+      self.delete_route('default')
       self._ip_route('add', 'default',
                      'via', self._gateway_ip,
                      'dev', self.name,
                      'metric', str(self.metric))
 
+    subnet = current.get('subnet', {})
     if (self._subnet and
         (subnet.get('via', None), subnet.get('metric', None)) !=
         (self._gateway_ip, str(self.metric))):
       logging.debug('Adding subnet route for dev %s', self.name)
-      self.delete_route(subnet=True)
+      self.delete_route('subnet')
       self._ip_route('add', self._subnet,
                      'dev', self.name,
                      'metric', str(self.metric))
 
-  def delete_route(self, default=False, subnet=False):
+    # RFC2365 multicast route.
+    if current.get('multicast', {}).get('metric', None) != str(self.metric):
+      logging.debug('Adding multicast route for dev %s', self.name)
+      self.delete_route('multicast')
+      self._ip_route('add', RFC2385_MULTICAST_ROUTE,
+                     'dev', self.name,
+                     'metric', str(self.metric))
+
+  def delete_route(self, *args):
     """Delete default and/or subnet routes for this interface.
 
     Args:
-      default:  Whether to delete default routes.  Must be true if subnet isn't.
-      subnet:  Whether to delete subnet routes.  Must be true if default isn't.
+      *args:  Which routes to delete.  Must be at least one of 'default',
+          'subnet', 'multicast'.
 
     Raises:
       ValueError:  If neither default nor subnet is True.
     """
-    route_types = []
-    if default:
-      route_types.append(('default', 'default'))
-    if subnet:
-      route_types.append(('subnet', self._subnet))
-
-    if not route_types:
+    args = set(args)
+    args &= set(('default', 'subnet', 'multicast'))
+    if not args:
       raise ValueError(
-          'Must specify at least one of default or subnet to delete.')
+          'Must specify at least one of default, subnet, multicast to delete.')
 
-    for route_type, key in route_types:
+    for route_type in args:
       while route_type in self.current_routes():
         logging.debug('Deleting %s route for dev %s', route_type, self.name)
-        self._ip_route('del', key, 'dev', self.name)
+        self._ip_route('del', self.current_routes()[route_type]['route'],
+                       'dev', self.name)
 
   def current_routes(self):
     """Read the current routes for this interface.
@@ -193,13 +215,20 @@
     result = {}
     for line in self._ip_route().splitlines():
       if 'dev %s' % self.name in line:
-        route_type = 'default' if line.startswith('default') else 'subnet'
+        if line.startswith('default'):
+          route_type = 'default'
+        elif re.search(r'\/\d{1,2}$', line.split()[0]):
+          route_type = 'subnet'
+        else:
+          continue
         route = {}
-        key = None
+        key = 'route'
         for token in line.split():
           if token in ['via', 'metric']:
             key = token
           elif key:
+            if key == 'route' and token == RFC2385_MULTICAST_ROUTE:
+              route_type = 'multicast'
             route[key] = token
             key = None
         if route:
@@ -305,9 +334,38 @@
     maybe_had_access = maybe_had_acs != False or maybe_had_internet != False
     has_access = has_acs or has_internet
     if not had_access and has_access:
-      self.add_routes()
+      self.prioritize_routes()
     elif maybe_had_access and not has_access:
-      self.delete_route(default=True, subnet=True)
+      # If we still have a link, just deprioritize the routes, in case we're
+      # wrong about the connection check.  If there's no actual link, then
+      # really delete the route.
+      if self.links:
+        self.deprioritize_routes()
+      else:
+        self.delete_route('default', 'subnet', 'multicast')
+
+  def prioritize_routes(self):
+    """When connection check succeeds, route priority (metric) should be normal.
+
+    This is the inverse of deprioritize_routes.
+    """
+    if not self._initialized:
+      return
+    logging.info('%s routes have normal priority', self.name)
+    self.metric_offset = 0
+    self.add_routes()
+
+  def deprioritize_routes(self):
+    """When connection check fails, deprioritize routes by increasing metric.
+
+    This is conservative alternative to deleting routes, in case we are mistaken
+    about route not providing a useful connection.
+    """
+    if not self._initialized:
+      return
+    logging.info('%s routes have low priority', self.name)
+    self.metric_offset = 50
+    self.add_routes()
 
   def initialize(self):
     """Tell the interface it has its initial state.
@@ -354,11 +412,17 @@
       self._moca_stations.remove(node_id)
       self.moca = bool(self._moca_stations)
 
-  def add_routes(self):
+  def prioritize_routes(self):
     """We only want ACS autoprovisioning when we're using a wired route."""
-    super(Bridge, self).add_routes()
+    super(Bridge, self).prioritize_routes()
     open(self._acs_autoprovisioning_filepath, 'w')
 
+  def deprioritize_routes(self, *args, **kwargs):
+    """We only want ACS autoprovisioning when we're using a wired route."""
+    if os.path.exists(self._acs_autoprovisioning_filepath):
+      os.unlink(self._acs_autoprovisioning_filepath)
+    super(Bridge, self).deprioritize_routes(*args, **kwargs)
+
   def delete_route(self, *args, **kwargs):
     """We only want ACS autoprovisioning when we're using a wired route."""
     if os.path.exists(self._acs_autoprovisioning_filepath):
diff --git a/conman/interface_test.py b/conman/interface_test.py
index 866d1db..e8ab4ca 100755
--- a/conman/interface_test.py
+++ b/conman/interface_test.py
@@ -49,6 +49,8 @@
     if not args:
       return '\n'.join(self.routing_table.values() +
                        ['1.2.3.4/24 dev fake0 proto kernel scope link',
+                        # Non-subnet route, e.g. to NFS host.
+                        '1.2.3.1 dev %s proto kernel scope link' % self.name,
                         'default via 1.2.3.4 dev fake0',
                         'random junk'])
 
@@ -68,7 +70,7 @@
       elif key[2] is None:
         # pylint: disable=g-builtin-op
         for k in self.routing_table.keys():
-          if k[0] == key[0]:
+          if k[:-1] == key[:-1]:
             logging.debug('Deleting route for %r (generalized from %s)', k, key)
             del self.routing_table[k]
             break
@@ -79,6 +81,10 @@
 
     return ''
 
+  def current_routes_normal_testonly(self):
+    result = self.current_routes()
+    return {k: v for k, v in result.iteritems() if int(v.get('metric', 0)) < 50}
+
 
 class Bridge(FakeInterfaceMixin, interface.Bridge):
   pass
@@ -333,6 +339,7 @@
     wvtest.WVFAIL(b.acs())
     wvtest.WVFAIL(b.internet())
     wvtest.WVFAIL(b.current_routes())
+    wvtest.WVFAIL(b.current_routes_normal_testonly())
     wvtest.WVFAIL(os.path.exists(autoprov_filepath))
 
     b.add_moca_station(0)
@@ -343,48 +350,53 @@
     # Everything should fail because the interface is not initialized.
     wvtest.WVFAIL(b.acs())
     wvtest.WVFAIL(b.internet())
-    wvtest.WVFAIL(b.current_routes())
+    wvtest.WVFAIL(b.current_routes_normal_testonly())
     wvtest.WVFAIL(os.path.exists(autoprov_filepath))
     b.initialize()
     wvtest.WVPASS(b.acs())
     wvtest.WVPASS(b.internet())
     current_routes = b.current_routes()
-    wvtest.WVPASS(current_routes)
+    wvtest.WVPASSEQ(len(current_routes), 3)
     wvtest.WVPASS('default' in current_routes)
     wvtest.WVPASS('subnet' in current_routes)
+    wvtest.WVPASS('multicast' in current_routes)
     wvtest.WVPASS(os.path.exists(autoprov_filepath))
 
     b.add_moca_station(1)
     wvtest.WVPASS(b.acs())
     wvtest.WVPASS(b.internet())
-    wvtest.WVPASS(b.current_routes())
+    wvtest.WVPASSEQ(len(b.current_routes()), 3)
     wvtest.WVPASS(os.path.exists(autoprov_filepath))
 
     b.remove_moca_station(0)
     b.remove_moca_station(1)
     wvtest.WVFAIL(b.acs())
     wvtest.WVFAIL(b.internet())
+    # We have no links, so should have no routes.
     wvtest.WVFAIL(b.current_routes())
     wvtest.WVFAIL(os.path.exists(autoprov_filepath))
 
     b.add_moca_station(2)
     wvtest.WVPASS(b.acs())
     wvtest.WVPASS(b.internet())
-    wvtest.WVPASS(b.current_routes())
+    wvtest.WVPASSEQ(len(b.current_routes()), 3)
     wvtest.WVPASS(os.path.exists(autoprov_filepath))
 
     b.set_connection_check_result('fail')
     b.update_routes()
     wvtest.WVFAIL(b.acs())
     wvtest.WVFAIL(b.internet())
-    wvtest.WVFAIL(b.current_routes())
+    # We have links but the connection check failed, so we should only have a
+    # low priority route, i.e. metric at least 50.
+    wvtest.WVPASSEQ(len(b.current_routes()), 3)
+    wvtest.WVFAIL(b.current_routes_normal_testonly())
     wvtest.WVFAIL(os.path.exists(autoprov_filepath))
 
     b.set_connection_check_result('restricted')
     b.update_routes()
     wvtest.WVPASS(b.acs())
     wvtest.WVFAIL(b.internet())
-    wvtest.WVPASS(b.current_routes())
+    wvtest.WVPASSEQ(len(b.current_routes()), 3)
     wvtest.WVPASS(os.path.exists(autoprov_filepath))
 
     wvtest.WVFAIL(b.get_ip_address())