Revert "Revert "/bin/wifi:  Lock /bin/wifi to prevent simultaneous calls.""

This reverts commit 6ccdd2cd8c62651ed31f2ed8e11ec107b1a0ded3.

This has been substantially rewritten, since fcntl.lockf failed about
5% of the time for no apprent reason.  The main change is that this
now uses subprocess calls to lockfile-create.  This required rewriting
the timing, the tests, etc.

Change-Id: Ic1b4152dd13a9fca72a19900b610ef82cfd546b3
diff --git a/wifi/utils.py b/wifi/utils.py
index e665654..7a90065 100644
--- a/wifi/utils.py
+++ b/wifi/utils.py
@@ -5,8 +5,10 @@
 from __future__ import print_function
 
 import collections
+import math
 import os
 import re
+import signal
 import subprocess
 import sys
 import time
@@ -300,3 +302,56 @@
     raise BinWifiException('PSK is not of a valid length: %d', len(psk))
 
   return psk
+
+
+def _lockfile_create_retries(timeout_sec):
+  """Invert the lockfile-create --retry option.
+
+  The --retry option specifies how many times to retry.  Each retry takes an
+  additional five seconds, starting at 0, so --retry 1 takes 5 seconds,
+  --retry 2 takes 15 (5 + 10), and so on.  So:
+
+    timeout_sec = 5 * (retries * (retries + 1)) / 2 =>
+    2.5retries^2 + 2.5retries + -timeout_sec = 0 =>
+    retries = (-2.5 +/- sqrt(2.5^2 - 4*2.5*-timeout_sec)) / (2*2.5)
+    retries = (-2.5 +/- sqrt(6.25 + 10*timeout_sec)) / 5
+
+  We want to ceil this to make sure we have more than enough time, and we can
+  even also add 1 to timeout_sec in case we'd otherwise get a whole number and
+  don't want it to be close.  We can also reduce the +/- to a + because we
+  don't care about negative solutions.
+
+  (Alternatively, we could remove the signal.alarm and
+  expose /bin/wifi callers to this logic by letting them specify the retry
+  count directly, but that would be even worse than this.)
+
+  Args:
+    timeout_sec: The number of seconds the timeout must exceed.
+
+  Returns:
+    A value for lockfile-create --retry.
+  """
+  return math.ceil((-2.5 + math.sqrt(6.25 + 10.0 * (timeout_sec + 1))) / 5.0)
+
+
+def lock(lockfile, timeout_sec):
+  """Attempt to lock lockfile for up to timeout_sec.
+
+  Args:
+    lockfile:  The file to lock.
+    timeout_sec:  How long to try before giving up.
+
+  Raises:
+    BinWifiException:  If the timeout is exceeded.
+  """
+  def time_out(*_):
+    raise BinWifiException('Failed to obtain lock %s after %d seconds',
+                           lockfile, timeout_sec)
+
+  retries = _lockfile_create_retries(timeout_sec)
+
+  signal.signal(signal.SIGALRM, time_out)
+  signal.alarm(timeout_sec)
+  subprocess.call(['lockfile-create', '--use-pid', '--retry', str(retries),
+                   lockfile])
+  signal.alarm(0)
diff --git a/wifi/utils_test.py b/wifi/utils_test.py
index be91859..0224605 100755
--- a/wifi/utils_test.py
+++ b/wifi/utils_test.py
@@ -3,7 +3,11 @@
 """Tests for utils.py."""
 
 import collections
+import multiprocessing
 import os
+import shutil
+import sys
+import tempfile
 
 import utils
 from wvtest import wvtest
@@ -123,5 +127,84 @@
   wvtest.WVPASSEQ('g' * 63, utils.validate_and_sanitize_psk('g' * 63))
 
 
+@wvtest.wvtest
+def lock_test():
+  """Test utils.lock and utils._lockfile_create_retries."""
+  wvtest.WVPASSEQ(utils._lockfile_create_retries(0), 1)
+  wvtest.WVPASSEQ(utils._lockfile_create_retries(4), 1)
+  wvtest.WVPASSEQ(utils._lockfile_create_retries(5), 2)
+  wvtest.WVPASSEQ(utils._lockfile_create_retries(14), 2)
+  wvtest.WVPASSEQ(utils._lockfile_create_retries(15), 3)
+  wvtest.WVPASSEQ(utils._lockfile_create_retries(60), 5)
+
+  try:
+    temp_dir = tempfile.mkdtemp()
+    lockfile = os.path.join(temp_dir, 'lock_and_unlock_test')
+
+    def lock_until_qi_nonempty(qi, qo, timeout_sec):
+      try:
+        utils.lock(lockfile, timeout_sec)
+      except utils.BinWifiException:
+        qo.put('timed out')
+        return
+      qo.put('acquired')
+      wvtest.WVPASSEQ(qi.get(), 'release')
+      qo.put('released')
+      sys.exit(0)
+
+    # Use multiprocessing because we're using lockfile-create with --use-pid, so
+    # we need separate PIDs.
+    q1i = multiprocessing.Queue()
+    q1o = multiprocessing.Queue()
+    # The timeout here is 5 because occasionally it takes more than one second
+    # to acquire the lock, causing the test to hang.  Five seconds is enough to
+    # prevent this.
+    p1 = multiprocessing.Process(target=lock_until_qi_nonempty,
+                                 args=(q1i, q1o, 1))
+
+    q2i = multiprocessing.Queue()
+    q2o = multiprocessing.Queue()
+    p2 = multiprocessing.Process(target=lock_until_qi_nonempty,
+                                 args=(q2i, q2o, 10))
+
+    p1.start()
+    wvtest.WVPASSEQ(q1o.get(), 'acquired')
+
+    p2.start()
+    wvtest.WVPASS(q2o.empty())
+
+    q1i.put('release')
+    wvtest.WVPASSEQ(q1o.get(), 'released')
+    p1.join()
+    wvtest.WVPASSEQ(q2o.get(), 'acquired')
+
+    q2i.put('release')
+    wvtest.WVPASSEQ(q2o.get(), 'released')
+    p2.join()
+
+    # Now test that the timeout works.
+    q3i = multiprocessing.Queue()
+    q3o = multiprocessing.Queue()
+    p3 = multiprocessing.Process(target=lock_until_qi_nonempty,
+                                 args=(q3i, q3o, 1))
+
+    q4i = multiprocessing.Queue()
+    q4o = multiprocessing.Queue()
+    p4 = multiprocessing.Process(target=lock_until_qi_nonempty,
+                                 args=(q4i, q4o, 1))
+
+    p3.start()
+    wvtest.WVPASSEQ(q3o.get(), 'acquired')
+    p4.start()
+    wvtest.WVPASSEQ(q4o.get(), 'timed out')
+    p4.join()
+
+    q3i.put('release')
+    p3.join()
+
+  finally:
+    shutil.rmtree(temp_dir)
+
+
 if __name__ == '__main__':
   wvtest.wvtest_main()
diff --git a/wifi/wifi.py b/wifi/wifi.py
index c4fde30..fb38bcd 100755
--- a/wifi/wifi.py
+++ b/wifi/wifi.py
@@ -49,9 +49,11 @@
 Y,yottasecond-timeouts            Don't rotate any keys: PTK, GTK, or GMK
 P,persist                         For set commands, persist options so we can restore them with 'wifi restore'.  For stop commands, remove persisted options.
 S,interface-suffix=               Interface suffix []
+lock-timeout=                     How long, in seconds, to wait for another /bin/wifi process to finish before giving up. [60]
 """
 
 _FINGERPRINTS_DIRECTORY = '/tmp/wifi/fingerprints'
+_LOCKFILE = '/tmp/wifi/wifi'
 
 
 # pylint: disable=protected-access
@@ -981,6 +983,8 @@
   opt, _, extra = parser.parse(argv)
   stringify_options(opt)
 
+  utils.lock(_LOCKFILE, int(opt.lock_timeout))
+
   if not extra:
     parser.fatal('Must specify a command (see usage for details).')
     return 1
@@ -1003,6 +1007,7 @@
     }[extra[0]]
   except KeyError:
     parser.fatal('Unrecognized command %s' % extra[0])
+
   success = function(opt)
 
   if success: