Merge changes I2970d4ca,I6313d283

* changes:
  Script to periodically sort hits log & fetch top hosts with unit tests.
  Script to update host hits log with unit tests.
diff --git a/cache_warming/fetch_popular.py b/cache_warming/fetch_popular.py
new file mode 100644
index 0000000..fe0f2b4
--- /dev/null
+++ b/cache_warming/fetch_popular.py
@@ -0,0 +1,65 @@
+#!/usr/bin/python
+"""Pre-fetches top requested hosts.
+
+Sorts dictionary represented in hit_log.json by number of hits
+and sends DNS requests to a predetermined number of the top hosts.
+"""
+
+import argparse
+import json
+import dns.resolver
+
+TOP_N = 50
+HITS_LOG_JSON_PATH = '/tmp/hits_log.json'
+
+
+def sort_hits_log(path):
+  """Sorts hosts in hits log by number of hits.
+
+  Args:
+    path: Path of JSON representation of dictionary mapping host
+          to tuple of most recent hit time and hit count.
+
+  Returns:
+    A list of hosts sorted by number of hits in descending order.
+  """
+  try:
+    log_json = open(path, 'r')
+  except IOError:
+    print 'unable to open ' + path
+    raise
+  else:
+    log = json.load(log_json)
+    return sorted(log, key=log.get, reverse=True)
+
+
+def prefetch(hosts, port, server):
+  """Pre-fetches list of hosts.
+
+  Args:
+    hosts: List of hosts to be fetched sorted by number of hits
+           in descending order.
+    port: Port to which to send queries (default is 53).
+    server: Alternate nameservers to query (default is None).
+  """
+  my_resolver = dns.resolver.Resolver()
+  my_resolver.port = port
+  if server is not None:
+    my_resolver.nameservers = server
+
+  if len(hosts) > TOP_N:
+    hosts = hosts[:TOP_N]
+  for host in hosts:
+    my_resolver.query(host)
+
+
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser()
+  parser.add_argument('-p', '--port', nargs='?', default=53, type=int,
+                      help='port to which to send queries (default is 53).')
+  parser.add_argument('-s', '--server', nargs='*', type=str,
+                      help='alternate nameservers to query (default is None).')
+  args = parser.parse_args()
+
+  sorted_log = sort_hits_log(HITS_LOG_JSON_PATH)
+  prefetch(sorted_log, args.port, args.server)
diff --git a/cache_warming/fetch_popular_test.py b/cache_warming/fetch_popular_test.py
new file mode 100644
index 0000000..ea9da8d
--- /dev/null
+++ b/cache_warming/fetch_popular_test.py
@@ -0,0 +1,39 @@
+#!/usr/bin/python
+"""Tests for fetch_popular.py."""
+
+import os
+import fetch_popular
+from wvtest import wvtest
+
+
+@wvtest.wvtest
+def testSortHitsLog_empty():
+  try:
+    file_name = 'test_log.json'
+    with open(file_name, 'w') as f:
+      f.write('{}')
+
+    expected = []
+    actual = fetch_popular.sort_hits_log(file_name)
+    wvtest.WVPASSEQ(actual, expected)
+  finally:
+    os.remove(file_name)
+
+
+@wvtest.wvtest
+def testSortHitsLog_nonEmpty():
+  try:
+    file_name = 'test_log.json'
+    with open(file_name, 'w') as f:
+      f.write('{"www.google.com": [2, "123456789"], "www.yahoo.com":'
+      	       ' [1,"987654321"], "www.espn.com": [3, "135792468"]}')
+
+    expected = ['www.espn.com', 'www.google.com', 'www.yahoo.com']
+    actual = fetch_popular.sort_hits_log(file_name)
+    wvtest.WVPASSEQ(actual, expected)
+  finally:
+    os.remove(file_name)
+
+
+if __name__ == '__main__':
+  wvtest.wvtest_main()
diff --git a/cache_warming/log_hits.py b/cache_warming/log_hits.py
new file mode 100644
index 0000000..5fbc2e8
--- /dev/null
+++ b/cache_warming/log_hits.py
@@ -0,0 +1,124 @@
+#!/usr/bin/python
+"""Updates most recent hit time and hit count for hosts in hits log.
+
+Reads queries from dns_query_log.txt and updates hosts in hits log
+dictionary with most recent hit time and hit count for each host.
+Saves hits log dictionary as hits_log.json for future modification.
+"""
+
+import json
+import os.path
+
+DNS_QUERY_LOG_PATH = '/tmp/dns_query_log.txt'
+HITS_LOG_JSON_PATH = '/tmp/hits_log.json'
+
+
+def process_line(log, ln):
+  """Processes a line of DNS query log and updates hits log.
+
+  Parses line and updates most recent hit time and hit count
+  for host in hits log.
+
+  Args:
+    log: Dictionary mapping host to tuple of hit count and most
+         recent hit time.
+    ln: String representing a line of DNS query log of the
+          format '[Unix time] [host name]'.
+
+  Returns:
+    An updated dictionary mapping host to tuple of hit count and
+    most recent hit time.
+  """
+  time, _, host = ln[:-1].partition(' ')
+  if host in log:
+    log[host] = (log[host][0] + 1, time)
+  else:
+    log[host] = (1, time)
+  return log
+
+
+def read_dns_query_log(path):
+  """Reads a DNS query log.
+
+  Processes each line of file, updating a hits log.
+
+  Args:
+    path: Path of DNS query log to be read.
+
+  Returns:
+    An updated dictionary mapping host to tuple of hit count and
+    most recent hit time.
+  """
+  try:
+    dns_query_log = open(path, 'r')
+  except IOError:
+    print 'unable to open ' + path
+  else:
+    log = {}
+    for line in dns_query_log:
+      log = process_line(log, line)
+    dns_query_log.close()
+    return log
+
+
+def clear_dns_query_log(path):
+  """Clears a DNS query log.
+
+  Opens file for write without writing anything.
+
+  Args:
+    path: Path of DNS query log to be cleared.
+  """
+  try:
+    open(path, 'w').close()
+    return
+  except IOError:
+    print 'unable to open ' + path
+
+
+def merge_logs(log, hist):
+  """Merges two hit logs.
+
+  Merges smaller hit log to larger hit log. Uses most recent hit
+  time and sums hit count from each log for each host.
+
+  Args:
+    log: Dictionary mapping host to tuple of hit count and
+         most recent hit time.
+    hist: Similar dictionary representing previous query history.
+
+  Returns:
+    An updated dictionary mapping host to tuple of hit count and
+    most recent hit time.
+  """
+  hist_larger = len(hist) > len(log)
+  big_log, small_log = (hist, log) if hist_larger else (log, hist)
+  for k, v in small_log.iteritems():
+    if k in big_log:
+      time = log[k][1]
+      big_log[k] = (big_log[k][0] + v[0], time)
+    else:
+      big_log[k] = (v[0], v[1])
+  return big_log
+
+
+if __name__ == '__main__':
+  hit_log = read_dns_query_log(DNS_QUERY_LOG_PATH)
+  clear_dns_query_log(DNS_QUERY_LOG_PATH)
+  if os.path.isfile(HITS_LOG_JSON_PATH):
+    hist_json = open(HITS_LOG_JSON_PATH, 'r')
+    hit_log_hist = json.load(hist_json)
+    hist_json.close()
+
+    hist_json = open(HITS_LOG_JSON_PATH, 'w')
+    json.dump(merge_logs(hit_log, hit_log_hist), hist_json)
+    hist_json.close()
+  else:
+    try:
+      hist_json = open(HITS_LOG_JSON_PATH, 'w')
+    except IOError:
+      print 'unable to open ' + HITS_LOG_JSON_PATH
+      raise
+    else:
+      json.dump(hit_log, hist_json)
+      hist_json.close()
diff --git a/cache_warming/log_hits_test.py b/cache_warming/log_hits_test.py
new file mode 100644
index 0000000..26944ce
--- /dev/null
+++ b/cache_warming/log_hits_test.py
@@ -0,0 +1,169 @@
+#!/usr/bin/python
+"""Tests for log_hits.py."""
+
+import os
+import log_hits
+from wvtest import wvtest
+
+
+@wvtest.wvtest
+def testProcessLine_firstHit():
+  line = '123456789 www.yahoo.com\n'
+  expected = {'www.yahoo.com': (1, '123456789')}
+  actual = log_hits.process_line({}, line)
+  wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testProcessLine_updateHitCount():
+  line = '123456789 www.yahoo.com\n'
+  log = {'www.yahoo.com': (1, '123456789')}
+  expected = 2
+  actual = log_hits.process_line(log, line)['www.yahoo.com'][0]
+  wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testProcessLine_updateRecentHitTime():
+  line = '123456789 www.yahoo.com\n'
+  log = {'www.yahoo.com': (1, '987654321')}
+  expected = '123456789'
+  actual = log_hits.process_line(log, line)['www.yahoo.com'][1]
+  wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testMergeLogs_emptyLog():
+  hist = {'www.yahoo.com': (1, '123456789')}
+  expected = hist
+  actual = log_hits.merge_logs({}, hist)
+  wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testMergeLogs_emptyHist():
+  log = {'www.yahoo.com': (1, '123456789')}
+  expected = log
+  actual = log_hits.merge_logs(log, {})
+  wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testMergeLogs_bothEmpty():
+  expected = {}
+  actual = log_hits.merge_logs({}, {})
+  wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testMergeLogs_noOverlap():
+  log = {'www.yahoo.com': (1, '123456789')}
+  hist = {'www.google.com': (1, '123456789')}
+  expected = {
+      'www.yahoo.com': (1, '123456789'),
+      'www.google.com': (1, '123456789')
+  }
+  actual = log_hits.merge_logs(log, hist)
+  wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testMergeLogs_updateHitCount():
+  log = {'www.yahoo.com': (1, '987654321')}
+  hist = {'www.yahoo.com': (1, '123456789')}
+  expected = 2
+  actual = log_hits.merge_logs(log, hist)['www.yahoo.com'][0]
+  wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testMergeLogs_updateRecentHitTime():
+  log = {'www.yahoo.com': (1, '987654321')}
+  hist = {'www.yahoo.com': (1, '123456789')}
+  expected = '987654321'
+  actual = log_hits.merge_logs(log, hist)['www.yahoo.com'][1]
+  wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testMergeLogs_histLargerNoOverlap():
+  log = {'www.yahoo.com': (1, '123456789')}
+  hist = {
+      'www.google.com': (1, '123456789'),
+      'www.espn.com': (1, '123456789')
+  }
+  expected = {
+      'www.yahoo.com': (1, '123456789'),
+      'www.google.com': (1, '123456789'),
+      'www.espn.com': (1, '123456789')
+  }
+  actual = log_hits.merge_logs(log, hist)
+  wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testMergeLogs_histLargerUpdateHitCount():
+  log = {'www.yahoo.com': (1, '987654321')}
+  hist = {
+      'www.yahoo.com': (1, '123456789'),
+      'www.google.com': (1, '123456789')
+  }
+  expected = 2
+  actual = log_hits.merge_logs(log, hist)['www.yahoo.com'][0]
+  wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testMergeLogs_histLargerUpdateRecentHitTime():
+  log = {'www.yahoo.com': (1, '987654321')}
+  hist = {
+      'www.yahoo.com': (1, '123456789'),
+      'www.google.com': (1, '123456789')
+  }
+  expected = '987654321'
+  actual = log_hits.merge_logs(log, hist)['www.yahoo.com'][1]
+  wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testReadDNSQueryLog_empty():
+  file_name = 'test_log.txt'
+  open(file_name, 'w').close()
+  expected = {}
+  actual = log_hits.read_dns_query_log(file_name)
+  wvtest.WVPASSEQ(actual, expected)
+  os.remove(file_name)
+
+
+@wvtest.wvtest
+def testReadDNSQueryLog_nonEmpty():
+  file_name = 'test_log.txt'
+  f = open(file_name, 'w')
+  f.write('123456789 www.yahoo.com\n987654321 www.google.com\n'
+          '135792468 www.yahoo.com\n')
+  f.close()
+  expected = {
+      'www.yahoo.com': (2, '135792468'),
+      'www.google.com': (1, '987654321')
+  }
+  actual = log_hits.read_dns_query_log(file_name)
+  wvtest.WVPASSEQ(actual, expected)
+  os.remove(file_name)
+
+
+@wvtest.wvtest
+def testClearDNSQueryLog():
+  file_name = 'test_log.txt'
+  f = open(file_name, 'w')
+  f.write('testing clear_dns_query_log()\n')
+  f.close()
+
+  log_hits.clear_dns_query_log(file_name)
+  expected = 0
+  actual = os.stat(file_name).st_size
+  wvtest.WVPASSEQ(actual, expected)
+  os.remove(file_name)
+
+
+if __name__ == '__main__':
+  wvtest.wvtest_main()
diff --git a/cache_warming/warm_cache b/cache_warming/warm_cache
new file mode 100644
index 0000000..fa334d0
--- /dev/null
+++ b/cache_warming/warm_cache
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+# Periodically processes logged DNS queries and prefetches
+# prefetches the most requested hosts, warming the cache.
+
+while sleep 60; do
+  python log_hits.py
+  python fetch_popular.py
+done
diff --git a/cache_warming/wvtest b/cache_warming/wvtest
new file mode 120000
index 0000000..75927a5
--- /dev/null
+++ b/cache_warming/wvtest
@@ -0,0 +1 @@
+../cmds/wvtest
\ No newline at end of file