Script to update host hits log with unit tests.
Change-Id: I6313d283538b8aa8513967f64a32a32662dc4d99
diff --git a/cache_warming/log_hits.py b/cache_warming/log_hits.py
new file mode 100644
index 0000000..5fbc2e8
--- /dev/null
+++ b/cache_warming/log_hits.py
@@ -0,0 +1,124 @@
+#!/usr/bin/python
+"""Updates most recent hit time and hit count for hosts in hits log.
+
+Reads queries from dns_query_log.txt and updates hosts in hits log
+dictionary with most recent hit time and hit count for each host.
+Saves hits log dictionary as hits_log.json for future modification.
+"""
+
+import json
+import os.path
+
+DNS_QUERY_LOG_PATH = '/tmp/dns_query_log.txt'
+HITS_LOG_JSON_PATH = '/tmp/hits_log.json'
+
+
+def process_line(log, ln):
+ """Processes a line of DNS query log and updates hits log.
+
+ Parses line and updates most recent hit time and hit count
+ for host in hits log.
+
+ Args:
+ log: Dictionary mapping host to tuple of hit count and most
+ recent hit time.
+ ln: String representing a line of DNS query log of the
+ format '[Unix time] [host name]'.
+
+ Returns:
+ An updated dictionary mapping host to tuple of hit count and
+ most recent hit time.
+ """
+ time, _, host = ln[:-1].partition(' ')
+ if host in log:
+ log[host] = (log[host][0] + 1, time)
+ else:
+ log[host] = (1, time)
+ return log
+
+
+def read_dns_query_log(path):
+ """Reads a DNS query log.
+
+ Processes each line of file, updating a hits log.
+
+ Args:
+ path: Path of DNS query log to be read.
+
+ Returns:
+ An updated dictionary mapping host to tuple of hit count and
+ most recent hit time.
+ """
+ try:
+ dns_query_log = open(path, 'r')
+ except IOError:
+ print 'unable to open ' + path
+ else:
+ log = {}
+ for line in dns_query_log:
+ log = process_line(log, line)
+ dns_query_log.close()
+ return log
+
+
+def clear_dns_query_log(path):
+ """Clears a DNS query log.
+
+ Opens file for write without writing anything.
+
+ Args:
+ path: Path of DNS query log to be cleared.
+ """
+ try:
+ open(path, 'w').close()
+ return
+ except IOError:
+ print 'unable to open ' + path
+
+
+def merge_logs(log, hist):
+ """Merges two hit logs.
+
+ Merges smaller hit log to larger hit log. Uses most recent hit
+ time and sums hit count from each log for each host.
+
+ Args:
+ log: Dictionary mapping host to tuple of hit count and
+ most recent hit time.
+ hist: Similar dictionary representing previous query history.
+
+ Returns:
+ An updated dictionary mapping host to tuple of hit count and
+ most recent hit time.
+ """
+ hist_larger = len(hist) > len(log)
+ big_log, small_log = (hist, log) if hist_larger else (log, hist)
+ for k, v in small_log.iteritems():
+ if k in big_log:
+ time = log[k][1]
+ big_log[k] = (big_log[k][0] + v[0], time)
+ else:
+ big_log[k] = (v[0], v[1])
+ return big_log
+
+
+if __name__ == '__main__':
+ hit_log = read_dns_query_log(DNS_QUERY_LOG_PATH)
+ clear_dns_query_log(DNS_QUERY_LOG_PATH)
+ if os.path.isfile(HITS_LOG_JSON_PATH):
+ hist_json = open(HITS_LOG_JSON_PATH, 'r')
+ hit_log_hist = json.load(hist_json)
+ hist_json.close()
+
+ hist_json = open(HITS_LOG_JSON_PATH, 'w')
+ json.dump(merge_logs(hit_log, hit_log_hist), hist_json)
+ hist_json.close()
+ else:
+ try:
+ hist_json = open(HITS_LOG_JSON_PATH, 'w')
+ except IOError:
+ print 'unable to open ' + HITS_LOG_JSON_PATH
+ raise
+ else:
+ json.dump(hit_log, hist_json)
+ hist_json.close()
diff --git a/cache_warming/log_hits_test.py b/cache_warming/log_hits_test.py
new file mode 100644
index 0000000..26944ce
--- /dev/null
+++ b/cache_warming/log_hits_test.py
@@ -0,0 +1,169 @@
+#!/usr/bin/python
+"""Tests for log_hits.py."""
+
+import os
+import log_hits
+from wvtest import wvtest
+
+
+@wvtest.wvtest
+def testProcessLine_firstHit():
+ line = '123456789 www.yahoo.com\n'
+ expected = {'www.yahoo.com': (1, '123456789')}
+ actual = log_hits.process_line({}, line)
+ wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testProcessLine_updateHitCount():
+ line = '123456789 www.yahoo.com\n'
+ log = {'www.yahoo.com': (1, '123456789')}
+ expected = 2
+ actual = log_hits.process_line(log, line)['www.yahoo.com'][0]
+ wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testProcessLine_updateRecentHitTime():
+ line = '123456789 www.yahoo.com\n'
+ log = {'www.yahoo.com': (1, '987654321')}
+ expected = '123456789'
+ actual = log_hits.process_line(log, line)['www.yahoo.com'][1]
+ wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testMergeLogs_emptyLog():
+ hist = {'www.yahoo.com': (1, '123456789')}
+ expected = hist
+ actual = log_hits.merge_logs({}, hist)
+ wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testMergeLogs_emptyHist():
+ log = {'www.yahoo.com': (1, '123456789')}
+ expected = log
+ actual = log_hits.merge_logs(log, {})
+ wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testMergeLogs_bothEmpty():
+ expected = {}
+ actual = log_hits.merge_logs({}, {})
+ wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testMergeLogs_noOverlap():
+ log = {'www.yahoo.com': (1, '123456789')}
+ hist = {'www.google.com': (1, '123456789')}
+ expected = {
+ 'www.yahoo.com': (1, '123456789'),
+ 'www.google.com': (1, '123456789')
+ }
+ actual = log_hits.merge_logs(log, hist)
+ wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testMergeLogs_updateHitCount():
+ log = {'www.yahoo.com': (1, '987654321')}
+ hist = {'www.yahoo.com': (1, '123456789')}
+ expected = 2
+ actual = log_hits.merge_logs(log, hist)['www.yahoo.com'][0]
+ wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testMergeLogs_updateRecentHitTime():
+ log = {'www.yahoo.com': (1, '987654321')}
+ hist = {'www.yahoo.com': (1, '123456789')}
+ expected = '987654321'
+ actual = log_hits.merge_logs(log, hist)['www.yahoo.com'][1]
+ wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testMergeLogs_histLargerNoOverlap():
+ log = {'www.yahoo.com': (1, '123456789')}
+ hist = {
+ 'www.google.com': (1, '123456789'),
+ 'www.espn.com': (1, '123456789')
+ }
+ expected = {
+ 'www.yahoo.com': (1, '123456789'),
+ 'www.google.com': (1, '123456789'),
+ 'www.espn.com': (1, '123456789')
+ }
+ actual = log_hits.merge_logs(log, hist)
+ wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testMergeLogs_histLargerUpdateHitCount():
+ log = {'www.yahoo.com': (1, '987654321')}
+ hist = {
+ 'www.yahoo.com': (1, '123456789'),
+ 'www.google.com': (1, '123456789')
+ }
+ expected = 2
+ actual = log_hits.merge_logs(log, hist)['www.yahoo.com'][0]
+ wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testMergeLogs_histLargerUpdateRecentHitTime():
+ log = {'www.yahoo.com': (1, '987654321')}
+ hist = {
+ 'www.yahoo.com': (1, '123456789'),
+ 'www.google.com': (1, '123456789')
+ }
+ expected = '987654321'
+ actual = log_hits.merge_logs(log, hist)['www.yahoo.com'][1]
+ wvtest.WVPASSEQ(actual, expected)
+
+
+@wvtest.wvtest
+def testReadDNSQueryLog_empty():
+ file_name = 'test_log.txt'
+ open(file_name, 'w').close()
+ expected = {}
+ actual = log_hits.read_dns_query_log(file_name)
+ wvtest.WVPASSEQ(actual, expected)
+ os.remove(file_name)
+
+
+@wvtest.wvtest
+def testReadDNSQueryLog_nonEmpty():
+ file_name = 'test_log.txt'
+ f = open(file_name, 'w')
+ f.write('123456789 www.yahoo.com\n987654321 www.google.com\n'
+ '135792468 www.yahoo.com\n')
+ f.close()
+ expected = {
+ 'www.yahoo.com': (2, '135792468'),
+ 'www.google.com': (1, '987654321')
+ }
+ actual = log_hits.read_dns_query_log(file_name)
+ wvtest.WVPASSEQ(actual, expected)
+ os.remove(file_name)
+
+
+@wvtest.wvtest
+def testClearDNSQueryLog():
+ file_name = 'test_log.txt'
+ f = open(file_name, 'w')
+ f.write('testing clear_dns_query_log()\n')
+ f.close()
+
+ log_hits.clear_dns_query_log(file_name)
+ expected = 0
+ actual = os.stat(file_name).st_size
+ wvtest.WVPASSEQ(actual, expected)
+ os.remove(file_name)
+
+
+if __name__ == '__main__':
+ wvtest.wvtest_main()
diff --git a/cache_warming/wvtest b/cache_warming/wvtest
new file mode 120000
index 0000000..75927a5
--- /dev/null
+++ b/cache_warming/wvtest
@@ -0,0 +1 @@
+../cmds/wvtest
\ No newline at end of file