Script to periodically sort hits log & fetch top hosts with unit tests.
Change-Id: I2970d4ca19c2b01b779420c9b7be190e04f87c3b
diff --git a/cache_warming/fetch_popular.py b/cache_warming/fetch_popular.py
new file mode 100644
index 0000000..fe0f2b4
--- /dev/null
+++ b/cache_warming/fetch_popular.py
@@ -0,0 +1,65 @@
+#!/usr/bin/python
+"""Pre-fetches top requested hosts.
+
+Sorts dictionary represented in hit_log.json by number of hits
+and sends DNS requests to a predetermined number of the top hosts.
+"""
+
+import argparse
+import json
+import dns.resolver
+
+TOP_N = 50
+HITS_LOG_JSON_PATH = '/tmp/hits_log.json'
+
+
+def sort_hits_log(path):
+ """Sorts hosts in hits log by number of hits.
+
+ Args:
+ path: Path of JSON representation of dictionary mapping host
+ to tuple of most recent hit time and hit count.
+
+ Returns:
+ A list of hosts sorted by number of hits in descending order.
+ """
+ try:
+ log_json = open(path, 'r')
+ except IOError:
+ print 'unable to open ' + path
+ raise
+ else:
+ log = json.load(log_json)
+ return sorted(log, key=log.get, reverse=True)
+
+
+def prefetch(hosts, port, server):
+ """Pre-fetches list of hosts.
+
+ Args:
+ hosts: List of hosts to be fetched sorted by number of hits
+ in descending order.
+ port: Port to which to send queries (default is 53).
+ server: Alternate nameservers to query (default is None).
+ """
+ my_resolver = dns.resolver.Resolver()
+ my_resolver.port = port
+ if server is not None:
+ my_resolver.nameservers = server
+
+ if len(hosts) > TOP_N:
+ hosts = hosts[:TOP_N]
+ for host in hosts:
+ my_resolver.query(host)
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-p', '--port', nargs='?', default=53, type=int,
+ help='port to which to send queries (default is 53).')
+ parser.add_argument('-s', '--server', nargs='*', type=str,
+ help='alternate nameservers to query (default is None).')
+ args = parser.parse_args()
+
+ sorted_log = sort_hits_log(HITS_LOG_JSON_PATH)
+ prefetch(sorted_log, args.port, args.server)
diff --git a/cache_warming/fetch_popular_test.py b/cache_warming/fetch_popular_test.py
new file mode 100644
index 0000000..ea9da8d
--- /dev/null
+++ b/cache_warming/fetch_popular_test.py
@@ -0,0 +1,39 @@
+#!/usr/bin/python
+"""Tests for fetch_popular.py."""
+
+import os
+import fetch_popular
+from wvtest import wvtest
+
+
+@wvtest.wvtest
+def testSortHitsLog_empty():
+ try:
+ file_name = 'test_log.json'
+ with open(file_name, 'w') as f:
+ f.write('{}')
+
+ expected = []
+ actual = fetch_popular.sort_hits_log(file_name)
+ wvtest.WVPASSEQ(actual, expected)
+ finally:
+ os.remove(file_name)
+
+
+@wvtest.wvtest
+def testSortHitsLog_nonEmpty():
+ try:
+ file_name = 'test_log.json'
+ with open(file_name, 'w') as f:
+ f.write('{"www.google.com": [2, "123456789"], "www.yahoo.com":'
+ ' [1,"987654321"], "www.espn.com": [3, "135792468"]}')
+
+ expected = ['www.espn.com', 'www.google.com', 'www.yahoo.com']
+ actual = fetch_popular.sort_hits_log(file_name)
+ wvtest.WVPASSEQ(actual, expected)
+ finally:
+ os.remove(file_name)
+
+
+if __name__ == '__main__':
+ wvtest.wvtest_main()
diff --git a/cache_warming/warm_cache b/cache_warming/warm_cache
new file mode 100644
index 0000000..fa334d0
--- /dev/null
+++ b/cache_warming/warm_cache
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+# Periodically processes logged DNS queries and prefetches
+# prefetches the most requested hosts, warming the cache.
+
+while sleep 60; do
+ python log_hits.py
+ python fetch_popular.py
+done