| #!/usr/bin/python |
| # Copyright 2011 Google Inc. All Rights Reserved. |
| # |
| """A command-line tool for uploading to diag.cpe.gfsvc.com.""" |
| |
| __author__ = 'apenwarr@google.com (Avery Pennarun)' |
| |
| |
| import hmac |
| import os.path |
| import random |
| import re |
| import signal |
| import StringIO |
| import struct |
| import sys |
| import time |
| import urllib |
| import zlib |
| import options |
| import pycurl |
| |
| optspec = """ |
| upload-logs [options...] <filenames...> |
| -- |
| s,server= The server URL [https://diag.cpe.gfsvc.com] |
| k,key= Add a key/value pair (format "-k key=value") |
| stdin= Also upload stdin, with the given virtual filename |
| consensus_key= Use a hard-coded consensus_key |
| """ |
| |
| # Initial retry time for the exponential backoff retry loop. |
| # This will create a backoff retry, with times centered at: |
| # 30, 60, 120, 240, 480, 480 |
| RETRY_INIT_DELAY = 30 |
| RETRY_MAX_DELAY = 480 |
| |
| |
| # MAC Address anonymization |
| MACADDR_RE = re.compile(r'([0-9a-fA-F]{2}(?:[-:_][0-9a-fA-F]{2}){5})') |
| SOFT = 'AEIOUY' 'V' |
| HARD = 'BCDFGHJKLMNPQRSTVWXYZ' 'AEIOU' |
| CONSENSUS_KEY = [''] |
| CONSENSUS_KEY_FILE = '/tmp/waveguide/consensus_key' |
| |
| |
| class HttpError(Exception): |
| |
| def __init__(self, status): |
| self.status = status |
| Exception.__init__(self, str(self)) |
| |
| def __str__(self): |
| return 'http status: %d' % (self.status) |
| |
| |
| def HttpDo(method, url, post_data=None, content_type=None): |
| """Make an HTTPS request using pycurl and return the result.""" |
| proto, _ = urllib.splittype(url) |
| assert proto.lower() in ('http', 'https') |
| assert method in ('GET', 'POST') |
| outdata = StringIO.StringIO() |
| # The log upload server does not take kindly to Expect: 100-continue |
| # so remove that. |
| headers = ['User-Agent: upload-logs', 'Expect:'] |
| if content_type: |
| headers.append('Content-Type: %s' % content_type) |
| # You'd think that internally Curl would do this... but no it doesn't. |
| for resolver in [pycurl.IPRESOLVE_V6, pycurl.IPRESOLVE_V4]: |
| curl = pycurl.Curl() |
| curl.setopt(pycurl.IPRESOLVE, resolver) |
| curl.setopt(pycurl.CONNECTTIMEOUT, 5) |
| curl.setopt(pycurl.WRITEFUNCTION, outdata.write) |
| curl.setopt(pycurl.FOLLOWLOCATION, 0) |
| curl.setopt(pycurl.SSL_VERIFYPEER, 1) |
| curl.setopt(pycurl.SSL_VERIFYHOST, 2) |
| if os.path.exists('/etc/ssl/private/device.key'): |
| curl.setopt(pycurl.SSLKEY, '/etc/ssl/private/device.key') |
| if os.path.exists('/etc/ssl/certs/device.pem'): |
| curl.setopt(pycurl.SSLCERT, '/etc/ssl/certs/device.pem') |
| curl.setopt(pycurl.URL, url) |
| curl.setopt(pycurl.HTTPHEADER, headers) |
| if method == 'GET': |
| curl.setopt(pycurl.HTTPGET, True) |
| else: |
| assert post_data is not None |
| request_buffer = StringIO.StringIO(post_data) |
| |
| def Ioctl(cmd): |
| if cmd == pycurl.IOCMD_RESTARTREAD: |
| request_buffer.seek(0) |
| curl.setopt(pycurl.POST, True) |
| curl.setopt(pycurl.IOCTLFUNCTION, Ioctl) |
| curl.setopt(pycurl.READFUNCTION, request_buffer.read) |
| curl.setopt(pycurl.POSTFIELDSIZE, len(post_data)) |
| try: |
| curl.perform() |
| http_status = curl.getinfo(pycurl.HTTP_CODE) |
| except pycurl.error as exc: |
| print 'pycurl exception: %s' % (exc,) |
| http_status = 500 |
| curl.close() |
| if http_status == 200: |
| return outdata.getvalue() |
| # 302 is success for posts. |
| if http_status == 302: |
| raise HttpError(http_status) |
| if resolver == pycurl.IPRESOLVE_V6: |
| print 'IPv6 failed with http error: %d' % http_status |
| else: |
| print 'IPv4 failed with http error: %d' % http_status |
| |
| # If neither of V6 or V4 worked, raise an exception. |
| raise HttpError(http_status) |
| |
| |
| def Trigraph(num): |
| """Given a value from 0..4095, encode it as a cons+vowel+cons sequence.""" |
| ns = len(SOFT) |
| nh = len(HARD) |
| assert nh * ns * nh >= 4096 |
| c3 = num % nh |
| c2 = (num / nh) % ns |
| c1 = num / nh / ns |
| return HARD[c1] + SOFT[c2] + HARD[c3] |
| |
| |
| def WordFromBinary(s): |
| """Encode a binary blob into a string of pronounceable syllables.""" |
| out = [] |
| while s: |
| part = s[:3] |
| s = s[3:] |
| while len(part) < 4: |
| part = '\0' + part |
| bits = struct.unpack('!I', part)[0] |
| out += [(bits >> 12) & 0xfff, |
| (bits >> 0) & 0xfff] |
| return ''.join(Trigraph(i) for i in out) |
| |
| |
| # Note(apenwarr): There are a few ways to do this. I elected to go with |
| # short human-usable strings (allowing for the small possibility of |
| # collisions) since the log messages will probably be "mostly" used by |
| # humans. |
| # |
| # An alternative would be to use "format preserving encryption" (basically |
| # a secure 1:1 mapping of unencrypted to anonymized, in the same number of |
| # bits) and then produce longer "words" with no possibility of collision. |
| # But with our current WordFromBinary() implementation, that would be |
| # 12 characters long, which is kind of inconvenient and long. |
| # |
| def AnonymizeMAC(macmatch): |
| """Anonymize a binary MAC address using the CONSENSUS_KEY.""" |
| macstr = macmatch.group(0) |
| assert len(macstr) == 17 |
| macbin = ''.join([chr(int(i, 16)) for i in re.split('[:_-]', macstr)]) |
| return WordFromBinary(hmac.new(CONSENSUS_KEY[0], macbin).digest())[:6] |
| |
| |
| def UploadFile(url, filename, fileobj, keys): |
| """Actually upload the given file to the server.""" |
| while filename.startswith('/'): |
| filename = filename[1:] |
| start_url = os.path.join(url, 'upload', filename) |
| if keys: |
| start_url += '?' + urllib.urlencode(keys) |
| upload_url = HttpDo('GET', start_url) |
| |
| splitter = 'foo-splitter-%f' % time.time() |
| content_type = 'multipart/form-data; boundary=%s' % splitter |
| |
| filtered = MACADDR_RE.sub(AnonymizeMAC, fileobj.read()) |
| basecontent = zlib.compress(filtered) |
| attachment = ('--%(splitter)s\r\n' |
| 'Content-Disposition: form-data; name="file";' |
| ' filename="%(filename)s"\r\n' |
| '\r\n' |
| '%(data)s' |
| '\r\n' |
| '--%(splitter)s--\r\n' |
| '\r\n' |
| % dict(splitter=splitter, |
| filename=filename, |
| data=basecontent)) |
| |
| # Retry upload forever until success. |
| # Each iteration increase the delay which should give the server |
| # more time to digest whatever data is has already received. |
| i = 0 |
| while True: |
| try: |
| HttpDo('POST', upload_url, attachment, content_type) |
| except HttpError, e: |
| # This is the success case. |
| if e.status == 302: |
| break |
| |
| # If the server is overloaded, retry after some random delay. |
| print 'upload-logs failed: %s' % e.status |
| |
| # Retry interval is maximum of 5 minutes, with a random delay |
| # of +/- 50% of the retry interval. |
| delay = min(RETRY_MAX_DELAY, RETRY_INIT_DELAY * 2**i) |
| rand_offset = random.uniform(-delay*0.5, delay*0.5) |
| time.sleep(delay + rand_offset) |
| i = min(i+1, 10) |
| else: |
| # http code 200 case. |
| raise Exception('expected http response code 302') |
| |
| |
| def main(): |
| # set an alarm, in case our HTTP client (or anything else) hangs |
| # for any reason |
| signal.alarm(60) |
| # Sending USR1 should now interrupt time.sleep() |
| signal.signal(signal.SIGUSR1, lambda signum, frame: 0) |
| |
| o = options.Options(optspec) |
| (opt, flags, extra) = o.parse(sys.argv[1:]) # pylint: disable-msg=W0612 |
| if not extra and not opt.stdin: |
| o.fatal('at least one filename and/or --stdin expected') |
| keys = [] |
| for k, v in flags: |
| if k in ('-k', '--key'): |
| keys.append(tuple(v.split('=', 1))) |
| |
| try: |
| with open(CONSENSUS_KEY_FILE) as f: |
| consensus_key = f.read() |
| except (IOError, OSError) as e: |
| # https://xkcd.com/221/ |
| consensus_key = str(time.time()) |
| CONSENSUS_KEY[0] = opt.consensus_key or consensus_key |
| |
| if opt.stdin: |
| UploadFile(opt.server, opt.stdin, sys.stdin, keys) |
| for filename in extra: |
| UploadFile(opt.server, filename, open(filename), keys) |
| |
| |
| if __name__ == '__main__': |
| main() |