blob: f35cef8cde8967a13d422ab116cb68395ed7b20a [file] [log] [blame]
#!/usr/bin/python
# Copyright 2011 Google Inc. All Rights Reserved.
#
"""A command-line tool for uploading to diag.cpe.gfsvc.com."""
__author__ = 'apenwarr@google.com (Avery Pennarun)'
import hmac
import os.path
import random
import re
import signal
import StringIO
import struct
import sys
import time
import urllib
import zlib
import options
import pycurl
optspec = """
upload-logs [options...] <filenames...>
--
s,server= The server URL [https://diag.cpe.gfsvc.com]
k,key= Add a key/value pair (format "-k key=value")
stdin= Also upload stdin, with the given virtual filename
consensus_key= Use a hard-coded consensus_key
"""
# Initial retry time for the exponential backoff retry loop.
# This will create a backoff retry, with times centered at:
# 30, 60, 120, 240, 480, 480
RETRY_INIT_DELAY = 30
RETRY_MAX_DELAY = 480
# MAC Address anonymization
MACADDR_RE = re.compile(r'([0-9a-fA-F]{2}(?:[-:_][0-9a-fA-F]{2}){5})')
SOFT = 'AEIOUY' 'V'
HARD = 'BCDFGHJKLMNPQRSTVWXYZ' 'AEIOU'
CONSENSUS_KEY = ['']
CONSENSUS_KEY_FILE = '/tmp/waveguide/consensus_key'
class HttpError(Exception):
def __init__(self, status):
self.status = status
Exception.__init__(self, str(self))
def __str__(self):
return 'http status: %d' % (self.status)
def HttpDo(method, url, post_data=None, content_type=None):
"""Make an HTTPS request using pycurl and return the result."""
proto, _ = urllib.splittype(url)
assert proto.lower() in ('http', 'https')
assert method in ('GET', 'POST')
outdata = StringIO.StringIO()
# The log upload server does not take kindly to Expect: 100-continue
# so remove that.
headers = ['User-Agent: upload-logs', 'Expect:']
if content_type:
headers.append('Content-Type: %s' % content_type)
# You'd think that internally Curl would do this... but no it doesn't.
for resolver in [pycurl.IPRESOLVE_V6, pycurl.IPRESOLVE_V4]:
curl = pycurl.Curl()
curl.setopt(pycurl.IPRESOLVE, resolver)
curl.setopt(pycurl.CONNECTTIMEOUT, 5)
curl.setopt(pycurl.WRITEFUNCTION, outdata.write)
curl.setopt(pycurl.FOLLOWLOCATION, 0)
curl.setopt(pycurl.SSL_VERIFYPEER, 1)
curl.setopt(pycurl.SSL_VERIFYHOST, 2)
if os.path.exists('/etc/ssl/private/device.key'):
curl.setopt(pycurl.SSLKEY, '/etc/ssl/private/device.key')
if os.path.exists('/etc/ssl/certs/device.pem'):
curl.setopt(pycurl.SSLCERT, '/etc/ssl/certs/device.pem')
curl.setopt(pycurl.URL, url)
curl.setopt(pycurl.HTTPHEADER, headers)
if method == 'GET':
curl.setopt(pycurl.HTTPGET, True)
else:
assert post_data is not None
request_buffer = StringIO.StringIO(post_data)
def Ioctl(cmd):
if cmd == pycurl.IOCMD_RESTARTREAD:
request_buffer.seek(0)
curl.setopt(pycurl.POST, True)
curl.setopt(pycurl.IOCTLFUNCTION, Ioctl)
curl.setopt(pycurl.READFUNCTION, request_buffer.read)
curl.setopt(pycurl.POSTFIELDSIZE, len(post_data))
try:
curl.perform()
http_status = curl.getinfo(pycurl.HTTP_CODE)
except pycurl.error as exc:
print 'pycurl exception: %s' % (exc,)
http_status = 500
curl.close()
if http_status == 200:
return outdata.getvalue()
# 302 is success for posts.
if http_status == 302:
raise HttpError(http_status)
if resolver == pycurl.IPRESOLVE_V6:
print 'IPv6 failed with http error: %d' % http_status
else:
print 'IPv4 failed with http error: %d' % http_status
# If neither of V6 or V4 worked, raise an exception.
raise HttpError(http_status)
def Trigraph(num):
"""Given a value from 0..4095, encode it as a cons+vowel+cons sequence."""
ns = len(SOFT)
nh = len(HARD)
assert nh * ns * nh >= 4096
c3 = num % nh
c2 = (num / nh) % ns
c1 = num / nh / ns
return HARD[c1] + SOFT[c2] + HARD[c3]
def WordFromBinary(s):
"""Encode a binary blob into a string of pronounceable syllables."""
out = []
while s:
part = s[:3]
s = s[3:]
while len(part) < 4:
part = '\0' + part
bits = struct.unpack('!I', part)[0]
out += [(bits >> 12) & 0xfff,
(bits >> 0) & 0xfff]
return ''.join(Trigraph(i) for i in out)
# Note(apenwarr): There are a few ways to do this. I elected to go with
# short human-usable strings (allowing for the small possibility of
# collisions) since the log messages will probably be "mostly" used by
# humans.
#
# An alternative would be to use "format preserving encryption" (basically
# a secure 1:1 mapping of unencrypted to anonymized, in the same number of
# bits) and then produce longer "words" with no possibility of collision.
# But with our current WordFromBinary() implementation, that would be
# 12 characters long, which is kind of inconvenient and long.
#
def AnonymizeMAC(macmatch):
"""Anonymize a binary MAC address using the CONSENSUS_KEY."""
macstr = macmatch.group(0)
assert len(macstr) == 17
macbin = ''.join([chr(int(i, 16)) for i in re.split('[:_-]', macstr)])
return WordFromBinary(hmac.new(CONSENSUS_KEY[0], macbin).digest())[:6]
def UploadFile(url, filename, fileobj, keys):
"""Actually upload the given file to the server."""
while filename.startswith('/'):
filename = filename[1:]
start_url = os.path.join(url, 'upload', filename)
if keys:
start_url += '?' + urllib.urlencode(keys)
upload_url = HttpDo('GET', start_url)
splitter = 'foo-splitter-%f' % time.time()
content_type = 'multipart/form-data; boundary=%s' % splitter
filtered = MACADDR_RE.sub(AnonymizeMAC, fileobj.read())
basecontent = zlib.compress(filtered)
attachment = ('--%(splitter)s\r\n'
'Content-Disposition: form-data; name="file";'
' filename="%(filename)s"\r\n'
'\r\n'
'%(data)s'
'\r\n'
'--%(splitter)s--\r\n'
'\r\n'
% dict(splitter=splitter,
filename=filename,
data=basecontent))
# Retry upload forever until success.
# Each iteration increase the delay which should give the server
# more time to digest whatever data is has already received.
i = 0
while True:
try:
HttpDo('POST', upload_url, attachment, content_type)
except HttpError, e:
# This is the success case.
if e.status == 302:
break
# If the server is overloaded, retry after some random delay.
print 'upload-logs failed: %s' % e.status
# Retry interval is maximum of 5 minutes, with a random delay
# of +/- 50% of the retry interval.
delay = min(RETRY_MAX_DELAY, RETRY_INIT_DELAY * 2**i)
rand_offset = random.uniform(-delay*0.5, delay*0.5)
time.sleep(delay + rand_offset)
i = min(i+1, 10)
else:
# http code 200 case.
raise Exception('expected http response code 302')
def main():
# set an alarm, in case our HTTP client (or anything else) hangs
# for any reason
signal.alarm(60)
# Sending USR1 should now interrupt time.sleep()
signal.signal(signal.SIGUSR1, lambda signum, frame: 0)
o = options.Options(optspec)
(opt, flags, extra) = o.parse(sys.argv[1:]) # pylint: disable-msg=W0612
if not extra and not opt.stdin:
o.fatal('at least one filename and/or --stdin expected')
keys = []
for k, v in flags:
if k in ('-k', '--key'):
keys.append(tuple(v.split('=', 1)))
try:
with open(CONSENSUS_KEY_FILE) as f:
consensus_key = f.read()
except (IOError, OSError) as e:
# https://xkcd.com/221/
consensus_key = str(time.time())
CONSENSUS_KEY[0] = opt.consensus_key or consensus_key
if opt.stdin:
UploadFile(opt.server, opt.stdin, sys.stdin, keys)
for filename in extra:
UploadFile(opt.server, filename, open(filename), keys)
if __name__ == '__main__':
main()