# Copyright 2011 Google Inc. All Rights Reserved.
"""A command-line tool for uploading to"""
__author__ = ' (Avery Pennarun)'
import hmac
import os.path
import random
import re
import signal
import StringIO
import struct
import sys
import time
import urllib
import zlib
import options
import pycurl
optspec = """
upload-logs [options...] <filenames...>
s,server= The server URL []
k,key= Add a key/value pair (format "-k key=value")
stdin= Also upload stdin, with the given virtual filename
consensus_key= Use a hard-coded consensus_key
# Initial retry time for the exponential backoff retry loop.
# This will create a backoff retry, with times centered at:
# 30, 60, 120, 240, 480, 480
# MAC Address anonymization
MACADDR_RE = re.compile(r'([0-9a-fA-F]{2}(?:[-:_][0-9a-fA-F]{2}){5})')
CONSENSUS_KEY_FILE = '/tmp/waveguide/consensus_key'
class HttpError(Exception):
def __init__(self, status):
self.status = status
Exception.__init__(self, str(self))
def __str__(self):
return 'http status: %d' % (self.status)
def HttpDo(method, url, post_data=None, content_type=None):
"""Make an HTTPS request using pycurl and return the result."""
proto, _ = urllib.splittype(url)
assert proto.lower() in ('http', 'https')
assert method in ('GET', 'POST')
outdata = StringIO.StringIO()
# The log upload server does not take kindly to Expect: 100-continue
# so remove that.
headers = ['User-Agent: upload-logs', 'Expect:']
if content_type:
headers.append('Content-Type: %s' % content_type)
# You'd think that internally Curl would do this... but no it doesn't.
for resolver in [pycurl.IPRESOLVE_V6, pycurl.IPRESOLVE_V4]:
curl = pycurl.Curl()
curl.setopt(pycurl.IPRESOLVE, resolver)
curl.setopt(pycurl.CONNECTTIMEOUT, 5)
curl.setopt(pycurl.WRITEFUNCTION, outdata.write)
curl.setopt(pycurl.FOLLOWLOCATION, 0)
curl.setopt(pycurl.SSL_VERIFYPEER, 1)
curl.setopt(pycurl.SSL_VERIFYHOST, 2)
if os.path.exists('/etc/ssl/private/device.key'):
curl.setopt(pycurl.SSLKEY, '/etc/ssl/private/device.key')
if os.path.exists('/etc/ssl/certs/device.pem'):
curl.setopt(pycurl.SSLCERT, '/etc/ssl/certs/device.pem')
curl.setopt(pycurl.URL, url)
curl.setopt(pycurl.HTTPHEADER, headers)
if method == 'GET':
curl.setopt(pycurl.HTTPGET, True)
assert post_data is not None
request_buffer = StringIO.StringIO(post_data)
def Ioctl(cmd):
if cmd == pycurl.IOCMD_RESTARTREAD:
curl.setopt(pycurl.POST, True)
curl.setopt(pycurl.IOCTLFUNCTION, Ioctl)
curl.setopt(pycurl.POSTFIELDSIZE, len(post_data))
http_status = curl.getinfo(pycurl.HTTP_CODE)
except pycurl.error as exc:
print 'pycurl exception: %s' % (exc,)
http_status = 500
if http_status == 200:
return outdata.getvalue()
# 302 is success for posts.
if http_status == 302:
raise HttpError(http_status)
if resolver == pycurl.IPRESOLVE_V6:
print 'IPv6 failed with http error: %d' % http_status
print 'IPv4 failed with http error: %d' % http_status
# If neither of V6 or V4 worked, raise an exception.
raise HttpError(http_status)
def Trigraph(num):
"""Given a value from 0..4095, encode it as a cons+vowel+cons sequence."""
ns = len(SOFT)
nh = len(HARD)
assert nh * ns * nh >= 4096
c3 = num % nh
c2 = (num / nh) % ns
c1 = num / nh / ns
return HARD[c1] + SOFT[c2] + HARD[c3]
def WordFromBinary(s):
"""Encode a binary blob into a string of pronounceable syllables."""
out = []
while s:
part = s[:3]
s = s[3:]
while len(part) < 4:
part = '\0' + part
bits = struct.unpack('!I', part)[0]
out += [(bits >> 12) & 0xfff,
(bits >> 0) & 0xfff]
return ''.join(Trigraph(i) for i in out)
# Note(apenwarr): There are a few ways to do this. I elected to go with
# short human-usable strings (allowing for the small possibility of
# collisions) since the log messages will probably be "mostly" used by
# humans.
# An alternative would be to use "format preserving encryption" (basically
# a secure 1:1 mapping of unencrypted to anonymized, in the same number of
# bits) and then produce longer "words" with no possibility of collision.
# But with our current WordFromBinary() implementation, that would be
# 12 characters long, which is kind of inconvenient and long.
def AnonymizeMAC(macmatch):
"""Anonymize a binary MAC address using the CONSENSUS_KEY."""
macstr =
assert len(macstr) == 17
macbin = ''.join([chr(int(i, 16)) for i in re.split('[:_-]', macstr)])
return WordFromBinary([0], macbin).digest())[:6]
def UploadFile(url, filename, fileobj, keys):
"""Actually upload the given file to the server."""
while filename.startswith('/'):
filename = filename[1:]
start_url = os.path.join(url, 'upload', filename)
if keys:
start_url += '?' + urllib.urlencode(keys)
upload_url = HttpDo('GET', start_url)
splitter = 'foo-splitter-%f' % time.time()
content_type = 'multipart/form-data; boundary=%s' % splitter
filtered = MACADDR_RE.sub(AnonymizeMAC,
basecontent = zlib.compress(filtered)
attachment = ('--%(splitter)s\r\n'
'Content-Disposition: form-data; name="file";'
' filename="%(filename)s"\r\n'
% dict(splitter=splitter,
# Retry upload forever until success.
# Each iteration increase the delay which should give the server
# more time to digest whatever data is has already received.
i = 0
while True:
HttpDo('POST', upload_url, attachment, content_type)
except HttpError, e:
# This is the success case.
if e.status == 302:
# If the server is overloaded, retry after some random delay.
print 'upload-logs failed: %s' % e.status
# Retry interval is maximum of 5 minutes, with a random delay
# of +/- 50% of the retry interval.
rand_offset = random.uniform(-delay*0.5, delay*0.5)
time.sleep(delay + rand_offset)
i = min(i+1, 10)
# http code 200 case.
raise Exception('expected http response code 302')
def main():
# set an alarm, in case our HTTP client (or anything else) hangs
# for any reason
# Sending USR1 should now interrupt time.sleep()
signal.signal(signal.SIGUSR1, lambda signum, frame: 0)
o = options.Options(optspec)
(opt, flags, extra) = o.parse(sys.argv[1:]) # pylint: disable-msg=W0612
if not extra and not opt.stdin:
o.fatal('at least one filename and/or --stdin expected')
keys = []
for k, v in flags:
if k in ('-k', '--key'):
keys.append(tuple(v.split('=', 1)))
with open(CONSENSUS_KEY_FILE) as f:
consensus_key =
except (IOError, OSError) as e:
consensus_key = str(time.time())
CONSENSUS_KEY[0] = opt.consensus_key or consensus_key
if opt.stdin:
UploadFile(opt.server, opt.stdin, sys.stdin, keys)
for filename in extra:
UploadFile(opt.server, filename, open(filename), keys)
if __name__ == '__main__':