| #!/usr/bin/env python |
| # |
| # Copyright 2009 Facebook |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| # not use this file except in compliance with the License. You may obtain |
| # a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| # License for the specific language governing permissions and limitations |
| # under the License. |
| |
| """HTTP utility code shared by clients and servers.""" |
| |
| from __future__ import absolute_import, division, with_statement |
| |
| import logging |
| import urllib |
| import re |
| |
| from tornado.util import b, ObjectDict |
| |
| |
| class HTTPHeaders(dict): |
| """A dictionary that maintains Http-Header-Case for all keys. |
| |
| Supports multiple values per key via a pair of new methods, |
| add() and get_list(). The regular dictionary interface returns a single |
| value per key, with multiple values joined by a comma. |
| |
| >>> h = HTTPHeaders({"content-type": "text/html"}) |
| >>> h.keys() |
| ['Content-Type'] |
| >>> h["Content-Type"] |
| 'text/html' |
| |
| >>> h.add("Set-Cookie", "A=B") |
| >>> h.add("Set-Cookie", "C=D") |
| >>> h["set-cookie"] |
| 'A=B,C=D' |
| >>> h.get_list("set-cookie") |
| ['A=B', 'C=D'] |
| |
| >>> for (k,v) in sorted(h.get_all()): |
| ... print '%s: %s' % (k,v) |
| ... |
| Content-Type: text/html |
| Set-Cookie: A=B |
| Set-Cookie: C=D |
| """ |
| def __init__(self, *args, **kwargs): |
| # Don't pass args or kwargs to dict.__init__, as it will bypass |
| # our __setitem__ |
| dict.__init__(self) |
| self._as_list = {} |
| self._last_key = None |
| if (len(args) == 1 and len(kwargs) == 0 and |
| isinstance(args[0], HTTPHeaders)): |
| # Copy constructor |
| for k, v in args[0].get_all(): |
| self.add(k, v) |
| else: |
| # Dict-style initialization |
| self.update(*args, **kwargs) |
| |
| # new public methods |
| |
| def add(self, name, value): |
| """Adds a new value for the given key.""" |
| norm_name = HTTPHeaders._normalize_name(name) |
| self._last_key = norm_name |
| if norm_name in self: |
| # bypass our override of __setitem__ since it modifies _as_list |
| dict.__setitem__(self, norm_name, self[norm_name] + ',' + value) |
| self._as_list[norm_name].append(value) |
| else: |
| self[norm_name] = value |
| |
| def get_list(self, name): |
| """Returns all values for the given header as a list.""" |
| norm_name = HTTPHeaders._normalize_name(name) |
| return self._as_list.get(norm_name, []) |
| |
| def get_all(self): |
| """Returns an iterable of all (name, value) pairs. |
| |
| If a header has multiple values, multiple pairs will be |
| returned with the same name. |
| """ |
| for name, list in self._as_list.iteritems(): |
| for value in list: |
| yield (name, value) |
| |
| def parse_line(self, line): |
| """Updates the dictionary with a single header line. |
| |
| >>> h = HTTPHeaders() |
| >>> h.parse_line("Content-Type: text/html") |
| >>> h.get('content-type') |
| 'text/html' |
| """ |
| if line[0].isspace(): |
| # continuation of a multi-line header |
| new_part = ' ' + line.lstrip() |
| self._as_list[self._last_key][-1] += new_part |
| dict.__setitem__(self, self._last_key, |
| self[self._last_key] + new_part) |
| else: |
| name, value = line.split(":", 1) |
| self.add(name, value.strip()) |
| |
| @classmethod |
| def parse(cls, headers): |
| """Returns a dictionary from HTTP header text. |
| |
| >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n") |
| >>> sorted(h.iteritems()) |
| [('Content-Length', '42'), ('Content-Type', 'text/html')] |
| """ |
| h = cls() |
| for line in headers.splitlines(): |
| if line: |
| h.parse_line(line) |
| return h |
| |
| # dict implementation overrides |
| |
| def __setitem__(self, name, value): |
| norm_name = HTTPHeaders._normalize_name(name) |
| dict.__setitem__(self, norm_name, value) |
| self._as_list[norm_name] = [value] |
| |
| def __getitem__(self, name): |
| return dict.__getitem__(self, HTTPHeaders._normalize_name(name)) |
| |
| def __delitem__(self, name): |
| norm_name = HTTPHeaders._normalize_name(name) |
| dict.__delitem__(self, norm_name) |
| del self._as_list[norm_name] |
| |
| def __contains__(self, name): |
| norm_name = HTTPHeaders._normalize_name(name) |
| return dict.__contains__(self, norm_name) |
| |
| def get(self, name, default=None): |
| return dict.get(self, HTTPHeaders._normalize_name(name), default) |
| |
| def update(self, *args, **kwargs): |
| # dict.update bypasses our __setitem__ |
| for k, v in dict(*args, **kwargs).iteritems(): |
| self[k] = v |
| |
| def copy(self): |
| # default implementation returns dict(self), not the subclass |
| return HTTPHeaders(self) |
| |
| _NORMALIZED_HEADER_RE = re.compile(r'^[A-Z0-9][a-z0-9]*(-[A-Z0-9][a-z0-9]*)*$') |
| _normalized_headers = {} |
| |
| @staticmethod |
| def _normalize_name(name): |
| """Converts a name to Http-Header-Case. |
| |
| >>> HTTPHeaders._normalize_name("coNtent-TYPE") |
| 'Content-Type' |
| """ |
| try: |
| return HTTPHeaders._normalized_headers[name] |
| except KeyError: |
| if HTTPHeaders._NORMALIZED_HEADER_RE.match(name): |
| normalized = name |
| else: |
| normalized = "-".join([w.capitalize() for w in name.split("-")]) |
| HTTPHeaders._normalized_headers[name] = normalized |
| return normalized |
| |
| |
| def url_concat(url, args): |
| """Concatenate url and argument dictionary regardless of whether |
| url has existing query parameters. |
| |
| >>> url_concat("http://example.com/foo?a=b", dict(c="d")) |
| 'http://example.com/foo?a=b&c=d' |
| """ |
| if not args: |
| return url |
| if url[-1] not in ('?', '&'): |
| url += '&' if ('?' in url) else '?' |
| return url + urllib.urlencode(args) |
| |
| |
| class HTTPFile(ObjectDict): |
| """Represents an HTTP file. For backwards compatibility, its instance |
| attributes are also accessible as dictionary keys. |
| |
| :ivar filename: |
| :ivar body: |
| :ivar content_type: The content_type comes from the provided HTTP header |
| and should not be trusted outright given that it can be easily forged. |
| """ |
| pass |
| |
| |
| def parse_multipart_form_data(boundary, data, arguments, files): |
| """Parses a multipart/form-data body. |
| |
| The boundary and data parameters are both byte strings. |
| The dictionaries given in the arguments and files parameters |
| will be updated with the contents of the body. |
| """ |
| # The standard allows for the boundary to be quoted in the header, |
| # although it's rare (it happens at least for google app engine |
| # xmpp). I think we're also supposed to handle backslash-escapes |
| # here but I'll save that until we see a client that uses them |
| # in the wild. |
| if boundary.startswith(b('"')) and boundary.endswith(b('"')): |
| boundary = boundary[1:-1] |
| final_boundary_index = data.rfind(b("--") + boundary + b("--")) |
| if final_boundary_index == -1: |
| logging.warning("Invalid multipart/form-data: no final boundary") |
| return |
| parts = data[:final_boundary_index].split(b("--") + boundary + b("\r\n")) |
| for part in parts: |
| if not part: |
| continue |
| eoh = part.find(b("\r\n\r\n")) |
| if eoh == -1: |
| logging.warning("multipart/form-data missing headers") |
| continue |
| headers = HTTPHeaders.parse(part[:eoh].decode("utf-8")) |
| disp_header = headers.get("Content-Disposition", "") |
| disposition, disp_params = _parse_header(disp_header) |
| if disposition != "form-data" or not part.endswith(b("\r\n")): |
| logging.warning("Invalid multipart/form-data") |
| continue |
| value = part[eoh + 4:-2] |
| if not disp_params.get("name"): |
| logging.warning("multipart/form-data value missing name") |
| continue |
| name = disp_params["name"] |
| if disp_params.get("filename"): |
| ctype = headers.get("Content-Type", "application/unknown") |
| files.setdefault(name, []).append(HTTPFile( |
| filename=disp_params["filename"], body=value, |
| content_type=ctype)) |
| else: |
| arguments.setdefault(name, []).append(value) |
| |
| |
| # _parseparam and _parse_header are copied and modified from python2.7's cgi.py |
| # The original 2.7 version of this code did not correctly support some |
| # combinations of semicolons and double quotes. |
| def _parseparam(s): |
| while s[:1] == ';': |
| s = s[1:] |
| end = s.find(';') |
| while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: |
| end = s.find(';', end + 1) |
| if end < 0: |
| end = len(s) |
| f = s[:end] |
| yield f.strip() |
| s = s[end:] |
| |
| |
| def _parse_header(line): |
| """Parse a Content-type like header. |
| |
| Return the main content-type and a dictionary of options. |
| |
| """ |
| parts = _parseparam(';' + line) |
| key = parts.next() |
| pdict = {} |
| for p in parts: |
| i = p.find('=') |
| if i >= 0: |
| name = p[:i].strip().lower() |
| value = p[i + 1:].strip() |
| if len(value) >= 2 and value[0] == value[-1] == '"': |
| value = value[1:-1] |
| value = value.replace('\\\\', '\\').replace('\\"', '"') |
| pdict[name] = value |
| return key, pdict |
| |
| |
| def doctests(): |
| import doctest |
| return doctest.DocTestSuite() |