| #!/usr/bin/env python |
| # |
| # Copyright 2009 Facebook |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| # not use this file except in compliance with the License. You may obtain |
| # a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| # License for the specific language governing permissions and limitations |
| # under the License. |
| |
| """HTTP utility code shared by clients and servers.""" |
| |
| import logging |
| import urllib |
| import re |
| |
| from tornado.util import b |
| |
| class HTTPHeaders(dict): |
| """A dictionary that maintains Http-Header-Case for all keys. |
| |
| Supports multiple values per key via a pair of new methods, |
| add() and get_list(). The regular dictionary interface returns a single |
| value per key, with multiple values joined by a comma. |
| |
| >>> h = HTTPHeaders({"content-type": "text/html"}) |
| >>> h.keys() |
| ['Content-Type'] |
| >>> h["Content-Type"] |
| 'text/html' |
| |
| >>> h.add("Set-Cookie", "A=B") |
| >>> h.add("Set-Cookie", "C=D") |
| >>> h["set-cookie"] |
| 'A=B,C=D' |
| >>> h.get_list("set-cookie") |
| ['A=B', 'C=D'] |
| |
| >>> for (k,v) in sorted(h.get_all()): |
| ... print '%s: %s' % (k,v) |
| ... |
| Content-Type: text/html |
| Set-Cookie: A=B |
| Set-Cookie: C=D |
| """ |
| def __init__(self, *args, **kwargs): |
| # Don't pass args or kwargs to dict.__init__, as it will bypass |
| # our __setitem__ |
| dict.__init__(self) |
| self._as_list = {} |
| self.update(*args, **kwargs) |
| |
| # new public methods |
| |
| def add(self, name, value): |
| """Adds a new value for the given key.""" |
| norm_name = HTTPHeaders._normalize_name(name) |
| if norm_name in self: |
| # bypass our override of __setitem__ since it modifies _as_list |
| dict.__setitem__(self, norm_name, self[norm_name] + ',' + value) |
| self._as_list[norm_name].append(value) |
| else: |
| self[norm_name] = value |
| |
| def get_list(self, name): |
| """Returns all values for the given header as a list.""" |
| norm_name = HTTPHeaders._normalize_name(name) |
| return self._as_list.get(norm_name, []) |
| |
| def get_all(self): |
| """Returns an iterable of all (name, value) pairs. |
| |
| If a header has multiple values, multiple pairs will be |
| returned with the same name. |
| """ |
| for name, list in self._as_list.iteritems(): |
| for value in list: |
| yield (name, value) |
| |
| def parse_line(self, line): |
| """Updates the dictionary with a single header line. |
| |
| >>> h = HTTPHeaders() |
| >>> h.parse_line("Content-Type: text/html") |
| >>> h.get('content-type') |
| 'text/html' |
| """ |
| name, value = line.split(":", 1) |
| self.add(name, value.strip()) |
| |
| @classmethod |
| def parse(cls, headers): |
| """Returns a dictionary from HTTP header text. |
| |
| >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n") |
| >>> sorted(h.iteritems()) |
| [('Content-Length', '42'), ('Content-Type', 'text/html')] |
| """ |
| h = cls() |
| for line in headers.splitlines(): |
| if line: |
| h.parse_line(line) |
| return h |
| |
| # dict implementation overrides |
| |
| def __setitem__(self, name, value): |
| norm_name = HTTPHeaders._normalize_name(name) |
| dict.__setitem__(self, norm_name, value) |
| self._as_list[norm_name] = [value] |
| |
| def __getitem__(self, name): |
| return dict.__getitem__(self, HTTPHeaders._normalize_name(name)) |
| |
| def __delitem__(self, name): |
| norm_name = HTTPHeaders._normalize_name(name) |
| dict.__delitem__(self, norm_name) |
| del self._as_list[norm_name] |
| |
| def get(self, name, default=None): |
| return dict.get(self, HTTPHeaders._normalize_name(name), default) |
| |
| def update(self, *args, **kwargs): |
| # dict.update bypasses our __setitem__ |
| for k, v in dict(*args, **kwargs).iteritems(): |
| self[k] = v |
| |
| _NORMALIZED_HEADER_RE = re.compile(r'^[A-Z0-9][a-z0-9]*(-[A-Z0-9][a-z0-9]*)*$') |
| _normalized_headers = {} |
| |
| @staticmethod |
| def _normalize_name(name): |
| """Converts a name to Http-Header-Case. |
| |
| >>> HTTPHeaders._normalize_name("coNtent-TYPE") |
| 'Content-Type' |
| """ |
| try: |
| return HTTPHeaders._normalized_headers[name] |
| except KeyError: |
| if HTTPHeaders._NORMALIZED_HEADER_RE.match(name): |
| normalized = name |
| else: |
| normalized = "-".join([w.capitalize() for w in name.split("-")]) |
| HTTPHeaders._normalized_headers[name] = normalized |
| return normalized |
| |
| |
| def url_concat(url, args): |
| """Concatenate url and argument dictionary regardless of whether |
| url has existing query parameters. |
| |
| >>> url_concat("http://example.com/foo?a=b", dict(c="d")) |
| 'http://example.com/foo?a=b&c=d' |
| """ |
| if not args: return url |
| if url[-1] not in ('?', '&'): |
| url += '&' if ('?' in url) else '?' |
| return url + urllib.urlencode(args) |
| |
| def parse_multipart_form_data(boundary, data, arguments, files): |
| """Parses a multipart/form-data body. |
| |
| The boundary and data parameters are both byte strings. |
| The dictionaries given in the arguments and files parameters |
| will be updated with the contents of the body. |
| """ |
| # The standard allows for the boundary to be quoted in the header, |
| # although it's rare (it happens at least for google app engine |
| # xmpp). I think we're also supposed to handle backslash-escapes |
| # here but I'll save that until we see a client that uses them |
| # in the wild. |
| if boundary.startswith(b('"')) and boundary.endswith(b('"')): |
| boundary = boundary[1:-1] |
| if data.endswith(b("\r\n")): |
| footer_length = len(boundary) + 6 |
| else: |
| footer_length = len(boundary) + 4 |
| parts = data[:-footer_length].split(b("--") + boundary + b("\r\n")) |
| for part in parts: |
| if not part: continue |
| eoh = part.find(b("\r\n\r\n")) |
| if eoh == -1: |
| logging.warning("multipart/form-data missing headers") |
| continue |
| headers = HTTPHeaders.parse(part[:eoh].decode("utf-8")) |
| name_header = headers.get("Content-Disposition", "") |
| if not name_header.startswith("form-data;") or \ |
| not part.endswith(b("\r\n")): |
| logging.warning("Invalid multipart/form-data") |
| continue |
| value = part[eoh + 4:-2] |
| name_values = {} |
| for name_part in name_header[10:].split(";"): |
| name, name_value = name_part.strip().split("=", 1) |
| name_values[name] = name_value.strip('"') |
| if not name_values.get("name"): |
| logging.warning("multipart/form-data value missing name") |
| continue |
| name = name_values["name"] |
| if name_values.get("filename"): |
| ctype = headers.get("Content-Type", "application/unknown") |
| files.setdefault(name, []).append(dict( |
| filename=name_values["filename"], body=value, |
| content_type=ctype)) |
| else: |
| arguments.setdefault(name, []).append(value) |
| |
| |
| def doctests(): |
| import doctest |
| return doctest.DocTestSuite() |
| |
| if __name__ == "__main__": |
| import doctest |
| doctest.testmod() |