-# -*- test-case-name: twisted.web.test.test_http -*-
-# Copyright (c) Twisted Matrix Laboratories.
-# See LICENSE for details.
-HyperText Transfer Protocol implementation.
-This is the basic server-side protocol implementation used by the Twisted
-Web server. It can parse HTTP 1.0 requests and supports many HTTP 1.1
-features as well. Additionally, some functionality implemented here is
-also useful for HTTP clients (such as the chunked encoding parser).
-# system imports
-from cStringIO import StringIO
-import tempfile
-import base64, binascii
-import cgi
-import socket
-import math
-import time
-import calendar
-import warnings
-import os
-from urlparse import urlparse as _urlparse
-from zope.interface import implements
-# twisted imports
-from twisted.internet import interfaces, reactor, protocol, address
-from twisted.internet.defer import Deferred
-from twisted.protocols import policies, basic
-from twisted.python import log
-from urllib import unquote
-from twisted.web.http_headers import _DictHeaders, Headers
-protocol_version = "HTTP/1.1"
-_CONTINUE = 100
-OK = 200
-CREATED = 201
-FOUND = 302
-SEE_OTHER = 303
-USE_PROXY = 305
-NOT_FOUND = 404
-GONE = 410
- # 100
- _CONTINUE: "Continue",
- SWITCHING: "Switching Protocols",
- # 200
- OK: "OK",
- CREATED: "Created",
- ACCEPTED: "Accepted",
- NON_AUTHORITATIVE_INFORMATION: "Non-Authoritative Information",
- NO_CONTENT: "No Content",
- RESET_CONTENT: "Reset Content.",
- PARTIAL_CONTENT: "Partial Content",
- MULTI_STATUS: "Multi-Status",
- # 300
- MULTIPLE_CHOICE: "Multiple Choices",
- MOVED_PERMANENTLY: "Moved Permanently",
- FOUND: "Found",
- SEE_OTHER: "See Other",
- NOT_MODIFIED: "Not Modified",
- USE_PROXY: "Use Proxy",
- # 306 not defined??
- TEMPORARY_REDIRECT: "Temporary Redirect",
- # 400
- BAD_REQUEST: "Bad Request",
- UNAUTHORIZED: "Unauthorized",
- PAYMENT_REQUIRED: "Payment Required",
- FORBIDDEN: "Forbidden",
- NOT_FOUND: "Not Found",
- NOT_ALLOWED: "Method Not Allowed",
- NOT_ACCEPTABLE: "Not Acceptable",
- PROXY_AUTH_REQUIRED: "Proxy Authentication Required",
- REQUEST_TIMEOUT: "Request Time-out",
- CONFLICT: "Conflict",
- GONE: "Gone",
- LENGTH_REQUIRED: "Length Required",
- PRECONDITION_FAILED: "Precondition Failed",
- REQUEST_ENTITY_TOO_LARGE: "Request Entity Too Large",
- REQUEST_URI_TOO_LONG: "Request-URI Too Long",
- UNSUPPORTED_MEDIA_TYPE: "Unsupported Media Type",
- REQUESTED_RANGE_NOT_SATISFIABLE: "Requested Range not satisfiable",
- EXPECTATION_FAILED: "Expectation Failed",
- # 500
- INTERNAL_SERVER_ERROR: "Internal Server Error",
- NOT_IMPLEMENTED: "Not Implemented",
- BAD_GATEWAY: "Bad Gateway",
- SERVICE_UNAVAILABLE: "Service Unavailable",
- GATEWAY_TIMEOUT: "Gateway Time-out",
- HTTP_VERSION_NOT_SUPPORTED: "HTTP Version not supported",
- INSUFFICIENT_STORAGE_SPACE: "Insufficient Storage Space",
- NOT_EXTENDED: "Not Extended"
- }
-CACHED = """Magic constant returned by http.Request methods to set cache
-validation headers when the request is conditional and the value fails
-the condition."""
-# backwards compatability
-responses = RESPONSES
-# datetime parsing and formatting
-weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
-monthname = [None,
- 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
- 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
-weekdayname_lower = [name.lower() for name in weekdayname]
-monthname_lower = [name and name.lower() for name in monthname]
-def urlparse(url):
- """
- Parse an URL into six components.
- This is similar to L{urlparse.urlparse}, but rejects C{unicode} input
- and always produces C{str} output.
- @type url: C{str}
- @raise TypeError: The given url was a C{unicode} string instead of a
- C{str}.
- @rtype: six-tuple of str
- @return: The scheme, net location, path, params, query string, and fragment
- of the URL.
- """
- if isinstance(url, unicode):
- raise TypeError("url must be str, not unicode")
- scheme, netloc, path, params, query, fragment = _urlparse(url)
- if isinstance(scheme, unicode):
- scheme = scheme.encode('ascii')
- netloc = netloc.encode('ascii')
- path = path.encode('ascii')
- query = query.encode('ascii')
- fragment = fragment.encode('ascii')
- return scheme, netloc, path, params, query, fragment
-def parse_qs(qs, keep_blank_values=0, strict_parsing=0, unquote=unquote):
- """
- like cgi.parse_qs, only with custom unquote function
- """
- d = {}
- items = [s2 for s1 in qs.split("&") for s2 in s1.split(";")]
- for item in items:
- try:
- k, v = item.split("=", 1)
- except ValueError:
- if strict_parsing:
- raise
- continue
- if v or keep_blank_values:
- k = unquote(k.replace("+", " "))
- v = unquote(v.replace("+", " "))
- if k in d:
- d[k].append(v)
- else:
- d[k] = [v]
- return d
-def datetimeToString(msSinceEpoch=None):
- """
- Convert seconds since epoch to HTTP datetime string.
- """
- if msSinceEpoch == None:
- msSinceEpoch = time.time()
- year, month, day, hh, mm, ss, wd, y, z = time.gmtime(msSinceEpoch)
- s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
- weekdayname[wd],
- day, monthname[month], year,
- hh, mm, ss)
- return s
-def datetimeToLogString(msSinceEpoch=None):
- """
- Convert seconds since epoch to log datetime string.
- """
- if msSinceEpoch == None:
- msSinceEpoch = time.time()
- year, month, day, hh, mm, ss, wd, y, z = time.gmtime(msSinceEpoch)
- s = "[%02d/%3s/%4d:%02d:%02d:%02d +0000]" % (
- day, monthname[month], year,
- hh, mm, ss)
- return s
-def timegm(year, month, day, hour, minute, second):
- """
- Convert time tuple in GMT to seconds since epoch, GMT
- """
- EPOCH = 1970
- if year < EPOCH:
- raise ValueError("Years prior to %d not supported" % (EPOCH,))
- assert 1 <= month <= 12
- days = 365*(year-EPOCH) + calendar.leapdays(EPOCH, year)
- for i in range(1, month):
- days = days + calendar.mdays[i]
- if month > 2 and calendar.isleap(year):
- days = days + 1
- days = days + day - 1
- hours = days*24 + hour
- minutes = hours*60 + minute
- seconds = minutes*60 + second
- return seconds
-def stringToDatetime(dateString):
- """
- Convert an HTTP date string (one of three formats) to seconds since epoch.
- """
- parts = dateString.split()
- if not parts[0][0:3].lower() in weekdayname_lower:
- # Weekday is stupid. Might have been omitted.
- try:
- return stringToDatetime("Sun, "+dateString)
- except ValueError:
- # Guess not.
- pass
- partlen = len(parts)
- if (partlen == 5 or partlen == 6) and parts[1].isdigit():
- # 1st date format: Sun, 06 Nov 1994 08:49:37 GMT
- # (Note: "GMT" is literal, not a variable timezone)
- # (also handles without "GMT")
- # This is the normal format
- day = parts[1]
- month = parts[2]
- year = parts[3]
- time = parts[4]
- elif (partlen == 3 or partlen == 4) and parts[1].find('-') != -1:
- # 2nd date format: Sunday, 06-Nov-94 08:49:37 GMT
- # (Note: "GMT" is literal, not a variable timezone)
- # (also handles without without "GMT")
- # Two digit year, yucko.
- day, month, year = parts[1].split('-')
- time = parts[2]
- year=int(year)
- if year < 69:
- year = year + 2000
- elif year < 100:
- year = year + 1900
- elif len(parts) == 5:
- # 3rd date format: Sun Nov 6 08:49:37 1994
- # ANSI C asctime() format.
- day = parts[2]
- month = parts[1]
- year = parts[4]
- time = parts[3]
- else:
- raise ValueError("Unknown datetime format %r" % dateString)
- day = int(day)
- month = int(monthname_lower.index(month.lower()))
- year = int(year)
- hour, min, sec = map(int, time.split(':'))
- return int(timegm(year, month, day, hour, min, sec))
-def toChunk(data):
- """
- Convert string to a chunk.
- @returns: a tuple of strings representing the chunked encoding of data
- """
- return ("%x\r\n" % len(data), data, "\r\n")
-def fromChunk(data):
- """
- Convert chunk to string.
- @returns: tuple (result, remaining), may raise ValueError.
- """
- prefix, rest = data.split('\r\n', 1)
- length = int(prefix, 16)
- if length < 0:
- raise ValueError("Chunk length must be >= 0, not %d" % (length,))
- if not rest[length:length + 2] == '\r\n':
- raise ValueError, "chunk must end with CRLF"
- return rest[:length], rest[length + 2:]
-def parseContentRange(header):
- """
- Parse a content-range header into (start, end, realLength).
- realLength might be None if real length is not known ('*').
- """
- kind, other = header.strip().split()
- if kind.lower() != "bytes":
- raise ValueError, "a range of type %r is not supported"
- startend, realLength = other.split("/")
- start, end = map(int, startend.split("-"))
- if realLength == "*":
- realLength = None
- else:
- realLength = int(realLength)
- return (start, end, realLength)
-class StringTransport:
- """
- I am a StringIO wrapper that conforms for the transport API. I support
- the `writeSequence' method.
- """
- def __init__(self):
- self.s = StringIO()
- def writeSequence(self, seq):
- self.s.write(''.join(seq))
- def __getattr__(self, attr):
- return getattr(self.__dict__['s'], attr)
-class HTTPClient(basic.LineReceiver):
- """
- A client for HTTP 1.0.
- Notes:
- You probably want to send a 'Host' header with the name of the site you're
- connecting to, in order to not break name based virtual hosting.
- @ivar length: The length of the request body in bytes.
- @type length: C{int}
- @ivar firstLine: Are we waiting for the first header line?
- @type firstLine: C{bool}
- @ivar __buffer: The buffer that stores the response to the HTTP request.
- @type __buffer: A C{StringIO} object.
- @ivar _header: Part or all of an HTTP request header.
- @type _header: C{str}
- """
- length = None
- firstLine = True
- __buffer = None
- _header = ""
- def sendCommand(self, command, path):
- self.transport.write('%s %s HTTP/1.0\r\n' % (command, path))
- def sendHeader(self, name, value):
- self.transport.write('%s: %s\r\n' % (name, value))
- def endHeaders(self):
- self.transport.write('\r\n')
- def extractHeader(self, header):
- """
- Given a complete HTTP header, extract the field name and value and
- process the header.
- @param header: a complete HTTP request header of the form
- 'field-name: value'.
- @type header: C{str}
- """
- key, val = header.split(':', 1)
- val = val.lstrip()
- self.handleHeader(key, val)
- if key.lower() == 'content-length':
- self.length = int(val)
- def lineReceived(self, line):
- """
- Parse the status line and headers for an HTTP request.
- @param line: Part of an HTTP request header. Request bodies are parsed
- in L{rawDataReceived}.
- @type line: C{str}
- """
- if self.firstLine:
- self.firstLine = False
- l = line.split(None, 2)
- version = l[0]
- status = l[1]
- try:
- message = l[2]
- except IndexError:
- # sometimes there is no message
- message = ""
- self.handleStatus(version, status, message)
- return
- if not line:
- if self._header != "":
- # Only extract headers if there are any
- self.extractHeader(self._header)
- self.__buffer = StringIO()
- self.handleEndHeaders()
- self.setRawMode()
- return
- if line.startswith('\t') or line.startswith(' '):
- # This line is part of a multiline header. According to RFC 822, in
- # "unfolding" multiline headers you do not strip the leading
- # whitespace on the continuing line.
- self._header = self._header + line
- elif self._header:
- # This line starts a new header, so process the previous one.
- self.extractHeader(self._header)
- self._header = line
- else: # First header
- self._header = line
- def connectionLost(self, reason):
- self.handleResponseEnd()
- def handleResponseEnd(self):
- """
- The response has been completely received.
- This callback may be invoked more than once per request.
- """
- if self.__buffer is not None:
- b = self.__buffer.getvalue()
- self.__buffer = None
- self.handleResponse(b)
- def handleResponsePart(self, data):
- self.__buffer.write(data)
- def connectionMade(self):
- pass
- def handleStatus(self, version, status, message):
- """
- Called when the status-line is received.
- @param version: e.g. 'HTTP/1.0'
- @param status: e.g. '200'
- @type status: C{str}
- @param message: e.g. 'OK'
- """
- def handleHeader(self, key, val):
- """
- Called every time a header is received.
- """
- def handleEndHeaders(self):
- """
- Called when all headers have been received.
- """
- def rawDataReceived(self, data):
- if self.length is not None:
- data, rest = data[:self.length], data[self.length:]
- self.length -= len(data)
- else:
- rest = ''
- self.handleResponsePart(data)
- if self.length == 0:
- self.handleResponseEnd()
- self.setLineMode(rest)
-# response codes that must have empty bodies
-NO_BODY_CODES = (204, 304)
-class Request:
- """
- A HTTP request.
- Subclasses should override the process() method to determine how
- the request will be processed.
- @ivar method: The HTTP method that was used.
- @ivar uri: The full URI that was requested (includes arguments).
- @ivar path: The path only (arguments not included).
- @ivar args: All of the arguments, including URL and POST arguments.
- @type args: A mapping of strings (the argument names) to lists of values.
- i.e., ?foo=bar&foo=baz&quux=spam results in
- {'foo': ['bar', 'baz'], 'quux': ['spam']}.
- @type requestHeaders: L{http_headers.Headers}
- @ivar requestHeaders: All received HTTP request headers.
- @ivar received_headers: Backwards-compatibility access to
- C{requestHeaders}. Use C{requestHeaders} instead. C{received_headers}
- behaves mostly like a C{dict} and does not provide access to all header
- values.
- @type responseHeaders: L{http_headers.Headers}
- @ivar responseHeaders: All HTTP response headers to be sent.
- @ivar headers: Backwards-compatibility access to C{responseHeaders}. Use
- C{responseHeaders} instead. C{headers} behaves mostly like a C{dict}
- and does not provide access to all header values nor does it allow
- multiple values for one header to be set.
- @ivar notifications: A C{list} of L{Deferred}s which are waiting for
- notification that the response to this request has been finished
- (successfully or with an error). Don't use this attribute directly,
- instead use the L{Request.notifyFinish} method.
- @ivar _disconnected: A flag which is C{False} until the connection over
- which this request was received is closed and which is C{True} after
- that.
- @type _disconnected: C{bool}
- """
- implements(interfaces.IConsumer)
- producer = None
- finished = 0
- code = OK
- code_message = RESPONSES[OK]
- method = "(no method yet)"
- clientproto = "(no clientproto yet)"
- uri = "(no uri yet)"
- startedWriting = 0
- chunked = 0
- sentLength = 0 # content-length of response, or total bytes sent via chunking
- etag = None
- lastModified = None
- args = None
- path = None
- content = None
- _forceSSL = 0
- _disconnected = False
- def __init__(self, channel, queued):
- """
- @param channel: the channel we're connected to.
- @param queued: are we in the request queue, or can we start writing to
- the transport?
- """
- self.notifications = []
- self.channel = channel
- self.queued = queued
- self.requestHeaders = Headers()
- self.received_cookies = {}
- self.responseHeaders = Headers()
- self.cookies = [] # outgoing cookies
- if queued:
- self.transport = StringTransport()
- else:
- self.transport = self.channel.transport
- def __setattr__(self, name, value):
- """
- Support assignment of C{dict} instances to C{received_headers} for
- backwards-compatibility.
- """
- if name == 'received_headers':
- # A property would be nice, but Request is classic.
- self.requestHeaders = headers = Headers()
- for k, v in value.iteritems():
- headers.setRawHeaders(k, [v])
- elif name == 'requestHeaders':
- self.__dict__[name] = value
- self.__dict__['received_headers'] = _DictHeaders(value)
- elif name == 'headers':
- self.responseHeaders = headers = Headers()
- for k, v in value.iteritems():
- headers.setRawHeaders(k, [v])
- elif name == 'responseHeaders':
- self.__dict__[name] = value
- self.__dict__['headers'] = _DictHeaders(value)
- else:
- self.__dict__[name] = value
- def _cleanup(self):
- """
- Called when have finished responding and are no longer queued.
- """
- if self.producer:
- log.err(RuntimeError("Producer was not unregistered for %s" % self.uri))
- self.unregisterProducer()
- self.channel.requestDone(self)
- del self.channel
- try:
- self.content.close()
- except OSError:
- # win32 suckiness, no idea why it does this
- pass
- del self.content
- for d in self.notifications:
- d.callback(None)
- self.notifications = []
- # methods for channel - end users should not use these
- def noLongerQueued(self):
- """
- Notify the object that it is no longer queued.
- We start writing whatever data we have to the transport, etc.
- This method is not intended for users.
- """
- if not self.queued:
- raise RuntimeError, "noLongerQueued() got called unnecessarily."
- self.queued = 0
- # set transport to real one and send any buffer data
- data = self.transport.getvalue()
- self.transport = self.channel.transport
- if data:
- self.transport.write(data)
- # if we have producer, register it with transport
- if (self.producer is not None) and not self.finished:
- self.transport.registerProducer(self.producer, self.streamingProducer)
- # if we're finished, clean up
- if self.finished:
- self._cleanup()
- def gotLength(self, length):
- """
- Called when HTTP channel got length of content in this request.
- This method is not intended for users.
- @param length: The length of the request body, as indicated by the
- request headers. C{None} if the request headers do not indicate a
- length.
- """
- if length is not None and length < 100000:
- self.content = StringIO()
- else:
- self.content = tempfile.TemporaryFile()
- def parseCookies(self):
- """
- Parse cookie headers.
- This method is not intended for users.
- """
- cookieheaders = self.requestHeaders.getRawHeaders("cookie")
- if cookieheaders is None:
- return
- for cookietxt in cookieheaders:
- if cookietxt:
- for cook in cookietxt.split(';'):
- cook = cook.lstrip()
- try:
- k, v = cook.split('=', 1)
- self.received_cookies[k] = v
- except ValueError:
- pass
- def handleContentChunk(self, data):
- """
- Write a chunk of data.
- This method is not intended for users.
- """
- self.content.write(data)
- def requestReceived(self, command, path, version):
- """
- Called by channel when all data has been received.
- This method is not intended for users.
- @type command: C{str}
- @param command: The HTTP verb of this request. This has the case
- supplied by the client (eg, it maybe "get" rather than "GET").
- @type path: C{str}
- @param path: The URI of this request.
- @type version: C{str}
- @param version: The HTTP version of this request.
- """
- self.content.seek(0,0)
- self.args = {}
- self.stack = []
- self.method, self.uri = command, path
- self.clientproto = version
- x = self.uri.split('?', 1)
- if len(x) == 1:
- self.path = self.uri
- else:
- self.path, argstring = x
- self.args = parse_qs(argstring, 1)
- # cache the client and server information, we'll need this later to be
- # serialized and sent with the request so CGIs will work remotely
- self.client = self.channel.transport.getPeer()
- self.host = self.channel.transport.getHost()
- # Argument processing
- args = self.args
- ctype = self.requestHeaders.getRawHeaders('content-type')
- if ctype is not None:
- ctype = ctype[0]
- if self.method == "POST" and ctype:
- mfd = 'multipart/form-data'
- key, pdict = cgi.parse_header(ctype)
- if key == 'application/x-www-form-urlencoded':
- args.update(parse_qs(self.content.read(), 1))
- elif key == mfd:
- try:
- args.update(cgi.parse_multipart(self.content, pdict))
- except KeyError, e:
- if e.args[0] == 'content-disposition':
- # Parse_multipart can't cope with missing
- # content-dispostion headers in multipart/form-data
- # parts, so we catch the exception and tell the client
- # it was a bad request.
- self.channel.transport.write(
- "HTTP/1.1 400 Bad Request\r\n\r\n")
- self.channel.transport.loseConnection()
- return
- raise
- self.content.seek(0, 0)
- self.process()
- def __repr__(self):
- return '<%s %s %s>'% (self.method, self.uri, self.clientproto)
- def process(self):
- """
- Override in subclasses.
- This method is not intended for users.
- """
- pass
- # consumer interface
- def registerProducer(self, producer, streaming):
- """
- Register a producer.
- """
- if self.producer:
- raise ValueError, "registering producer %s before previous one (%s) was unregistered" % (producer, self.producer)
- self.streamingProducer = streaming
- self.producer = producer
- if self.queued:
- if streaming:
- producer.pauseProducing()
- else:
- self.transport.registerProducer(producer, streaming)
- def unregisterProducer(self):
- """
- Unregister the producer.
- """
- if not self.queued:
- self.transport.unregisterProducer()
- self.producer = None
- # private http response methods
- def _sendError(self, code, resp=''):
- self.transport.write('%s %s %s\r\n\r\n' % (self.clientproto, code, resp))
- # The following is the public interface that people should be
- # writing to.
- def getHeader(self, key):
- """
- Get an HTTP request header.
- @type key: C{str}
- @param key: The name of the header to get the value of.
- @rtype: C{str} or C{NoneType}
- @return: The value of the specified header, or C{None} if that header
- was not present in the request.
- """
- value = self.requestHeaders.getRawHeaders(key)
- if value is not None:
- return value[-1]
- def getCookie(self, key):
- """
- Get a cookie that was sent from the network.
- """
- return self.received_cookies.get(key)
- def notifyFinish(self):
- """
- Notify when the response to this request has finished.
- @rtype: L{Deferred}
- @return: A L{Deferred} which will be triggered when the request is
- finished -- with a C{None} value if the request finishes
- successfully or with an error if the request is interrupted by an
- error (for example, the client closing the connection prematurely).
- """
- self.notifications.append(Deferred())
- return self.notifications[-1]
- def finish(self):
- """
- Indicate that all response data has been written to this L{Request}.
- """
- if self._disconnected:
- raise RuntimeError(
- "Request.finish called on a request after its connection was lost; "
- "use Request.notifyFinish to keep track of this.")
- if self.finished:
- warnings.warn("Warning! request.finish called twice.", stacklevel=2)
- return
- if not self.startedWriting:
- # write headers
- self.write('')
- if self.chunked:
- # write last chunk and closing CRLF
- self.transport.write("0\r\n\r\n")
- # log request
- if hasattr(self.channel, "factory"):
- self.channel.factory.log(self)
- self.finished = 1
- if not self.queued:
- self._cleanup()
- def write(self, data):
- """
- Write some data as a result of an HTTP request. The first
- time this is called, it writes out response data.
- @type data: C{str}
- @param data: Some bytes to be sent as part of the response body.
- """
- if self.finished:
- raise RuntimeError('Request.write called on a request after '
- 'Request.finish was called.')
- if not self.startedWriting:
- self.startedWriting = 1
- version = self.clientproto
- l = []
- l.append('%s %s %s\r\n' % (version, self.code,
- self.code_message))
- # if we don't have a content length, we send data in
- # chunked mode, so that we can support pipelining in
- # persistent connections.
- if ((version == "HTTP/1.1") and
- (self.responseHeaders.getRawHeaders('content-length') is None) and
- self.method != "HEAD" and self.code not in NO_BODY_CODES):
- l.append("%s: %s\r\n" % ('Transfer-Encoding', 'chunked'))
- self.chunked = 1
- if self.lastModified is not None:
- if self.responseHeaders.hasHeader('last-modified'):
- log.msg("Warning: last-modified specified both in"
- " header list and lastModified attribute.")
- else:
- self.responseHeaders.setRawHeaders(
- 'last-modified',
- [datetimeToString(self.lastModified)])
- if self.etag is not None:
- self.responseHeaders.setRawHeaders('ETag', [self.etag])
- for name, values in self.responseHeaders.getAllRawHeaders():
- for value in values:
- l.append("%s: %s\r\n" % (name, value))
- for cookie in self.cookies:
- l.append('%s: %s\r\n' % ("Set-Cookie", cookie))
- l.append("\r\n")
- self.transport.writeSequence(l)
- # if this is a "HEAD" request, we shouldn't return any data
- if self.method == "HEAD":
- self.write = lambda data: None
- return
- # for certain result codes, we should never return any data
- if self.code in NO_BODY_CODES:
- self.write = lambda data: None
- return
- self.sentLength = self.sentLength + len(data)
- if data:
- if self.chunked:
- self.transport.writeSequence(toChunk(data))
- else:
- self.transport.write(data)
- def addCookie(self, k, v, expires=None, domain=None, path=None, max_age=None, comment=None, secure=None):
- """
- Set an outgoing HTTP cookie.
- In general, you should consider using sessions instead of cookies, see
- L{twisted.web.server.Request.getSession} and the
- L{twisted.web.server.Session} class for details.
- """
- cookie = '%s=%s' % (k, v)
- if expires is not None:
- cookie = cookie +"; Expires=%s" % expires
- if domain is not None:
- cookie = cookie +"; Domain=%s" % domain
- if path is not None:
- cookie = cookie +"; Path=%s" % path
- if max_age is not None:
- cookie = cookie +"; Max-Age=%s" % max_age
- if comment is not None:
- cookie = cookie +"; Comment=%s" % comment
- if secure:
- cookie = cookie +"; Secure"
- self.cookies.append(cookie)
- def setResponseCode(self, code, message=None):
- """
- Set the HTTP response code.
- """
- if not isinstance(code, (int, long)):
- raise TypeError("HTTP response code must be int or long")
- self.code = code
- if message:
- self.code_message = message
- else:
- self.code_message = RESPONSES.get(code, "Unknown Status")
- def setHeader(self, name, value):
- """
- Set an HTTP response header. Overrides any previously set values for
- this header.
- @type name: C{str}
- @param name: The name of the header for which to set the value.
- @type value: C{str}
- @param value: The value to set for the named header.
- """
- self.responseHeaders.setRawHeaders(name, [value])
- def redirect(self, url):
- """
- Utility function that does a redirect.
- The request should have finish() called after this.
- """
- self.setResponseCode(FOUND)
- self.setHeader("location", url)
- def setLastModified(self, when):
- """
- Set the C{Last-Modified} time for the response to this request.
- If I am called more than once, I ignore attempts to set
- Last-Modified earlier, only replacing the Last-Modified time
- if it is to a later value.
- If I am a conditional request, I may modify my response code
- to L{NOT_MODIFIED} if appropriate for the time given.
- @param when: The last time the resource being returned was
- modified, in seconds since the epoch.
- @type when: number
- @return: If I am a C{If-Modified-Since} conditional request and
- the time given is not newer than the condition, I return
- L{http.CACHED<CACHED>} to indicate that you should write no
- body. Otherwise, I return a false value.
- """
- # time.time() may be a float, but the HTTP-date strings are
- # only good for whole seconds.
- when = long(math.ceil(when))
- if (not self.lastModified) or (self.lastModified < when):
- self.lastModified = when
- modifiedSince = self.getHeader('if-modified-since')
- if modifiedSince:
- firstPart = modifiedSince.split(';', 1)[0]
- try:
- modifiedSince = stringToDatetime(firstPart)
- except ValueError:
- return None
- if modifiedSince >= when:
- self.setResponseCode(NOT_MODIFIED)
- return CACHED
- return None
- def setETag(self, etag):
- """
- Set an C{entity tag} for the outgoing response.
- That's \"entity tag\" as in the HTTP/1.1 C{ETag} header, \"used
- for comparing two or more entities from the same requested
- resource.\"
- If I am a conditional request, I may modify my response code
- to L{NOT_MODIFIED} or L{PRECONDITION_FAILED}, if appropriate
- for the tag given.
- @param etag: The entity tag for the resource being returned.
- @type etag: string
- @return: If I am a C{If-None-Match} conditional request and
- the tag matches one in the request, I return
- L{http.CACHED<CACHED>} to indicate that you should write
- no body. Otherwise, I return a false value.
- """
- if etag:
- self.etag = etag
- tags = self.getHeader("if-none-match")
- if tags:
- tags = tags.split()
- if (etag in tags) or ('*' in tags):
- self.setResponseCode(((self.method in ("HEAD", "GET"))
- return CACHED
- return None
- def getAllHeaders(self):
- """
- Return dictionary mapping the names of all received headers to the last
- value received for each.
- Since this method does not return all header information,
- C{self.requestHeaders.getAllRawHeaders()} may be preferred.
- """
- headers = {}
- for k, v in self.requestHeaders.getAllRawHeaders():
- headers[k.lower()] = v[-1]
- return headers
- def getRequestHostname(self):
- """
- Get the hostname that the user passed in to the request.
- This will either use the Host: header (if it is available) or the
- host we are listening on if the header is unavailable.
- @returns: the requested hostname
- @rtype: C{str}
- """
- # XXX This method probably has no unit tests. I changed it a ton and
- # nothing failed.
- host = self.getHeader('host')
- if host:
- return host.split(':', 1)[0]
- return self.getHost().host
- def getHost(self):
- """
- Get my originally requesting transport's host.
- Don't rely on the 'transport' attribute, since Request objects may be
- copied remotely. For information on this method's return value, see
- twisted.internet.tcp.Port.
- """
- return self.host
- def setHost(self, host, port, ssl=0):
- """
- Change the host and port the request thinks it's using.
- This method is useful for working with reverse HTTP proxies (e.g.
- both Squid and Apache's mod_proxy can do this), when the address
- the HTTP client is using is different than the one we're listening on.
- For example, Apache may be listening on https://www.example.com, and then
- forwarding requests to http://localhost:8080, but we don't want HTML produced
- by Twisted to say 'http://localhost:8080', they should say 'https://www.example.com',
- so we do::
- request.setHost('www.example.com', 443, ssl=1)
- @type host: C{str}
- @param host: The value to which to change the host header.
- @type ssl: C{bool}
- @param ssl: A flag which, if C{True}, indicates that the request is
- considered secure (if C{True}, L{isSecure} will return C{True}).
- """
- self._forceSSL = ssl # set first so isSecure will work
- if self.isSecure():
- default = 443
- else:
- default = 80
- if port == default:
- hostHeader = host
- else:
- hostHeader = '%s:%d' % (host, port)
- self.requestHeaders.setRawHeaders("host", [hostHeader])
- self.host = address.IPv4Address("TCP", host, port)
- def getClientIP(self):
- """
- Return the IP address of the client who submitted this request.
- @returns: the client IP address
- @rtype: C{str}
- """
- if isinstance(self.client, address.IPv4Address):
- return self.client.host
- else:
- return None
- def isSecure(self):
- """
- Return True if this request is using a secure transport.
- Normally this method returns True if this request's HTTPChannel
- instance is using a transport that implements ISSLTransport.
- This will also return True if setHost() has been called
- with ssl=True.
- @returns: True if this request is secure
- @rtype: C{bool}
- """
- if self._forceSSL:
- return True
- transport = getattr(getattr(self, 'channel', None), 'transport', None)
- if interfaces.ISSLTransport(transport, None) is not None:
- return True
- return False
- def _authorize(self):
- # Authorization, (mostly) per the RFC
- try:
- authh = self.getHeader("Authorization")
- if not authh:
- self.user = self.password = ''
- return
- bas, upw = authh.split()
- if bas.lower() != "basic":
- raise ValueError
- upw = base64.decodestring(upw)
- self.user, self.password = upw.split(':', 1)
- except (binascii.Error, ValueError):
- self.user = self.password = ""
- except:
- log.err()
- self.user = self.password = ""
- def getUser(self):
- """
- Return the HTTP user sent with this request, if any.
- If no user was supplied, return the empty string.
- @returns: the HTTP user, if any
- @rtype: C{str}
- """
- try:
- return self.user
- except:
- pass
- self._authorize()
- return self.user
- def getPassword(self):
- """
- Return the HTTP password sent with this request, if any.
- If no password was supplied, return the empty string.
- @returns: the HTTP password, if any
- @rtype: C{str}
- """
- try:
- return self.password
- except:
- pass
- self._authorize()
- return self.password
- def getClient(self):
- if self.client.type != 'TCP':
- return None
- host = self.client.host
- try:
- name, names, addresses = socket.gethostbyaddr(host)
- except socket.error:
- return host
- names.insert(0, name)
- for name in names:
- if '.' in name:
- return name
- return names[0]
- def connectionLost(self, reason):
- """
- There is no longer a connection for this request to respond over.
- Clean up anything which can't be useful anymore.
- """
- self._disconnected = True
- self.channel = None
- if self.content is not None:
- self.content.close()
- for d in self.notifications:
- d.errback(reason)
- self.notifications = []
-class _DataLoss(Exception):
- """
- L{_DataLoss} indicates that not all of a message body was received. This
- is only one of several possible exceptions which may indicate that data
- was lost. Because of this, it should not be checked for by
- specifically; any unexpected exception should be treated as having
- caused data loss.
- """
-class PotentialDataLoss(Exception):
- """
- L{PotentialDataLoss} may be raised by a transfer encoding decoder's
- C{noMoreData} method to indicate that it cannot be determined if the
- entire response body has been delivered. This only occurs when making
- requests to HTTP servers which do not set I{Content-Length} or a
- I{Transfer-Encoding} in the response because in this case the end of the
- response is indicated by the connection being closed, an event which may
- also be due to a transient network problem or other error.
- """
-class _IdentityTransferDecoder(object):
- """
- Protocol for accumulating bytes up to a specified length. This handles the
- case where no I{Transfer-Encoding} is specified.
- @ivar contentLength: Counter keeping track of how many more bytes there are
- to receive.
- @ivar dataCallback: A one-argument callable which will be invoked each
- time application data is received.
- @ivar finishCallback: A one-argument callable which will be invoked when
- the terminal chunk is received. It will be invoked with all bytes
- which were delivered to this protocol which came after the terminal
- chunk.
- """
- def __init__(self, contentLength, dataCallback, finishCallback):
- self.contentLength = contentLength
- self.dataCallback = dataCallback
- self.finishCallback = finishCallback
- def dataReceived(self, data):
- """
- Interpret the next chunk of bytes received. Either deliver them to the
- data callback or invoke the finish callback if enough bytes have been
- received.
- @raise RuntimeError: If the finish callback has already been invoked
- during a previous call to this methood.
- """
- if self.dataCallback is None:
- raise RuntimeError(
- "_IdentityTransferDecoder cannot decode data after finishing")
- if self.contentLength is None:
- self.dataCallback(data)
- elif len(data) < self.contentLength:
- self.contentLength -= len(data)
- self.dataCallback(data)
- else:
- # Make the state consistent before invoking any code belonging to
- # anyone else in case noMoreData ends up being called beneath this
- # stack frame.
- contentLength = self.contentLength
- dataCallback = self.dataCallback
- finishCallback = self.finishCallback
- self.dataCallback = self.finishCallback = None
- self.contentLength = 0
- dataCallback(data[:contentLength])
- finishCallback(data[contentLength:])
- def noMoreData(self):
- """
- All data which will be delivered to this decoder has been. Check to
- make sure as much data as was expected has been received.
- @raise PotentialDataLoss: If the content length is unknown.
- @raise _DataLoss: If the content length is known and fewer than that
- many bytes have been delivered.
- @return: C{None}
- """
- finishCallback = self.finishCallback
- self.dataCallback = self.finishCallback = None
- if self.contentLength is None:
- finishCallback('')
- raise PotentialDataLoss()
- elif self.contentLength != 0:
- raise _DataLoss()
-class _ChunkedTransferDecoder(object):
- """
- Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 2616,
- section 3.6.1. This protocol can interpret the contents of a request or
- response body which uses the I{chunked} Transfer-Encoding. It cannot
- interpret any of the rest of the HTTP protocol.
- It may make sense for _ChunkedTransferDecoder to be an actual IProtocol
- implementation. Currently, the only user of this class will only ever
- call dataReceived on it. However, it might be an improvement if the
- user could connect this to a transport and deliver connection lost
- notification. This way, `dataCallback` becomes `self.transport.write`
- and perhaps `finishCallback` becomes `self.transport.loseConnection()`
- (although I'm not sure where the extra data goes in that case). This
- could also allow this object to indicate to the receiver of data that
- the stream was not completely received, an error case which should be
- noticed. -exarkun
- @ivar dataCallback: A one-argument callable which will be invoked each
- time application data is received.
- @ivar finishCallback: A one-argument callable which will be invoked when
- the terminal chunk is received. It will be invoked with all bytes
- which were delivered to this protocol which came after the terminal
- chunk.
- @ivar length: Counter keeping track of how many more bytes in a chunk there
- are to receive.
- @ivar state: One of C{'CHUNK_LENGTH'}, C{'CRLF'}, C{'TRAILER'},
- C{'BODY'}, or C{'FINISHED'}. For C{'CHUNK_LENGTH'}, data for the
- chunk length line is currently being read. For C{'CRLF'}, the CR LF
- pair which follows each chunk is being read. For C{'TRAILER'}, the CR
- LF pair which follows the terminal 0-length chunk is currently being
- read. For C{'BODY'}, the contents of a chunk are being read. For
- C{'FINISHED'}, the last chunk has been completely read and no more
- input is valid.
- """
- state = 'CHUNK_LENGTH'
- def __init__(self, dataCallback, finishCallback):
- self.dataCallback = dataCallback
- self.finishCallback = finishCallback
- self._buffer = ''
- def _dataReceived_CHUNK_LENGTH(self, data):
- if '\r\n' in data:
- line, rest = data.split('\r\n', 1)
- parts = line.split(';')
- self.length = int(parts[0], 16)
- if self.length == 0:
- self.state = 'TRAILER'
- else:
- self.state = 'BODY'
- return rest
- else:
- self._buffer = data
- return ''
- def _dataReceived_CRLF(self, data):
- if data.startswith('\r\n'):
- self.state = 'CHUNK_LENGTH'
- return data[2:]
- else:
- self._buffer = data
- return ''
- def _dataReceived_TRAILER(self, data):
- if data.startswith('\r\n'):
- data = data[2:]
- self.state = 'FINISHED'
- self.finishCallback(data)
- else:
- self._buffer = data
- return ''
- def _dataReceived_BODY(self, data):
- if len(data) >= self.length:
- chunk, data = data[:self.length], data[self.length:]
- self.dataCallback(chunk)
- self.state = 'CRLF'
- return data
- elif len(data) < self.length:
- self.length -= len(data)
- self.dataCallback(data)
- return ''
- def _dataReceived_FINISHED(self, data):
- raise RuntimeError(
- "_ChunkedTransferDecoder.dataReceived called after last "
- "chunk was processed")
- def dataReceived(self, data):
- """
- Interpret data from a request or response body which uses the
- I{chunked} Transfer-Encoding.
- """
- data = self._buffer + data
- self._buffer = ''
- while data:
- data = getattr(self, '_dataReceived_%s' % (self.state,))(data)
- def noMoreData(self):
- """
- Verify that all data has been received. If it has not been, raise
- L{_DataLoss}.
- """
- if self.state != 'FINISHED':
- raise _DataLoss(
- "Chunked decoder in %r state, still expecting more data to "
- "get to 'FINISHED' state." % (self.state,))
-class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin):
- """
- A receiver for HTTP requests.
- @ivar _transferDecoder: C{None} or an instance of
- L{_ChunkedTransferDecoder} if the request body uses the I{chunked}
- Transfer-Encoding.
- """
- maxHeaders = 500 # max number of headers allowed per request
- length = 0
- persistent = 1
- __header = ''
- __first_line = 1
- __content = None
- # set in instances or subclasses
- requestFactory = Request
- _savedTimeOut = None
- _receivedHeaderCount = 0
- def __init__(self):
- # the request queue
- self.requests = []
- self._transferDecoder = None
- def connectionMade(self):
- self.setTimeout(self.timeOut)
- def lineReceived(self, line):
- self.resetTimeout()
- if self.__first_line:
- # if this connection is not persistent, drop any data which
- # the client (illegally) sent after the last request.
- if not self.persistent:
- self.dataReceived = self.lineReceived = lambda *args: None
- return
- # IE sends an extraneous empty line (\r\n) after a POST request;
- # eat up such a line, but only ONCE
- if not line and self.__first_line == 1:
- self.__first_line = 2
- return
- # create a new Request object
- request = self.requestFactory(self, len(self.requests))
- self.requests.append(request)
- self.__first_line = 0
- parts = line.split()
- if len(parts) != 3:
- self.transport.write("HTTP/1.1 400 Bad Request\r\n\r\n")
- self.transport.loseConnection()
- return
- command, request, version = parts
- self._command = command
- self._path = request
- self._version = version
- elif line == '':
- if self.__header:
- self.headerReceived(self.__header)
- self.__header = ''
- self.allHeadersReceived()
- if self.length == 0:
- self.allContentReceived()
- else:
- self.setRawMode()
- elif line[0] in ' \t':
- self.__header = self.__header+'\n'+line
- else:
- if self.__header:
- self.headerReceived(self.__header)
- self.__header = line
- def _finishRequestBody(self, data):
- self.allContentReceived()
- self.setLineMode(data)
- def headerReceived(self, line):
- """
- Do pre-processing (for content-length) and store this header away.
- Enforce the per-request header limit.
- @type line: C{str}
- @param line: A line from the header section of a request, excluding the
- line delimiter.
- """
- header, data = line.split(':', 1)
- header = header.lower()
- data = data.strip()
- if header == 'content-length':
- self.length = int(data)
- self._transferDecoder = _IdentityTransferDecoder(
- self.length, self.requests[-1].handleContentChunk, self._finishRequestBody)
- elif header == 'transfer-encoding' and data.lower() == 'chunked':
- self.length = None
- self._transferDecoder = _ChunkedTransferDecoder(
- self.requests[-1].handleContentChunk, self._finishRequestBody)
- reqHeaders = self.requests[-1].requestHeaders
- values = reqHeaders.getRawHeaders(header)
- if values is not None:
- values.append(data)
- else:
- reqHeaders.setRawHeaders(header, [data])
- self._receivedHeaderCount += 1
- if self._receivedHeaderCount > self.maxHeaders:
- self.transport.write("HTTP/1.1 400 Bad Request\r\n\r\n")
- self.transport.loseConnection()
- def allContentReceived(self):
- command = self._command
- path = self._path
- version = self._version
- # reset ALL state variables, so we don't interfere with next request
- self.length = 0
- self._receivedHeaderCount = 0
- self.__first_line = 1
- self._transferDecoder = None
- del self._command, self._path, self._version
- # Disable the idle timeout, in case this request takes a long
- # time to finish generating output.
- if self.timeOut:
- self._savedTimeOut = self.setTimeout(None)
- req = self.requests[-1]
- req.requestReceived(command, path, version)
- def rawDataReceived(self, data):
- self.resetTimeout()
- self._transferDecoder.dataReceived(data)
- def allHeadersReceived(self):
- req = self.requests[-1]
- req.parseCookies()
- self.persistent = self.checkPersistence(req, self._version)
- req.gotLength(self.length)
- # Handle 'Expect: 100-continue' with automated 100 response code,
- # a simplistic implementation of RFC 2686 8.2.3:
- expectContinue = req.requestHeaders.getRawHeaders('expect')
- if (expectContinue and expectContinue[0].lower() == '100-continue' and
- self._version == 'HTTP/1.1'):
- req.transport.write("HTTP/1.1 100 Continue\r\n\r\n")
- def checkPersistence(self, request, version):
- """
- Check if the channel should close or not.
- @param request: The request most recently received over this channel
- against which checks will be made to determine if this connection
- can remain open after a matching response is returned.
- @type version: C{str}
- @param version: The version of the request.
- @rtype: C{bool}
- @return: A flag which, if C{True}, indicates that this connection may
- remain open to receive another request; if C{False}, the connection
- must be closed in order to indicate the completion of the response
- to C{request}.
- """
- connection = request.requestHeaders.getRawHeaders('connection')
- if connection:
- tokens = map(str.lower, connection[0].split(' '))
- else:
- tokens = []
- # HTTP 1.0 persistent connection support is currently disabled,
- # since we need a way to disable pipelining. HTTP 1.0 can't do
- # pipelining since we can't know in advance if we'll have a
- # content-length header, if we don't have the header we need to close the
- # connection. In HTTP 1.1 this is not an issue since we use chunked
- # encoding if content-length is not available.
- #if version == "HTTP/1.0":
- # if 'keep-alive' in tokens:
- # request.setHeader('connection', 'Keep-Alive')
- # return 1
- # else:
- # return 0
- if version == "HTTP/1.1":
- if 'close' in tokens:
- request.responseHeaders.setRawHeaders('connection', ['close'])
- return False
- else:
- return True
- else:
- return False
- def requestDone(self, request):
- """
- Called by first request in queue when it is done.
- """
- if request != self.requests[0]: raise TypeError
- del self.requests[0]
- if self.persistent:
- # notify next request it can start writing
- if self.requests:
- self.requests[0].noLongerQueued()
- else:
- if self._savedTimeOut:
- self.setTimeout(self._savedTimeOut)
- else:
- self.transport.loseConnection()
- def timeoutConnection(self):
- log.msg("Timing out client: %s" % str(self.transport.getPeer()))
- policies.TimeoutMixin.timeoutConnection(self)
- def connectionLost(self, reason):
- self.setTimeout(None)
- for request in self.requests:
- request.connectionLost(reason)
-class HTTPFactory(protocol.ServerFactory):
- """
- Factory for HTTP server.
- @ivar _logDateTime: A cached datetime string for log messages, updated by
- C{_logDateTimeCall}.
- @type _logDateTime: L{str}
- @ivar _logDateTimeCall: A delayed call for the next update to the cached log
- datetime string.
- @type _logDateTimeCall: L{IDelayedCall} provided
- """
- protocol = HTTPChannel
- logPath = None
- timeOut = 60 * 60 * 12
- def __init__(self, logPath=None, timeout=60*60*12):
- if logPath is not None:
- logPath = os.path.abspath(logPath)
- self.logPath = logPath
- self.timeOut = timeout
- # For storing the cached log datetime and the callback to update it
- self._logDateTime = None
- self._logDateTimeCall = None
- def _updateLogDateTime(self):
- """
- Update log datetime periodically, so we aren't always recalculating it.
- """
- self._logDateTime = datetimeToLogString()
- self._logDateTimeCall = reactor.callLater(1, self._updateLogDateTime)
- def buildProtocol(self, addr):
- p = protocol.ServerFactory.buildProtocol(self, addr)
- # timeOut needs to be on the Protocol instance cause
- # TimeoutMixin expects it there
- p.timeOut = self.timeOut
- return p
- def startFactory(self):
- """
- Set up request logging if necessary.
- """
- if self._logDateTimeCall is None:
- self._updateLogDateTime()
- if self.logPath:
- self.logFile = self._openLogFile(self.logPath)
- else:
- self.logFile = log.logfile
- def stopFactory(self):
- if hasattr(self, "logFile"):
- if self.logFile != log.logfile:
- self.logFile.close()
- del self.logFile
- if self._logDateTimeCall is not None and self._logDateTimeCall.active():
- self._logDateTimeCall.cancel()
- self._logDateTimeCall = None
- def _openLogFile(self, path):
- """
- Override in subclasses, e.g. to use twisted.python.logfile.
- """
- f = open(path, "a", 1)
- return f
- def _escape(self, s):
- # pain in the ass. Return a string like python repr, but always
- # escaped as if surrounding quotes were "".
- r = repr(s)
- if r[0] == "'":
- return r[1:-1].replace('"', '\\"').replace("\\'", "'")
- return r[1:-1]
- def log(self, request):
- """
- Log a request's result to the logfile, by default in combined log format.
- """
- if hasattr(self, "logFile"):
- line = '%s - - %s "%s" %d %s "%s" "%s"\n' % (
- request.getClientIP(),
- # request.getUser() or "-", # the remote user is almost never important
- self._logDateTime,
- '%s %s %s' % (self._escape(request.method),
- self._escape(request.uri),
- self._escape(request.clientproto)),
- request.code,
- request.sentLength or "-",
- self._escape(request.getHeader("referer") or "-"),
- self._escape(request.getHeader("user-agent") or "-"))
- self.logFile.write(line)