Initial Commit
This commit is contained in:
88
lib/MultipartPostHandler.py
Normal file
88
lib/MultipartPostHandler.py
Normal file
@@ -0,0 +1,88 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
####
|
||||
# 06/2010 Nic Wolfe <nic@wolfeden.ca>
|
||||
# 02/2006 Will Holcomb <wholcomb@gmail.com>
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
|
||||
import urllib
|
||||
import urllib2
|
||||
import mimetools, mimetypes
|
||||
import os, sys
|
||||
|
||||
# Controls how sequences are uncoded. If true, elements may be given multiple values by
|
||||
# assigning a sequence.
|
||||
doseq = 1
|
||||
|
||||
class MultipartPostHandler(urllib2.BaseHandler):
|
||||
handler_order = urllib2.HTTPHandler.handler_order - 10 # needs to run first
|
||||
|
||||
def http_request(self, request):
|
||||
data = request.get_data()
|
||||
if data is not None and type(data) != str:
|
||||
v_files = []
|
||||
v_vars = []
|
||||
try:
|
||||
for(key, value) in data.items():
|
||||
if type(value) in (file, list, tuple):
|
||||
v_files.append((key, value))
|
||||
else:
|
||||
v_vars.append((key, value))
|
||||
except TypeError:
|
||||
systype, value, traceback = sys.exc_info()
|
||||
raise TypeError, "not a valid non-string sequence or mapping object", traceback
|
||||
|
||||
if len(v_files) == 0:
|
||||
data = urllib.urlencode(v_vars, doseq)
|
||||
else:
|
||||
boundary, data = MultipartPostHandler.multipart_encode(v_vars, v_files)
|
||||
contenttype = 'multipart/form-data; boundary=%s' % boundary
|
||||
if(request.has_header('Content-Type')
|
||||
and request.get_header('Content-Type').find('multipart/form-data') != 0):
|
||||
print "Replacing %s with %s" % (request.get_header('content-type'), 'multipart/form-data')
|
||||
request.add_unredirected_header('Content-Type', contenttype)
|
||||
|
||||
request.add_data(data)
|
||||
return request
|
||||
|
||||
@staticmethod
|
||||
def multipart_encode(vars, files, boundary = None, buffer = None):
|
||||
if boundary is None:
|
||||
boundary = mimetools.choose_boundary()
|
||||
if buffer is None:
|
||||
buffer = ''
|
||||
for(key, value) in vars:
|
||||
buffer += '--%s\r\n' % boundary
|
||||
buffer += 'Content-Disposition: form-data; name="%s"' % key
|
||||
buffer += '\r\n\r\n' + value + '\r\n'
|
||||
for(key, fd) in files:
|
||||
|
||||
# allow them to pass in a file or a tuple with name & data
|
||||
if type(fd) == file:
|
||||
name_in = fd.name
|
||||
fd.seek(0)
|
||||
data_in = fd.read()
|
||||
elif type(fd) in (tuple, list):
|
||||
name_in, data_in = fd
|
||||
|
||||
filename = os.path.basename(name_in)
|
||||
contenttype = mimetypes.guess_type(filename)[0] or 'application/octet-stream'
|
||||
buffer += '--%s\r\n' % boundary
|
||||
buffer += 'Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % (key, filename)
|
||||
buffer += 'Content-Type: %s\r\n' % contenttype
|
||||
# buffer += 'Content-Length: %s\r\n' % file_size
|
||||
buffer += '\r\n' + data_in + '\r\n'
|
||||
buffer += '--%s--\r\n\r\n' % boundary
|
||||
return boundary, buffer
|
||||
|
||||
https_request = http_request
|
||||
5
lib/apscheduler/__init__.py
Normal file
5
lib/apscheduler/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
version_info = (3, 0, 1)
|
||||
version = '3.0.1'
|
||||
release = '3.0.1'
|
||||
|
||||
__version__ = release # PEP 396
|
||||
73
lib/apscheduler/events.py
Normal file
73
lib/apscheduler/events.py
Normal file
@@ -0,0 +1,73 @@
|
||||
__all__ = ('EVENT_SCHEDULER_START', 'EVENT_SCHEDULER_SHUTDOWN', 'EVENT_EXECUTOR_ADDED', 'EVENT_EXECUTOR_REMOVED',
|
||||
'EVENT_JOBSTORE_ADDED', 'EVENT_JOBSTORE_REMOVED', 'EVENT_ALL_JOBS_REMOVED', 'EVENT_JOB_ADDED',
|
||||
'EVENT_JOB_REMOVED', 'EVENT_JOB_MODIFIED', 'EVENT_JOB_EXECUTED', 'EVENT_JOB_ERROR', 'EVENT_JOB_MISSED',
|
||||
'SchedulerEvent', 'JobEvent', 'JobExecutionEvent')
|
||||
|
||||
|
||||
EVENT_SCHEDULER_START = 1
|
||||
EVENT_SCHEDULER_SHUTDOWN = 2
|
||||
EVENT_EXECUTOR_ADDED = 4
|
||||
EVENT_EXECUTOR_REMOVED = 8
|
||||
EVENT_JOBSTORE_ADDED = 16
|
||||
EVENT_JOBSTORE_REMOVED = 32
|
||||
EVENT_ALL_JOBS_REMOVED = 64
|
||||
EVENT_JOB_ADDED = 128
|
||||
EVENT_JOB_REMOVED = 256
|
||||
EVENT_JOB_MODIFIED = 512
|
||||
EVENT_JOB_EXECUTED = 1024
|
||||
EVENT_JOB_ERROR = 2048
|
||||
EVENT_JOB_MISSED = 4096
|
||||
EVENT_ALL = (EVENT_SCHEDULER_START | EVENT_SCHEDULER_SHUTDOWN | EVENT_JOBSTORE_ADDED | EVENT_JOBSTORE_REMOVED |
|
||||
EVENT_JOB_ADDED | EVENT_JOB_REMOVED | EVENT_JOB_MODIFIED | EVENT_JOB_EXECUTED |
|
||||
EVENT_JOB_ERROR | EVENT_JOB_MISSED)
|
||||
|
||||
|
||||
class SchedulerEvent(object):
|
||||
"""
|
||||
An event that concerns the scheduler itself.
|
||||
|
||||
:ivar code: the type code of this event
|
||||
:ivar alias: alias of the job store or executor that was added or removed (if applicable)
|
||||
"""
|
||||
|
||||
def __init__(self, code, alias=None):
|
||||
super(SchedulerEvent, self).__init__()
|
||||
self.code = code
|
||||
self.alias = alias
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s (code=%d)>' % (self.__class__.__name__, self.code)
|
||||
|
||||
|
||||
class JobEvent(SchedulerEvent):
|
||||
"""
|
||||
An event that concerns a job.
|
||||
|
||||
:ivar code: the type code of this event
|
||||
:ivar job_id: identifier of the job in question
|
||||
:ivar jobstore: alias of the job store containing the job in question
|
||||
"""
|
||||
|
||||
def __init__(self, code, job_id, jobstore):
|
||||
super(JobEvent, self).__init__(code)
|
||||
self.code = code
|
||||
self.job_id = job_id
|
||||
self.jobstore = jobstore
|
||||
|
||||
|
||||
class JobExecutionEvent(JobEvent):
|
||||
"""
|
||||
An event that concerns the execution of individual jobs.
|
||||
|
||||
:ivar scheduled_run_time: the time when the job was scheduled to be run
|
||||
:ivar retval: the return value of the successfully executed job
|
||||
:ivar exception: the exception raised by the job
|
||||
:ivar traceback: a formatted traceback for the exception
|
||||
"""
|
||||
|
||||
def __init__(self, code, job_id, jobstore, scheduled_run_time, retval=None, exception=None, traceback=None):
|
||||
super(JobExecutionEvent, self).__init__(code, job_id, jobstore)
|
||||
self.scheduled_run_time = scheduled_run_time
|
||||
self.retval = retval
|
||||
self.exception = exception
|
||||
self.traceback = traceback
|
||||
0
lib/apscheduler/executors/__init__.py
Normal file
0
lib/apscheduler/executors/__init__.py
Normal file
28
lib/apscheduler/executors/asyncio.py
Normal file
28
lib/apscheduler/executors/asyncio.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from __future__ import absolute_import
|
||||
import sys
|
||||
|
||||
from apscheduler.executors.base import BaseExecutor, run_job
|
||||
|
||||
|
||||
class AsyncIOExecutor(BaseExecutor):
|
||||
"""
|
||||
Runs jobs in the default executor of the event loop.
|
||||
|
||||
Plugin alias: ``asyncio``
|
||||
"""
|
||||
|
||||
def start(self, scheduler, alias):
|
||||
super(AsyncIOExecutor, self).start(scheduler, alias)
|
||||
self._eventloop = scheduler._eventloop
|
||||
|
||||
def _do_submit_job(self, job, run_times):
|
||||
def callback(f):
|
||||
try:
|
||||
events = f.result()
|
||||
except:
|
||||
self._run_job_error(job.id, *sys.exc_info()[1:])
|
||||
else:
|
||||
self._run_job_success(job.id, events)
|
||||
|
||||
f = self._eventloop.run_in_executor(None, run_job, job, job._jobstore_alias, run_times, self._logger.name)
|
||||
f.add_done_callback(callback)
|
||||
119
lib/apscheduler/executors/base.py
Normal file
119
lib/apscheduler/executors/base.py
Normal file
@@ -0,0 +1,119 @@
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timedelta
|
||||
from traceback import format_tb
|
||||
import logging
|
||||
import sys
|
||||
|
||||
from pytz import utc
|
||||
import six
|
||||
|
||||
from apscheduler.events import JobExecutionEvent, EVENT_JOB_MISSED, EVENT_JOB_ERROR, EVENT_JOB_EXECUTED
|
||||
|
||||
|
||||
class MaxInstancesReachedError(Exception):
|
||||
def __init__(self, job):
|
||||
super(MaxInstancesReachedError, self).__init__(
|
||||
'Job "%s" has already reached its maximum number of instances (%d)' % (job.id, job.max_instances))
|
||||
|
||||
|
||||
class BaseExecutor(six.with_metaclass(ABCMeta, object)):
|
||||
"""Abstract base class that defines the interface that every executor must implement."""
|
||||
|
||||
_scheduler = None
|
||||
_lock = None
|
||||
_logger = logging.getLogger('apscheduler.executors')
|
||||
|
||||
def __init__(self):
|
||||
super(BaseExecutor, self).__init__()
|
||||
self._instances = defaultdict(lambda: 0)
|
||||
|
||||
def start(self, scheduler, alias):
|
||||
"""
|
||||
Called by the scheduler when the scheduler is being started or when the executor is being added to an already
|
||||
running scheduler.
|
||||
|
||||
:param apscheduler.schedulers.base.BaseScheduler scheduler: the scheduler that is starting this executor
|
||||
:param str|unicode alias: alias of this executor as it was assigned to the scheduler
|
||||
"""
|
||||
|
||||
self._scheduler = scheduler
|
||||
self._lock = scheduler._create_lock()
|
||||
self._logger = logging.getLogger('apscheduler.executors.%s' % alias)
|
||||
|
||||
def shutdown(self, wait=True):
|
||||
"""
|
||||
Shuts down this executor.
|
||||
|
||||
:param bool wait: ``True`` to wait until all submitted jobs have been executed
|
||||
"""
|
||||
|
||||
def submit_job(self, job, run_times):
|
||||
"""
|
||||
Submits job for execution.
|
||||
|
||||
:param Job job: job to execute
|
||||
:param list[datetime] run_times: list of datetimes specifying when the job should have been run
|
||||
:raises MaxInstancesReachedError: if the maximum number of allowed instances for this job has been reached
|
||||
"""
|
||||
|
||||
assert self._lock is not None, 'This executor has not been started yet'
|
||||
with self._lock:
|
||||
if self._instances[job.id] >= job.max_instances:
|
||||
raise MaxInstancesReachedError(job)
|
||||
|
||||
self._do_submit_job(job, run_times)
|
||||
self._instances[job.id] += 1
|
||||
|
||||
@abstractmethod
|
||||
def _do_submit_job(self, job, run_times):
|
||||
"""Performs the actual task of scheduling `run_job` to be called."""
|
||||
|
||||
def _run_job_success(self, job_id, events):
|
||||
"""Called by the executor with the list of generated events when `run_job` has been successfully called."""
|
||||
|
||||
with self._lock:
|
||||
self._instances[job_id] -= 1
|
||||
|
||||
for event in events:
|
||||
self._scheduler._dispatch_event(event)
|
||||
|
||||
def _run_job_error(self, job_id, exc, traceback=None):
|
||||
"""Called by the executor with the exception if there is an error calling `run_job`."""
|
||||
|
||||
with self._lock:
|
||||
self._instances[job_id] -= 1
|
||||
|
||||
exc_info = (exc.__class__, exc, traceback)
|
||||
self._logger.error('Error running job %s', job_id, exc_info=exc_info)
|
||||
|
||||
|
||||
def run_job(job, jobstore_alias, run_times, logger_name):
|
||||
"""Called by executors to run the job. Returns a list of scheduler events to be dispatched by the scheduler."""
|
||||
|
||||
events = []
|
||||
logger = logging.getLogger(logger_name)
|
||||
for run_time in run_times:
|
||||
# See if the job missed its run time window, and handle possible misfires accordingly
|
||||
if job.misfire_grace_time is not None:
|
||||
difference = datetime.now(utc) - run_time
|
||||
grace_time = timedelta(seconds=job.misfire_grace_time)
|
||||
if difference > grace_time:
|
||||
events.append(JobExecutionEvent(EVENT_JOB_MISSED, job.id, jobstore_alias, run_time))
|
||||
logger.warning('Run time of job "%s" was missed by %s', job, difference)
|
||||
continue
|
||||
|
||||
logger.info('Running job "%s" (scheduled at %s)', job, run_time)
|
||||
try:
|
||||
retval = job.func(*job.args, **job.kwargs)
|
||||
except:
|
||||
exc, tb = sys.exc_info()[1:]
|
||||
formatted_tb = ''.join(format_tb(tb))
|
||||
events.append(JobExecutionEvent(EVENT_JOB_ERROR, job.id, jobstore_alias, run_time, exception=exc,
|
||||
traceback=formatted_tb))
|
||||
logger.exception('Job "%s" raised an exception', job)
|
||||
else:
|
||||
events.append(JobExecutionEvent(EVENT_JOB_EXECUTED, job.id, jobstore_alias, run_time, retval=retval))
|
||||
logger.info('Job "%s" executed successfully', job)
|
||||
|
||||
return events
|
||||
19
lib/apscheduler/executors/debug.py
Normal file
19
lib/apscheduler/executors/debug.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import sys
|
||||
|
||||
from apscheduler.executors.base import BaseExecutor, run_job
|
||||
|
||||
|
||||
class DebugExecutor(BaseExecutor):
|
||||
"""
|
||||
A special executor that executes the target callable directly instead of deferring it to a thread or process.
|
||||
|
||||
Plugin alias: ``debug``
|
||||
"""
|
||||
|
||||
def _do_submit_job(self, job, run_times):
|
||||
try:
|
||||
events = run_job(job, job._jobstore_alias, run_times, self._logger.name)
|
||||
except:
|
||||
self._run_job_error(job.id, *sys.exc_info()[1:])
|
||||
else:
|
||||
self._run_job_success(job.id, events)
|
||||
29
lib/apscheduler/executors/gevent.py
Normal file
29
lib/apscheduler/executors/gevent.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from __future__ import absolute_import
|
||||
import sys
|
||||
|
||||
from apscheduler.executors.base import BaseExecutor, run_job
|
||||
|
||||
|
||||
try:
|
||||
import gevent
|
||||
except ImportError: # pragma: nocover
|
||||
raise ImportError('GeventExecutor requires gevent installed')
|
||||
|
||||
|
||||
class GeventExecutor(BaseExecutor):
|
||||
"""
|
||||
Runs jobs as greenlets.
|
||||
|
||||
Plugin alias: ``gevent``
|
||||
"""
|
||||
|
||||
def _do_submit_job(self, job, run_times):
|
||||
def callback(greenlet):
|
||||
try:
|
||||
events = greenlet.get()
|
||||
except:
|
||||
self._run_job_error(job.id, *sys.exc_info()[1:])
|
||||
else:
|
||||
self._run_job_success(job.id, events)
|
||||
|
||||
gevent.spawn(run_job, job, job._jobstore_alias, run_times, self._logger.name).link(callback)
|
||||
54
lib/apscheduler/executors/pool.py
Normal file
54
lib/apscheduler/executors/pool.py
Normal file
@@ -0,0 +1,54 @@
|
||||
from abc import abstractmethod
|
||||
import concurrent.futures
|
||||
|
||||
from apscheduler.executors.base import BaseExecutor, run_job
|
||||
|
||||
|
||||
class BasePoolExecutor(BaseExecutor):
|
||||
@abstractmethod
|
||||
def __init__(self, pool):
|
||||
super(BasePoolExecutor, self).__init__()
|
||||
self._pool = pool
|
||||
|
||||
def _do_submit_job(self, job, run_times):
|
||||
def callback(f):
|
||||
exc, tb = (f.exception_info() if hasattr(f, 'exception_info') else
|
||||
(f.exception(), getattr(f.exception(), '__traceback__', None)))
|
||||
if exc:
|
||||
self._run_job_error(job.id, exc, tb)
|
||||
else:
|
||||
self._run_job_success(job.id, f.result())
|
||||
|
||||
f = self._pool.submit(run_job, job, job._jobstore_alias, run_times, self._logger.name)
|
||||
f.add_done_callback(callback)
|
||||
|
||||
def shutdown(self, wait=True):
|
||||
self._pool.shutdown(wait)
|
||||
|
||||
|
||||
class ThreadPoolExecutor(BasePoolExecutor):
|
||||
"""
|
||||
An executor that runs jobs in a concurrent.futures thread pool.
|
||||
|
||||
Plugin alias: ``threadpool``
|
||||
|
||||
:param max_workers: the maximum number of spawned threads.
|
||||
"""
|
||||
|
||||
def __init__(self, max_workers=10):
|
||||
pool = concurrent.futures.ThreadPoolExecutor(int(max_workers))
|
||||
super(ThreadPoolExecutor, self).__init__(pool)
|
||||
|
||||
|
||||
class ProcessPoolExecutor(BasePoolExecutor):
|
||||
"""
|
||||
An executor that runs jobs in a concurrent.futures process pool.
|
||||
|
||||
Plugin alias: ``processpool``
|
||||
|
||||
:param max_workers: the maximum number of spawned processes.
|
||||
"""
|
||||
|
||||
def __init__(self, max_workers=10):
|
||||
pool = concurrent.futures.ProcessPoolExecutor(int(max_workers))
|
||||
super(ProcessPoolExecutor, self).__init__(pool)
|
||||
25
lib/apscheduler/executors/twisted.py
Normal file
25
lib/apscheduler/executors/twisted.py
Normal file
@@ -0,0 +1,25 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from apscheduler.executors.base import BaseExecutor, run_job
|
||||
|
||||
|
||||
class TwistedExecutor(BaseExecutor):
|
||||
"""
|
||||
Runs jobs in the reactor's thread pool.
|
||||
|
||||
Plugin alias: ``twisted``
|
||||
"""
|
||||
|
||||
def start(self, scheduler, alias):
|
||||
super(TwistedExecutor, self).start(scheduler, alias)
|
||||
self._reactor = scheduler._reactor
|
||||
|
||||
def _do_submit_job(self, job, run_times):
|
||||
def callback(success, result):
|
||||
if success:
|
||||
self._run_job_success(job.id, result)
|
||||
else:
|
||||
self._run_job_error(job.id, result.value, result.tb)
|
||||
|
||||
self._reactor.getThreadPool().callInThreadWithCallback(callback, run_job, job, job._jobstore_alias, run_times,
|
||||
self._logger.name)
|
||||
252
lib/apscheduler/job.py
Normal file
252
lib/apscheduler/job.py
Normal file
@@ -0,0 +1,252 @@
|
||||
from collections import Iterable, Mapping
|
||||
from uuid import uuid4
|
||||
|
||||
import six
|
||||
|
||||
from apscheduler.triggers.base import BaseTrigger
|
||||
from apscheduler.util import ref_to_obj, obj_to_ref, datetime_repr, repr_escape, get_callable_name, check_callable_args, \
|
||||
convert_to_datetime
|
||||
|
||||
|
||||
class Job(object):
|
||||
"""
|
||||
Contains the options given when scheduling callables and its current schedule and other state.
|
||||
This class should never be instantiated by the user.
|
||||
|
||||
:var str id: the unique identifier of this job
|
||||
:var str name: the description of this job
|
||||
:var func: the callable to execute
|
||||
:var tuple|list args: positional arguments to the callable
|
||||
:var dict kwargs: keyword arguments to the callable
|
||||
:var bool coalesce: whether to only run the job once when several run times are due
|
||||
:var trigger: the trigger object that controls the schedule of this job
|
||||
:var str executor: the name of the executor that will run this job
|
||||
:var int misfire_grace_time: the time (in seconds) how much this job's execution is allowed to be late
|
||||
:var int max_instances: the maximum number of concurrently executing instances allowed for this job
|
||||
:var datetime.datetime next_run_time: the next scheduled run time of this job
|
||||
"""
|
||||
|
||||
__slots__ = ('_scheduler', '_jobstore_alias', 'id', 'trigger', 'executor', 'func', 'func_ref', 'args', 'kwargs',
|
||||
'name', 'misfire_grace_time', 'coalesce', 'max_instances', 'next_run_time')
|
||||
|
||||
def __init__(self, scheduler, id=None, **kwargs):
|
||||
super(Job, self).__init__()
|
||||
self._scheduler = scheduler
|
||||
self._jobstore_alias = None
|
||||
self._modify(id=id or uuid4().hex, **kwargs)
|
||||
|
||||
def modify(self, **changes):
|
||||
"""
|
||||
Makes the given changes to this job and saves it in the associated job store.
|
||||
Accepted keyword arguments are the same as the variables on this class.
|
||||
|
||||
.. seealso:: :meth:`~apscheduler.schedulers.base.BaseScheduler.modify_job`
|
||||
"""
|
||||
|
||||
self._scheduler.modify_job(self.id, self._jobstore_alias, **changes)
|
||||
|
||||
def reschedule(self, trigger, **trigger_args):
|
||||
"""
|
||||
Shortcut for switching the trigger on this job.
|
||||
|
||||
.. seealso:: :meth:`~apscheduler.schedulers.base.BaseScheduler.reschedule_job`
|
||||
"""
|
||||
|
||||
self._scheduler.reschedule_job(self.id, self._jobstore_alias, trigger, **trigger_args)
|
||||
|
||||
def pause(self):
|
||||
"""
|
||||
Temporarily suspend the execution of this job.
|
||||
|
||||
.. seealso:: :meth:`~apscheduler.schedulers.base.BaseScheduler.pause_job`
|
||||
"""
|
||||
|
||||
self._scheduler.pause_job(self.id, self._jobstore_alias)
|
||||
|
||||
def resume(self):
|
||||
"""
|
||||
Resume the schedule of this job if previously paused.
|
||||
|
||||
.. seealso:: :meth:`~apscheduler.schedulers.base.BaseScheduler.resume_job`
|
||||
"""
|
||||
|
||||
self._scheduler.resume_job(self.id, self._jobstore_alias)
|
||||
|
||||
def remove(self):
|
||||
"""
|
||||
Unschedules this job and removes it from its associated job store.
|
||||
|
||||
.. seealso:: :meth:`~apscheduler.schedulers.base.BaseScheduler.remove_job`
|
||||
"""
|
||||
|
||||
self._scheduler.remove_job(self.id, self._jobstore_alias)
|
||||
|
||||
@property
|
||||
def pending(self):
|
||||
"""Returns ``True`` if the referenced job is still waiting to be added to its designated job store."""
|
||||
|
||||
return self._jobstore_alias is None
|
||||
|
||||
#
|
||||
# Private API
|
||||
#
|
||||
|
||||
def _get_run_times(self, now):
|
||||
"""
|
||||
Computes the scheduled run times between ``next_run_time`` and ``now`` (inclusive).
|
||||
|
||||
:type now: datetime.datetime
|
||||
:rtype: list[datetime.datetime]
|
||||
"""
|
||||
|
||||
run_times = []
|
||||
next_run_time = self.next_run_time
|
||||
while next_run_time and next_run_time <= now:
|
||||
run_times.append(next_run_time)
|
||||
next_run_time = self.trigger.get_next_fire_time(next_run_time, now)
|
||||
|
||||
return run_times
|
||||
|
||||
def _modify(self, **changes):
|
||||
"""Validates the changes to the Job and makes the modifications if and only if all of them validate."""
|
||||
|
||||
approved = {}
|
||||
|
||||
if 'id' in changes:
|
||||
value = changes.pop('id')
|
||||
if not isinstance(value, six.string_types):
|
||||
raise TypeError("id must be a nonempty string")
|
||||
if hasattr(self, 'id'):
|
||||
raise ValueError('The job ID may not be changed')
|
||||
approved['id'] = value
|
||||
|
||||
if 'func' in changes or 'args' in changes or 'kwargs' in changes:
|
||||
func = changes.pop('func') if 'func' in changes else self.func
|
||||
args = changes.pop('args') if 'args' in changes else self.args
|
||||
kwargs = changes.pop('kwargs') if 'kwargs' in changes else self.kwargs
|
||||
|
||||
if isinstance(func, str):
|
||||
func_ref = func
|
||||
func = ref_to_obj(func)
|
||||
elif callable(func):
|
||||
try:
|
||||
func_ref = obj_to_ref(func)
|
||||
except ValueError:
|
||||
# If this happens, this Job won't be serializable
|
||||
func_ref = None
|
||||
else:
|
||||
raise TypeError('func must be a callable or a textual reference to one')
|
||||
|
||||
if not hasattr(self, 'name') and changes.get('name', None) is None:
|
||||
changes['name'] = get_callable_name(func)
|
||||
|
||||
if isinstance(args, six.string_types) or not isinstance(args, Iterable):
|
||||
raise TypeError('args must be a non-string iterable')
|
||||
if isinstance(kwargs, six.string_types) or not isinstance(kwargs, Mapping):
|
||||
raise TypeError('kwargs must be a dict-like object')
|
||||
|
||||
check_callable_args(func, args, kwargs)
|
||||
|
||||
approved['func'] = func
|
||||
approved['func_ref'] = func_ref
|
||||
approved['args'] = args
|
||||
approved['kwargs'] = kwargs
|
||||
|
||||
if 'name' in changes:
|
||||
value = changes.pop('name')
|
||||
if not value or not isinstance(value, six.string_types):
|
||||
raise TypeError("name must be a nonempty string")
|
||||
approved['name'] = value
|
||||
|
||||
if 'misfire_grace_time' in changes:
|
||||
value = changes.pop('misfire_grace_time')
|
||||
if value is not None and (not isinstance(value, six.integer_types) or value <= 0):
|
||||
raise TypeError('misfire_grace_time must be either None or a positive integer')
|
||||
approved['misfire_grace_time'] = value
|
||||
|
||||
if 'coalesce' in changes:
|
||||
value = bool(changes.pop('coalesce'))
|
||||
approved['coalesce'] = value
|
||||
|
||||
if 'max_instances' in changes:
|
||||
value = changes.pop('max_instances')
|
||||
if not isinstance(value, six.integer_types) or value <= 0:
|
||||
raise TypeError('max_instances must be a positive integer')
|
||||
approved['max_instances'] = value
|
||||
|
||||
if 'trigger' in changes:
|
||||
trigger = changes.pop('trigger')
|
||||
if not isinstance(trigger, BaseTrigger):
|
||||
raise TypeError('Expected a trigger instance, got %s instead' % trigger.__class__.__name__)
|
||||
|
||||
approved['trigger'] = trigger
|
||||
|
||||
if 'executor' in changes:
|
||||
value = changes.pop('executor')
|
||||
if not isinstance(value, six.string_types):
|
||||
raise TypeError('executor must be a string')
|
||||
approved['executor'] = value
|
||||
|
||||
if 'next_run_time' in changes:
|
||||
value = changes.pop('next_run_time')
|
||||
approved['next_run_time'] = convert_to_datetime(value, self._scheduler.timezone, 'next_run_time')
|
||||
|
||||
if changes:
|
||||
raise AttributeError('The following are not modifiable attributes of Job: %s' % ', '.join(changes))
|
||||
|
||||
for key, value in six.iteritems(approved):
|
||||
setattr(self, key, value)
|
||||
|
||||
def __getstate__(self):
|
||||
# Don't allow this Job to be serialized if the function reference could not be determined
|
||||
if not self.func_ref:
|
||||
raise ValueError('This Job cannot be serialized since the reference to its callable (%r) could not be '
|
||||
'determined. Consider giving a textual reference (module:function name) instead.' %
|
||||
(self.func,))
|
||||
|
||||
return {
|
||||
'version': 1,
|
||||
'id': self.id,
|
||||
'func': self.func_ref,
|
||||
'trigger': self.trigger,
|
||||
'executor': self.executor,
|
||||
'args': self.args,
|
||||
'kwargs': self.kwargs,
|
||||
'name': self.name,
|
||||
'misfire_grace_time': self.misfire_grace_time,
|
||||
'coalesce': self.coalesce,
|
||||
'max_instances': self.max_instances,
|
||||
'next_run_time': self.next_run_time
|
||||
}
|
||||
|
||||
def __setstate__(self, state):
|
||||
if state.get('version', 1) > 1:
|
||||
raise ValueError('Job has version %s, but only version 1 can be handled' % state['version'])
|
||||
|
||||
self.id = state['id']
|
||||
self.func_ref = state['func']
|
||||
self.func = ref_to_obj(self.func_ref)
|
||||
self.trigger = state['trigger']
|
||||
self.executor = state['executor']
|
||||
self.args = state['args']
|
||||
self.kwargs = state['kwargs']
|
||||
self.name = state['name']
|
||||
self.misfire_grace_time = state['misfire_grace_time']
|
||||
self.coalesce = state['coalesce']
|
||||
self.max_instances = state['max_instances']
|
||||
self.next_run_time = state['next_run_time']
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, Job):
|
||||
return self.id == other.id
|
||||
return NotImplemented
|
||||
|
||||
def __repr__(self):
|
||||
return '<Job (id=%s name=%s)>' % (repr_escape(self.id), repr_escape(self.name))
|
||||
|
||||
def __str__(self):
|
||||
return '%s (trigger: %s, next run at: %s)' % (repr_escape(self.name), repr_escape(str(self.trigger)),
|
||||
datetime_repr(self.next_run_time))
|
||||
|
||||
def __unicode__(self):
|
||||
return six.u('%s (trigger: %s, next run at: %s)') % (self.name, self.trigger, datetime_repr(self.next_run_time))
|
||||
0
lib/apscheduler/jobstores/__init__.py
Normal file
0
lib/apscheduler/jobstores/__init__.py
Normal file
127
lib/apscheduler/jobstores/base.py
Normal file
127
lib/apscheduler/jobstores/base.py
Normal file
@@ -0,0 +1,127 @@
|
||||
from abc import ABCMeta, abstractmethod
|
||||
import logging
|
||||
|
||||
import six
|
||||
|
||||
|
||||
class JobLookupError(KeyError):
|
||||
"""Raised when the job store cannot find a job for update or removal."""
|
||||
|
||||
def __init__(self, job_id):
|
||||
super(JobLookupError, self).__init__(six.u('No job by the id of %s was found') % job_id)
|
||||
|
||||
|
||||
class ConflictingIdError(KeyError):
|
||||
"""Raised when the uniqueness of job IDs is being violated."""
|
||||
|
||||
def __init__(self, job_id):
|
||||
super(ConflictingIdError, self).__init__(six.u('Job identifier (%s) conflicts with an existing job') % job_id)
|
||||
|
||||
|
||||
class TransientJobError(ValueError):
|
||||
"""Raised when an attempt to add transient (with no func_ref) job to a persistent job store is detected."""
|
||||
|
||||
def __init__(self, job_id):
|
||||
super(TransientJobError, self).__init__(
|
||||
six.u('Job (%s) cannot be added to this job store because a reference to the callable could not be '
|
||||
'determined.') % job_id)
|
||||
|
||||
|
||||
class BaseJobStore(six.with_metaclass(ABCMeta)):
|
||||
"""Abstract base class that defines the interface that every job store must implement."""
|
||||
|
||||
_scheduler = None
|
||||
_alias = None
|
||||
_logger = logging.getLogger('apscheduler.jobstores')
|
||||
|
||||
def start(self, scheduler, alias):
|
||||
"""
|
||||
Called by the scheduler when the scheduler is being started or when the job store is being added to an already
|
||||
running scheduler.
|
||||
|
||||
:param apscheduler.schedulers.base.BaseScheduler scheduler: the scheduler that is starting this job store
|
||||
:param str|unicode alias: alias of this job store as it was assigned to the scheduler
|
||||
"""
|
||||
|
||||
self._scheduler = scheduler
|
||||
self._alias = alias
|
||||
self._logger = logging.getLogger('apscheduler.jobstores.%s' % alias)
|
||||
|
||||
def shutdown(self):
|
||||
"""Frees any resources still bound to this job store."""
|
||||
|
||||
@abstractmethod
|
||||
def lookup_job(self, job_id):
|
||||
"""
|
||||
Returns a specific job, or ``None`` if it isn't found..
|
||||
|
||||
The job store is responsible for setting the ``scheduler`` and ``jobstore`` attributes of the returned job to
|
||||
point to the scheduler and itself, respectively.
|
||||
|
||||
:param str|unicode job_id: identifier of the job
|
||||
:rtype: Job
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_due_jobs(self, now):
|
||||
"""
|
||||
Returns the list of jobs that have ``next_run_time`` earlier or equal to ``now``.
|
||||
The returned jobs must be sorted by next run time (ascending).
|
||||
|
||||
:param datetime.datetime now: the current (timezone aware) datetime
|
||||
:rtype: list[Job]
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_next_run_time(self):
|
||||
"""
|
||||
Returns the earliest run time of all the jobs stored in this job store, or ``None`` if there are no active jobs.
|
||||
|
||||
:rtype: datetime.datetime
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_all_jobs(self):
|
||||
"""
|
||||
Returns a list of all jobs in this job store. The returned jobs should be sorted by next run time (ascending).
|
||||
Paused jobs (next_run_time is None) should be sorted last.
|
||||
|
||||
The job store is responsible for setting the ``scheduler`` and ``jobstore`` attributes of the returned jobs to
|
||||
point to the scheduler and itself, respectively.
|
||||
|
||||
:rtype: list[Job]
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def add_job(self, job):
|
||||
"""
|
||||
Adds the given job to this store.
|
||||
|
||||
:param Job job: the job to add
|
||||
:raises ConflictingIdError: if there is another job in this store with the same ID
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def update_job(self, job):
|
||||
"""
|
||||
Replaces the job in the store with the given newer version.
|
||||
|
||||
:param Job job: the job to update
|
||||
:raises JobLookupError: if the job does not exist
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def remove_job(self, job_id):
|
||||
"""
|
||||
Removes the given job from this store.
|
||||
|
||||
:param str|unicode job_id: identifier of the job
|
||||
:raises JobLookupError: if the job does not exist
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def remove_all_jobs(self):
|
||||
"""Removes all jobs from this store."""
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s>' % self.__class__.__name__
|
||||
107
lib/apscheduler/jobstores/memory.py
Normal file
107
lib/apscheduler/jobstores/memory.py
Normal file
@@ -0,0 +1,107 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from apscheduler.jobstores.base import BaseJobStore, JobLookupError, ConflictingIdError
|
||||
from apscheduler.util import datetime_to_utc_timestamp
|
||||
|
||||
|
||||
class MemoryJobStore(BaseJobStore):
|
||||
"""
|
||||
Stores jobs in an array in RAM. Provides no persistence support.
|
||||
|
||||
Plugin alias: ``memory``
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(MemoryJobStore, self).__init__()
|
||||
self._jobs = [] # list of (job, timestamp), sorted by next_run_time and job id (ascending)
|
||||
self._jobs_index = {} # id -> (job, timestamp) lookup table
|
||||
|
||||
def lookup_job(self, job_id):
|
||||
return self._jobs_index.get(job_id, (None, None))[0]
|
||||
|
||||
def get_due_jobs(self, now):
|
||||
now_timestamp = datetime_to_utc_timestamp(now)
|
||||
pending = []
|
||||
for job, timestamp in self._jobs:
|
||||
if timestamp is None or timestamp > now_timestamp:
|
||||
break
|
||||
pending.append(job)
|
||||
|
||||
return pending
|
||||
|
||||
def get_next_run_time(self):
|
||||
return self._jobs[0][0].next_run_time if self._jobs else None
|
||||
|
||||
def get_all_jobs(self):
|
||||
return [j[0] for j in self._jobs]
|
||||
|
||||
def add_job(self, job):
|
||||
if job.id in self._jobs_index:
|
||||
raise ConflictingIdError(job.id)
|
||||
|
||||
timestamp = datetime_to_utc_timestamp(job.next_run_time)
|
||||
index = self._get_job_index(timestamp, job.id)
|
||||
self._jobs.insert(index, (job, timestamp))
|
||||
self._jobs_index[job.id] = (job, timestamp)
|
||||
|
||||
def update_job(self, job):
|
||||
old_job, old_timestamp = self._jobs_index.get(job.id, (None, None))
|
||||
if old_job is None:
|
||||
raise JobLookupError(job.id)
|
||||
|
||||
# If the next run time has not changed, simply replace the job in its present index.
|
||||
# Otherwise, reinsert the job to the list to preserve the ordering.
|
||||
old_index = self._get_job_index(old_timestamp, old_job.id)
|
||||
new_timestamp = datetime_to_utc_timestamp(job.next_run_time)
|
||||
if old_timestamp == new_timestamp:
|
||||
self._jobs[old_index] = (job, new_timestamp)
|
||||
else:
|
||||
del self._jobs[old_index]
|
||||
new_index = self._get_job_index(new_timestamp, job.id)
|
||||
self._jobs.insert(new_index, (job, new_timestamp))
|
||||
|
||||
self._jobs_index[old_job.id] = (job, new_timestamp)
|
||||
|
||||
def remove_job(self, job_id):
|
||||
job, timestamp = self._jobs_index.get(job_id, (None, None))
|
||||
if job is None:
|
||||
raise JobLookupError(job_id)
|
||||
|
||||
index = self._get_job_index(timestamp, job_id)
|
||||
del self._jobs[index]
|
||||
del self._jobs_index[job.id]
|
||||
|
||||
def remove_all_jobs(self):
|
||||
self._jobs = []
|
||||
self._jobs_index = {}
|
||||
|
||||
def shutdown(self):
|
||||
self.remove_all_jobs()
|
||||
|
||||
def _get_job_index(self, timestamp, job_id):
|
||||
"""
|
||||
Returns the index of the given job, or if it's not found, the index where the job should be inserted based on
|
||||
the given timestamp.
|
||||
|
||||
:type timestamp: int
|
||||
:type job_id: str
|
||||
"""
|
||||
|
||||
lo, hi = 0, len(self._jobs)
|
||||
timestamp = float('inf') if timestamp is None else timestamp
|
||||
while lo < hi:
|
||||
mid = (lo + hi) // 2
|
||||
mid_job, mid_timestamp = self._jobs[mid]
|
||||
mid_timestamp = float('inf') if mid_timestamp is None else mid_timestamp
|
||||
if mid_timestamp > timestamp:
|
||||
hi = mid
|
||||
elif mid_timestamp < timestamp:
|
||||
lo = mid + 1
|
||||
elif mid_job.id > job_id:
|
||||
hi = mid
|
||||
elif mid_job.id < job_id:
|
||||
lo = mid + 1
|
||||
else:
|
||||
return mid
|
||||
|
||||
return lo
|
||||
124
lib/apscheduler/jobstores/mongodb.py
Normal file
124
lib/apscheduler/jobstores/mongodb.py
Normal file
@@ -0,0 +1,124 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from apscheduler.jobstores.base import BaseJobStore, JobLookupError, ConflictingIdError
|
||||
from apscheduler.util import maybe_ref, datetime_to_utc_timestamp, utc_timestamp_to_datetime
|
||||
from apscheduler.job import Job
|
||||
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except ImportError: # pragma: nocover
|
||||
import pickle
|
||||
|
||||
try:
|
||||
from bson.binary import Binary
|
||||
from pymongo.errors import DuplicateKeyError
|
||||
from pymongo import MongoClient, ASCENDING
|
||||
except ImportError: # pragma: nocover
|
||||
raise ImportError('MongoDBJobStore requires PyMongo installed')
|
||||
|
||||
|
||||
class MongoDBJobStore(BaseJobStore):
|
||||
"""
|
||||
Stores jobs in a MongoDB database. Any leftover keyword arguments are directly passed to pymongo's `MongoClient
|
||||
<http://api.mongodb.org/python/current/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient>`_.
|
||||
|
||||
Plugin alias: ``mongodb``
|
||||
|
||||
:param str database: database to store jobs in
|
||||
:param str collection: collection to store jobs in
|
||||
:param client: a :class:`~pymongo.mongo_client.MongoClient` instance to use instead of providing connection
|
||||
arguments
|
||||
:param int pickle_protocol: pickle protocol level to use (for serialization), defaults to the highest available
|
||||
"""
|
||||
|
||||
def __init__(self, database='apscheduler', collection='jobs', client=None,
|
||||
pickle_protocol=pickle.HIGHEST_PROTOCOL, **connect_args):
|
||||
super(MongoDBJobStore, self).__init__()
|
||||
self.pickle_protocol = pickle_protocol
|
||||
|
||||
if not database:
|
||||
raise ValueError('The "database" parameter must not be empty')
|
||||
if not collection:
|
||||
raise ValueError('The "collection" parameter must not be empty')
|
||||
|
||||
if client:
|
||||
self.connection = maybe_ref(client)
|
||||
else:
|
||||
connect_args.setdefault('w', 1)
|
||||
self.connection = MongoClient(**connect_args)
|
||||
|
||||
self.collection = self.connection[database][collection]
|
||||
self.collection.ensure_index('next_run_time', sparse=True)
|
||||
|
||||
def lookup_job(self, job_id):
|
||||
document = self.collection.find_one(job_id, ['job_state'])
|
||||
return self._reconstitute_job(document['job_state']) if document else None
|
||||
|
||||
def get_due_jobs(self, now):
|
||||
timestamp = datetime_to_utc_timestamp(now)
|
||||
return self._get_jobs({'next_run_time': {'$lte': timestamp}})
|
||||
|
||||
def get_next_run_time(self):
|
||||
document = self.collection.find_one({'next_run_time': {'$ne': None}}, fields=['next_run_time'],
|
||||
sort=[('next_run_time', ASCENDING)])
|
||||
return utc_timestamp_to_datetime(document['next_run_time']) if document else None
|
||||
|
||||
def get_all_jobs(self):
|
||||
return self._get_jobs({})
|
||||
|
||||
def add_job(self, job):
|
||||
try:
|
||||
self.collection.insert({
|
||||
'_id': job.id,
|
||||
'next_run_time': datetime_to_utc_timestamp(job.next_run_time),
|
||||
'job_state': Binary(pickle.dumps(job.__getstate__(), self.pickle_protocol))
|
||||
})
|
||||
except DuplicateKeyError:
|
||||
raise ConflictingIdError(job.id)
|
||||
|
||||
def update_job(self, job):
|
||||
changes = {
|
||||
'next_run_time': datetime_to_utc_timestamp(job.next_run_time),
|
||||
'job_state': Binary(pickle.dumps(job.__getstate__(), self.pickle_protocol))
|
||||
}
|
||||
result = self.collection.update({'_id': job.id}, {'$set': changes})
|
||||
if result and result['n'] == 0:
|
||||
raise JobLookupError(id)
|
||||
|
||||
def remove_job(self, job_id):
|
||||
result = self.collection.remove(job_id)
|
||||
if result and result['n'] == 0:
|
||||
raise JobLookupError(job_id)
|
||||
|
||||
def remove_all_jobs(self):
|
||||
self.collection.remove()
|
||||
|
||||
def shutdown(self):
|
||||
self.connection.disconnect()
|
||||
|
||||
def _reconstitute_job(self, job_state):
|
||||
job_state = pickle.loads(job_state)
|
||||
job = Job.__new__(Job)
|
||||
job.__setstate__(job_state)
|
||||
job._scheduler = self._scheduler
|
||||
job._jobstore_alias = self._alias
|
||||
return job
|
||||
|
||||
def _get_jobs(self, conditions):
|
||||
jobs = []
|
||||
failed_job_ids = []
|
||||
for document in self.collection.find(conditions, ['_id', 'job_state'], sort=[('next_run_time', ASCENDING)]):
|
||||
try:
|
||||
jobs.append(self._reconstitute_job(document['job_state']))
|
||||
except:
|
||||
self._logger.exception('Unable to restore job "%s" -- removing it', document['_id'])
|
||||
failed_job_ids.append(document['_id'])
|
||||
|
||||
# Remove all the jobs we failed to restore
|
||||
if failed_job_ids:
|
||||
self.collection.remove({'_id': {'$in': failed_job_ids}})
|
||||
|
||||
return jobs
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s (client=%s)>' % (self.__class__.__name__, self.connection)
|
||||
138
lib/apscheduler/jobstores/redis.py
Normal file
138
lib/apscheduler/jobstores/redis.py
Normal file
@@ -0,0 +1,138 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import six
|
||||
|
||||
from apscheduler.jobstores.base import BaseJobStore, JobLookupError, ConflictingIdError
|
||||
from apscheduler.util import datetime_to_utc_timestamp, utc_timestamp_to_datetime
|
||||
from apscheduler.job import Job
|
||||
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except ImportError: # pragma: nocover
|
||||
import pickle
|
||||
|
||||
try:
|
||||
from redis import StrictRedis
|
||||
except ImportError: # pragma: nocover
|
||||
raise ImportError('RedisJobStore requires redis installed')
|
||||
|
||||
|
||||
class RedisJobStore(BaseJobStore):
|
||||
"""
|
||||
Stores jobs in a Redis database. Any leftover keyword arguments are directly passed to redis's StrictRedis.
|
||||
|
||||
Plugin alias: ``redis``
|
||||
|
||||
:param int db: the database number to store jobs in
|
||||
:param str jobs_key: key to store jobs in
|
||||
:param str run_times_key: key to store the jobs' run times in
|
||||
:param int pickle_protocol: pickle protocol level to use (for serialization), defaults to the highest available
|
||||
"""
|
||||
|
||||
def __init__(self, db=0, jobs_key='apscheduler.jobs', run_times_key='apscheduler.run_times',
|
||||
pickle_protocol=pickle.HIGHEST_PROTOCOL, **connect_args):
|
||||
super(RedisJobStore, self).__init__()
|
||||
|
||||
if db is None:
|
||||
raise ValueError('The "db" parameter must not be empty')
|
||||
if not jobs_key:
|
||||
raise ValueError('The "jobs_key" parameter must not be empty')
|
||||
if not run_times_key:
|
||||
raise ValueError('The "run_times_key" parameter must not be empty')
|
||||
|
||||
self.pickle_protocol = pickle_protocol
|
||||
self.jobs_key = jobs_key
|
||||
self.run_times_key = run_times_key
|
||||
self.redis = StrictRedis(db=int(db), **connect_args)
|
||||
|
||||
def lookup_job(self, job_id):
|
||||
job_state = self.redis.hget(self.jobs_key, job_id)
|
||||
return self._reconstitute_job(job_state) if job_state else None
|
||||
|
||||
def get_due_jobs(self, now):
|
||||
timestamp = datetime_to_utc_timestamp(now)
|
||||
job_ids = self.redis.zrangebyscore(self.run_times_key, 0, timestamp)
|
||||
if job_ids:
|
||||
job_states = self.redis.hmget(self.jobs_key, *job_ids)
|
||||
return self._reconstitute_jobs(six.moves.zip(job_ids, job_states))
|
||||
return []
|
||||
|
||||
def get_next_run_time(self):
|
||||
next_run_time = self.redis.zrange(self.run_times_key, 0, 0, withscores=True)
|
||||
if next_run_time:
|
||||
return utc_timestamp_to_datetime(next_run_time[0][1])
|
||||
|
||||
def get_all_jobs(self):
|
||||
job_states = self.redis.hgetall(self.jobs_key)
|
||||
jobs = self._reconstitute_jobs(six.iteritems(job_states))
|
||||
return sorted(jobs, key=lambda job: job.next_run_time)
|
||||
|
||||
def add_job(self, job):
|
||||
if self.redis.hexists(self.jobs_key, job.id):
|
||||
raise ConflictingIdError(job.id)
|
||||
|
||||
with self.redis.pipeline() as pipe:
|
||||
pipe.multi()
|
||||
pipe.hset(self.jobs_key, job.id, pickle.dumps(job.__getstate__(), self.pickle_protocol))
|
||||
pipe.zadd(self.run_times_key, datetime_to_utc_timestamp(job.next_run_time), job.id)
|
||||
pipe.execute()
|
||||
|
||||
def update_job(self, job):
|
||||
if not self.redis.hexists(self.jobs_key, job.id):
|
||||
raise JobLookupError(job.id)
|
||||
|
||||
with self.redis.pipeline() as pipe:
|
||||
pipe.hset(self.jobs_key, job.id, pickle.dumps(job.__getstate__(), self.pickle_protocol))
|
||||
if job.next_run_time:
|
||||
pipe.zadd(self.run_times_key, datetime_to_utc_timestamp(job.next_run_time), job.id)
|
||||
else:
|
||||
pipe.zrem(self.run_times_key, job.id)
|
||||
pipe.execute()
|
||||
|
||||
def remove_job(self, job_id):
|
||||
if not self.redis.hexists(self.jobs_key, job_id):
|
||||
raise JobLookupError(job_id)
|
||||
|
||||
with self.redis.pipeline() as pipe:
|
||||
pipe.hdel(self.jobs_key, job_id)
|
||||
pipe.zrem(self.run_times_key, job_id)
|
||||
pipe.execute()
|
||||
|
||||
def remove_all_jobs(self):
|
||||
with self.redis.pipeline() as pipe:
|
||||
pipe.delete(self.jobs_key)
|
||||
pipe.delete(self.run_times_key)
|
||||
pipe.execute()
|
||||
|
||||
def shutdown(self):
|
||||
self.redis.connection_pool.disconnect()
|
||||
|
||||
def _reconstitute_job(self, job_state):
|
||||
job_state = pickle.loads(job_state)
|
||||
job = Job.__new__(Job)
|
||||
job.__setstate__(job_state)
|
||||
job._scheduler = self._scheduler
|
||||
job._jobstore_alias = self._alias
|
||||
return job
|
||||
|
||||
def _reconstitute_jobs(self, job_states):
|
||||
jobs = []
|
||||
failed_job_ids = []
|
||||
for job_id, job_state in job_states:
|
||||
try:
|
||||
jobs.append(self._reconstitute_job(job_state))
|
||||
except:
|
||||
self._logger.exception('Unable to restore job "%s" -- removing it', job_id)
|
||||
failed_job_ids.append(job_id)
|
||||
|
||||
# Remove all the jobs we failed to restore
|
||||
if failed_job_ids:
|
||||
with self.redis.pipeline() as pipe:
|
||||
pipe.hdel(self.jobs_key, *failed_job_ids)
|
||||
pipe.zrem(self.run_times_key, *failed_job_ids)
|
||||
pipe.execute()
|
||||
|
||||
return jobs
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s>' % self.__class__.__name__
|
||||
137
lib/apscheduler/jobstores/sqlalchemy.py
Normal file
137
lib/apscheduler/jobstores/sqlalchemy.py
Normal file
@@ -0,0 +1,137 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from apscheduler.jobstores.base import BaseJobStore, JobLookupError, ConflictingIdError
|
||||
from apscheduler.util import maybe_ref, datetime_to_utc_timestamp, utc_timestamp_to_datetime
|
||||
from apscheduler.job import Job
|
||||
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except ImportError: # pragma: nocover
|
||||
import pickle
|
||||
|
||||
try:
|
||||
from sqlalchemy import create_engine, Table, Column, MetaData, Unicode, Float, LargeBinary, select
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
except ImportError: # pragma: nocover
|
||||
raise ImportError('SQLAlchemyJobStore requires SQLAlchemy installed')
|
||||
|
||||
|
||||
class SQLAlchemyJobStore(BaseJobStore):
|
||||
"""
|
||||
Stores jobs in a database table using SQLAlchemy. The table will be created if it doesn't exist in the database.
|
||||
|
||||
Plugin alias: ``sqlalchemy``
|
||||
|
||||
:param str url: connection string (see `SQLAlchemy documentation
|
||||
<http://docs.sqlalchemy.org/en/latest/core/engines.html?highlight=create_engine#database-urls>`_
|
||||
on this)
|
||||
:param engine: an SQLAlchemy Engine to use instead of creating a new one based on ``url``
|
||||
:param str tablename: name of the table to store jobs in
|
||||
:param metadata: a :class:`~sqlalchemy.MetaData` instance to use instead of creating a new one
|
||||
:param int pickle_protocol: pickle protocol level to use (for serialization), defaults to the highest available
|
||||
"""
|
||||
|
||||
def __init__(self, url=None, engine=None, tablename='apscheduler_jobs', metadata=None,
|
||||
pickle_protocol=pickle.HIGHEST_PROTOCOL):
|
||||
super(SQLAlchemyJobStore, self).__init__()
|
||||
self.pickle_protocol = pickle_protocol
|
||||
metadata = maybe_ref(metadata) or MetaData()
|
||||
|
||||
if engine:
|
||||
self.engine = maybe_ref(engine)
|
||||
elif url:
|
||||
self.engine = create_engine(url)
|
||||
else:
|
||||
raise ValueError('Need either "engine" or "url" defined')
|
||||
|
||||
# 191 = max key length in MySQL for InnoDB/utf8mb4 tables, 25 = precision that translates to an 8-byte float
|
||||
self.jobs_t = Table(
|
||||
tablename, metadata,
|
||||
Column('id', Unicode(191, _warn_on_bytestring=False), primary_key=True),
|
||||
Column('next_run_time', Float(25), index=True),
|
||||
Column('job_state', LargeBinary, nullable=False)
|
||||
)
|
||||
|
||||
self.jobs_t.create(self.engine, True)
|
||||
|
||||
def lookup_job(self, job_id):
|
||||
selectable = select([self.jobs_t.c.job_state]).where(self.jobs_t.c.id == job_id)
|
||||
job_state = self.engine.execute(selectable).scalar()
|
||||
return self._reconstitute_job(job_state) if job_state else None
|
||||
|
||||
def get_due_jobs(self, now):
|
||||
timestamp = datetime_to_utc_timestamp(now)
|
||||
return self._get_jobs(self.jobs_t.c.next_run_time <= timestamp)
|
||||
|
||||
def get_next_run_time(self):
|
||||
selectable = select([self.jobs_t.c.next_run_time]).where(self.jobs_t.c.next_run_time != None).\
|
||||
order_by(self.jobs_t.c.next_run_time).limit(1)
|
||||
next_run_time = self.engine.execute(selectable).scalar()
|
||||
return utc_timestamp_to_datetime(next_run_time)
|
||||
|
||||
def get_all_jobs(self):
|
||||
return self._get_jobs()
|
||||
|
||||
def add_job(self, job):
|
||||
insert = self.jobs_t.insert().values(**{
|
||||
'id': job.id,
|
||||
'next_run_time': datetime_to_utc_timestamp(job.next_run_time),
|
||||
'job_state': pickle.dumps(job.__getstate__(), self.pickle_protocol)
|
||||
})
|
||||
try:
|
||||
self.engine.execute(insert)
|
||||
except IntegrityError:
|
||||
raise ConflictingIdError(job.id)
|
||||
|
||||
def update_job(self, job):
|
||||
update = self.jobs_t.update().values(**{
|
||||
'next_run_time': datetime_to_utc_timestamp(job.next_run_time),
|
||||
'job_state': pickle.dumps(job.__getstate__(), self.pickle_protocol)
|
||||
}).where(self.jobs_t.c.id == job.id)
|
||||
result = self.engine.execute(update)
|
||||
if result.rowcount == 0:
|
||||
raise JobLookupError(id)
|
||||
|
||||
def remove_job(self, job_id):
|
||||
delete = self.jobs_t.delete().where(self.jobs_t.c.id == job_id)
|
||||
result = self.engine.execute(delete)
|
||||
if result.rowcount == 0:
|
||||
raise JobLookupError(job_id)
|
||||
|
||||
def remove_all_jobs(self):
|
||||
delete = self.jobs_t.delete()
|
||||
self.engine.execute(delete)
|
||||
|
||||
def shutdown(self):
|
||||
self.engine.dispose()
|
||||
|
||||
def _reconstitute_job(self, job_state):
|
||||
job_state = pickle.loads(job_state)
|
||||
job_state['jobstore'] = self
|
||||
job = Job.__new__(Job)
|
||||
job.__setstate__(job_state)
|
||||
job._scheduler = self._scheduler
|
||||
job._jobstore_alias = self._alias
|
||||
return job
|
||||
|
||||
def _get_jobs(self, *conditions):
|
||||
jobs = []
|
||||
selectable = select([self.jobs_t.c.id, self.jobs_t.c.job_state]).order_by(self.jobs_t.c.next_run_time)
|
||||
selectable = selectable.where(*conditions) if conditions else selectable
|
||||
failed_job_ids = set()
|
||||
for row in self.engine.execute(selectable):
|
||||
try:
|
||||
jobs.append(self._reconstitute_job(row.job_state))
|
||||
except:
|
||||
self._logger.exception('Unable to restore job "%s" -- removing it', row.id)
|
||||
failed_job_ids.add(row.id)
|
||||
|
||||
# Remove all the jobs we failed to restore
|
||||
if failed_job_ids:
|
||||
delete = self.jobs_t.delete().where(self.jobs_t.c.id.in_(failed_job_ids))
|
||||
self.engine.execute(delete)
|
||||
|
||||
return jobs
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s (url=%s)>' % (self.__class__.__name__, self.engine.url)
|
||||
12
lib/apscheduler/schedulers/__init__.py
Normal file
12
lib/apscheduler/schedulers/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
class SchedulerAlreadyRunningError(Exception):
|
||||
"""Raised when attempting to start or configure the scheduler when it's already running."""
|
||||
|
||||
def __str__(self):
|
||||
return 'Scheduler is already running'
|
||||
|
||||
|
||||
class SchedulerNotRunningError(Exception):
|
||||
"""Raised when attempting to shutdown the scheduler when it's not running."""
|
||||
|
||||
def __str__(self):
|
||||
return 'Scheduler is not running'
|
||||
68
lib/apscheduler/schedulers/asyncio.py
Normal file
68
lib/apscheduler/schedulers/asyncio.py
Normal file
@@ -0,0 +1,68 @@
|
||||
from __future__ import absolute_import
|
||||
from functools import wraps
|
||||
|
||||
from apscheduler.schedulers.base import BaseScheduler
|
||||
from apscheduler.util import maybe_ref
|
||||
|
||||
try:
|
||||
import asyncio
|
||||
except ImportError: # pragma: nocover
|
||||
try:
|
||||
import trollius as asyncio
|
||||
except ImportError:
|
||||
raise ImportError('AsyncIOScheduler requires either Python 3.4 or the asyncio package installed')
|
||||
|
||||
|
||||
def run_in_event_loop(func):
|
||||
@wraps(func)
|
||||
def wrapper(self, *args, **kwargs):
|
||||
self._eventloop.call_soon_threadsafe(func, self, *args, **kwargs)
|
||||
return wrapper
|
||||
|
||||
|
||||
class AsyncIOScheduler(BaseScheduler):
|
||||
"""
|
||||
A scheduler that runs on an asyncio (:pep:`3156`) event loop.
|
||||
|
||||
Extra options:
|
||||
|
||||
============== =============================================================
|
||||
``event_loop`` AsyncIO event loop to use (defaults to the global event loop)
|
||||
============== =============================================================
|
||||
"""
|
||||
|
||||
_eventloop = None
|
||||
_timeout = None
|
||||
|
||||
def start(self):
|
||||
super(AsyncIOScheduler, self).start()
|
||||
self.wakeup()
|
||||
|
||||
@run_in_event_loop
|
||||
def shutdown(self, wait=True):
|
||||
super(AsyncIOScheduler, self).shutdown(wait)
|
||||
self._stop_timer()
|
||||
|
||||
def _configure(self, config):
|
||||
self._eventloop = maybe_ref(config.pop('event_loop', None)) or asyncio.get_event_loop()
|
||||
super(AsyncIOScheduler, self)._configure(config)
|
||||
|
||||
def _start_timer(self, wait_seconds):
|
||||
self._stop_timer()
|
||||
if wait_seconds is not None:
|
||||
self._timeout = self._eventloop.call_later(wait_seconds, self.wakeup)
|
||||
|
||||
def _stop_timer(self):
|
||||
if self._timeout:
|
||||
self._timeout.cancel()
|
||||
del self._timeout
|
||||
|
||||
@run_in_event_loop
|
||||
def wakeup(self):
|
||||
self._stop_timer()
|
||||
wait_seconds = self._process_jobs()
|
||||
self._start_timer(wait_seconds)
|
||||
|
||||
def _create_default_executor(self):
|
||||
from apscheduler.executors.asyncio import AsyncIOExecutor
|
||||
return AsyncIOExecutor()
|
||||
39
lib/apscheduler/schedulers/background.py
Normal file
39
lib/apscheduler/schedulers/background.py
Normal file
@@ -0,0 +1,39 @@
|
||||
from __future__ import absolute_import
|
||||
from threading import Thread, Event
|
||||
|
||||
from apscheduler.schedulers.base import BaseScheduler
|
||||
from apscheduler.schedulers.blocking import BlockingScheduler
|
||||
from apscheduler.util import asbool
|
||||
|
||||
|
||||
class BackgroundScheduler(BlockingScheduler):
|
||||
"""
|
||||
A scheduler that runs in the background using a separate thread
|
||||
(:meth:`~apscheduler.schedulers.base.BaseScheduler.start` will return immediately).
|
||||
|
||||
Extra options:
|
||||
|
||||
========== ============================================================================================
|
||||
``daemon`` Set the ``daemon`` option in the background thread (defaults to ``True``,
|
||||
see `the documentation <https://docs.python.org/3.4/library/threading.html#thread-objects>`_
|
||||
for further details)
|
||||
========== ============================================================================================
|
||||
"""
|
||||
|
||||
_thread = None
|
||||
|
||||
def _configure(self, config):
|
||||
self._daemon = asbool(config.pop('daemon', True))
|
||||
super(BackgroundScheduler, self)._configure(config)
|
||||
|
||||
def start(self):
|
||||
BaseScheduler.start(self)
|
||||
self._event = Event()
|
||||
self._thread = Thread(target=self._main_loop, name='APScheduler')
|
||||
self._thread.daemon = self._daemon
|
||||
self._thread.start()
|
||||
|
||||
def shutdown(self, wait=True):
|
||||
super(BackgroundScheduler, self).shutdown(wait)
|
||||
self._thread.join()
|
||||
del self._thread
|
||||
845
lib/apscheduler/schedulers/base.py
Normal file
845
lib/apscheduler/schedulers/base.py
Normal file
@@ -0,0 +1,845 @@
|
||||
from __future__ import print_function
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from collections import MutableMapping
|
||||
from threading import RLock
|
||||
from datetime import datetime
|
||||
from logging import getLogger
|
||||
import sys
|
||||
|
||||
from pkg_resources import iter_entry_points
|
||||
from tzlocal import get_localzone
|
||||
import six
|
||||
|
||||
from apscheduler.schedulers import SchedulerAlreadyRunningError, SchedulerNotRunningError
|
||||
from apscheduler.executors.base import MaxInstancesReachedError, BaseExecutor
|
||||
from apscheduler.executors.pool import ThreadPoolExecutor
|
||||
from apscheduler.jobstores.base import ConflictingIdError, JobLookupError, BaseJobStore
|
||||
from apscheduler.jobstores.memory import MemoryJobStore
|
||||
from apscheduler.job import Job
|
||||
from apscheduler.triggers.base import BaseTrigger
|
||||
from apscheduler.util import asbool, asint, astimezone, maybe_ref, timedelta_seconds, undefined
|
||||
from apscheduler.events import (
|
||||
SchedulerEvent, JobEvent, EVENT_SCHEDULER_START, EVENT_SCHEDULER_SHUTDOWN, EVENT_JOBSTORE_ADDED,
|
||||
EVENT_JOBSTORE_REMOVED, EVENT_ALL, EVENT_JOB_MODIFIED, EVENT_JOB_REMOVED, EVENT_JOB_ADDED, EVENT_EXECUTOR_ADDED,
|
||||
EVENT_EXECUTOR_REMOVED, EVENT_ALL_JOBS_REMOVED)
|
||||
|
||||
|
||||
class BaseScheduler(six.with_metaclass(ABCMeta)):
|
||||
"""
|
||||
Abstract base class for all schedulers. Takes the following keyword arguments:
|
||||
|
||||
:param str|logging.Logger logger: logger to use for the scheduler's logging (defaults to apscheduler.scheduler)
|
||||
:param str|datetime.tzinfo timezone: the default time zone (defaults to the local timezone)
|
||||
:param dict job_defaults: default values for newly added jobs
|
||||
:param dict jobstores: a dictionary of job store alias -> job store instance or configuration dict
|
||||
:param dict executors: a dictionary of executor alias -> executor instance or configuration dict
|
||||
|
||||
.. seealso:: :ref:`scheduler-config`
|
||||
"""
|
||||
|
||||
_trigger_plugins = dict((ep.name, ep) for ep in iter_entry_points('apscheduler.triggers'))
|
||||
_trigger_classes = {}
|
||||
_executor_plugins = dict((ep.name, ep) for ep in iter_entry_points('apscheduler.executors'))
|
||||
_executor_classes = {}
|
||||
_jobstore_plugins = dict((ep.name, ep) for ep in iter_entry_points('apscheduler.jobstores'))
|
||||
_jobstore_classes = {}
|
||||
_stopped = True
|
||||
|
||||
#
|
||||
# Public API
|
||||
#
|
||||
|
||||
def __init__(self, gconfig={}, **options):
|
||||
super(BaseScheduler, self).__init__()
|
||||
self._executors = {}
|
||||
self._executors_lock = self._create_lock()
|
||||
self._jobstores = {}
|
||||
self._jobstores_lock = self._create_lock()
|
||||
self._listeners = []
|
||||
self._listeners_lock = self._create_lock()
|
||||
self._pending_jobs = []
|
||||
self.configure(gconfig, **options)
|
||||
|
||||
def configure(self, gconfig={}, prefix='apscheduler.', **options):
|
||||
"""
|
||||
Reconfigures the scheduler with the given options. Can only be done when the scheduler isn't running.
|
||||
|
||||
:param dict gconfig: a "global" configuration dictionary whose values can be overridden by keyword arguments to
|
||||
this method
|
||||
:param str|unicode prefix: pick only those keys from ``gconfig`` that are prefixed with this string
|
||||
(pass an empty string or ``None`` to use all keys)
|
||||
:raises SchedulerAlreadyRunningError: if the scheduler is already running
|
||||
"""
|
||||
|
||||
if self.running:
|
||||
raise SchedulerAlreadyRunningError
|
||||
|
||||
# If a non-empty prefix was given, strip it from the keys in the global configuration dict
|
||||
if prefix:
|
||||
prefixlen = len(prefix)
|
||||
gconfig = dict((key[prefixlen:], value) for key, value in six.iteritems(gconfig) if key.startswith(prefix))
|
||||
|
||||
# Create a structure from the dotted options (e.g. "a.b.c = d" -> {'a': {'b': {'c': 'd'}}})
|
||||
config = {}
|
||||
for key, value in six.iteritems(gconfig):
|
||||
parts = key.split('.')
|
||||
parent = config
|
||||
key = parts.pop(0)
|
||||
while parts:
|
||||
parent = parent.setdefault(key, {})
|
||||
key = parts.pop(0)
|
||||
parent[key] = value
|
||||
|
||||
# Override any options with explicit keyword arguments
|
||||
config.update(options)
|
||||
self._configure(config)
|
||||
|
||||
@abstractmethod
|
||||
def start(self):
|
||||
"""
|
||||
Starts the scheduler. The details of this process depend on the implementation.
|
||||
|
||||
:raises SchedulerAlreadyRunningError: if the scheduler is already running
|
||||
"""
|
||||
|
||||
if self.running:
|
||||
raise SchedulerAlreadyRunningError
|
||||
|
||||
with self._executors_lock:
|
||||
# Create a default executor if nothing else is configured
|
||||
if 'default' not in self._executors:
|
||||
self.add_executor(self._create_default_executor(), 'default')
|
||||
|
||||
# Start all the executors
|
||||
for alias, executor in six.iteritems(self._executors):
|
||||
executor.start(self, alias)
|
||||
|
||||
with self._jobstores_lock:
|
||||
# Create a default job store if nothing else is configured
|
||||
if 'default' not in self._jobstores:
|
||||
self.add_jobstore(self._create_default_jobstore(), 'default')
|
||||
|
||||
# Start all the job stores
|
||||
for alias, store in six.iteritems(self._jobstores):
|
||||
store.start(self, alias)
|
||||
|
||||
# Schedule all pending jobs
|
||||
for job, jobstore_alias, replace_existing in self._pending_jobs:
|
||||
self._real_add_job(job, jobstore_alias, replace_existing, False)
|
||||
del self._pending_jobs[:]
|
||||
|
||||
self._stopped = False
|
||||
self._logger.info('Scheduler started')
|
||||
|
||||
# Notify listeners that the scheduler has been started
|
||||
self._dispatch_event(SchedulerEvent(EVENT_SCHEDULER_START))
|
||||
|
||||
@abstractmethod
|
||||
def shutdown(self, wait=True):
|
||||
"""
|
||||
Shuts down the scheduler. Does not interrupt any currently running jobs.
|
||||
|
||||
:param bool wait: ``True`` to wait until all currently executing jobs have finished
|
||||
:raises SchedulerNotRunningError: if the scheduler has not been started yet
|
||||
"""
|
||||
|
||||
if not self.running:
|
||||
raise SchedulerNotRunningError
|
||||
|
||||
self._stopped = True
|
||||
|
||||
# Shut down all executors
|
||||
for executor in six.itervalues(self._executors):
|
||||
executor.shutdown(wait)
|
||||
|
||||
# Shut down all job stores
|
||||
for jobstore in six.itervalues(self._jobstores):
|
||||
jobstore.shutdown()
|
||||
|
||||
self._logger.info('Scheduler has been shut down')
|
||||
self._dispatch_event(SchedulerEvent(EVENT_SCHEDULER_SHUTDOWN))
|
||||
|
||||
@property
|
||||
def running(self):
|
||||
return not self._stopped
|
||||
|
||||
def add_executor(self, executor, alias='default', **executor_opts):
|
||||
"""
|
||||
Adds an executor to this scheduler. Any extra keyword arguments will be passed to the executor plugin's
|
||||
constructor, assuming that the first argument is the name of an executor plugin.
|
||||
|
||||
:param str|unicode|apscheduler.executors.base.BaseExecutor executor: either an executor instance or the name of
|
||||
an executor plugin
|
||||
:param str|unicode alias: alias for the scheduler
|
||||
:raises ValueError: if there is already an executor by the given alias
|
||||
"""
|
||||
|
||||
with self._executors_lock:
|
||||
if alias in self._executors:
|
||||
raise ValueError('This scheduler already has an executor by the alias of "%s"' % alias)
|
||||
|
||||
if isinstance(executor, BaseExecutor):
|
||||
self._executors[alias] = executor
|
||||
elif isinstance(executor, six.string_types):
|
||||
self._executors[alias] = executor = self._create_plugin_instance('executor', executor, executor_opts)
|
||||
else:
|
||||
raise TypeError('Expected an executor instance or a string, got %s instead' %
|
||||
executor.__class__.__name__)
|
||||
|
||||
# Start the executor right away if the scheduler is running
|
||||
if self.running:
|
||||
executor.start(self)
|
||||
|
||||
self._dispatch_event(SchedulerEvent(EVENT_EXECUTOR_ADDED, alias))
|
||||
|
||||
def remove_executor(self, alias, shutdown=True):
|
||||
"""
|
||||
Removes the executor by the given alias from this scheduler.
|
||||
|
||||
:param str|unicode alias: alias of the executor
|
||||
:param bool shutdown: ``True`` to shut down the executor after removing it
|
||||
"""
|
||||
|
||||
with self._jobstores_lock:
|
||||
executor = self._lookup_executor(alias)
|
||||
del self._executors[alias]
|
||||
|
||||
if shutdown:
|
||||
executor.shutdown()
|
||||
|
||||
self._dispatch_event(SchedulerEvent(EVENT_EXECUTOR_REMOVED, alias))
|
||||
|
||||
def add_jobstore(self, jobstore, alias='default', **jobstore_opts):
|
||||
"""
|
||||
Adds a job store to this scheduler. Any extra keyword arguments will be passed to the job store plugin's
|
||||
constructor, assuming that the first argument is the name of a job store plugin.
|
||||
|
||||
:param str|unicode|apscheduler.jobstores.base.BaseJobStore jobstore: job store to be added
|
||||
:param str|unicode alias: alias for the job store
|
||||
:raises ValueError: if there is already a job store by the given alias
|
||||
"""
|
||||
|
||||
with self._jobstores_lock:
|
||||
if alias in self._jobstores:
|
||||
raise ValueError('This scheduler already has a job store by the alias of "%s"' % alias)
|
||||
|
||||
if isinstance(jobstore, BaseJobStore):
|
||||
self._jobstores[alias] = jobstore
|
||||
elif isinstance(jobstore, six.string_types):
|
||||
self._jobstores[alias] = jobstore = self._create_plugin_instance('jobstore', jobstore, jobstore_opts)
|
||||
else:
|
||||
raise TypeError('Expected a job store instance or a string, got %s instead' %
|
||||
jobstore.__class__.__name__)
|
||||
|
||||
# Start the job store right away if the scheduler is running
|
||||
if self.running:
|
||||
jobstore.start(self, alias)
|
||||
|
||||
# Notify listeners that a new job store has been added
|
||||
self._dispatch_event(SchedulerEvent(EVENT_JOBSTORE_ADDED, alias))
|
||||
|
||||
# Notify the scheduler so it can scan the new job store for jobs
|
||||
if self.running:
|
||||
self.wakeup()
|
||||
|
||||
def remove_jobstore(self, alias, shutdown=True):
|
||||
"""
|
||||
Removes the job store by the given alias from this scheduler.
|
||||
|
||||
:param str|unicode alias: alias of the job store
|
||||
:param bool shutdown: ``True`` to shut down the job store after removing it
|
||||
"""
|
||||
|
||||
with self._jobstores_lock:
|
||||
jobstore = self._lookup_jobstore(alias)
|
||||
del self._jobstores[alias]
|
||||
|
||||
if shutdown:
|
||||
jobstore.shutdown()
|
||||
|
||||
self._dispatch_event(SchedulerEvent(EVENT_JOBSTORE_REMOVED, alias))
|
||||
|
||||
def add_listener(self, callback, mask=EVENT_ALL):
|
||||
"""
|
||||
add_listener(callback, mask=EVENT_ALL)
|
||||
|
||||
Adds a listener for scheduler events. When a matching event occurs, ``callback`` is executed with the event
|
||||
object as its sole argument. If the ``mask`` parameter is not provided, the callback will receive events of all
|
||||
types.
|
||||
|
||||
:param callback: any callable that takes one argument
|
||||
:param int mask: bitmask that indicates which events should be listened to
|
||||
|
||||
.. seealso:: :mod:`apscheduler.events`
|
||||
.. seealso:: :ref:`scheduler-events`
|
||||
"""
|
||||
|
||||
with self._listeners_lock:
|
||||
self._listeners.append((callback, mask))
|
||||
|
||||
def remove_listener(self, callback):
|
||||
"""Removes a previously added event listener."""
|
||||
|
||||
with self._listeners_lock:
|
||||
for i, (cb, _) in enumerate(self._listeners):
|
||||
if callback == cb:
|
||||
del self._listeners[i]
|
||||
|
||||
def add_job(self, func, trigger=None, args=None, kwargs=None, id=None, name=None, misfire_grace_time=undefined,
|
||||
coalesce=undefined, max_instances=undefined, next_run_time=undefined, jobstore='default',
|
||||
executor='default', replace_existing=False, **trigger_args):
|
||||
"""
|
||||
add_job(func, trigger=None, args=None, kwargs=None, id=None, name=None, misfire_grace_time=undefined, \
|
||||
coalesce=undefined, max_instances=undefined, next_run_time=undefined, jobstore='default', \
|
||||
executor='default', replace_existing=False, **trigger_args)
|
||||
|
||||
Adds the given job to the job list and wakes up the scheduler if it's already running.
|
||||
|
||||
Any option that defaults to ``undefined`` will be replaced with the corresponding default value when the job is
|
||||
scheduled (which happens when the scheduler is started, or immediately if the scheduler is already running).
|
||||
|
||||
The ``func`` argument can be given either as a callable object or a textual reference in the
|
||||
``package.module:some.object`` format, where the first half (separated by ``:``) is an importable module and the
|
||||
second half is a reference to the callable object, relative to the module.
|
||||
|
||||
The ``trigger`` argument can either be:
|
||||
#. the alias name of the trigger (e.g. ``date``, ``interval`` or ``cron``), in which case any extra keyword
|
||||
arguments to this method are passed on to the trigger's constructor
|
||||
#. an instance of a trigger class
|
||||
|
||||
:param func: callable (or a textual reference to one) to run at the given time
|
||||
:param str|apscheduler.triggers.base.BaseTrigger trigger: trigger that determines when ``func`` is called
|
||||
:param list|tuple args: list of positional arguments to call func with
|
||||
:param dict kwargs: dict of keyword arguments to call func with
|
||||
:param str|unicode id: explicit identifier for the job (for modifying it later)
|
||||
:param str|unicode name: textual description of the job
|
||||
:param int misfire_grace_time: seconds after the designated run time that the job is still allowed to be run
|
||||
:param bool coalesce: run once instead of many times if the scheduler determines that the job should be run more
|
||||
than once in succession
|
||||
:param int max_instances: maximum number of concurrently running instances allowed for this job
|
||||
:param datetime next_run_time: when to first run the job, regardless of the trigger (pass ``None`` to add the
|
||||
job as paused)
|
||||
:param str|unicode jobstore: alias of the job store to store the job in
|
||||
:param str|unicode executor: alias of the executor to run the job with
|
||||
:param bool replace_existing: ``True`` to replace an existing job with the same ``id`` (but retain the
|
||||
number of runs from the existing one)
|
||||
:rtype: Job
|
||||
"""
|
||||
|
||||
job_kwargs = {
|
||||
'trigger': self._create_trigger(trigger, trigger_args),
|
||||
'executor': executor,
|
||||
'func': func,
|
||||
'args': tuple(args) if args is not None else (),
|
||||
'kwargs': dict(kwargs) if kwargs is not None else {},
|
||||
'id': id,
|
||||
'name': name,
|
||||
'misfire_grace_time': misfire_grace_time,
|
||||
'coalesce': coalesce,
|
||||
'max_instances': max_instances,
|
||||
'next_run_time': next_run_time
|
||||
}
|
||||
job_kwargs = dict((key, value) for key, value in six.iteritems(job_kwargs) if value is not undefined)
|
||||
job = Job(self, **job_kwargs)
|
||||
|
||||
# Don't really add jobs to job stores before the scheduler is up and running
|
||||
with self._jobstores_lock:
|
||||
if not self.running:
|
||||
self._pending_jobs.append((job, jobstore, replace_existing))
|
||||
self._logger.info('Adding job tentatively -- it will be properly scheduled when the scheduler starts')
|
||||
else:
|
||||
self._real_add_job(job, jobstore, replace_existing, True)
|
||||
|
||||
return job
|
||||
|
||||
def scheduled_job(self, trigger, args=None, kwargs=None, id=None, name=None, misfire_grace_time=undefined,
|
||||
coalesce=undefined, max_instances=undefined, next_run_time=undefined, jobstore='default',
|
||||
executor='default', **trigger_args):
|
||||
"""
|
||||
scheduled_job(trigger, args=None, kwargs=None, id=None, name=None, misfire_grace_time=undefined, \
|
||||
coalesce=undefined, max_instances=undefined, next_run_time=undefined, jobstore='default', \
|
||||
executor='default',**trigger_args)
|
||||
|
||||
A decorator version of :meth:`add_job`, except that ``replace_existing`` is always ``True``.
|
||||
|
||||
.. important:: The ``id`` argument must be given if scheduling a job in a persistent job store. The scheduler
|
||||
cannot, however, enforce this requirement.
|
||||
"""
|
||||
|
||||
def inner(func):
|
||||
self.add_job(func, trigger, args, kwargs, id, name, misfire_grace_time, coalesce, max_instances,
|
||||
next_run_time, jobstore, executor, True, **trigger_args)
|
||||
return func
|
||||
return inner
|
||||
|
||||
def modify_job(self, job_id, jobstore=None, **changes):
|
||||
"""
|
||||
Modifies the properties of a single job. Modifications are passed to this method as extra keyword arguments.
|
||||
|
||||
:param str|unicode job_id: the identifier of the job
|
||||
:param str|unicode jobstore: alias of the job store that contains the job
|
||||
"""
|
||||
with self._jobstores_lock:
|
||||
job, jobstore = self._lookup_job(job_id, jobstore)
|
||||
job._modify(**changes)
|
||||
if jobstore:
|
||||
self._lookup_jobstore(jobstore).update_job(job)
|
||||
|
||||
self._dispatch_event(JobEvent(EVENT_JOB_MODIFIED, job_id, jobstore))
|
||||
|
||||
# Wake up the scheduler since the job's next run time may have been changed
|
||||
self.wakeup()
|
||||
|
||||
def reschedule_job(self, job_id, jobstore=None, trigger=None, **trigger_args):
|
||||
"""
|
||||
Constructs a new trigger for a job and updates its next run time.
|
||||
Extra keyword arguments are passed directly to the trigger's constructor.
|
||||
|
||||
:param str|unicode job_id: the identifier of the job
|
||||
:param str|unicode jobstore: alias of the job store that contains the job
|
||||
:param trigger: alias of the trigger type or a trigger instance
|
||||
"""
|
||||
|
||||
trigger = self._create_trigger(trigger, trigger_args)
|
||||
now = datetime.now(self.timezone)
|
||||
next_run_time = trigger.get_next_fire_time(None, now)
|
||||
self.modify_job(job_id, jobstore, trigger=trigger, next_run_time=next_run_time)
|
||||
|
||||
def pause_job(self, job_id, jobstore=None):
|
||||
"""
|
||||
Causes the given job not to be executed until it is explicitly resumed.
|
||||
|
||||
:param str|unicode job_id: the identifier of the job
|
||||
:param str|unicode jobstore: alias of the job store that contains the job
|
||||
"""
|
||||
|
||||
self.modify_job(job_id, jobstore, next_run_time=None)
|
||||
|
||||
def resume_job(self, job_id, jobstore=None):
|
||||
"""
|
||||
Resumes the schedule of the given job, or removes the job if its schedule is finished.
|
||||
|
||||
:param str|unicode job_id: the identifier of the job
|
||||
:param str|unicode jobstore: alias of the job store that contains the job
|
||||
"""
|
||||
|
||||
with self._jobstores_lock:
|
||||
job, jobstore = self._lookup_job(job_id, jobstore)
|
||||
now = datetime.now(self.timezone)
|
||||
next_run_time = job.trigger.get_next_fire_time(None, now)
|
||||
if next_run_time:
|
||||
self.modify_job(job_id, jobstore, next_run_time=next_run_time)
|
||||
else:
|
||||
self.remove_job(job.id, jobstore)
|
||||
|
||||
def get_jobs(self, jobstore=None, pending=None):
|
||||
"""
|
||||
Returns a list of pending jobs (if the scheduler hasn't been started yet) and scheduled jobs, either from a
|
||||
specific job store or from all of them.
|
||||
|
||||
:param str|unicode jobstore: alias of the job store
|
||||
:param bool pending: ``False`` to leave out pending jobs (jobs that are waiting for the scheduler start to be
|
||||
added to their respective job stores), ``True`` to only include pending jobs, anything else
|
||||
to return both
|
||||
:rtype: list[Job]
|
||||
"""
|
||||
|
||||
with self._jobstores_lock:
|
||||
jobs = []
|
||||
|
||||
if pending is not False:
|
||||
for job, alias, replace_existing in self._pending_jobs:
|
||||
if jobstore is None or alias == jobstore:
|
||||
jobs.append(job)
|
||||
|
||||
if pending is not True:
|
||||
for alias, store in six.iteritems(self._jobstores):
|
||||
if jobstore is None or alias == jobstore:
|
||||
jobs.extend(store.get_all_jobs())
|
||||
|
||||
return jobs
|
||||
|
||||
def get_job(self, job_id, jobstore=None):
|
||||
"""
|
||||
Returns the Job that matches the given ``job_id``.
|
||||
|
||||
:param str|unicode job_id: the identifier of the job
|
||||
:param str|unicode jobstore: alias of the job store that most likely contains the job
|
||||
:return: the Job by the given ID, or ``None`` if it wasn't found
|
||||
:rtype: Job
|
||||
"""
|
||||
|
||||
with self._jobstores_lock:
|
||||
try:
|
||||
return self._lookup_job(job_id, jobstore)[0]
|
||||
except JobLookupError:
|
||||
return
|
||||
|
||||
def remove_job(self, job_id, jobstore=None):
|
||||
"""
|
||||
Removes a job, preventing it from being run any more.
|
||||
|
||||
:param str|unicode job_id: the identifier of the job
|
||||
:param str|unicode jobstore: alias of the job store that contains the job
|
||||
:raises JobLookupError: if the job was not found
|
||||
"""
|
||||
|
||||
with self._jobstores_lock:
|
||||
# Check if the job is among the pending jobs
|
||||
for i, (job, jobstore_alias, replace_existing) in enumerate(self._pending_jobs):
|
||||
if job.id == job_id:
|
||||
del self._pending_jobs[i]
|
||||
jobstore = jobstore_alias
|
||||
break
|
||||
else:
|
||||
# Otherwise, try to remove it from each store until it succeeds or we run out of stores to check
|
||||
for alias, store in six.iteritems(self._jobstores):
|
||||
if jobstore in (None, alias):
|
||||
try:
|
||||
store.remove_job(job_id)
|
||||
except JobLookupError:
|
||||
continue
|
||||
|
||||
jobstore = alias
|
||||
break
|
||||
|
||||
if jobstore is None:
|
||||
raise JobLookupError(job_id)
|
||||
|
||||
# Notify listeners that a job has been removed
|
||||
event = JobEvent(EVENT_JOB_REMOVED, job_id, jobstore)
|
||||
self._dispatch_event(event)
|
||||
|
||||
self._logger.info('Removed job %s', job_id)
|
||||
|
||||
def remove_all_jobs(self, jobstore=None):
|
||||
"""
|
||||
Removes all jobs from the specified job store, or all job stores if none is given.
|
||||
|
||||
:param str|unicode jobstore: alias of the job store
|
||||
"""
|
||||
|
||||
with self._jobstores_lock:
|
||||
if jobstore:
|
||||
self._pending_jobs = [pending for pending in self._pending_jobs if pending[1] != jobstore]
|
||||
else:
|
||||
self._pending_jobs = []
|
||||
|
||||
for alias, store in six.iteritems(self._jobstores):
|
||||
if jobstore in (None, alias):
|
||||
store.remove_all_jobs()
|
||||
|
||||
self._dispatch_event(SchedulerEvent(EVENT_ALL_JOBS_REMOVED, jobstore))
|
||||
|
||||
def print_jobs(self, jobstore=None, out=None):
|
||||
"""
|
||||
print_jobs(jobstore=None, out=sys.stdout)
|
||||
|
||||
Prints out a textual listing of all jobs currently scheduled on either all job stores or just a specific one.
|
||||
|
||||
:param str|unicode jobstore: alias of the job store, ``None`` to list jobs from all stores
|
||||
:param file out: a file-like object to print to (defaults to **sys.stdout** if nothing is given)
|
||||
"""
|
||||
|
||||
out = out or sys.stdout
|
||||
with self._jobstores_lock:
|
||||
if self._pending_jobs:
|
||||
print(six.u('Pending jobs:'), file=out)
|
||||
for job, jobstore_alias, replace_existing in self._pending_jobs:
|
||||
if jobstore in (None, jobstore_alias):
|
||||
print(six.u(' %s') % job, file=out)
|
||||
|
||||
for alias, store in six.iteritems(self._jobstores):
|
||||
if jobstore in (None, alias):
|
||||
print(six.u('Jobstore %s:') % alias, file=out)
|
||||
jobs = store.get_all_jobs()
|
||||
if jobs:
|
||||
for job in jobs:
|
||||
print(six.u(' %s') % job, file=out)
|
||||
else:
|
||||
print(six.u(' No scheduled jobs'), file=out)
|
||||
|
||||
@abstractmethod
|
||||
def wakeup(self):
|
||||
"""
|
||||
Notifies the scheduler that there may be jobs due for execution.
|
||||
Triggers :meth:`_process_jobs` to be run in an implementation specific manner.
|
||||
"""
|
||||
|
||||
#
|
||||
# Private API
|
||||
#
|
||||
|
||||
def _configure(self, config):
|
||||
# Set general options
|
||||
self._logger = maybe_ref(config.pop('logger', None)) or getLogger('apscheduler.scheduler')
|
||||
self.timezone = astimezone(config.pop('timezone', None)) or get_localzone()
|
||||
|
||||
# Set the job defaults
|
||||
job_defaults = config.get('job_defaults', {})
|
||||
self._job_defaults = {
|
||||
'misfire_grace_time': asint(job_defaults.get('misfire_grace_time', 1)),
|
||||
'coalesce': asbool(job_defaults.get('coalesce', True)),
|
||||
'max_instances': asint(job_defaults.get('max_instances', 1))
|
||||
}
|
||||
|
||||
# Configure executors
|
||||
self._executors.clear()
|
||||
for alias, value in six.iteritems(config.get('executors', {})):
|
||||
if isinstance(value, BaseExecutor):
|
||||
self.add_executor(value, alias)
|
||||
elif isinstance(value, MutableMapping):
|
||||
executor_class = value.pop('class', None)
|
||||
plugin = value.pop('type', None)
|
||||
if plugin:
|
||||
executor = self._create_plugin_instance('executor', plugin, value)
|
||||
elif executor_class:
|
||||
cls = maybe_ref(executor_class)
|
||||
executor = cls(**value)
|
||||
else:
|
||||
raise ValueError('Cannot create executor "%s" -- either "type" or "class" must be defined' % alias)
|
||||
|
||||
self.add_executor(executor, alias)
|
||||
else:
|
||||
raise TypeError("Expected executor instance or dict for executors['%s'], got %s instead" % (
|
||||
alias, value.__class__.__name__))
|
||||
|
||||
# Configure job stores
|
||||
self._jobstores.clear()
|
||||
for alias, value in six.iteritems(config.get('jobstores', {})):
|
||||
if isinstance(value, BaseJobStore):
|
||||
self.add_jobstore(value, alias)
|
||||
elif isinstance(value, MutableMapping):
|
||||
jobstore_class = value.pop('class', None)
|
||||
plugin = value.pop('type', None)
|
||||
if plugin:
|
||||
jobstore = self._create_plugin_instance('jobstore', plugin, value)
|
||||
elif jobstore_class:
|
||||
cls = maybe_ref(jobstore_class)
|
||||
jobstore = cls(**value)
|
||||
else:
|
||||
raise ValueError('Cannot create job store "%s" -- either "type" or "class" must be defined' % alias)
|
||||
|
||||
self.add_jobstore(jobstore, alias)
|
||||
else:
|
||||
raise TypeError("Expected job store instance or dict for jobstores['%s'], got %s instead" % (
|
||||
alias, value.__class__.__name__))
|
||||
|
||||
def _create_default_executor(self):
|
||||
"""Creates a default executor store, specific to the particular scheduler type."""
|
||||
|
||||
return ThreadPoolExecutor()
|
||||
|
||||
def _create_default_jobstore(self):
|
||||
"""Creates a default job store, specific to the particular scheduler type."""
|
||||
|
||||
return MemoryJobStore()
|
||||
|
||||
def _lookup_executor(self, alias):
|
||||
"""
|
||||
Returns the executor instance by the given name from the list of executors that were added to this scheduler.
|
||||
|
||||
:type alias: str
|
||||
:raises KeyError: if no executor by the given alias is not found
|
||||
"""
|
||||
|
||||
try:
|
||||
return self._executors[alias]
|
||||
except KeyError:
|
||||
raise KeyError('No such executor: %s' % alias)
|
||||
|
||||
def _lookup_jobstore(self, alias):
|
||||
"""
|
||||
Returns the job store instance by the given name from the list of job stores that were added to this scheduler.
|
||||
|
||||
:type alias: str
|
||||
:raises KeyError: if no job store by the given alias is not found
|
||||
"""
|
||||
|
||||
try:
|
||||
return self._jobstores[alias]
|
||||
except KeyError:
|
||||
raise KeyError('No such job store: %s' % alias)
|
||||
|
||||
def _lookup_job(self, job_id, jobstore_alias):
|
||||
"""
|
||||
Finds a job by its ID.
|
||||
|
||||
:type job_id: str
|
||||
:param str jobstore_alias: alias of a job store to look in
|
||||
:return tuple[Job, str]: a tuple of job, jobstore alias (jobstore alias is None in case of a pending job)
|
||||
:raises JobLookupError: if no job by the given ID is found.
|
||||
"""
|
||||
|
||||
# Check if the job is among the pending jobs
|
||||
for job, alias, replace_existing in self._pending_jobs:
|
||||
if job.id == job_id:
|
||||
return job, None
|
||||
|
||||
# Look in all job stores
|
||||
for alias, store in six.iteritems(self._jobstores):
|
||||
if jobstore_alias in (None, alias):
|
||||
job = store.lookup_job(job_id)
|
||||
if job is not None:
|
||||
return job, alias
|
||||
|
||||
raise JobLookupError(job_id)
|
||||
|
||||
def _dispatch_event(self, event):
|
||||
"""
|
||||
Dispatches the given event to interested listeners.
|
||||
|
||||
:param SchedulerEvent event: the event to send
|
||||
"""
|
||||
|
||||
with self._listeners_lock:
|
||||
listeners = tuple(self._listeners)
|
||||
|
||||
for cb, mask in listeners:
|
||||
if event.code & mask:
|
||||
try:
|
||||
cb(event)
|
||||
except:
|
||||
self._logger.exception('Error notifying listener')
|
||||
|
||||
def _real_add_job(self, job, jobstore_alias, replace_existing, wakeup):
|
||||
"""
|
||||
:param Job job: the job to add
|
||||
:param bool replace_existing: ``True`` to use update_job() in case the job already exists in the store
|
||||
:param bool wakeup: ``True`` to wake up the scheduler after adding the job
|
||||
"""
|
||||
|
||||
# Fill in undefined values with defaults
|
||||
replacements = {}
|
||||
for key, value in six.iteritems(self._job_defaults):
|
||||
if not hasattr(job, key):
|
||||
replacements[key] = value
|
||||
|
||||
# Calculate the next run time if there is none defined
|
||||
if not hasattr(job, 'next_run_time'):
|
||||
now = datetime.now(self.timezone)
|
||||
replacements['next_run_time'] = job.trigger.get_next_fire_time(None, now)
|
||||
|
||||
# Apply any replacements
|
||||
job._modify(**replacements)
|
||||
|
||||
# Add the job to the given job store
|
||||
store = self._lookup_jobstore(jobstore_alias)
|
||||
try:
|
||||
store.add_job(job)
|
||||
except ConflictingIdError:
|
||||
if replace_existing:
|
||||
store.update_job(job)
|
||||
else:
|
||||
raise
|
||||
|
||||
# Mark the job as no longer pending
|
||||
job._jobstore_alias = jobstore_alias
|
||||
|
||||
# Notify listeners that a new job has been added
|
||||
event = JobEvent(EVENT_JOB_ADDED, job.id, jobstore_alias)
|
||||
self._dispatch_event(event)
|
||||
|
||||
self._logger.info('Added job "%s" to job store "%s"', job.name, jobstore_alias)
|
||||
|
||||
# Notify the scheduler about the new job
|
||||
if wakeup:
|
||||
self.wakeup()
|
||||
|
||||
def _create_plugin_instance(self, type_, alias, constructor_kwargs):
|
||||
"""Creates an instance of the given plugin type, loading the plugin first if necessary."""
|
||||
|
||||
plugin_container, class_container, base_class = {
|
||||
'trigger': (self._trigger_plugins, self._trigger_classes, BaseTrigger),
|
||||
'jobstore': (self._jobstore_plugins, self._jobstore_classes, BaseJobStore),
|
||||
'executor': (self._executor_plugins, self._executor_classes, BaseExecutor)
|
||||
}[type_]
|
||||
|
||||
try:
|
||||
plugin_cls = class_container[alias]
|
||||
except KeyError:
|
||||
if alias in plugin_container:
|
||||
plugin_cls = class_container[alias] = plugin_container[alias].load()
|
||||
if not issubclass(plugin_cls, base_class):
|
||||
raise TypeError('The {0} entry point does not point to a {0} class'.format(type_))
|
||||
else:
|
||||
raise LookupError('No {0} by the name "{1}" was found'.format(type_, alias))
|
||||
|
||||
return plugin_cls(**constructor_kwargs)
|
||||
|
||||
def _create_trigger(self, trigger, trigger_args):
|
||||
if isinstance(trigger, BaseTrigger):
|
||||
return trigger
|
||||
elif trigger is None:
|
||||
trigger = 'date'
|
||||
elif not isinstance(trigger, six.string_types):
|
||||
raise TypeError('Expected a trigger instance or string, got %s instead' % trigger.__class__.__name__)
|
||||
|
||||
# Use the scheduler's time zone if nothing else is specified
|
||||
trigger_args.setdefault('timezone', self.timezone)
|
||||
|
||||
# Instantiate the trigger class
|
||||
return self._create_plugin_instance('trigger', trigger, trigger_args)
|
||||
|
||||
def _create_lock(self):
|
||||
"""Creates a reentrant lock object."""
|
||||
|
||||
return RLock()
|
||||
|
||||
def _process_jobs(self):
|
||||
"""
|
||||
Iterates through jobs in every jobstore, starts jobs that are due and figures out how long to wait for the next
|
||||
round.
|
||||
"""
|
||||
|
||||
self._logger.debug('Looking for jobs to run')
|
||||
now = datetime.now(self.timezone)
|
||||
next_wakeup_time = None
|
||||
|
||||
with self._jobstores_lock:
|
||||
for jobstore_alias, jobstore in six.iteritems(self._jobstores):
|
||||
for job in jobstore.get_due_jobs(now):
|
||||
# Look up the job's executor
|
||||
try:
|
||||
executor = self._lookup_executor(job.executor)
|
||||
except:
|
||||
self._logger.error(
|
||||
'Executor lookup ("%s") failed for job "%s" -- removing it from the job store',
|
||||
job.executor, job)
|
||||
self.remove_job(job.id, jobstore_alias)
|
||||
continue
|
||||
|
||||
run_times = job._get_run_times(now)
|
||||
run_times = run_times[-1:] if run_times and job.coalesce else run_times
|
||||
if run_times:
|
||||
try:
|
||||
executor.submit_job(job, run_times)
|
||||
except MaxInstancesReachedError:
|
||||
self._logger.warning(
|
||||
'Execution of job "%s" skipped: maximum number of running instances reached (%d)',
|
||||
job, job.max_instances)
|
||||
except:
|
||||
self._logger.exception('Error submitting job "%s" to executor "%s"', job, job.executor)
|
||||
|
||||
# Update the job if it has a next execution time. Otherwise remove it from the job store.
|
||||
job_next_run = job.trigger.get_next_fire_time(run_times[-1], now)
|
||||
if job_next_run:
|
||||
job._modify(next_run_time=job_next_run)
|
||||
jobstore.update_job(job)
|
||||
else:
|
||||
self.remove_job(job.id, jobstore_alias)
|
||||
|
||||
# Set a new next wakeup time if there isn't one yet or the jobstore has an even earlier one
|
||||
jobstore_next_run_time = jobstore.get_next_run_time()
|
||||
if jobstore_next_run_time and (next_wakeup_time is None or jobstore_next_run_time < next_wakeup_time):
|
||||
next_wakeup_time = jobstore_next_run_time
|
||||
|
||||
# Determine the delay until this method should be called again
|
||||
if next_wakeup_time is not None:
|
||||
wait_seconds = max(timedelta_seconds(next_wakeup_time - now), 0)
|
||||
self._logger.debug('Next wakeup is due at %s (in %f seconds)', next_wakeup_time, wait_seconds)
|
||||
else:
|
||||
wait_seconds = None
|
||||
self._logger.debug('No jobs; waiting until a job is added')
|
||||
|
||||
return wait_seconds
|
||||
32
lib/apscheduler/schedulers/blocking.py
Normal file
32
lib/apscheduler/schedulers/blocking.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from __future__ import absolute_import
|
||||
from threading import Event
|
||||
|
||||
from apscheduler.schedulers.base import BaseScheduler
|
||||
|
||||
|
||||
class BlockingScheduler(BaseScheduler):
|
||||
"""
|
||||
A scheduler that runs in the foreground (:meth:`~apscheduler.schedulers.base.BaseScheduler.start` will block).
|
||||
"""
|
||||
|
||||
MAX_WAIT_TIME = 4294967 # Maximum value accepted by Event.wait() on Windows
|
||||
|
||||
_event = None
|
||||
|
||||
def start(self):
|
||||
super(BlockingScheduler, self).start()
|
||||
self._event = Event()
|
||||
self._main_loop()
|
||||
|
||||
def shutdown(self, wait=True):
|
||||
super(BlockingScheduler, self).shutdown(wait)
|
||||
self._event.set()
|
||||
|
||||
def _main_loop(self):
|
||||
while self.running:
|
||||
wait_seconds = self._process_jobs()
|
||||
self._event.wait(wait_seconds if wait_seconds is not None else self.MAX_WAIT_TIME)
|
||||
self._event.clear()
|
||||
|
||||
def wakeup(self):
|
||||
self._event.set()
|
||||
35
lib/apscheduler/schedulers/gevent.py
Normal file
35
lib/apscheduler/schedulers/gevent.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from apscheduler.schedulers.blocking import BlockingScheduler
|
||||
from apscheduler.schedulers.base import BaseScheduler
|
||||
|
||||
try:
|
||||
from gevent.event import Event
|
||||
from gevent.lock import RLock
|
||||
import gevent
|
||||
except ImportError: # pragma: nocover
|
||||
raise ImportError('GeventScheduler requires gevent installed')
|
||||
|
||||
|
||||
class GeventScheduler(BlockingScheduler):
|
||||
"""A scheduler that runs as a Gevent greenlet."""
|
||||
|
||||
_greenlet = None
|
||||
|
||||
def start(self):
|
||||
BaseScheduler.start(self)
|
||||
self._event = Event()
|
||||
self._greenlet = gevent.spawn(self._main_loop)
|
||||
return self._greenlet
|
||||
|
||||
def shutdown(self, wait=True):
|
||||
super(GeventScheduler, self).shutdown(wait)
|
||||
self._greenlet.join()
|
||||
del self._greenlet
|
||||
|
||||
def _create_lock(self):
|
||||
return RLock()
|
||||
|
||||
def _create_default_executor(self):
|
||||
from apscheduler.executors.gevent import GeventExecutor
|
||||
return GeventExecutor()
|
||||
46
lib/apscheduler/schedulers/qt.py
Normal file
46
lib/apscheduler/schedulers/qt.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from apscheduler.schedulers.base import BaseScheduler
|
||||
|
||||
try:
|
||||
from PyQt5.QtCore import QObject, QTimer
|
||||
except ImportError: # pragma: nocover
|
||||
try:
|
||||
from PyQt4.QtCore import QObject, QTimer
|
||||
except ImportError:
|
||||
try:
|
||||
from PySide.QtCore import QObject, QTimer # flake8: noqa
|
||||
except ImportError:
|
||||
raise ImportError('QtScheduler requires either PyQt5, PyQt4 or PySide installed')
|
||||
|
||||
|
||||
class QtScheduler(BaseScheduler):
|
||||
"""A scheduler that runs in a Qt event loop."""
|
||||
|
||||
_timer = None
|
||||
|
||||
def start(self):
|
||||
super(QtScheduler, self).start()
|
||||
self.wakeup()
|
||||
|
||||
def shutdown(self, wait=True):
|
||||
super(QtScheduler, self).shutdown(wait)
|
||||
self._stop_timer()
|
||||
|
||||
def _start_timer(self, wait_seconds):
|
||||
self._stop_timer()
|
||||
if wait_seconds is not None:
|
||||
self._timer = QTimer.singleShot(wait_seconds * 1000, self._process_jobs)
|
||||
|
||||
def _stop_timer(self):
|
||||
if self._timer:
|
||||
if self._timer.isActive():
|
||||
self._timer.stop()
|
||||
del self._timer
|
||||
|
||||
def wakeup(self):
|
||||
self._start_timer(0)
|
||||
|
||||
def _process_jobs(self):
|
||||
wait_seconds = super(QtScheduler, self)._process_jobs()
|
||||
self._start_timer(wait_seconds)
|
||||
60
lib/apscheduler/schedulers/tornado.py
Normal file
60
lib/apscheduler/schedulers/tornado.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from __future__ import absolute_import
|
||||
from datetime import timedelta
|
||||
from functools import wraps
|
||||
|
||||
from apscheduler.schedulers.base import BaseScheduler
|
||||
from apscheduler.util import maybe_ref
|
||||
|
||||
try:
|
||||
from tornado.ioloop import IOLoop
|
||||
except ImportError: # pragma: nocover
|
||||
raise ImportError('TornadoScheduler requires tornado installed')
|
||||
|
||||
|
||||
def run_in_ioloop(func):
|
||||
@wraps(func)
|
||||
def wrapper(self, *args, **kwargs):
|
||||
self._ioloop.add_callback(func, self, *args, **kwargs)
|
||||
return wrapper
|
||||
|
||||
|
||||
class TornadoScheduler(BaseScheduler):
|
||||
"""
|
||||
A scheduler that runs on a Tornado IOLoop.
|
||||
|
||||
=========== ===============================================================
|
||||
``io_loop`` Tornado IOLoop instance to use (defaults to the global IO loop)
|
||||
=========== ===============================================================
|
||||
"""
|
||||
|
||||
_ioloop = None
|
||||
_timeout = None
|
||||
|
||||
def start(self):
|
||||
super(TornadoScheduler, self).start()
|
||||
self.wakeup()
|
||||
|
||||
@run_in_ioloop
|
||||
def shutdown(self, wait=True):
|
||||
super(TornadoScheduler, self).shutdown(wait)
|
||||
self._stop_timer()
|
||||
|
||||
def _configure(self, config):
|
||||
self._ioloop = maybe_ref(config.pop('io_loop', None)) or IOLoop.current()
|
||||
super(TornadoScheduler, self)._configure(config)
|
||||
|
||||
def _start_timer(self, wait_seconds):
|
||||
self._stop_timer()
|
||||
if wait_seconds is not None:
|
||||
self._timeout = self._ioloop.add_timeout(timedelta(seconds=wait_seconds), self.wakeup)
|
||||
|
||||
def _stop_timer(self):
|
||||
if self._timeout:
|
||||
self._ioloop.remove_timeout(self._timeout)
|
||||
del self._timeout
|
||||
|
||||
@run_in_ioloop
|
||||
def wakeup(self):
|
||||
self._stop_timer()
|
||||
wait_seconds = self._process_jobs()
|
||||
self._start_timer(wait_seconds)
|
||||
65
lib/apscheduler/schedulers/twisted.py
Normal file
65
lib/apscheduler/schedulers/twisted.py
Normal file
@@ -0,0 +1,65 @@
|
||||
from __future__ import absolute_import
|
||||
from functools import wraps
|
||||
|
||||
from apscheduler.schedulers.base import BaseScheduler
|
||||
from apscheduler.util import maybe_ref
|
||||
|
||||
try:
|
||||
from twisted.internet import reactor as default_reactor
|
||||
except ImportError: # pragma: nocover
|
||||
raise ImportError('TwistedScheduler requires Twisted installed')
|
||||
|
||||
|
||||
def run_in_reactor(func):
|
||||
@wraps(func)
|
||||
def wrapper(self, *args, **kwargs):
|
||||
self._reactor.callFromThread(func, self, *args, **kwargs)
|
||||
return wrapper
|
||||
|
||||
|
||||
class TwistedScheduler(BaseScheduler):
|
||||
"""
|
||||
A scheduler that runs on a Twisted reactor.
|
||||
|
||||
Extra options:
|
||||
|
||||
=========== ========================================================
|
||||
``reactor`` Reactor instance to use (defaults to the global reactor)
|
||||
=========== ========================================================
|
||||
"""
|
||||
|
||||
_reactor = None
|
||||
_delayedcall = None
|
||||
|
||||
def _configure(self, config):
|
||||
self._reactor = maybe_ref(config.pop('reactor', default_reactor))
|
||||
super(TwistedScheduler, self)._configure(config)
|
||||
|
||||
def start(self):
|
||||
super(TwistedScheduler, self).start()
|
||||
self.wakeup()
|
||||
|
||||
@run_in_reactor
|
||||
def shutdown(self, wait=True):
|
||||
super(TwistedScheduler, self).shutdown(wait)
|
||||
self._stop_timer()
|
||||
|
||||
def _start_timer(self, wait_seconds):
|
||||
self._stop_timer()
|
||||
if wait_seconds is not None:
|
||||
self._delayedcall = self._reactor.callLater(wait_seconds, self.wakeup)
|
||||
|
||||
def _stop_timer(self):
|
||||
if self._delayedcall and self._delayedcall.active():
|
||||
self._delayedcall.cancel()
|
||||
del self._delayedcall
|
||||
|
||||
@run_in_reactor
|
||||
def wakeup(self):
|
||||
self._stop_timer()
|
||||
wait_seconds = self._process_jobs()
|
||||
self._start_timer(wait_seconds)
|
||||
|
||||
def _create_default_executor(self):
|
||||
from apscheduler.executors.twisted import TwistedExecutor
|
||||
return TwistedExecutor()
|
||||
0
lib/apscheduler/triggers/__init__.py
Normal file
0
lib/apscheduler/triggers/__init__.py
Normal file
16
lib/apscheduler/triggers/base.py
Normal file
16
lib/apscheduler/triggers/base.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from abc import ABCMeta, abstractmethod
|
||||
|
||||
import six
|
||||
|
||||
|
||||
class BaseTrigger(six.with_metaclass(ABCMeta)):
|
||||
"""Abstract base class that defines the interface that every trigger must implement."""
|
||||
|
||||
@abstractmethod
|
||||
def get_next_fire_time(self, previous_fire_time, now):
|
||||
"""
|
||||
Returns the next datetime to fire on, If no such datetime can be calculated, returns ``None``.
|
||||
|
||||
:param datetime.datetime previous_fire_time: the previous time the trigger was fired
|
||||
:param datetime.datetime now: current datetime
|
||||
"""
|
||||
176
lib/apscheduler/triggers/cron/__init__.py
Normal file
176
lib/apscheduler/triggers/cron/__init__.py
Normal file
@@ -0,0 +1,176 @@
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from tzlocal import get_localzone
|
||||
import six
|
||||
|
||||
from apscheduler.triggers.base import BaseTrigger
|
||||
from apscheduler.triggers.cron.fields import BaseField, WeekField, DayOfMonthField, DayOfWeekField, DEFAULT_VALUES
|
||||
from apscheduler.util import datetime_ceil, convert_to_datetime, datetime_repr, astimezone
|
||||
|
||||
|
||||
class CronTrigger(BaseTrigger):
|
||||
"""
|
||||
Triggers when current time matches all specified time constraints, similarly to how the UNIX cron scheduler works.
|
||||
|
||||
:param int|str year: 4-digit year
|
||||
:param int|str month: month (1-12)
|
||||
:param int|str day: day of the (1-31)
|
||||
:param int|str week: ISO week (1-53)
|
||||
:param int|str day_of_week: number or name of weekday (0-6 or mon,tue,wed,thu,fri,sat,sun)
|
||||
:param int|str hour: hour (0-23)
|
||||
:param int|str minute: minute (0-59)
|
||||
:param int|str second: second (0-59)
|
||||
:param datetime|str start_date: earliest possible date/time to trigger on (inclusive)
|
||||
:param datetime|str end_date: latest possible date/time to trigger on (inclusive)
|
||||
:param datetime.tzinfo|str timezone: time zone to use for the date/time calculations
|
||||
(defaults to scheduler timezone)
|
||||
|
||||
.. note:: The first weekday is always **monday**.
|
||||
"""
|
||||
|
||||
FIELD_NAMES = ('year', 'month', 'day', 'week', 'day_of_week', 'hour', 'minute', 'second')
|
||||
FIELDS_MAP = {
|
||||
'year': BaseField,
|
||||
'month': BaseField,
|
||||
'week': WeekField,
|
||||
'day': DayOfMonthField,
|
||||
'day_of_week': DayOfWeekField,
|
||||
'hour': BaseField,
|
||||
'minute': BaseField,
|
||||
'second': BaseField
|
||||
}
|
||||
|
||||
__slots__ = 'timezone', 'start_date', 'end_date', 'fields'
|
||||
|
||||
def __init__(self, year=None, month=None, day=None, week=None, day_of_week=None, hour=None, minute=None,
|
||||
second=None, start_date=None, end_date=None, timezone=None):
|
||||
if timezone:
|
||||
self.timezone = astimezone(timezone)
|
||||
elif start_date and start_date.tzinfo:
|
||||
self.timezone = start_date.tzinfo
|
||||
elif end_date and end_date.tzinfo:
|
||||
self.timezone = end_date.tzinfo
|
||||
else:
|
||||
self.timezone = get_localzone()
|
||||
|
||||
self.start_date = convert_to_datetime(start_date, self.timezone, 'start_date')
|
||||
self.end_date = convert_to_datetime(end_date, self.timezone, 'end_date')
|
||||
|
||||
values = dict((key, value) for (key, value) in six.iteritems(locals())
|
||||
if key in self.FIELD_NAMES and value is not None)
|
||||
self.fields = []
|
||||
assign_defaults = False
|
||||
for field_name in self.FIELD_NAMES:
|
||||
if field_name in values:
|
||||
exprs = values.pop(field_name)
|
||||
is_default = False
|
||||
assign_defaults = not values
|
||||
elif assign_defaults:
|
||||
exprs = DEFAULT_VALUES[field_name]
|
||||
is_default = True
|
||||
else:
|
||||
exprs = '*'
|
||||
is_default = True
|
||||
|
||||
field_class = self.FIELDS_MAP[field_name]
|
||||
field = field_class(field_name, exprs, is_default)
|
||||
self.fields.append(field)
|
||||
|
||||
def _increment_field_value(self, dateval, fieldnum):
|
||||
"""
|
||||
Increments the designated field and resets all less significant fields to their minimum values.
|
||||
|
||||
:type dateval: datetime
|
||||
:type fieldnum: int
|
||||
:return: a tuple containing the new date, and the number of the field that was actually incremented
|
||||
:rtype: tuple
|
||||
"""
|
||||
|
||||
values = {}
|
||||
i = 0
|
||||
while i < len(self.fields):
|
||||
field = self.fields[i]
|
||||
if not field.REAL:
|
||||
if i == fieldnum:
|
||||
fieldnum -= 1
|
||||
i -= 1
|
||||
else:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if i < fieldnum:
|
||||
values[field.name] = field.get_value(dateval)
|
||||
i += 1
|
||||
elif i > fieldnum:
|
||||
values[field.name] = field.get_min(dateval)
|
||||
i += 1
|
||||
else:
|
||||
value = field.get_value(dateval)
|
||||
maxval = field.get_max(dateval)
|
||||
if value == maxval:
|
||||
fieldnum -= 1
|
||||
i -= 1
|
||||
else:
|
||||
values[field.name] = value + 1
|
||||
i += 1
|
||||
|
||||
difference = datetime(**values) - dateval.replace(tzinfo=None)
|
||||
return self.timezone.normalize(dateval + difference), fieldnum
|
||||
|
||||
def _set_field_value(self, dateval, fieldnum, new_value):
|
||||
values = {}
|
||||
for i, field in enumerate(self.fields):
|
||||
if field.REAL:
|
||||
if i < fieldnum:
|
||||
values[field.name] = field.get_value(dateval)
|
||||
elif i > fieldnum:
|
||||
values[field.name] = field.get_min(dateval)
|
||||
else:
|
||||
values[field.name] = new_value
|
||||
|
||||
difference = datetime(**values) - dateval.replace(tzinfo=None)
|
||||
return self.timezone.normalize(dateval + difference)
|
||||
|
||||
def get_next_fire_time(self, previous_fire_time, now):
|
||||
if previous_fire_time:
|
||||
start_date = max(now, previous_fire_time + timedelta(microseconds=1))
|
||||
else:
|
||||
start_date = max(now, self.start_date) if self.start_date else now
|
||||
|
||||
fieldnum = 0
|
||||
next_date = datetime_ceil(start_date).astimezone(self.timezone)
|
||||
while 0 <= fieldnum < len(self.fields):
|
||||
field = self.fields[fieldnum]
|
||||
curr_value = field.get_value(next_date)
|
||||
next_value = field.get_next_value(next_date)
|
||||
|
||||
if next_value is None:
|
||||
# No valid value was found
|
||||
next_date, fieldnum = self._increment_field_value(next_date, fieldnum - 1)
|
||||
elif next_value > curr_value:
|
||||
# A valid, but higher than the starting value, was found
|
||||
if field.REAL:
|
||||
next_date = self._set_field_value(next_date, fieldnum, next_value)
|
||||
fieldnum += 1
|
||||
else:
|
||||
next_date, fieldnum = self._increment_field_value(next_date, fieldnum)
|
||||
else:
|
||||
# A valid value was found, no changes necessary
|
||||
fieldnum += 1
|
||||
|
||||
# Return if the date has rolled past the end date
|
||||
if self.end_date and next_date > self.end_date:
|
||||
return None
|
||||
|
||||
if fieldnum >= 0:
|
||||
return next_date
|
||||
|
||||
def __str__(self):
|
||||
options = ["%s='%s'" % (f.name, f) for f in self.fields if not f.is_default]
|
||||
return 'cron[%s]' % (', '.join(options))
|
||||
|
||||
def __repr__(self):
|
||||
options = ["%s='%s'" % (f.name, f) for f in self.fields if not f.is_default]
|
||||
if self.start_date:
|
||||
options.append("start_date='%s'" % datetime_repr(self.start_date))
|
||||
return '<%s (%s)>' % (self.__class__.__name__, ', '.join(options))
|
||||
188
lib/apscheduler/triggers/cron/expressions.py
Normal file
188
lib/apscheduler/triggers/cron/expressions.py
Normal file
@@ -0,0 +1,188 @@
|
||||
"""
|
||||
This module contains the expressions applicable for CronTrigger's fields.
|
||||
"""
|
||||
|
||||
from calendar import monthrange
|
||||
import re
|
||||
|
||||
from apscheduler.util import asint
|
||||
|
||||
__all__ = ('AllExpression', 'RangeExpression', 'WeekdayRangeExpression', 'WeekdayPositionExpression',
|
||||
'LastDayOfMonthExpression')
|
||||
|
||||
|
||||
WEEKDAYS = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
|
||||
|
||||
|
||||
class AllExpression(object):
|
||||
value_re = re.compile(r'\*(?:/(?P<step>\d+))?$')
|
||||
|
||||
def __init__(self, step=None):
|
||||
self.step = asint(step)
|
||||
if self.step == 0:
|
||||
raise ValueError('Increment must be higher than 0')
|
||||
|
||||
def get_next_value(self, date, field):
|
||||
start = field.get_value(date)
|
||||
minval = field.get_min(date)
|
||||
maxval = field.get_max(date)
|
||||
start = max(start, minval)
|
||||
|
||||
if not self.step:
|
||||
next = start
|
||||
else:
|
||||
distance_to_next = (self.step - (start - minval)) % self.step
|
||||
next = start + distance_to_next
|
||||
|
||||
if next <= maxval:
|
||||
return next
|
||||
|
||||
def __str__(self):
|
||||
if self.step:
|
||||
return '*/%d' % self.step
|
||||
return '*'
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%s)" % (self.__class__.__name__, self.step)
|
||||
|
||||
|
||||
class RangeExpression(AllExpression):
|
||||
value_re = re.compile(
|
||||
r'(?P<first>\d+)(?:-(?P<last>\d+))?(?:/(?P<step>\d+))?$')
|
||||
|
||||
def __init__(self, first, last=None, step=None):
|
||||
AllExpression.__init__(self, step)
|
||||
first = asint(first)
|
||||
last = asint(last)
|
||||
if last is None and step is None:
|
||||
last = first
|
||||
if last is not None and first > last:
|
||||
raise ValueError('The minimum value in a range must not be higher than the maximum')
|
||||
self.first = first
|
||||
self.last = last
|
||||
|
||||
def get_next_value(self, date, field):
|
||||
start = field.get_value(date)
|
||||
minval = field.get_min(date)
|
||||
maxval = field.get_max(date)
|
||||
|
||||
# Apply range limits
|
||||
minval = max(minval, self.first)
|
||||
if self.last is not None:
|
||||
maxval = min(maxval, self.last)
|
||||
start = max(start, minval)
|
||||
|
||||
if not self.step:
|
||||
next = start
|
||||
else:
|
||||
distance_to_next = (self.step - (start - minval)) % self.step
|
||||
next = start + distance_to_next
|
||||
|
||||
if next <= maxval:
|
||||
return next
|
||||
|
||||
def __str__(self):
|
||||
if self.last != self.first and self.last is not None:
|
||||
range = '%d-%d' % (self.first, self.last)
|
||||
else:
|
||||
range = str(self.first)
|
||||
|
||||
if self.step:
|
||||
return '%s/%d' % (range, self.step)
|
||||
return range
|
||||
|
||||
def __repr__(self):
|
||||
args = [str(self.first)]
|
||||
if self.last != self.first and self.last is not None or self.step:
|
||||
args.append(str(self.last))
|
||||
if self.step:
|
||||
args.append(str(self.step))
|
||||
return "%s(%s)" % (self.__class__.__name__, ', '.join(args))
|
||||
|
||||
|
||||
class WeekdayRangeExpression(RangeExpression):
|
||||
value_re = re.compile(r'(?P<first>[a-z]+)(?:-(?P<last>[a-z]+))?', re.IGNORECASE)
|
||||
|
||||
def __init__(self, first, last=None):
|
||||
try:
|
||||
first_num = WEEKDAYS.index(first.lower())
|
||||
except ValueError:
|
||||
raise ValueError('Invalid weekday name "%s"' % first)
|
||||
|
||||
if last:
|
||||
try:
|
||||
last_num = WEEKDAYS.index(last.lower())
|
||||
except ValueError:
|
||||
raise ValueError('Invalid weekday name "%s"' % last)
|
||||
else:
|
||||
last_num = None
|
||||
|
||||
RangeExpression.__init__(self, first_num, last_num)
|
||||
|
||||
def __str__(self):
|
||||
if self.last != self.first and self.last is not None:
|
||||
return '%s-%s' % (WEEKDAYS[self.first], WEEKDAYS[self.last])
|
||||
return WEEKDAYS[self.first]
|
||||
|
||||
def __repr__(self):
|
||||
args = ["'%s'" % WEEKDAYS[self.first]]
|
||||
if self.last != self.first and self.last is not None:
|
||||
args.append("'%s'" % WEEKDAYS[self.last])
|
||||
return "%s(%s)" % (self.__class__.__name__, ', '.join(args))
|
||||
|
||||
|
||||
class WeekdayPositionExpression(AllExpression):
|
||||
options = ['1st', '2nd', '3rd', '4th', '5th', 'last']
|
||||
value_re = re.compile(r'(?P<option_name>%s) +(?P<weekday_name>(?:\d+|\w+))' % '|'.join(options), re.IGNORECASE)
|
||||
|
||||
def __init__(self, option_name, weekday_name):
|
||||
try:
|
||||
self.option_num = self.options.index(option_name.lower())
|
||||
except ValueError:
|
||||
raise ValueError('Invalid weekday position "%s"' % option_name)
|
||||
|
||||
try:
|
||||
self.weekday = WEEKDAYS.index(weekday_name.lower())
|
||||
except ValueError:
|
||||
raise ValueError('Invalid weekday name "%s"' % weekday_name)
|
||||
|
||||
def get_next_value(self, date, field):
|
||||
# Figure out the weekday of the month's first day and the number
|
||||
# of days in that month
|
||||
first_day_wday, last_day = monthrange(date.year, date.month)
|
||||
|
||||
# Calculate which day of the month is the first of the target weekdays
|
||||
first_hit_day = self.weekday - first_day_wday + 1
|
||||
if first_hit_day <= 0:
|
||||
first_hit_day += 7
|
||||
|
||||
# Calculate what day of the month the target weekday would be
|
||||
if self.option_num < 5:
|
||||
target_day = first_hit_day + self.option_num * 7
|
||||
else:
|
||||
target_day = first_hit_day + ((last_day - first_hit_day) / 7) * 7
|
||||
|
||||
if target_day <= last_day and target_day >= date.day:
|
||||
return target_day
|
||||
|
||||
def __str__(self):
|
||||
return '%s %s' % (self.options[self.option_num], WEEKDAYS[self.weekday])
|
||||
|
||||
def __repr__(self):
|
||||
return "%s('%s', '%s')" % (self.__class__.__name__, self.options[self.option_num], WEEKDAYS[self.weekday])
|
||||
|
||||
|
||||
class LastDayOfMonthExpression(AllExpression):
|
||||
value_re = re.compile(r'last', re.IGNORECASE)
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def get_next_value(self, date, field):
|
||||
return monthrange(date.year, date.month)[1]
|
||||
|
||||
def __str__(self):
|
||||
return 'last'
|
||||
|
||||
def __repr__(self):
|
||||
return "%s()" % self.__class__.__name__
|
||||
97
lib/apscheduler/triggers/cron/fields.py
Normal file
97
lib/apscheduler/triggers/cron/fields.py
Normal file
@@ -0,0 +1,97 @@
|
||||
"""
|
||||
Fields represent CronTrigger options which map to :class:`~datetime.datetime`
|
||||
fields.
|
||||
"""
|
||||
|
||||
from calendar import monthrange
|
||||
|
||||
from apscheduler.triggers.cron.expressions import (
|
||||
AllExpression, RangeExpression, WeekdayPositionExpression, LastDayOfMonthExpression, WeekdayRangeExpression)
|
||||
|
||||
|
||||
__all__ = ('MIN_VALUES', 'MAX_VALUES', 'DEFAULT_VALUES', 'BaseField', 'WeekField', 'DayOfMonthField', 'DayOfWeekField')
|
||||
|
||||
|
||||
MIN_VALUES = {'year': 1970, 'month': 1, 'day': 1, 'week': 1, 'day_of_week': 0, 'hour': 0, 'minute': 0, 'second': 0}
|
||||
MAX_VALUES = {'year': 2 ** 63, 'month': 12, 'day:': 31, 'week': 53, 'day_of_week': 6, 'hour': 23, 'minute': 59,
|
||||
'second': 59}
|
||||
DEFAULT_VALUES = {'year': '*', 'month': 1, 'day': 1, 'week': '*', 'day_of_week': '*', 'hour': 0, 'minute': 0,
|
||||
'second': 0}
|
||||
|
||||
|
||||
class BaseField(object):
|
||||
REAL = True
|
||||
COMPILERS = [AllExpression, RangeExpression]
|
||||
|
||||
def __init__(self, name, exprs, is_default=False):
|
||||
self.name = name
|
||||
self.is_default = is_default
|
||||
self.compile_expressions(exprs)
|
||||
|
||||
def get_min(self, dateval):
|
||||
return MIN_VALUES[self.name]
|
||||
|
||||
def get_max(self, dateval):
|
||||
return MAX_VALUES[self.name]
|
||||
|
||||
def get_value(self, dateval):
|
||||
return getattr(dateval, self.name)
|
||||
|
||||
def get_next_value(self, dateval):
|
||||
smallest = None
|
||||
for expr in self.expressions:
|
||||
value = expr.get_next_value(dateval, self)
|
||||
if smallest is None or (value is not None and value < smallest):
|
||||
smallest = value
|
||||
|
||||
return smallest
|
||||
|
||||
def compile_expressions(self, exprs):
|
||||
self.expressions = []
|
||||
|
||||
# Split a comma-separated expression list, if any
|
||||
exprs = str(exprs).strip()
|
||||
if ',' in exprs:
|
||||
for expr in exprs.split(','):
|
||||
self.compile_expression(expr)
|
||||
else:
|
||||
self.compile_expression(exprs)
|
||||
|
||||
def compile_expression(self, expr):
|
||||
for compiler in self.COMPILERS:
|
||||
match = compiler.value_re.match(expr)
|
||||
if match:
|
||||
compiled_expr = compiler(**match.groupdict())
|
||||
self.expressions.append(compiled_expr)
|
||||
return
|
||||
|
||||
raise ValueError('Unrecognized expression "%s" for field "%s"' % (expr, self.name))
|
||||
|
||||
def __str__(self):
|
||||
expr_strings = (str(e) for e in self.expressions)
|
||||
return ','.join(expr_strings)
|
||||
|
||||
def __repr__(self):
|
||||
return "%s('%s', '%s')" % (self.__class__.__name__, self.name, self)
|
||||
|
||||
|
||||
class WeekField(BaseField):
|
||||
REAL = False
|
||||
|
||||
def get_value(self, dateval):
|
||||
return dateval.isocalendar()[1]
|
||||
|
||||
|
||||
class DayOfMonthField(BaseField):
|
||||
COMPILERS = BaseField.COMPILERS + [WeekdayPositionExpression, LastDayOfMonthExpression]
|
||||
|
||||
def get_max(self, dateval):
|
||||
return monthrange(dateval.year, dateval.month)[1]
|
||||
|
||||
|
||||
class DayOfWeekField(BaseField):
|
||||
REAL = False
|
||||
COMPILERS = BaseField.COMPILERS + [WeekdayRangeExpression]
|
||||
|
||||
def get_value(self, dateval):
|
||||
return dateval.weekday()
|
||||
30
lib/apscheduler/triggers/date.py
Normal file
30
lib/apscheduler/triggers/date.py
Normal file
@@ -0,0 +1,30 @@
|
||||
from datetime import datetime
|
||||
|
||||
from tzlocal import get_localzone
|
||||
|
||||
from apscheduler.triggers.base import BaseTrigger
|
||||
from apscheduler.util import convert_to_datetime, datetime_repr, astimezone
|
||||
|
||||
|
||||
class DateTrigger(BaseTrigger):
|
||||
"""
|
||||
Triggers once on the given datetime. If ``run_date`` is left empty, current time is used.
|
||||
|
||||
:param datetime|str run_date: the date/time to run the job at
|
||||
:param datetime.tzinfo|str timezone: time zone for ``run_date`` if it doesn't have one already
|
||||
"""
|
||||
|
||||
__slots__ = 'timezone', 'run_date'
|
||||
|
||||
def __init__(self, run_date=None, timezone=None):
|
||||
timezone = astimezone(timezone) or get_localzone()
|
||||
self.run_date = convert_to_datetime(run_date or datetime.now(), timezone, 'run_date')
|
||||
|
||||
def get_next_fire_time(self, previous_fire_time, now):
|
||||
return self.run_date if previous_fire_time is None else None
|
||||
|
||||
def __str__(self):
|
||||
return 'date[%s]' % datetime_repr(self.run_date)
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s (run_date='%s')>" % (self.__class__.__name__, datetime_repr(self.run_date))
|
||||
65
lib/apscheduler/triggers/interval.py
Normal file
65
lib/apscheduler/triggers/interval.py
Normal file
@@ -0,0 +1,65 @@
|
||||
from datetime import timedelta, datetime
|
||||
from math import ceil
|
||||
|
||||
from tzlocal import get_localzone
|
||||
|
||||
from apscheduler.triggers.base import BaseTrigger
|
||||
from apscheduler.util import convert_to_datetime, timedelta_seconds, datetime_repr, astimezone
|
||||
|
||||
|
||||
class IntervalTrigger(BaseTrigger):
|
||||
"""
|
||||
Triggers on specified intervals, starting on ``start_date`` if specified, ``datetime.now()`` + interval
|
||||
otherwise.
|
||||
|
||||
:param int weeks: number of weeks to wait
|
||||
:param int days: number of days to wait
|
||||
:param int hours: number of hours to wait
|
||||
:param int minutes: number of minutes to wait
|
||||
:param int seconds: number of seconds to wait
|
||||
:param datetime|str start_date: starting point for the interval calculation
|
||||
:param datetime|str end_date: latest possible date/time to trigger on
|
||||
:param datetime.tzinfo|str timezone: time zone to use for the date/time calculations
|
||||
"""
|
||||
|
||||
__slots__ = 'timezone', 'start_date', 'end_date', 'interval'
|
||||
|
||||
def __init__(self, weeks=0, days=0, hours=0, minutes=0, seconds=0, start_date=None, end_date=None, timezone=None):
|
||||
self.interval = timedelta(weeks=weeks, days=days, hours=hours, minutes=minutes, seconds=seconds)
|
||||
self.interval_length = timedelta_seconds(self.interval)
|
||||
if self.interval_length == 0:
|
||||
self.interval = timedelta(seconds=1)
|
||||
self.interval_length = 1
|
||||
|
||||
if timezone:
|
||||
self.timezone = astimezone(timezone)
|
||||
elif start_date and start_date.tzinfo:
|
||||
self.timezone = start_date.tzinfo
|
||||
elif end_date and end_date.tzinfo:
|
||||
self.timezone = end_date.tzinfo
|
||||
else:
|
||||
self.timezone = get_localzone()
|
||||
|
||||
start_date = start_date or (datetime.now(self.timezone) + self.interval)
|
||||
self.start_date = convert_to_datetime(start_date, self.timezone, 'start_date')
|
||||
self.end_date = convert_to_datetime(end_date, self.timezone, 'end_date')
|
||||
|
||||
def get_next_fire_time(self, previous_fire_time, now):
|
||||
if previous_fire_time:
|
||||
next_fire_time = previous_fire_time + self.interval
|
||||
elif self.start_date > now:
|
||||
next_fire_time = self.start_date
|
||||
else:
|
||||
timediff_seconds = timedelta_seconds(now - self.start_date)
|
||||
next_interval_num = int(ceil(timediff_seconds / self.interval_length))
|
||||
next_fire_time = self.start_date + self.interval * next_interval_num
|
||||
|
||||
if not self.end_date or next_fire_time <= self.end_date:
|
||||
return self.timezone.normalize(next_fire_time)
|
||||
|
||||
def __str__(self):
|
||||
return 'interval[%s]' % str(self.interval)
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s (interval=%r, start_date='%s')>" % (self.__class__.__name__, self.interval,
|
||||
datetime_repr(self.start_date))
|
||||
385
lib/apscheduler/util.py
Normal file
385
lib/apscheduler/util.py
Normal file
@@ -0,0 +1,385 @@
|
||||
"""This module contains several handy functions primarily meant for internal use."""
|
||||
|
||||
from __future__ import division
|
||||
from datetime import date, datetime, time, timedelta, tzinfo
|
||||
from inspect import isfunction, ismethod, getargspec
|
||||
from calendar import timegm
|
||||
import re
|
||||
|
||||
from pytz import timezone, utc
|
||||
import six
|
||||
|
||||
try:
|
||||
from inspect import signature
|
||||
except ImportError: # pragma: nocover
|
||||
try:
|
||||
from funcsigs import signature
|
||||
except ImportError:
|
||||
signature = None
|
||||
|
||||
__all__ = ('asint', 'asbool', 'astimezone', 'convert_to_datetime', 'datetime_to_utc_timestamp',
|
||||
'utc_timestamp_to_datetime', 'timedelta_seconds', 'datetime_ceil', 'get_callable_name', 'obj_to_ref',
|
||||
'ref_to_obj', 'maybe_ref', 'repr_escape', 'check_callable_args')
|
||||
|
||||
|
||||
class _Undefined(object):
|
||||
def __nonzero__(self):
|
||||
return False
|
||||
|
||||
def __bool__(self):
|
||||
return False
|
||||
|
||||
def __repr__(self):
|
||||
return '<undefined>'
|
||||
|
||||
undefined = _Undefined() #: a unique object that only signifies that no value is defined
|
||||
|
||||
|
||||
def asint(text):
|
||||
"""
|
||||
Safely converts a string to an integer, returning None if the string is None.
|
||||
|
||||
:type text: str
|
||||
:rtype: int
|
||||
"""
|
||||
|
||||
if text is not None:
|
||||
return int(text)
|
||||
|
||||
|
||||
def asbool(obj):
|
||||
"""
|
||||
Interprets an object as a boolean value.
|
||||
|
||||
:rtype: bool
|
||||
"""
|
||||
|
||||
if isinstance(obj, str):
|
||||
obj = obj.strip().lower()
|
||||
if obj in ('true', 'yes', 'on', 'y', 't', '1'):
|
||||
return True
|
||||
if obj in ('false', 'no', 'off', 'n', 'f', '0'):
|
||||
return False
|
||||
raise ValueError('Unable to interpret value "%s" as boolean' % obj)
|
||||
return bool(obj)
|
||||
|
||||
|
||||
def astimezone(obj):
|
||||
"""
|
||||
Interprets an object as a timezone.
|
||||
|
||||
:rtype: tzinfo
|
||||
"""
|
||||
|
||||
if isinstance(obj, six.string_types):
|
||||
return timezone(obj)
|
||||
if isinstance(obj, tzinfo):
|
||||
if not hasattr(obj, 'localize') or not hasattr(obj, 'normalize'):
|
||||
raise TypeError('Only timezones from the pytz library are supported')
|
||||
if obj.zone == 'local':
|
||||
raise ValueError('Unable to determine the name of the local timezone -- use an explicit timezone instead')
|
||||
return obj
|
||||
if obj is not None:
|
||||
raise TypeError('Expected tzinfo, got %s instead' % obj.__class__.__name__)
|
||||
|
||||
|
||||
_DATE_REGEX = re.compile(
|
||||
r'(?P<year>\d{4})-(?P<month>\d{1,2})-(?P<day>\d{1,2})'
|
||||
r'(?: (?P<hour>\d{1,2}):(?P<minute>\d{1,2}):(?P<second>\d{1,2})'
|
||||
r'(?:\.(?P<microsecond>\d{1,6}))?)?')
|
||||
|
||||
|
||||
def convert_to_datetime(input, tz, arg_name):
|
||||
"""
|
||||
Converts the given object to a timezone aware datetime object.
|
||||
If a timezone aware datetime object is passed, it is returned unmodified.
|
||||
If a native datetime object is passed, it is given the specified timezone.
|
||||
If the input is a string, it is parsed as a datetime with the given timezone.
|
||||
|
||||
Date strings are accepted in three different forms: date only (Y-m-d),
|
||||
date with time (Y-m-d H:M:S) or with date+time with microseconds
|
||||
(Y-m-d H:M:S.micro).
|
||||
|
||||
:param str|datetime input: the datetime or string to convert to a timezone aware datetime
|
||||
:param datetime.tzinfo tz: timezone to interpret ``input`` in
|
||||
:param str arg_name: the name of the argument (used in an error message)
|
||||
:rtype: datetime
|
||||
"""
|
||||
|
||||
if input is None:
|
||||
return
|
||||
elif isinstance(input, datetime):
|
||||
datetime_ = input
|
||||
elif isinstance(input, date):
|
||||
datetime_ = datetime.combine(input, time())
|
||||
elif isinstance(input, six.string_types):
|
||||
m = _DATE_REGEX.match(input)
|
||||
if not m:
|
||||
raise ValueError('Invalid date string')
|
||||
values = [(k, int(v or 0)) for k, v in m.groupdict().items()]
|
||||
values = dict(values)
|
||||
datetime_ = datetime(**values)
|
||||
else:
|
||||
raise TypeError('Unsupported type for %s: %s' % (arg_name, input.__class__.__name__))
|
||||
|
||||
if datetime_.tzinfo is not None:
|
||||
return datetime_
|
||||
if tz is None:
|
||||
raise ValueError('The "tz" argument must be specified if %s has no timezone information' % arg_name)
|
||||
if isinstance(tz, six.string_types):
|
||||
tz = timezone(tz)
|
||||
|
||||
try:
|
||||
return tz.localize(datetime_, is_dst=None)
|
||||
except AttributeError:
|
||||
raise TypeError('Only pytz timezones are supported (need the localize() and normalize() methods)')
|
||||
|
||||
|
||||
def datetime_to_utc_timestamp(timeval):
|
||||
"""
|
||||
Converts a datetime instance to a timestamp.
|
||||
|
||||
:type timeval: datetime
|
||||
:rtype: float
|
||||
"""
|
||||
|
||||
if timeval is not None:
|
||||
return timegm(timeval.utctimetuple()) + timeval.microsecond / 1000000
|
||||
|
||||
|
||||
def utc_timestamp_to_datetime(timestamp):
|
||||
"""
|
||||
Converts the given timestamp to a datetime instance.
|
||||
|
||||
:type timestamp: float
|
||||
:rtype: datetime
|
||||
"""
|
||||
|
||||
if timestamp is not None:
|
||||
return datetime.fromtimestamp(timestamp, utc)
|
||||
|
||||
|
||||
def timedelta_seconds(delta):
|
||||
"""
|
||||
Converts the given timedelta to seconds.
|
||||
|
||||
:type delta: timedelta
|
||||
:rtype: float
|
||||
"""
|
||||
|
||||
return delta.days * 24 * 60 * 60 + delta.seconds + \
|
||||
delta.microseconds / 1000000.0
|
||||
|
||||
|
||||
def datetime_ceil(dateval):
|
||||
"""
|
||||
Rounds the given datetime object upwards.
|
||||
|
||||
:type dateval: datetime
|
||||
"""
|
||||
|
||||
if dateval.microsecond > 0:
|
||||
return dateval + timedelta(seconds=1, microseconds=-dateval.microsecond)
|
||||
return dateval
|
||||
|
||||
|
||||
def datetime_repr(dateval):
|
||||
return dateval.strftime('%Y-%m-%d %H:%M:%S %Z') if dateval else 'None'
|
||||
|
||||
|
||||
def get_callable_name(func):
|
||||
"""
|
||||
Returns the best available display name for the given function/callable.
|
||||
|
||||
:rtype: str
|
||||
"""
|
||||
|
||||
# the easy case (on Python 3.3+)
|
||||
if hasattr(func, '__qualname__'):
|
||||
return func.__qualname__
|
||||
|
||||
# class methods, bound and unbound methods
|
||||
f_self = getattr(func, '__self__', None) or getattr(func, 'im_self', None)
|
||||
if f_self and hasattr(func, '__name__'):
|
||||
f_class = f_self if isinstance(f_self, type) else f_self.__class__
|
||||
else:
|
||||
f_class = getattr(func, 'im_class', None)
|
||||
|
||||
if f_class and hasattr(func, '__name__'):
|
||||
return '%s.%s' % (f_class.__name__, func.__name__)
|
||||
|
||||
# class or class instance
|
||||
if hasattr(func, '__call__'):
|
||||
# class
|
||||
if hasattr(func, '__name__'):
|
||||
return func.__name__
|
||||
|
||||
# instance of a class with a __call__ method
|
||||
return func.__class__.__name__
|
||||
|
||||
raise TypeError('Unable to determine a name for %r -- maybe it is not a callable?' % func)
|
||||
|
||||
|
||||
def obj_to_ref(obj):
|
||||
"""
|
||||
Returns the path to the given object.
|
||||
|
||||
:rtype: str
|
||||
"""
|
||||
|
||||
try:
|
||||
ref = '%s:%s' % (obj.__module__, get_callable_name(obj))
|
||||
obj2 = ref_to_obj(ref)
|
||||
if obj != obj2:
|
||||
raise ValueError
|
||||
except Exception:
|
||||
raise ValueError('Cannot determine the reference to %r' % obj)
|
||||
|
||||
return ref
|
||||
|
||||
|
||||
def ref_to_obj(ref):
|
||||
"""
|
||||
Returns the object pointed to by ``ref``.
|
||||
|
||||
:type ref: str
|
||||
"""
|
||||
|
||||
if not isinstance(ref, six.string_types):
|
||||
raise TypeError('References must be strings')
|
||||
if ':' not in ref:
|
||||
raise ValueError('Invalid reference')
|
||||
|
||||
modulename, rest = ref.split(':', 1)
|
||||
try:
|
||||
obj = __import__(modulename)
|
||||
except ImportError:
|
||||
raise LookupError('Error resolving reference %s: could not import module' % ref)
|
||||
|
||||
try:
|
||||
for name in modulename.split('.')[1:] + rest.split('.'):
|
||||
obj = getattr(obj, name)
|
||||
return obj
|
||||
except Exception:
|
||||
raise LookupError('Error resolving reference %s: error looking up object' % ref)
|
||||
|
||||
|
||||
def maybe_ref(ref):
|
||||
"""
|
||||
Returns the object that the given reference points to, if it is indeed a reference.
|
||||
If it is not a reference, the object is returned as-is.
|
||||
"""
|
||||
|
||||
if not isinstance(ref, str):
|
||||
return ref
|
||||
return ref_to_obj(ref)
|
||||
|
||||
|
||||
if six.PY2:
|
||||
def repr_escape(string):
|
||||
if isinstance(string, six.text_type):
|
||||
return string.encode('ascii', 'backslashreplace')
|
||||
return string
|
||||
else:
|
||||
repr_escape = lambda string: string
|
||||
|
||||
|
||||
def check_callable_args(func, args, kwargs):
|
||||
"""
|
||||
Ensures that the given callable can be called with the given arguments.
|
||||
|
||||
:type args: tuple
|
||||
:type kwargs: dict
|
||||
"""
|
||||
|
||||
pos_kwargs_conflicts = [] # parameters that have a match in both args and kwargs
|
||||
positional_only_kwargs = [] # positional-only parameters that have a match in kwargs
|
||||
unsatisfied_args = [] # parameters in signature that don't have a match in args or kwargs
|
||||
unsatisfied_kwargs = [] # keyword-only arguments that don't have a match in kwargs
|
||||
unmatched_args = list(args) # args that didn't match any of the parameters in the signature
|
||||
unmatched_kwargs = list(kwargs) # kwargs that didn't match any of the parameters in the signature
|
||||
has_varargs = has_var_kwargs = False # indicates if the signature defines *args and **kwargs respectively
|
||||
|
||||
if signature:
|
||||
try:
|
||||
sig = signature(func)
|
||||
except ValueError:
|
||||
return # signature() doesn't work against every kind of callable
|
||||
|
||||
for param in six.itervalues(sig.parameters):
|
||||
if param.kind == param.POSITIONAL_OR_KEYWORD:
|
||||
if param.name in unmatched_kwargs and unmatched_args:
|
||||
pos_kwargs_conflicts.append(param.name)
|
||||
elif unmatched_args:
|
||||
del unmatched_args[0]
|
||||
elif param.name in unmatched_kwargs:
|
||||
unmatched_kwargs.remove(param.name)
|
||||
elif param.default is param.empty:
|
||||
unsatisfied_args.append(param.name)
|
||||
elif param.kind == param.POSITIONAL_ONLY:
|
||||
if unmatched_args:
|
||||
del unmatched_args[0]
|
||||
elif param.name in unmatched_kwargs:
|
||||
unmatched_kwargs.remove(param.name)
|
||||
positional_only_kwargs.append(param.name)
|
||||
elif param.default is param.empty:
|
||||
unsatisfied_args.append(param.name)
|
||||
elif param.kind == param.KEYWORD_ONLY:
|
||||
if param.name in unmatched_kwargs:
|
||||
unmatched_kwargs.remove(param.name)
|
||||
elif param.default is param.empty:
|
||||
unsatisfied_kwargs.append(param.name)
|
||||
elif param.kind == param.VAR_POSITIONAL:
|
||||
has_varargs = True
|
||||
elif param.kind == param.VAR_KEYWORD:
|
||||
has_var_kwargs = True
|
||||
else:
|
||||
if not isfunction(func) and not ismethod(func) and hasattr(func, '__call__'):
|
||||
func = func.__call__
|
||||
|
||||
try:
|
||||
argspec = getargspec(func)
|
||||
except TypeError:
|
||||
return # getargspec() doesn't work certain callables
|
||||
|
||||
argspec_args = argspec.args if not ismethod(func) else argspec.args[1:]
|
||||
has_varargs = bool(argspec.varargs)
|
||||
has_var_kwargs = bool(argspec.keywords)
|
||||
for arg, default in six.moves.zip_longest(argspec_args, argspec.defaults or (), fillvalue=undefined):
|
||||
if arg in unmatched_kwargs and unmatched_args:
|
||||
pos_kwargs_conflicts.append(arg)
|
||||
elif unmatched_args:
|
||||
del unmatched_args[0]
|
||||
elif arg in unmatched_kwargs:
|
||||
unmatched_kwargs.remove(arg)
|
||||
elif default is undefined:
|
||||
unsatisfied_args.append(arg)
|
||||
|
||||
# Make sure there are no conflicts between args and kwargs
|
||||
if pos_kwargs_conflicts:
|
||||
raise ValueError('The following arguments are supplied in both args and kwargs: %s' %
|
||||
', '.join(pos_kwargs_conflicts))
|
||||
|
||||
# Check if keyword arguments are being fed to positional-only parameters
|
||||
if positional_only_kwargs:
|
||||
raise ValueError('The following arguments cannot be given as keyword arguments: %s' %
|
||||
', '.join(positional_only_kwargs))
|
||||
|
||||
# Check that the number of positional arguments minus the number of matched kwargs matches the argspec
|
||||
if unsatisfied_args:
|
||||
raise ValueError('The following arguments have not been supplied: %s' % ', '.join(unsatisfied_args))
|
||||
|
||||
# Check that all keyword-only arguments have been supplied
|
||||
if unsatisfied_kwargs:
|
||||
raise ValueError('The following keyword-only arguments have not been supplied in kwargs: %s' %
|
||||
', '.join(unsatisfied_kwargs))
|
||||
|
||||
# Check that the callable can accept the given number of positional arguments
|
||||
if not has_varargs and unmatched_args:
|
||||
raise ValueError('The list of positional arguments is longer than the target callable can handle '
|
||||
'(allowed: %d, given in args: %d)' % (len(args) - len(unmatched_args), len(args)))
|
||||
|
||||
# Check that the callable can accept the given keyword arguments
|
||||
if not has_var_kwargs and unmatched_kwargs:
|
||||
raise ValueError('The target callable does not accept the following keyword arguments: %s' %
|
||||
', '.join(unmatched_kwargs))
|
||||
2386
lib/argparse.py
Normal file
2386
lib/argparse.py
Normal file
File diff suppressed because it is too large
Load Diff
131
lib/bencode.py
Normal file
131
lib/bencode.py
Normal file
@@ -0,0 +1,131 @@
|
||||
# Got this from here: https://gist.github.com/1126793
|
||||
|
||||
# The contents of this file are subject to the Python Software Foundation
|
||||
# License Version 2.3 (the License). You may not copy or use this file, in
|
||||
# either source code or executable form, except in compliance with the License.
|
||||
# You may obtain a copy of the License at http://www.python.org/license.
|
||||
#
|
||||
# Software distributed under the License is distributed on an AS IS basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
|
||||
# Written by Petru Paler
|
||||
|
||||
# Minor modifications made by Andrew Resch to replace the BTFailure errors with Exceptions
|
||||
|
||||
def decode_int(x, f):
|
||||
f += 1
|
||||
newf = x.index('e', f)
|
||||
n = int(x[f:newf])
|
||||
if x[f] == '-':
|
||||
if x[f + 1] == '0':
|
||||
raise ValueError
|
||||
elif x[f] == '0' and newf != f+1:
|
||||
raise ValueError
|
||||
return (n, newf+1)
|
||||
|
||||
def decode_string(x, f):
|
||||
colon = x.index(':', f)
|
||||
n = int(x[f:colon])
|
||||
if x[f] == '0' and colon != f+1:
|
||||
raise ValueError
|
||||
colon += 1
|
||||
return (x[colon:colon+n], colon+n)
|
||||
|
||||
def decode_list(x, f):
|
||||
r, f = [], f+1
|
||||
while x[f] != 'e':
|
||||
v, f = decode_func[x[f]](x, f)
|
||||
r.append(v)
|
||||
return (r, f + 1)
|
||||
|
||||
def decode_dict(x, f):
|
||||
r, f = {}, f+1
|
||||
while x[f] != 'e':
|
||||
k, f = decode_string(x, f)
|
||||
r[k], f = decode_func[x[f]](x, f)
|
||||
return (r, f + 1)
|
||||
|
||||
decode_func = {}
|
||||
decode_func['l'] = decode_list
|
||||
decode_func['d'] = decode_dict
|
||||
decode_func['i'] = decode_int
|
||||
decode_func['0'] = decode_string
|
||||
decode_func['1'] = decode_string
|
||||
decode_func['2'] = decode_string
|
||||
decode_func['3'] = decode_string
|
||||
decode_func['4'] = decode_string
|
||||
decode_func['5'] = decode_string
|
||||
decode_func['6'] = decode_string
|
||||
decode_func['7'] = decode_string
|
||||
decode_func['8'] = decode_string
|
||||
decode_func['9'] = decode_string
|
||||
|
||||
def bdecode(x):
|
||||
try:
|
||||
r, l = decode_func[x[0]](x, 0)
|
||||
except (IndexError, KeyError, ValueError):
|
||||
raise Exception("not a valid bencoded string")
|
||||
|
||||
return r
|
||||
|
||||
from types import StringType, IntType, LongType, DictType, ListType, TupleType
|
||||
|
||||
|
||||
class Bencached(object):
|
||||
|
||||
__slots__ = ['bencoded']
|
||||
|
||||
def __init__(self, s):
|
||||
self.bencoded = s
|
||||
|
||||
def encode_bencached(x,r):
|
||||
r.append(x.bencoded)
|
||||
|
||||
def encode_int(x, r):
|
||||
r.extend(('i', str(x), 'e'))
|
||||
|
||||
def encode_bool(x, r):
|
||||
if x:
|
||||
encode_int(1, r)
|
||||
else:
|
||||
encode_int(0, r)
|
||||
|
||||
def encode_string(x, r):
|
||||
r.extend((str(len(x)), ':', x))
|
||||
|
||||
def encode_list(x, r):
|
||||
r.append('l')
|
||||
for i in x:
|
||||
encode_func[type(i)](i, r)
|
||||
r.append('e')
|
||||
|
||||
def encode_dict(x,r):
|
||||
r.append('d')
|
||||
ilist = x.items()
|
||||
ilist.sort()
|
||||
for k, v in ilist:
|
||||
r.extend((str(len(k)), ':', k))
|
||||
encode_func[type(v)](v, r)
|
||||
r.append('e')
|
||||
|
||||
encode_func = {}
|
||||
encode_func[Bencached] = encode_bencached
|
||||
encode_func[IntType] = encode_int
|
||||
encode_func[LongType] = encode_int
|
||||
encode_func[StringType] = encode_string
|
||||
encode_func[ListType] = encode_list
|
||||
encode_func[TupleType] = encode_list
|
||||
encode_func[DictType] = encode_dict
|
||||
|
||||
try:
|
||||
from types import BooleanType
|
||||
encode_func[BooleanType] = encode_bool
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
def bencode(x):
|
||||
r = []
|
||||
encode_func[type(x)](x, r)
|
||||
return ''.join(r)
|
||||
803
lib/biplist/__init__.py
Executable file
803
lib/biplist/__init__.py
Executable file
@@ -0,0 +1,803 @@
|
||||
"""biplist -- a library for reading and writing binary property list files.
|
||||
|
||||
Binary Property List (plist) files provide a faster and smaller serialization
|
||||
format for property lists on OS X. This is a library for generating binary
|
||||
plists which can be read by OS X, iOS, or other clients.
|
||||
|
||||
The API models the plistlib API, and will call through to plistlib when
|
||||
XML serialization or deserialization is required.
|
||||
|
||||
To generate plists with UID values, wrap the values with the Uid object. The
|
||||
value must be an int.
|
||||
|
||||
To generate plists with NSData/CFData values, wrap the values with the
|
||||
Data object. The value must be a string.
|
||||
|
||||
Date values can only be datetime.datetime objects.
|
||||
|
||||
The exceptions InvalidPlistException and NotBinaryPlistException may be
|
||||
thrown to indicate that the data cannot be serialized or deserialized as
|
||||
a binary plist.
|
||||
|
||||
Plist generation example:
|
||||
|
||||
from biplist import *
|
||||
from datetime import datetime
|
||||
plist = {'aKey':'aValue',
|
||||
'0':1.322,
|
||||
'now':datetime.now(),
|
||||
'list':[1,2,3],
|
||||
'tuple':('a','b','c')
|
||||
}
|
||||
try:
|
||||
writePlist(plist, "example.plist")
|
||||
except (InvalidPlistException, NotBinaryPlistException), e:
|
||||
print "Something bad happened:", e
|
||||
|
||||
Plist parsing example:
|
||||
|
||||
from biplist import *
|
||||
try:
|
||||
plist = readPlist("example.plist")
|
||||
print plist
|
||||
except (InvalidPlistException, NotBinaryPlistException), e:
|
||||
print "Not a plist:", e
|
||||
"""
|
||||
|
||||
import sys
|
||||
from collections import namedtuple
|
||||
import datetime
|
||||
import io
|
||||
import math
|
||||
import plistlib
|
||||
from struct import pack, unpack
|
||||
from struct import error as struct_error
|
||||
import sys
|
||||
import time
|
||||
|
||||
try:
|
||||
unicode
|
||||
unicodeEmpty = r''
|
||||
except NameError:
|
||||
unicode = str
|
||||
unicodeEmpty = ''
|
||||
try:
|
||||
long
|
||||
except NameError:
|
||||
long = int
|
||||
try:
|
||||
{}.iteritems
|
||||
iteritems = lambda x: x.iteritems()
|
||||
except AttributeError:
|
||||
iteritems = lambda x: x.items()
|
||||
|
||||
__all__ = [
|
||||
'Uid', 'Data', 'readPlist', 'writePlist', 'readPlistFromString',
|
||||
'writePlistToString', 'InvalidPlistException', 'NotBinaryPlistException'
|
||||
]
|
||||
|
||||
# Apple uses Jan 1, 2001 as a base for all plist date/times.
|
||||
apple_reference_date = datetime.datetime.utcfromtimestamp(978307200)
|
||||
|
||||
class Uid(int):
|
||||
"""Wrapper around integers for representing UID values. This
|
||||
is used in keyed archiving."""
|
||||
def __repr__(self):
|
||||
return "Uid(%d)" % self
|
||||
|
||||
class Data(bytes):
|
||||
"""Wrapper around str types for representing Data values."""
|
||||
pass
|
||||
|
||||
class InvalidPlistException(Exception):
|
||||
"""Raised when the plist is incorrectly formatted."""
|
||||
pass
|
||||
|
||||
class NotBinaryPlistException(Exception):
|
||||
"""Raised when a binary plist was expected but not encountered."""
|
||||
pass
|
||||
|
||||
def readPlist(pathOrFile):
|
||||
"""Raises NotBinaryPlistException, InvalidPlistException"""
|
||||
didOpen = False
|
||||
result = None
|
||||
if isinstance(pathOrFile, (bytes, unicode)):
|
||||
pathOrFile = open(pathOrFile, 'rb')
|
||||
didOpen = True
|
||||
try:
|
||||
reader = PlistReader(pathOrFile)
|
||||
result = reader.parse()
|
||||
except NotBinaryPlistException as e:
|
||||
try:
|
||||
pathOrFile.seek(0)
|
||||
result = None
|
||||
if hasattr(plistlib, 'loads'):
|
||||
contents = None
|
||||
if isinstance(pathOrFile, (bytes, unicode)):
|
||||
with open(pathOrFile, 'rb') as f:
|
||||
contents = f.read()
|
||||
else:
|
||||
contents = pathOrFile.read()
|
||||
result = plistlib.loads(contents)
|
||||
else:
|
||||
result = plistlib.readPlist(pathOrFile)
|
||||
result = wrapDataObject(result, for_binary=True)
|
||||
except Exception as e:
|
||||
raise InvalidPlistException(e)
|
||||
finally:
|
||||
if didOpen:
|
||||
pathOrFile.close()
|
||||
return result
|
||||
|
||||
def wrapDataObject(o, for_binary=False):
|
||||
if isinstance(o, Data) and not for_binary:
|
||||
v = sys.version_info
|
||||
if not (v[0] >= 3 and v[1] >= 4):
|
||||
o = plistlib.Data(o)
|
||||
elif isinstance(o, (bytes, plistlib.Data)) and for_binary:
|
||||
if hasattr(o, 'data'):
|
||||
o = Data(o.data)
|
||||
elif isinstance(o, tuple):
|
||||
o = wrapDataObject(list(o), for_binary)
|
||||
o = tuple(o)
|
||||
elif isinstance(o, list):
|
||||
for i in range(len(o)):
|
||||
o[i] = wrapDataObject(o[i], for_binary)
|
||||
elif isinstance(o, dict):
|
||||
for k in o:
|
||||
o[k] = wrapDataObject(o[k], for_binary)
|
||||
return o
|
||||
|
||||
def writePlist(rootObject, pathOrFile, binary=True):
|
||||
if not binary:
|
||||
rootObject = wrapDataObject(rootObject, binary)
|
||||
if hasattr(plistlib, "dump"):
|
||||
if isinstance(pathOrFile, (bytes, unicode)):
|
||||
with open(pathOrFile, 'wb') as f:
|
||||
return plistlib.dump(rootObject, f)
|
||||
else:
|
||||
return plistlib.dump(rootObject, pathOrFile)
|
||||
else:
|
||||
return plistlib.writePlist(rootObject, pathOrFile)
|
||||
else:
|
||||
didOpen = False
|
||||
if isinstance(pathOrFile, (bytes, unicode)):
|
||||
pathOrFile = open(pathOrFile, 'wb')
|
||||
didOpen = True
|
||||
writer = PlistWriter(pathOrFile)
|
||||
result = writer.writeRoot(rootObject)
|
||||
if didOpen:
|
||||
pathOrFile.close()
|
||||
return result
|
||||
|
||||
def readPlistFromString(data):
|
||||
return readPlist(io.BytesIO(data))
|
||||
|
||||
def writePlistToString(rootObject, binary=True):
|
||||
if not binary:
|
||||
rootObject = wrapDataObject(rootObject, binary)
|
||||
if hasattr(plistlib, "dumps"):
|
||||
return plistlib.dumps(rootObject)
|
||||
elif hasattr(plistlib, "writePlistToBytes"):
|
||||
return plistlib.writePlistToBytes(rootObject)
|
||||
else:
|
||||
return plistlib.writePlistToString(rootObject)
|
||||
else:
|
||||
ioObject = io.BytesIO()
|
||||
writer = PlistWriter(ioObject)
|
||||
writer.writeRoot(rootObject)
|
||||
return ioObject.getvalue()
|
||||
|
||||
def is_stream_binary_plist(stream):
|
||||
stream.seek(0)
|
||||
header = stream.read(7)
|
||||
if header == b'bplist0':
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
PlistTrailer = namedtuple('PlistTrailer', 'offsetSize, objectRefSize, offsetCount, topLevelObjectNumber, offsetTableOffset')
|
||||
PlistByteCounts = namedtuple('PlistByteCounts', 'nullBytes, boolBytes, intBytes, realBytes, dateBytes, dataBytes, stringBytes, uidBytes, arrayBytes, setBytes, dictBytes')
|
||||
|
||||
class PlistReader(object):
|
||||
file = None
|
||||
contents = ''
|
||||
offsets = None
|
||||
trailer = None
|
||||
currentOffset = 0
|
||||
|
||||
def __init__(self, fileOrStream):
|
||||
"""Raises NotBinaryPlistException."""
|
||||
self.reset()
|
||||
self.file = fileOrStream
|
||||
|
||||
def parse(self):
|
||||
return self.readRoot()
|
||||
|
||||
def reset(self):
|
||||
self.trailer = None
|
||||
self.contents = ''
|
||||
self.offsets = []
|
||||
self.currentOffset = 0
|
||||
|
||||
def readRoot(self):
|
||||
result = None
|
||||
self.reset()
|
||||
# Get the header, make sure it's a valid file.
|
||||
if not is_stream_binary_plist(self.file):
|
||||
raise NotBinaryPlistException()
|
||||
self.file.seek(0)
|
||||
self.contents = self.file.read()
|
||||
if len(self.contents) < 32:
|
||||
raise InvalidPlistException("File is too short.")
|
||||
trailerContents = self.contents[-32:]
|
||||
try:
|
||||
self.trailer = PlistTrailer._make(unpack("!xxxxxxBBQQQ", trailerContents))
|
||||
offset_size = self.trailer.offsetSize * self.trailer.offsetCount
|
||||
offset = self.trailer.offsetTableOffset
|
||||
offset_contents = self.contents[offset:offset+offset_size]
|
||||
offset_i = 0
|
||||
while offset_i < self.trailer.offsetCount:
|
||||
begin = self.trailer.offsetSize*offset_i
|
||||
tmp_contents = offset_contents[begin:begin+self.trailer.offsetSize]
|
||||
tmp_sized = self.getSizedInteger(tmp_contents, self.trailer.offsetSize)
|
||||
self.offsets.append(tmp_sized)
|
||||
offset_i += 1
|
||||
self.setCurrentOffsetToObjectNumber(self.trailer.topLevelObjectNumber)
|
||||
result = self.readObject()
|
||||
except TypeError as e:
|
||||
raise InvalidPlistException(e)
|
||||
return result
|
||||
|
||||
def setCurrentOffsetToObjectNumber(self, objectNumber):
|
||||
self.currentOffset = self.offsets[objectNumber]
|
||||
|
||||
def readObject(self):
|
||||
result = None
|
||||
tmp_byte = self.contents[self.currentOffset:self.currentOffset+1]
|
||||
marker_byte = unpack("!B", tmp_byte)[0]
|
||||
format = (marker_byte >> 4) & 0x0f
|
||||
extra = marker_byte & 0x0f
|
||||
self.currentOffset += 1
|
||||
|
||||
def proc_extra(extra):
|
||||
if extra == 0b1111:
|
||||
#self.currentOffset += 1
|
||||
extra = self.readObject()
|
||||
return extra
|
||||
|
||||
# bool, null, or fill byte
|
||||
if format == 0b0000:
|
||||
if extra == 0b0000:
|
||||
result = None
|
||||
elif extra == 0b1000:
|
||||
result = False
|
||||
elif extra == 0b1001:
|
||||
result = True
|
||||
elif extra == 0b1111:
|
||||
pass # fill byte
|
||||
else:
|
||||
raise InvalidPlistException("Invalid object found at offset: %d" % (self.currentOffset - 1))
|
||||
# int
|
||||
elif format == 0b0001:
|
||||
extra = proc_extra(extra)
|
||||
result = self.readInteger(pow(2, extra))
|
||||
# real
|
||||
elif format == 0b0010:
|
||||
extra = proc_extra(extra)
|
||||
result = self.readReal(extra)
|
||||
# date
|
||||
elif format == 0b0011 and extra == 0b0011:
|
||||
result = self.readDate()
|
||||
# data
|
||||
elif format == 0b0100:
|
||||
extra = proc_extra(extra)
|
||||
result = self.readData(extra)
|
||||
# ascii string
|
||||
elif format == 0b0101:
|
||||
extra = proc_extra(extra)
|
||||
result = self.readAsciiString(extra)
|
||||
# Unicode string
|
||||
elif format == 0b0110:
|
||||
extra = proc_extra(extra)
|
||||
result = self.readUnicode(extra)
|
||||
# uid
|
||||
elif format == 0b1000:
|
||||
result = self.readUid(extra)
|
||||
# array
|
||||
elif format == 0b1010:
|
||||
extra = proc_extra(extra)
|
||||
result = self.readArray(extra)
|
||||
# set
|
||||
elif format == 0b1100:
|
||||
extra = proc_extra(extra)
|
||||
result = set(self.readArray(extra))
|
||||
# dict
|
||||
elif format == 0b1101:
|
||||
extra = proc_extra(extra)
|
||||
result = self.readDict(extra)
|
||||
else:
|
||||
raise InvalidPlistException("Invalid object found: {format: %s, extra: %s}" % (bin(format), bin(extra)))
|
||||
return result
|
||||
|
||||
def readInteger(self, byteSize):
|
||||
result = 0
|
||||
original_offset = self.currentOffset
|
||||
data = self.contents[self.currentOffset:self.currentOffset + byteSize]
|
||||
result = self.getSizedInteger(data, byteSize, as_number=True)
|
||||
self.currentOffset = original_offset + byteSize
|
||||
return result
|
||||
|
||||
def readReal(self, length):
|
||||
result = 0.0
|
||||
to_read = pow(2, length)
|
||||
data = self.contents[self.currentOffset:self.currentOffset+to_read]
|
||||
if length == 2: # 4 bytes
|
||||
result = unpack('>f', data)[0]
|
||||
elif length == 3: # 8 bytes
|
||||
result = unpack('>d', data)[0]
|
||||
else:
|
||||
raise InvalidPlistException("Unknown real of length %d bytes" % to_read)
|
||||
return result
|
||||
|
||||
def readRefs(self, count):
|
||||
refs = []
|
||||
i = 0
|
||||
while i < count:
|
||||
fragment = self.contents[self.currentOffset:self.currentOffset+self.trailer.objectRefSize]
|
||||
ref = self.getSizedInteger(fragment, len(fragment))
|
||||
refs.append(ref)
|
||||
self.currentOffset += self.trailer.objectRefSize
|
||||
i += 1
|
||||
return refs
|
||||
|
||||
def readArray(self, count):
|
||||
result = []
|
||||
values = self.readRefs(count)
|
||||
i = 0
|
||||
while i < len(values):
|
||||
self.setCurrentOffsetToObjectNumber(values[i])
|
||||
value = self.readObject()
|
||||
result.append(value)
|
||||
i += 1
|
||||
return result
|
||||
|
||||
def readDict(self, count):
|
||||
result = {}
|
||||
keys = self.readRefs(count)
|
||||
values = self.readRefs(count)
|
||||
i = 0
|
||||
while i < len(keys):
|
||||
self.setCurrentOffsetToObjectNumber(keys[i])
|
||||
key = self.readObject()
|
||||
self.setCurrentOffsetToObjectNumber(values[i])
|
||||
value = self.readObject()
|
||||
result[key] = value
|
||||
i += 1
|
||||
return result
|
||||
|
||||
def readAsciiString(self, length):
|
||||
result = unpack("!%ds" % length, self.contents[self.currentOffset:self.currentOffset+length])[0]
|
||||
self.currentOffset += length
|
||||
return result
|
||||
|
||||
def readUnicode(self, length):
|
||||
actual_length = length*2
|
||||
data = self.contents[self.currentOffset:self.currentOffset+actual_length]
|
||||
# unpack not needed?!! data = unpack(">%ds" % (actual_length), data)[0]
|
||||
self.currentOffset += actual_length
|
||||
return data.decode('utf_16_be')
|
||||
|
||||
def readDate(self):
|
||||
result = unpack(">d", self.contents[self.currentOffset:self.currentOffset+8])[0]
|
||||
# Use timedelta to workaround time_t size limitation on 32-bit python.
|
||||
result = datetime.timedelta(seconds=result) + apple_reference_date
|
||||
self.currentOffset += 8
|
||||
return result
|
||||
|
||||
def readData(self, length):
|
||||
result = self.contents[self.currentOffset:self.currentOffset+length]
|
||||
self.currentOffset += length
|
||||
return Data(result)
|
||||
|
||||
def readUid(self, length):
|
||||
return Uid(self.readInteger(length+1))
|
||||
|
||||
def getSizedInteger(self, data, byteSize, as_number=False):
|
||||
"""Numbers of 8 bytes are signed integers when they refer to numbers, but unsigned otherwise."""
|
||||
result = 0
|
||||
# 1, 2, and 4 byte integers are unsigned
|
||||
if byteSize == 1:
|
||||
result = unpack('>B', data)[0]
|
||||
elif byteSize == 2:
|
||||
result = unpack('>H', data)[0]
|
||||
elif byteSize == 4:
|
||||
result = unpack('>L', data)[0]
|
||||
elif byteSize == 8:
|
||||
if as_number:
|
||||
result = unpack('>q', data)[0]
|
||||
else:
|
||||
result = unpack('>Q', data)[0]
|
||||
elif byteSize <= 16:
|
||||
# Handle odd-sized or integers larger than 8 bytes
|
||||
# Don't naively go over 16 bytes, in order to prevent infinite loops.
|
||||
result = 0
|
||||
if hasattr(int, 'from_bytes'):
|
||||
result = int.from_bytes(data, 'big')
|
||||
else:
|
||||
for byte in data:
|
||||
result = (result << 8) | unpack('>B', byte)[0]
|
||||
else:
|
||||
raise InvalidPlistException("Encountered integer longer than 16 bytes.")
|
||||
return result
|
||||
|
||||
class HashableWrapper(object):
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
def __repr__(self):
|
||||
return "<HashableWrapper: %s>" % [self.value]
|
||||
|
||||
class BoolWrapper(object):
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
def __repr__(self):
|
||||
return "<BoolWrapper: %s>" % self.value
|
||||
|
||||
class FloatWrapper(object):
|
||||
_instances = {}
|
||||
def __new__(klass, value):
|
||||
# Ensure FloatWrapper(x) for a given float x is always the same object
|
||||
wrapper = klass._instances.get(value)
|
||||
if wrapper is None:
|
||||
wrapper = object.__new__(klass)
|
||||
wrapper.value = value
|
||||
klass._instances[value] = wrapper
|
||||
return wrapper
|
||||
def __repr__(self):
|
||||
return "<FloatWrapper: %s>" % self.value
|
||||
|
||||
class PlistWriter(object):
|
||||
header = b'bplist00bybiplist1.0'
|
||||
file = None
|
||||
byteCounts = None
|
||||
trailer = None
|
||||
computedUniques = None
|
||||
writtenReferences = None
|
||||
referencePositions = None
|
||||
wrappedTrue = None
|
||||
wrappedFalse = None
|
||||
|
||||
def __init__(self, file):
|
||||
self.reset()
|
||||
self.file = file
|
||||
self.wrappedTrue = BoolWrapper(True)
|
||||
self.wrappedFalse = BoolWrapper(False)
|
||||
|
||||
def reset(self):
|
||||
self.byteCounts = PlistByteCounts(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
self.trailer = PlistTrailer(0, 0, 0, 0, 0)
|
||||
|
||||
# A set of all the uniques which have been computed.
|
||||
self.computedUniques = set()
|
||||
# A list of all the uniques which have been written.
|
||||
self.writtenReferences = {}
|
||||
# A dict of the positions of the written uniques.
|
||||
self.referencePositions = {}
|
||||
|
||||
def positionOfObjectReference(self, obj):
|
||||
"""If the given object has been written already, return its
|
||||
position in the offset table. Otherwise, return None."""
|
||||
return self.writtenReferences.get(obj)
|
||||
|
||||
def writeRoot(self, root):
|
||||
"""
|
||||
Strategy is:
|
||||
- write header
|
||||
- wrap root object so everything is hashable
|
||||
- compute size of objects which will be written
|
||||
- need to do this in order to know how large the object refs
|
||||
will be in the list/dict/set reference lists
|
||||
- write objects
|
||||
- keep objects in writtenReferences
|
||||
- keep positions of object references in referencePositions
|
||||
- write object references with the length computed previously
|
||||
- computer object reference length
|
||||
- write object reference positions
|
||||
- write trailer
|
||||
"""
|
||||
output = self.header
|
||||
wrapped_root = self.wrapRoot(root)
|
||||
should_reference_root = True#not isinstance(wrapped_root, HashableWrapper)
|
||||
self.computeOffsets(wrapped_root, asReference=should_reference_root, isRoot=True)
|
||||
self.trailer = self.trailer._replace(**{'objectRefSize':self.intSize(len(self.computedUniques))})
|
||||
(_, output) = self.writeObjectReference(wrapped_root, output)
|
||||
output = self.writeObject(wrapped_root, output, setReferencePosition=True)
|
||||
|
||||
# output size at this point is an upper bound on how big the
|
||||
# object reference offsets need to be.
|
||||
self.trailer = self.trailer._replace(**{
|
||||
'offsetSize':self.intSize(len(output)),
|
||||
'offsetCount':len(self.computedUniques),
|
||||
'offsetTableOffset':len(output),
|
||||
'topLevelObjectNumber':0
|
||||
})
|
||||
|
||||
output = self.writeOffsetTable(output)
|
||||
output += pack('!xxxxxxBBQQQ', *self.trailer)
|
||||
self.file.write(output)
|
||||
|
||||
def wrapRoot(self, root):
|
||||
if isinstance(root, bool):
|
||||
if root is True:
|
||||
return self.wrappedTrue
|
||||
else:
|
||||
return self.wrappedFalse
|
||||
elif isinstance(root, float):
|
||||
return FloatWrapper(root)
|
||||
elif isinstance(root, set):
|
||||
n = set()
|
||||
for value in root:
|
||||
n.add(self.wrapRoot(value))
|
||||
return HashableWrapper(n)
|
||||
elif isinstance(root, dict):
|
||||
n = {}
|
||||
for key, value in iteritems(root):
|
||||
n[self.wrapRoot(key)] = self.wrapRoot(value)
|
||||
return HashableWrapper(n)
|
||||
elif isinstance(root, list):
|
||||
n = []
|
||||
for value in root:
|
||||
n.append(self.wrapRoot(value))
|
||||
return HashableWrapper(n)
|
||||
elif isinstance(root, tuple):
|
||||
n = tuple([self.wrapRoot(value) for value in root])
|
||||
return HashableWrapper(n)
|
||||
else:
|
||||
return root
|
||||
|
||||
def incrementByteCount(self, field, incr=1):
|
||||
self.byteCounts = self.byteCounts._replace(**{field:self.byteCounts.__getattribute__(field) + incr})
|
||||
|
||||
def computeOffsets(self, obj, asReference=False, isRoot=False):
|
||||
def check_key(key):
|
||||
if key is None:
|
||||
raise InvalidPlistException('Dictionary keys cannot be null in plists.')
|
||||
elif isinstance(key, Data):
|
||||
raise InvalidPlistException('Data cannot be dictionary keys in plists.')
|
||||
elif not isinstance(key, (bytes, unicode)):
|
||||
raise InvalidPlistException('Keys must be strings.')
|
||||
|
||||
def proc_size(size):
|
||||
if size > 0b1110:
|
||||
size += self.intSize(size)
|
||||
return size
|
||||
# If this should be a reference, then we keep a record of it in the
|
||||
# uniques table.
|
||||
if asReference:
|
||||
if obj in self.computedUniques:
|
||||
return
|
||||
else:
|
||||
self.computedUniques.add(obj)
|
||||
|
||||
if obj is None:
|
||||
self.incrementByteCount('nullBytes')
|
||||
elif isinstance(obj, BoolWrapper):
|
||||
self.incrementByteCount('boolBytes')
|
||||
elif isinstance(obj, Uid):
|
||||
size = self.intSize(obj)
|
||||
self.incrementByteCount('uidBytes', incr=1+size)
|
||||
elif isinstance(obj, (int, long)):
|
||||
size = self.intSize(obj)
|
||||
self.incrementByteCount('intBytes', incr=1+size)
|
||||
elif isinstance(obj, FloatWrapper):
|
||||
size = self.realSize(obj)
|
||||
self.incrementByteCount('realBytes', incr=1+size)
|
||||
elif isinstance(obj, datetime.datetime):
|
||||
self.incrementByteCount('dateBytes', incr=2)
|
||||
elif isinstance(obj, Data):
|
||||
size = proc_size(len(obj))
|
||||
self.incrementByteCount('dataBytes', incr=1+size)
|
||||
elif isinstance(obj, (unicode, bytes)):
|
||||
size = proc_size(len(obj))
|
||||
self.incrementByteCount('stringBytes', incr=1+size)
|
||||
elif isinstance(obj, HashableWrapper):
|
||||
obj = obj.value
|
||||
if isinstance(obj, set):
|
||||
size = proc_size(len(obj))
|
||||
self.incrementByteCount('setBytes', incr=1+size)
|
||||
for value in obj:
|
||||
self.computeOffsets(value, asReference=True)
|
||||
elif isinstance(obj, (list, tuple)):
|
||||
size = proc_size(len(obj))
|
||||
self.incrementByteCount('arrayBytes', incr=1+size)
|
||||
for value in obj:
|
||||
asRef = True
|
||||
self.computeOffsets(value, asReference=True)
|
||||
elif isinstance(obj, dict):
|
||||
size = proc_size(len(obj))
|
||||
self.incrementByteCount('dictBytes', incr=1+size)
|
||||
for key, value in iteritems(obj):
|
||||
check_key(key)
|
||||
self.computeOffsets(key, asReference=True)
|
||||
self.computeOffsets(value, asReference=True)
|
||||
else:
|
||||
raise InvalidPlistException("Unknown object type.")
|
||||
|
||||
def writeObjectReference(self, obj, output):
|
||||
"""Tries to write an object reference, adding it to the references
|
||||
table. Does not write the actual object bytes or set the reference
|
||||
position. Returns a tuple of whether the object was a new reference
|
||||
(True if it was, False if it already was in the reference table)
|
||||
and the new output.
|
||||
"""
|
||||
position = self.positionOfObjectReference(obj)
|
||||
if position is None:
|
||||
self.writtenReferences[obj] = len(self.writtenReferences)
|
||||
output += self.binaryInt(len(self.writtenReferences) - 1, byteSize=self.trailer.objectRefSize)
|
||||
return (True, output)
|
||||
else:
|
||||
output += self.binaryInt(position, byteSize=self.trailer.objectRefSize)
|
||||
return (False, output)
|
||||
|
||||
def writeObject(self, obj, output, setReferencePosition=False):
|
||||
"""Serializes the given object to the output. Returns output.
|
||||
If setReferencePosition is True, will set the position the
|
||||
object was written.
|
||||
"""
|
||||
def proc_variable_length(format, length):
|
||||
result = b''
|
||||
if length > 0b1110:
|
||||
result += pack('!B', (format << 4) | 0b1111)
|
||||
result = self.writeObject(length, result)
|
||||
else:
|
||||
result += pack('!B', (format << 4) | length)
|
||||
return result
|
||||
|
||||
if isinstance(obj, (str, unicode)) and obj == unicodeEmpty:
|
||||
# The Apple Plist decoder can't decode a zero length Unicode string.
|
||||
obj = b''
|
||||
|
||||
if setReferencePosition:
|
||||
self.referencePositions[obj] = len(output)
|
||||
|
||||
if obj is None:
|
||||
output += pack('!B', 0b00000000)
|
||||
elif isinstance(obj, BoolWrapper):
|
||||
if obj.value is False:
|
||||
output += pack('!B', 0b00001000)
|
||||
else:
|
||||
output += pack('!B', 0b00001001)
|
||||
elif isinstance(obj, Uid):
|
||||
size = self.intSize(obj)
|
||||
output += pack('!B', (0b1000 << 4) | size - 1)
|
||||
output += self.binaryInt(obj)
|
||||
elif isinstance(obj, (int, long)):
|
||||
byteSize = self.intSize(obj)
|
||||
root = math.log(byteSize, 2)
|
||||
output += pack('!B', (0b0001 << 4) | int(root))
|
||||
output += self.binaryInt(obj, as_number=True)
|
||||
elif isinstance(obj, FloatWrapper):
|
||||
# just use doubles
|
||||
output += pack('!B', (0b0010 << 4) | 3)
|
||||
output += self.binaryReal(obj)
|
||||
elif isinstance(obj, datetime.datetime):
|
||||
timestamp = (obj - apple_reference_date).total_seconds()
|
||||
output += pack('!B', 0b00110011)
|
||||
output += pack('!d', float(timestamp))
|
||||
elif isinstance(obj, Data):
|
||||
output += proc_variable_length(0b0100, len(obj))
|
||||
output += obj
|
||||
elif isinstance(obj, unicode):
|
||||
byteData = obj.encode('utf_16_be')
|
||||
output += proc_variable_length(0b0110, len(byteData)//2)
|
||||
output += byteData
|
||||
elif isinstance(obj, bytes):
|
||||
output += proc_variable_length(0b0101, len(obj))
|
||||
output += obj
|
||||
elif isinstance(obj, HashableWrapper):
|
||||
obj = obj.value
|
||||
if isinstance(obj, (set, list, tuple)):
|
||||
if isinstance(obj, set):
|
||||
output += proc_variable_length(0b1100, len(obj))
|
||||
else:
|
||||
output += proc_variable_length(0b1010, len(obj))
|
||||
|
||||
objectsToWrite = []
|
||||
for objRef in obj:
|
||||
(isNew, output) = self.writeObjectReference(objRef, output)
|
||||
if isNew:
|
||||
objectsToWrite.append(objRef)
|
||||
for objRef in objectsToWrite:
|
||||
output = self.writeObject(objRef, output, setReferencePosition=True)
|
||||
elif isinstance(obj, dict):
|
||||
output += proc_variable_length(0b1101, len(obj))
|
||||
keys = []
|
||||
values = []
|
||||
objectsToWrite = []
|
||||
for key, value in iteritems(obj):
|
||||
keys.append(key)
|
||||
values.append(value)
|
||||
for key in keys:
|
||||
(isNew, output) = self.writeObjectReference(key, output)
|
||||
if isNew:
|
||||
objectsToWrite.append(key)
|
||||
for value in values:
|
||||
(isNew, output) = self.writeObjectReference(value, output)
|
||||
if isNew:
|
||||
objectsToWrite.append(value)
|
||||
for objRef in objectsToWrite:
|
||||
output = self.writeObject(objRef, output, setReferencePosition=True)
|
||||
return output
|
||||
|
||||
def writeOffsetTable(self, output):
|
||||
"""Writes all of the object reference offsets."""
|
||||
all_positions = []
|
||||
writtenReferences = list(self.writtenReferences.items())
|
||||
writtenReferences.sort(key=lambda x: x[1])
|
||||
for obj,order in writtenReferences:
|
||||
# Porting note: Elsewhere we deliberately replace empty unicdoe strings
|
||||
# with empty binary strings, but the empty unicode string
|
||||
# goes into writtenReferences. This isn't an issue in Py2
|
||||
# because u'' and b'' have the same hash; but it is in
|
||||
# Py3, where they don't.
|
||||
if bytes != str and obj == unicodeEmpty:
|
||||
obj = b''
|
||||
position = self.referencePositions.get(obj)
|
||||
if position is None:
|
||||
raise InvalidPlistException("Error while writing offsets table. Object not found. %s" % obj)
|
||||
output += self.binaryInt(position, self.trailer.offsetSize)
|
||||
all_positions.append(position)
|
||||
return output
|
||||
|
||||
def binaryReal(self, obj):
|
||||
# just use doubles
|
||||
result = pack('>d', obj.value)
|
||||
return result
|
||||
|
||||
def binaryInt(self, obj, byteSize=None, as_number=False):
|
||||
result = b''
|
||||
if byteSize is None:
|
||||
byteSize = self.intSize(obj)
|
||||
if byteSize == 1:
|
||||
result += pack('>B', obj)
|
||||
elif byteSize == 2:
|
||||
result += pack('>H', obj)
|
||||
elif byteSize == 4:
|
||||
result += pack('>L', obj)
|
||||
elif byteSize == 8:
|
||||
if as_number:
|
||||
result += pack('>q', obj)
|
||||
else:
|
||||
result += pack('>Q', obj)
|
||||
elif byteSize <= 16:
|
||||
try:
|
||||
result = pack('>Q', 0) + pack('>Q', obj)
|
||||
except struct_error as e:
|
||||
raise InvalidPlistException("Unable to pack integer %d: %s" % (obj, e))
|
||||
else:
|
||||
raise InvalidPlistException("Core Foundation can't handle integers with size greater than 16 bytes.")
|
||||
return result
|
||||
|
||||
def intSize(self, obj):
|
||||
"""Returns the number of bytes necessary to store the given integer."""
|
||||
# SIGNED
|
||||
if obj < 0: # Signed integer, always 8 bytes
|
||||
return 8
|
||||
# UNSIGNED
|
||||
elif obj <= 0xFF: # 1 byte
|
||||
return 1
|
||||
elif obj <= 0xFFFF: # 2 bytes
|
||||
return 2
|
||||
elif obj <= 0xFFFFFFFF: # 4 bytes
|
||||
return 4
|
||||
# SIGNED
|
||||
# 0x7FFFFFFFFFFFFFFF is the max.
|
||||
elif obj <= 0x7FFFFFFFFFFFFFFF: # 8 bytes signed
|
||||
return 8
|
||||
elif obj <= 0xffffffffffffffff: # 8 bytes unsigned
|
||||
return 16
|
||||
else:
|
||||
raise InvalidPlistException("Core Foundation can't handle integers with size greater than 8 bytes.")
|
||||
|
||||
def realSize(self, obj):
|
||||
return 8
|
||||
406
lib/bs4/__init__.py
Normal file
406
lib/bs4/__init__.py
Normal file
@@ -0,0 +1,406 @@
|
||||
"""Beautiful Soup
|
||||
Elixir and Tonic
|
||||
"The Screen-Scraper's Friend"
|
||||
http://www.crummy.com/software/BeautifulSoup/
|
||||
|
||||
Beautiful Soup uses a pluggable XML or HTML parser to parse a
|
||||
(possibly invalid) document into a tree representation. Beautiful Soup
|
||||
provides provides methods and Pythonic idioms that make it easy to
|
||||
navigate, search, and modify the parse tree.
|
||||
|
||||
Beautiful Soup works with Python 2.6 and up. It works better if lxml
|
||||
and/or html5lib is installed.
|
||||
|
||||
For more than you ever wanted to know about Beautiful Soup, see the
|
||||
documentation:
|
||||
http://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||
"""
|
||||
|
||||
__author__ = "Leonard Richardson (leonardr@segfault.org)"
|
||||
__version__ = "4.3.2"
|
||||
__copyright__ = "Copyright (c) 2004-2013 Leonard Richardson"
|
||||
__license__ = "MIT"
|
||||
|
||||
__all__ = ['BeautifulSoup']
|
||||
|
||||
import os
|
||||
import re
|
||||
import warnings
|
||||
|
||||
from .builder import builder_registry, ParserRejectedMarkup
|
||||
from .dammit import UnicodeDammit
|
||||
from .element import (
|
||||
CData,
|
||||
Comment,
|
||||
DEFAULT_OUTPUT_ENCODING,
|
||||
Declaration,
|
||||
Doctype,
|
||||
NavigableString,
|
||||
PageElement,
|
||||
ProcessingInstruction,
|
||||
ResultSet,
|
||||
SoupStrainer,
|
||||
Tag,
|
||||
)
|
||||
|
||||
# The very first thing we do is give a useful error if someone is
|
||||
# running this code under Python 3 without converting it.
|
||||
syntax_error = u'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
|
||||
|
||||
class BeautifulSoup(Tag):
|
||||
"""
|
||||
This class defines the basic interface called by the tree builders.
|
||||
|
||||
These methods will be called by the parser:
|
||||
reset()
|
||||
feed(markup)
|
||||
|
||||
The tree builder may call these methods from its feed() implementation:
|
||||
handle_starttag(name, attrs) # See note about return value
|
||||
handle_endtag(name)
|
||||
handle_data(data) # Appends to the current data node
|
||||
endData(containerClass=NavigableString) # Ends the current data node
|
||||
|
||||
No matter how complicated the underlying parser is, you should be
|
||||
able to build a tree using 'start tag' events, 'end tag' events,
|
||||
'data' events, and "done with data" events.
|
||||
|
||||
If you encounter an empty-element tag (aka a self-closing tag,
|
||||
like HTML's <br> tag), call handle_starttag and then
|
||||
handle_endtag.
|
||||
"""
|
||||
ROOT_TAG_NAME = u'[document]'
|
||||
|
||||
# If the end-user gives no indication which tree builder they
|
||||
# want, look for one with these features.
|
||||
DEFAULT_BUILDER_FEATURES = ['html', 'fast']
|
||||
|
||||
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
|
||||
|
||||
def __init__(self, markup="", features=None, builder=None,
|
||||
parse_only=None, from_encoding=None, **kwargs):
|
||||
"""The Soup object is initialized as the 'root tag', and the
|
||||
provided markup (which can be a string or a file-like object)
|
||||
is fed into the underlying parser."""
|
||||
|
||||
if 'convertEntities' in kwargs:
|
||||
warnings.warn(
|
||||
"BS4 does not respect the convertEntities argument to the "
|
||||
"BeautifulSoup constructor. Entities are always converted "
|
||||
"to Unicode characters.")
|
||||
|
||||
if 'markupMassage' in kwargs:
|
||||
del kwargs['markupMassage']
|
||||
warnings.warn(
|
||||
"BS4 does not respect the markupMassage argument to the "
|
||||
"BeautifulSoup constructor. The tree builder is responsible "
|
||||
"for any necessary markup massage.")
|
||||
|
||||
if 'smartQuotesTo' in kwargs:
|
||||
del kwargs['smartQuotesTo']
|
||||
warnings.warn(
|
||||
"BS4 does not respect the smartQuotesTo argument to the "
|
||||
"BeautifulSoup constructor. Smart quotes are always converted "
|
||||
"to Unicode characters.")
|
||||
|
||||
if 'selfClosingTags' in kwargs:
|
||||
del kwargs['selfClosingTags']
|
||||
warnings.warn(
|
||||
"BS4 does not respect the selfClosingTags argument to the "
|
||||
"BeautifulSoup constructor. The tree builder is responsible "
|
||||
"for understanding self-closing tags.")
|
||||
|
||||
if 'isHTML' in kwargs:
|
||||
del kwargs['isHTML']
|
||||
warnings.warn(
|
||||
"BS4 does not respect the isHTML argument to the "
|
||||
"BeautifulSoup constructor. You can pass in features='html' "
|
||||
"or features='xml' to get a builder capable of handling "
|
||||
"one or the other.")
|
||||
|
||||
def deprecated_argument(old_name, new_name):
|
||||
if old_name in kwargs:
|
||||
warnings.warn(
|
||||
'The "%s" argument to the BeautifulSoup constructor '
|
||||
'has been renamed to "%s."' % (old_name, new_name))
|
||||
value = kwargs[old_name]
|
||||
del kwargs[old_name]
|
||||
return value
|
||||
return None
|
||||
|
||||
parse_only = parse_only or deprecated_argument(
|
||||
"parseOnlyThese", "parse_only")
|
||||
|
||||
from_encoding = from_encoding or deprecated_argument(
|
||||
"fromEncoding", "from_encoding")
|
||||
|
||||
if len(kwargs) > 0:
|
||||
arg = kwargs.keys().pop()
|
||||
raise TypeError(
|
||||
"__init__() got an unexpected keyword argument '%s'" % arg)
|
||||
|
||||
if builder is None:
|
||||
if isinstance(features, basestring):
|
||||
features = [features]
|
||||
if features is None or len(features) == 0:
|
||||
features = self.DEFAULT_BUILDER_FEATURES
|
||||
builder_class = builder_registry.lookup(*features)
|
||||
if builder_class is None:
|
||||
raise FeatureNotFound(
|
||||
"Couldn't find a tree builder with the features you "
|
||||
"requested: %s. Do you need to install a parser library?"
|
||||
% ",".join(features))
|
||||
builder = builder_class()
|
||||
self.builder = builder
|
||||
self.is_xml = builder.is_xml
|
||||
self.builder.soup = self
|
||||
|
||||
self.parse_only = parse_only
|
||||
|
||||
if hasattr(markup, 'read'): # It's a file-type object.
|
||||
markup = markup.read()
|
||||
elif len(markup) <= 256:
|
||||
# Print out warnings for a couple beginner problems
|
||||
# involving passing non-markup to Beautiful Soup.
|
||||
# Beautiful Soup will still parse the input as markup,
|
||||
# just in case that's what the user really wants.
|
||||
if (isinstance(markup, unicode)
|
||||
and not os.path.supports_unicode_filenames):
|
||||
possible_filename = markup.encode("utf8")
|
||||
else:
|
||||
possible_filename = markup
|
||||
is_file = False
|
||||
try:
|
||||
is_file = os.path.exists(possible_filename)
|
||||
except Exception, e:
|
||||
# This is almost certainly a problem involving
|
||||
# characters not valid in filenames on this
|
||||
# system. Just let it go.
|
||||
pass
|
||||
if is_file:
|
||||
warnings.warn(
|
||||
'"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
|
||||
if markup[:5] == "http:" or markup[:6] == "https:":
|
||||
# TODO: This is ugly but I couldn't get it to work in
|
||||
# Python 3 otherwise.
|
||||
if ((isinstance(markup, bytes) and not b' ' in markup)
|
||||
or (isinstance(markup, unicode) and not u' ' in markup)):
|
||||
warnings.warn(
|
||||
'"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)
|
||||
|
||||
for (self.markup, self.original_encoding, self.declared_html_encoding,
|
||||
self.contains_replacement_characters) in (
|
||||
self.builder.prepare_markup(markup, from_encoding)):
|
||||
self.reset()
|
||||
try:
|
||||
self._feed()
|
||||
break
|
||||
except ParserRejectedMarkup:
|
||||
pass
|
||||
|
||||
# Clear out the markup and remove the builder's circular
|
||||
# reference to this object.
|
||||
self.markup = None
|
||||
self.builder.soup = None
|
||||
|
||||
def _feed(self):
|
||||
# Convert the document to Unicode.
|
||||
self.builder.reset()
|
||||
|
||||
self.builder.feed(self.markup)
|
||||
# Close out any unfinished strings and close all the open tags.
|
||||
self.endData()
|
||||
while self.currentTag.name != self.ROOT_TAG_NAME:
|
||||
self.popTag()
|
||||
|
||||
def reset(self):
|
||||
Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
|
||||
self.hidden = 1
|
||||
self.builder.reset()
|
||||
self.current_data = []
|
||||
self.currentTag = None
|
||||
self.tagStack = []
|
||||
self.preserve_whitespace_tag_stack = []
|
||||
self.pushTag(self)
|
||||
|
||||
def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
|
||||
"""Create a new tag associated with this soup."""
|
||||
return Tag(None, self.builder, name, namespace, nsprefix, attrs)
|
||||
|
||||
def new_string(self, s, subclass=NavigableString):
|
||||
"""Create a new NavigableString associated with this soup."""
|
||||
navigable = subclass(s)
|
||||
navigable.setup()
|
||||
return navigable
|
||||
|
||||
def insert_before(self, successor):
|
||||
raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
|
||||
|
||||
def insert_after(self, successor):
|
||||
raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
|
||||
|
||||
def popTag(self):
|
||||
tag = self.tagStack.pop()
|
||||
if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]:
|
||||
self.preserve_whitespace_tag_stack.pop()
|
||||
#print "Pop", tag.name
|
||||
if self.tagStack:
|
||||
self.currentTag = self.tagStack[-1]
|
||||
return self.currentTag
|
||||
|
||||
def pushTag(self, tag):
|
||||
#print "Push", tag.name
|
||||
if self.currentTag:
|
||||
self.currentTag.contents.append(tag)
|
||||
self.tagStack.append(tag)
|
||||
self.currentTag = self.tagStack[-1]
|
||||
if tag.name in self.builder.preserve_whitespace_tags:
|
||||
self.preserve_whitespace_tag_stack.append(tag)
|
||||
|
||||
def endData(self, containerClass=NavigableString):
|
||||
if self.current_data:
|
||||
current_data = u''.join(self.current_data)
|
||||
# If whitespace is not preserved, and this string contains
|
||||
# nothing but ASCII spaces, replace it with a single space
|
||||
# or newline.
|
||||
if not self.preserve_whitespace_tag_stack:
|
||||
strippable = True
|
||||
for i in current_data:
|
||||
if i not in self.ASCII_SPACES:
|
||||
strippable = False
|
||||
break
|
||||
if strippable:
|
||||
if '\n' in current_data:
|
||||
current_data = '\n'
|
||||
else:
|
||||
current_data = ' '
|
||||
|
||||
# Reset the data collector.
|
||||
self.current_data = []
|
||||
|
||||
# Should we add this string to the tree at all?
|
||||
if self.parse_only and len(self.tagStack) <= 1 and \
|
||||
(not self.parse_only.text or \
|
||||
not self.parse_only.search(current_data)):
|
||||
return
|
||||
|
||||
o = containerClass(current_data)
|
||||
self.object_was_parsed(o)
|
||||
|
||||
def object_was_parsed(self, o, parent=None, most_recent_element=None):
|
||||
"""Add an object to the parse tree."""
|
||||
parent = parent or self.currentTag
|
||||
most_recent_element = most_recent_element or self._most_recent_element
|
||||
o.setup(parent, most_recent_element)
|
||||
|
||||
if most_recent_element is not None:
|
||||
most_recent_element.next_element = o
|
||||
self._most_recent_element = o
|
||||
parent.contents.append(o)
|
||||
|
||||
def _popToTag(self, name, nsprefix=None, inclusivePop=True):
|
||||
"""Pops the tag stack up to and including the most recent
|
||||
instance of the given tag. If inclusivePop is false, pops the tag
|
||||
stack up to but *not* including the most recent instqance of
|
||||
the given tag."""
|
||||
#print "Popping to %s" % name
|
||||
if name == self.ROOT_TAG_NAME:
|
||||
# The BeautifulSoup object itself can never be popped.
|
||||
return
|
||||
|
||||
most_recently_popped = None
|
||||
|
||||
stack_size = len(self.tagStack)
|
||||
for i in range(stack_size - 1, 0, -1):
|
||||
t = self.tagStack[i]
|
||||
if (name == t.name and nsprefix == t.prefix):
|
||||
if inclusivePop:
|
||||
most_recently_popped = self.popTag()
|
||||
break
|
||||
most_recently_popped = self.popTag()
|
||||
|
||||
return most_recently_popped
|
||||
|
||||
def handle_starttag(self, name, namespace, nsprefix, attrs):
|
||||
"""Push a start tag on to the stack.
|
||||
|
||||
If this method returns None, the tag was rejected by the
|
||||
SoupStrainer. You should proceed as if the tag had not occured
|
||||
in the document. For instance, if this was a self-closing tag,
|
||||
don't call handle_endtag.
|
||||
"""
|
||||
|
||||
# print "Start tag %s: %s" % (name, attrs)
|
||||
self.endData()
|
||||
|
||||
if (self.parse_only and len(self.tagStack) <= 1
|
||||
and (self.parse_only.text
|
||||
or not self.parse_only.search_tag(name, attrs))):
|
||||
return None
|
||||
|
||||
tag = Tag(self, self.builder, name, namespace, nsprefix, attrs,
|
||||
self.currentTag, self._most_recent_element)
|
||||
if tag is None:
|
||||
return tag
|
||||
if self._most_recent_element:
|
||||
self._most_recent_element.next_element = tag
|
||||
self._most_recent_element = tag
|
||||
self.pushTag(tag)
|
||||
return tag
|
||||
|
||||
def handle_endtag(self, name, nsprefix=None):
|
||||
#print "End tag: " + name
|
||||
self.endData()
|
||||
self._popToTag(name, nsprefix)
|
||||
|
||||
def handle_data(self, data):
|
||||
self.current_data.append(data)
|
||||
|
||||
def decode(self, pretty_print=False,
|
||||
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
|
||||
formatter="minimal"):
|
||||
"""Returns a string or Unicode representation of this document.
|
||||
To get Unicode, pass None for encoding."""
|
||||
|
||||
if self.is_xml:
|
||||
# Print the XML declaration
|
||||
encoding_part = ''
|
||||
if eventual_encoding != None:
|
||||
encoding_part = ' encoding="%s"' % eventual_encoding
|
||||
prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
|
||||
else:
|
||||
prefix = u''
|
||||
if not pretty_print:
|
||||
indent_level = None
|
||||
else:
|
||||
indent_level = 0
|
||||
return prefix + super(BeautifulSoup, self).decode(
|
||||
indent_level, eventual_encoding, formatter)
|
||||
|
||||
# Alias to make it easier to type import: 'from bs4 import _soup'
|
||||
_s = BeautifulSoup
|
||||
_soup = BeautifulSoup
|
||||
|
||||
class BeautifulStoneSoup(BeautifulSoup):
|
||||
"""Deprecated interface to an XML parser."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['features'] = 'xml'
|
||||
warnings.warn(
|
||||
'The BeautifulStoneSoup class is deprecated. Instead of using '
|
||||
'it, pass features="xml" into the BeautifulSoup constructor.')
|
||||
super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class StopParsing(Exception):
|
||||
pass
|
||||
|
||||
class FeatureNotFound(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
#By default, act as an HTML pretty-printer.
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
soup = BeautifulSoup(sys.stdin)
|
||||
print soup.prettify()
|
||||
321
lib/bs4/builder/__init__.py
Normal file
321
lib/bs4/builder/__init__.py
Normal file
@@ -0,0 +1,321 @@
|
||||
from collections import defaultdict
|
||||
import itertools
|
||||
import sys
|
||||
from bs4.element import (
|
||||
CharsetMetaAttributeValue,
|
||||
ContentMetaAttributeValue,
|
||||
whitespace_re
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'HTMLTreeBuilder',
|
||||
'SAXTreeBuilder',
|
||||
'TreeBuilder',
|
||||
'TreeBuilderRegistry',
|
||||
]
|
||||
|
||||
# Some useful features for a TreeBuilder to have.
|
||||
FAST = 'fast'
|
||||
PERMISSIVE = 'permissive'
|
||||
STRICT = 'strict'
|
||||
XML = 'xml'
|
||||
HTML = 'html'
|
||||
HTML_5 = 'html5'
|
||||
|
||||
|
||||
class TreeBuilderRegistry(object):
|
||||
|
||||
def __init__(self):
|
||||
self.builders_for_feature = defaultdict(list)
|
||||
self.builders = []
|
||||
|
||||
def register(self, treebuilder_class):
|
||||
"""Register a treebuilder based on its advertised features."""
|
||||
for feature in treebuilder_class.features:
|
||||
self.builders_for_feature[feature].insert(0, treebuilder_class)
|
||||
self.builders.insert(0, treebuilder_class)
|
||||
|
||||
def lookup(self, *features):
|
||||
if len(self.builders) == 0:
|
||||
# There are no builders at all.
|
||||
return None
|
||||
|
||||
if len(features) == 0:
|
||||
# They didn't ask for any features. Give them the most
|
||||
# recently registered builder.
|
||||
return self.builders[0]
|
||||
|
||||
# Go down the list of features in order, and eliminate any builders
|
||||
# that don't match every feature.
|
||||
features = list(features)
|
||||
features.reverse()
|
||||
candidates = None
|
||||
candidate_set = None
|
||||
while len(features) > 0:
|
||||
feature = features.pop()
|
||||
we_have_the_feature = self.builders_for_feature.get(feature, [])
|
||||
if len(we_have_the_feature) > 0:
|
||||
if candidates is None:
|
||||
candidates = we_have_the_feature
|
||||
candidate_set = set(candidates)
|
||||
else:
|
||||
# Eliminate any candidates that don't have this feature.
|
||||
candidate_set = candidate_set.intersection(
|
||||
set(we_have_the_feature))
|
||||
|
||||
# The only valid candidates are the ones in candidate_set.
|
||||
# Go through the original list of candidates and pick the first one
|
||||
# that's in candidate_set.
|
||||
if candidate_set is None:
|
||||
return None
|
||||
for candidate in candidates:
|
||||
if candidate in candidate_set:
|
||||
return candidate
|
||||
return None
|
||||
|
||||
# The BeautifulSoup class will take feature lists from developers and use them
|
||||
# to look up builders in this registry.
|
||||
builder_registry = TreeBuilderRegistry()
|
||||
|
||||
class TreeBuilder(object):
|
||||
"""Turn a document into a Beautiful Soup object tree."""
|
||||
|
||||
features = []
|
||||
|
||||
is_xml = False
|
||||
preserve_whitespace_tags = set()
|
||||
empty_element_tags = None # A tag will be considered an empty-element
|
||||
# tag when and only when it has no contents.
|
||||
|
||||
# A value for these tag/attribute combinations is a space- or
|
||||
# comma-separated list of CDATA, rather than a single CDATA.
|
||||
cdata_list_attributes = {}
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.soup = None
|
||||
|
||||
def reset(self):
|
||||
pass
|
||||
|
||||
def can_be_empty_element(self, tag_name):
|
||||
"""Might a tag with this name be an empty-element tag?
|
||||
|
||||
The final markup may or may not actually present this tag as
|
||||
self-closing.
|
||||
|
||||
For instance: an HTMLBuilder does not consider a <p> tag to be
|
||||
an empty-element tag (it's not in
|
||||
HTMLBuilder.empty_element_tags). This means an empty <p> tag
|
||||
will be presented as "<p></p>", not "<p />".
|
||||
|
||||
The default implementation has no opinion about which tags are
|
||||
empty-element tags, so a tag will be presented as an
|
||||
empty-element tag if and only if it has no contents.
|
||||
"<foo></foo>" will become "<foo />", and "<foo>bar</foo>" will
|
||||
be left alone.
|
||||
"""
|
||||
if self.empty_element_tags is None:
|
||||
return True
|
||||
return tag_name in self.empty_element_tags
|
||||
|
||||
def feed(self, markup):
|
||||
raise NotImplementedError()
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding=None,
|
||||
document_declared_encoding=None):
|
||||
return markup, None, None, False
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""Wrap an HTML fragment to make it look like a document.
|
||||
|
||||
Different parsers do this differently. For instance, lxml
|
||||
introduces an empty <head> tag, and html5lib
|
||||
doesn't. Abstracting this away lets us write simple tests
|
||||
which run HTML fragments through the parser and compare the
|
||||
results against other HTML fragments.
|
||||
|
||||
This method should not be used outside of tests.
|
||||
"""
|
||||
return fragment
|
||||
|
||||
def set_up_substitutions(self, tag):
|
||||
return False
|
||||
|
||||
def _replace_cdata_list_attribute_values(self, tag_name, attrs):
|
||||
"""Replaces class="foo bar" with class=["foo", "bar"]
|
||||
|
||||
Modifies its input in place.
|
||||
"""
|
||||
if not attrs:
|
||||
return attrs
|
||||
if self.cdata_list_attributes:
|
||||
universal = self.cdata_list_attributes.get('*', [])
|
||||
tag_specific = self.cdata_list_attributes.get(
|
||||
tag_name.lower(), None)
|
||||
for attr in attrs.keys():
|
||||
if attr in universal or (tag_specific and attr in tag_specific):
|
||||
# We have a "class"-type attribute whose string
|
||||
# value is a whitespace-separated list of
|
||||
# values. Split it into a list.
|
||||
value = attrs[attr]
|
||||
if isinstance(value, basestring):
|
||||
values = whitespace_re.split(value)
|
||||
else:
|
||||
# html5lib sometimes calls setAttributes twice
|
||||
# for the same tag when rearranging the parse
|
||||
# tree. On the second call the attribute value
|
||||
# here is already a list. If this happens,
|
||||
# leave the value alone rather than trying to
|
||||
# split it again.
|
||||
values = value
|
||||
attrs[attr] = values
|
||||
return attrs
|
||||
|
||||
class SAXTreeBuilder(TreeBuilder):
|
||||
"""A Beautiful Soup treebuilder that listens for SAX events."""
|
||||
|
||||
def feed(self, markup):
|
||||
raise NotImplementedError()
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def startElement(self, name, attrs):
|
||||
attrs = dict((key[1], value) for key, value in list(attrs.items()))
|
||||
#print "Start %s, %r" % (name, attrs)
|
||||
self.soup.handle_starttag(name, attrs)
|
||||
|
||||
def endElement(self, name):
|
||||
#print "End %s" % name
|
||||
self.soup.handle_endtag(name)
|
||||
|
||||
def startElementNS(self, nsTuple, nodeName, attrs):
|
||||
# Throw away (ns, nodeName) for now.
|
||||
self.startElement(nodeName, attrs)
|
||||
|
||||
def endElementNS(self, nsTuple, nodeName):
|
||||
# Throw away (ns, nodeName) for now.
|
||||
self.endElement(nodeName)
|
||||
#handler.endElementNS((ns, node.nodeName), node.nodeName)
|
||||
|
||||
def startPrefixMapping(self, prefix, nodeValue):
|
||||
# Ignore the prefix for now.
|
||||
pass
|
||||
|
||||
def endPrefixMapping(self, prefix):
|
||||
# Ignore the prefix for now.
|
||||
# handler.endPrefixMapping(prefix)
|
||||
pass
|
||||
|
||||
def characters(self, content):
|
||||
self.soup.handle_data(content)
|
||||
|
||||
def startDocument(self):
|
||||
pass
|
||||
|
||||
def endDocument(self):
|
||||
pass
|
||||
|
||||
|
||||
class HTMLTreeBuilder(TreeBuilder):
|
||||
"""This TreeBuilder knows facts about HTML.
|
||||
|
||||
Such as which tags are empty-element tags.
|
||||
"""
|
||||
|
||||
preserve_whitespace_tags = set(['pre', 'textarea'])
|
||||
empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
|
||||
'spacer', 'link', 'frame', 'base'])
|
||||
|
||||
# The HTML standard defines these attributes as containing a
|
||||
# space-separated list of values, not a single value. That is,
|
||||
# class="foo bar" means that the 'class' attribute has two values,
|
||||
# 'foo' and 'bar', not the single value 'foo bar'. When we
|
||||
# encounter one of these attributes, we will parse its value into
|
||||
# a list of values if possible. Upon output, the list will be
|
||||
# converted back into a string.
|
||||
cdata_list_attributes = {
|
||||
"*" : ['class', 'accesskey', 'dropzone'],
|
||||
"a" : ['rel', 'rev'],
|
||||
"link" : ['rel', 'rev'],
|
||||
"td" : ["headers"],
|
||||
"th" : ["headers"],
|
||||
"td" : ["headers"],
|
||||
"form" : ["accept-charset"],
|
||||
"object" : ["archive"],
|
||||
|
||||
# These are HTML5 specific, as are *.accesskey and *.dropzone above.
|
||||
"area" : ["rel"],
|
||||
"icon" : ["sizes"],
|
||||
"iframe" : ["sandbox"],
|
||||
"output" : ["for"],
|
||||
}
|
||||
|
||||
def set_up_substitutions(self, tag):
|
||||
# We are only interested in <meta> tags
|
||||
if tag.name != 'meta':
|
||||
return False
|
||||
|
||||
http_equiv = tag.get('http-equiv')
|
||||
content = tag.get('content')
|
||||
charset = tag.get('charset')
|
||||
|
||||
# We are interested in <meta> tags that say what encoding the
|
||||
# document was originally in. This means HTML 5-style <meta>
|
||||
# tags that provide the "charset" attribute. It also means
|
||||
# HTML 4-style <meta> tags that provide the "content"
|
||||
# attribute and have "http-equiv" set to "content-type".
|
||||
#
|
||||
# In both cases we will replace the value of the appropriate
|
||||
# attribute with a standin object that can take on any
|
||||
# encoding.
|
||||
meta_encoding = None
|
||||
if charset is not None:
|
||||
# HTML 5 style:
|
||||
# <meta charset="utf8">
|
||||
meta_encoding = charset
|
||||
tag['charset'] = CharsetMetaAttributeValue(charset)
|
||||
|
||||
elif (content is not None and http_equiv is not None
|
||||
and http_equiv.lower() == 'content-type'):
|
||||
# HTML 4 style:
|
||||
# <meta http-equiv="content-type" content="text/html; charset=utf8">
|
||||
tag['content'] = ContentMetaAttributeValue(content)
|
||||
|
||||
return (meta_encoding is not None)
|
||||
|
||||
def register_treebuilders_from(module):
|
||||
"""Copy TreeBuilders from the given module into this module."""
|
||||
# I'm fairly sure this is not the best way to do this.
|
||||
this_module = sys.modules['bs4.builder']
|
||||
for name in module.__all__:
|
||||
obj = getattr(module, name)
|
||||
|
||||
if issubclass(obj, TreeBuilder):
|
||||
setattr(this_module, name, obj)
|
||||
this_module.__all__.append(name)
|
||||
# Register the builder while we're at it.
|
||||
this_module.builder_registry.register(obj)
|
||||
|
||||
class ParserRejectedMarkup(Exception):
|
||||
pass
|
||||
|
||||
# Builders are registered in reverse order of priority, so that custom
|
||||
# builder registrations will take precedence. In general, we want lxml
|
||||
# to take precedence over html5lib, because it's faster. And we only
|
||||
# want to use HTMLParser as a last result.
|
||||
from . import _htmlparser
|
||||
register_treebuilders_from(_htmlparser)
|
||||
try:
|
||||
from . import _html5lib
|
||||
register_treebuilders_from(_html5lib)
|
||||
except ImportError:
|
||||
# They don't have html5lib installed.
|
||||
pass
|
||||
try:
|
||||
from . import _lxml
|
||||
register_treebuilders_from(_lxml)
|
||||
except ImportError:
|
||||
# They don't have lxml installed.
|
||||
pass
|
||||
285
lib/bs4/builder/_html5lib.py
Normal file
285
lib/bs4/builder/_html5lib.py
Normal file
@@ -0,0 +1,285 @@
|
||||
__all__ = [
|
||||
'HTML5TreeBuilder',
|
||||
]
|
||||
|
||||
import warnings
|
||||
from bs4.builder import (
|
||||
PERMISSIVE,
|
||||
HTML,
|
||||
HTML_5,
|
||||
HTMLTreeBuilder,
|
||||
)
|
||||
from bs4.element import NamespacedAttribute
|
||||
import html5lib
|
||||
from html5lib.constants import namespaces
|
||||
from bs4.element import (
|
||||
Comment,
|
||||
Doctype,
|
||||
NavigableString,
|
||||
Tag,
|
||||
)
|
||||
|
||||
class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
"""Use html5lib to build a tree."""
|
||||
|
||||
features = ['html5lib', PERMISSIVE, HTML_5, HTML]
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding):
|
||||
# Store the user-specified encoding for use later on.
|
||||
self.user_specified_encoding = user_specified_encoding
|
||||
yield (markup, None, None, False)
|
||||
|
||||
# These methods are defined by Beautiful Soup.
|
||||
def feed(self, markup):
|
||||
if self.soup.parse_only is not None:
|
||||
warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
|
||||
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
|
||||
doc = parser.parse(markup, encoding=self.user_specified_encoding)
|
||||
|
||||
# Set the character encoding detected by the tokenizer.
|
||||
if isinstance(markup, unicode):
|
||||
# We need to special-case this because html5lib sets
|
||||
# charEncoding to UTF-8 if it gets Unicode input.
|
||||
doc.original_encoding = None
|
||||
else:
|
||||
doc.original_encoding = parser.tokenizer.stream.charEncoding[0]
|
||||
|
||||
def create_treebuilder(self, namespaceHTMLElements):
|
||||
self.underlying_builder = TreeBuilderForHtml5lib(
|
||||
self.soup, namespaceHTMLElements)
|
||||
return self.underlying_builder
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""See `TreeBuilder`."""
|
||||
return u'<html><head></head><body>%s</body></html>' % fragment
|
||||
|
||||
|
||||
class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
|
||||
|
||||
def __init__(self, soup, namespaceHTMLElements):
|
||||
self.soup = soup
|
||||
super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
|
||||
|
||||
def documentClass(self):
|
||||
self.soup.reset()
|
||||
return Element(self.soup, self.soup, None)
|
||||
|
||||
def insertDoctype(self, token):
|
||||
name = token["name"]
|
||||
publicId = token["publicId"]
|
||||
systemId = token["systemId"]
|
||||
|
||||
doctype = Doctype.for_name_and_ids(name, publicId, systemId)
|
||||
self.soup.object_was_parsed(doctype)
|
||||
|
||||
def elementClass(self, name, namespace):
|
||||
tag = self.soup.new_tag(name, namespace)
|
||||
return Element(tag, self.soup, namespace)
|
||||
|
||||
def commentClass(self, data):
|
||||
return TextNode(Comment(data), self.soup)
|
||||
|
||||
def fragmentClass(self):
|
||||
self.soup = BeautifulSoup("")
|
||||
self.soup.name = "[document_fragment]"
|
||||
return Element(self.soup, self.soup, None)
|
||||
|
||||
def appendChild(self, node):
|
||||
# XXX This code is not covered by the BS4 tests.
|
||||
self.soup.append(node.element)
|
||||
|
||||
def getDocument(self):
|
||||
return self.soup
|
||||
|
||||
def getFragment(self):
|
||||
return html5lib.treebuilders._base.TreeBuilder.getFragment(self).element
|
||||
|
||||
class AttrList(object):
|
||||
def __init__(self, element):
|
||||
self.element = element
|
||||
self.attrs = dict(self.element.attrs)
|
||||
def __iter__(self):
|
||||
return list(self.attrs.items()).__iter__()
|
||||
def __setitem__(self, name, value):
|
||||
"set attr", name, value
|
||||
self.element[name] = value
|
||||
def items(self):
|
||||
return list(self.attrs.items())
|
||||
def keys(self):
|
||||
return list(self.attrs.keys())
|
||||
def __len__(self):
|
||||
return len(self.attrs)
|
||||
def __getitem__(self, name):
|
||||
return self.attrs[name]
|
||||
def __contains__(self, name):
|
||||
return name in list(self.attrs.keys())
|
||||
|
||||
|
||||
class Element(html5lib.treebuilders._base.Node):
|
||||
def __init__(self, element, soup, namespace):
|
||||
html5lib.treebuilders._base.Node.__init__(self, element.name)
|
||||
self.element = element
|
||||
self.soup = soup
|
||||
self.namespace = namespace
|
||||
|
||||
def appendChild(self, node):
|
||||
string_child = child = None
|
||||
if isinstance(node, basestring):
|
||||
# Some other piece of code decided to pass in a string
|
||||
# instead of creating a TextElement object to contain the
|
||||
# string.
|
||||
string_child = child = node
|
||||
elif isinstance(node, Tag):
|
||||
# Some other piece of code decided to pass in a Tag
|
||||
# instead of creating an Element object to contain the
|
||||
# Tag.
|
||||
child = node
|
||||
elif node.element.__class__ == NavigableString:
|
||||
string_child = child = node.element
|
||||
else:
|
||||
child = node.element
|
||||
|
||||
if not isinstance(child, basestring) and child.parent is not None:
|
||||
node.element.extract()
|
||||
|
||||
if (string_child and self.element.contents
|
||||
and self.element.contents[-1].__class__ == NavigableString):
|
||||
# We are appending a string onto another string.
|
||||
# TODO This has O(n^2) performance, for input like
|
||||
# "a</a>a</a>a</a>..."
|
||||
old_element = self.element.contents[-1]
|
||||
new_element = self.soup.new_string(old_element + string_child)
|
||||
old_element.replace_with(new_element)
|
||||
self.soup._most_recent_element = new_element
|
||||
else:
|
||||
if isinstance(node, basestring):
|
||||
# Create a brand new NavigableString from this string.
|
||||
child = self.soup.new_string(node)
|
||||
|
||||
# Tell Beautiful Soup to act as if it parsed this element
|
||||
# immediately after the parent's last descendant. (Or
|
||||
# immediately after the parent, if it has no children.)
|
||||
if self.element.contents:
|
||||
most_recent_element = self.element._last_descendant(False)
|
||||
else:
|
||||
most_recent_element = self.element
|
||||
|
||||
self.soup.object_was_parsed(
|
||||
child, parent=self.element,
|
||||
most_recent_element=most_recent_element)
|
||||
|
||||
def getAttributes(self):
|
||||
return AttrList(self.element)
|
||||
|
||||
def setAttributes(self, attributes):
|
||||
if attributes is not None and len(attributes) > 0:
|
||||
|
||||
converted_attributes = []
|
||||
for name, value in list(attributes.items()):
|
||||
if isinstance(name, tuple):
|
||||
new_name = NamespacedAttribute(*name)
|
||||
del attributes[name]
|
||||
attributes[new_name] = value
|
||||
|
||||
self.soup.builder._replace_cdata_list_attribute_values(
|
||||
self.name, attributes)
|
||||
for name, value in attributes.items():
|
||||
self.element[name] = value
|
||||
|
||||
# The attributes may contain variables that need substitution.
|
||||
# Call set_up_substitutions manually.
|
||||
#
|
||||
# The Tag constructor called this method when the Tag was created,
|
||||
# but we just set/changed the attributes, so call it again.
|
||||
self.soup.builder.set_up_substitutions(self.element)
|
||||
attributes = property(getAttributes, setAttributes)
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
if insertBefore:
|
||||
text = TextNode(self.soup.new_string(data), self.soup)
|
||||
self.insertBefore(data, insertBefore)
|
||||
else:
|
||||
self.appendChild(data)
|
||||
|
||||
def insertBefore(self, node, refNode):
|
||||
index = self.element.index(refNode.element)
|
||||
if (node.element.__class__ == NavigableString and self.element.contents
|
||||
and self.element.contents[index-1].__class__ == NavigableString):
|
||||
# (See comments in appendChild)
|
||||
old_node = self.element.contents[index-1]
|
||||
new_str = self.soup.new_string(old_node + node.element)
|
||||
old_node.replace_with(new_str)
|
||||
else:
|
||||
self.element.insert(index, node.element)
|
||||
node.parent = self
|
||||
|
||||
def removeChild(self, node):
|
||||
node.element.extract()
|
||||
|
||||
def reparentChildren(self, new_parent):
|
||||
"""Move all of this tag's children into another tag."""
|
||||
element = self.element
|
||||
new_parent_element = new_parent.element
|
||||
# Determine what this tag's next_element will be once all the children
|
||||
# are removed.
|
||||
final_next_element = element.next_sibling
|
||||
|
||||
new_parents_last_descendant = new_parent_element._last_descendant(False, False)
|
||||
if len(new_parent_element.contents) > 0:
|
||||
# The new parent already contains children. We will be
|
||||
# appending this tag's children to the end.
|
||||
new_parents_last_child = new_parent_element.contents[-1]
|
||||
new_parents_last_descendant_next_element = new_parents_last_descendant.next_element
|
||||
else:
|
||||
# The new parent contains no children.
|
||||
new_parents_last_child = None
|
||||
new_parents_last_descendant_next_element = new_parent_element.next_element
|
||||
|
||||
to_append = element.contents
|
||||
append_after = new_parent.element.contents
|
||||
if len(to_append) > 0:
|
||||
# Set the first child's previous_element and previous_sibling
|
||||
# to elements within the new parent
|
||||
first_child = to_append[0]
|
||||
first_child.previous_element = new_parents_last_descendant
|
||||
first_child.previous_sibling = new_parents_last_child
|
||||
|
||||
# Fix the last child's next_element and next_sibling
|
||||
last_child = to_append[-1]
|
||||
last_child.next_element = new_parents_last_descendant_next_element
|
||||
last_child.next_sibling = None
|
||||
|
||||
for child in to_append:
|
||||
child.parent = new_parent_element
|
||||
new_parent_element.contents.append(child)
|
||||
|
||||
# Now that this element has no children, change its .next_element.
|
||||
element.contents = []
|
||||
element.next_element = final_next_element
|
||||
|
||||
def cloneNode(self):
|
||||
tag = self.soup.new_tag(self.element.name, self.namespace)
|
||||
node = Element(tag, self.soup, self.namespace)
|
||||
for key,value in self.attributes:
|
||||
node.attributes[key] = value
|
||||
return node
|
||||
|
||||
def hasContent(self):
|
||||
return self.element.contents
|
||||
|
||||
def getNameTuple(self):
|
||||
if self.namespace is None:
|
||||
return namespaces["html"], self.name
|
||||
else:
|
||||
return self.namespace, self.name
|
||||
|
||||
nameTuple = property(getNameTuple)
|
||||
|
||||
class TextNode(Element):
|
||||
def __init__(self, element, soup):
|
||||
html5lib.treebuilders._base.Node.__init__(self, None)
|
||||
self.element = element
|
||||
self.soup = soup
|
||||
|
||||
def cloneNode(self):
|
||||
raise NotImplementedError
|
||||
258
lib/bs4/builder/_htmlparser.py
Normal file
258
lib/bs4/builder/_htmlparser.py
Normal file
@@ -0,0 +1,258 @@
|
||||
"""Use the HTMLParser library to parse HTML files that aren't too bad."""
|
||||
|
||||
__all__ = [
|
||||
'HTMLParserTreeBuilder',
|
||||
]
|
||||
|
||||
from HTMLParser import (
|
||||
HTMLParser,
|
||||
HTMLParseError,
|
||||
)
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
# Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
|
||||
# argument, which we'd like to set to False. Unfortunately,
|
||||
# http://bugs.python.org/issue13273 makes strict=True a better bet
|
||||
# before Python 3.2.3.
|
||||
#
|
||||
# At the end of this file, we monkeypatch HTMLParser so that
|
||||
# strict=True works well on Python 3.2.2.
|
||||
major, minor, release = sys.version_info[:3]
|
||||
CONSTRUCTOR_TAKES_STRICT = (
|
||||
major > 3
|
||||
or (major == 3 and minor > 2)
|
||||
or (major == 3 and minor == 2 and release >= 3))
|
||||
|
||||
from bs4.element import (
|
||||
CData,
|
||||
Comment,
|
||||
Declaration,
|
||||
Doctype,
|
||||
ProcessingInstruction,
|
||||
)
|
||||
from bs4.dammit import EntitySubstitution, UnicodeDammit
|
||||
|
||||
from bs4.builder import (
|
||||
HTML,
|
||||
HTMLTreeBuilder,
|
||||
STRICT,
|
||||
)
|
||||
|
||||
|
||||
HTMLPARSER = 'html.parser'
|
||||
|
||||
class BeautifulSoupHTMLParser(HTMLParser):
|
||||
def handle_starttag(self, name, attrs):
|
||||
# XXX namespace
|
||||
attr_dict = {}
|
||||
for key, value in attrs:
|
||||
# Change None attribute values to the empty string
|
||||
# for consistency with the other tree builders.
|
||||
if value is None:
|
||||
value = ''
|
||||
attr_dict[key] = value
|
||||
attrvalue = '""'
|
||||
self.soup.handle_starttag(name, None, None, attr_dict)
|
||||
|
||||
def handle_endtag(self, name):
|
||||
self.soup.handle_endtag(name)
|
||||
|
||||
def handle_data(self, data):
|
||||
self.soup.handle_data(data)
|
||||
|
||||
def handle_charref(self, name):
|
||||
# XXX workaround for a bug in HTMLParser. Remove this once
|
||||
# it's fixed.
|
||||
if name.startswith('x'):
|
||||
real_name = int(name.lstrip('x'), 16)
|
||||
elif name.startswith('X'):
|
||||
real_name = int(name.lstrip('X'), 16)
|
||||
else:
|
||||
real_name = int(name)
|
||||
|
||||
try:
|
||||
data = unichr(real_name)
|
||||
except (ValueError, OverflowError), e:
|
||||
data = u"\N{REPLACEMENT CHARACTER}"
|
||||
|
||||
self.handle_data(data)
|
||||
|
||||
def handle_entityref(self, name):
|
||||
character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name)
|
||||
if character is not None:
|
||||
data = character
|
||||
else:
|
||||
data = "&%s;" % name
|
||||
self.handle_data(data)
|
||||
|
||||
def handle_comment(self, data):
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(Comment)
|
||||
|
||||
def handle_decl(self, data):
|
||||
self.soup.endData()
|
||||
if data.startswith("DOCTYPE "):
|
||||
data = data[len("DOCTYPE "):]
|
||||
elif data == 'DOCTYPE':
|
||||
# i.e. "<!DOCTYPE>"
|
||||
data = ''
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(Doctype)
|
||||
|
||||
def unknown_decl(self, data):
|
||||
if data.upper().startswith('CDATA['):
|
||||
cls = CData
|
||||
data = data[len('CDATA['):]
|
||||
else:
|
||||
cls = Declaration
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(cls)
|
||||
|
||||
def handle_pi(self, data):
|
||||
self.soup.endData()
|
||||
if data.endswith("?") and data.lower().startswith("xml"):
|
||||
# "An XHTML processing instruction using the trailing '?'
|
||||
# will cause the '?' to be included in data." - HTMLParser
|
||||
# docs.
|
||||
#
|
||||
# Strip the question mark so we don't end up with two
|
||||
# question marks.
|
||||
data = data[:-1]
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(ProcessingInstruction)
|
||||
|
||||
|
||||
class HTMLParserTreeBuilder(HTMLTreeBuilder):
|
||||
|
||||
is_xml = False
|
||||
features = [HTML, STRICT, HTMLPARSER]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if CONSTRUCTOR_TAKES_STRICT:
|
||||
kwargs['strict'] = False
|
||||
self.parser_args = (args, kwargs)
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding=None,
|
||||
document_declared_encoding=None):
|
||||
"""
|
||||
:return: A 4-tuple (markup, original encoding, encoding
|
||||
declared within markup, whether any characters had to be
|
||||
replaced with REPLACEMENT CHARACTER).
|
||||
"""
|
||||
if isinstance(markup, unicode):
|
||||
yield (markup, None, None, False)
|
||||
return
|
||||
|
||||
try_encodings = [user_specified_encoding, document_declared_encoding]
|
||||
dammit = UnicodeDammit(markup, try_encodings, is_html=True)
|
||||
yield (dammit.markup, dammit.original_encoding,
|
||||
dammit.declared_html_encoding,
|
||||
dammit.contains_replacement_characters)
|
||||
|
||||
def feed(self, markup):
|
||||
args, kwargs = self.parser_args
|
||||
parser = BeautifulSoupHTMLParser(*args, **kwargs)
|
||||
parser.soup = self.soup
|
||||
try:
|
||||
parser.feed(markup)
|
||||
except HTMLParseError, e:
|
||||
warnings.warn(RuntimeWarning(
|
||||
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
|
||||
raise e
|
||||
|
||||
# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
|
||||
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
|
||||
# string.
|
||||
#
|
||||
# XXX This code can be removed once most Python 3 users are on 3.2.3.
|
||||
if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT:
|
||||
import re
|
||||
attrfind_tolerant = re.compile(
|
||||
r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*'
|
||||
r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?')
|
||||
HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant
|
||||
|
||||
locatestarttagend = re.compile(r"""
|
||||
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
|
||||
(?:\s+ # whitespace before attribute name
|
||||
(?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
|
||||
(?:\s*=\s* # value indicator
|
||||
(?:'[^']*' # LITA-enclosed value
|
||||
|\"[^\"]*\" # LIT-enclosed value
|
||||
|[^'\">\s]+ # bare value
|
||||
)
|
||||
)?
|
||||
)
|
||||
)*
|
||||
\s* # trailing whitespace
|
||||
""", re.VERBOSE)
|
||||
BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend
|
||||
|
||||
from html.parser import tagfind, attrfind
|
||||
|
||||
def parse_starttag(self, i):
|
||||
self.__starttag_text = None
|
||||
endpos = self.check_for_whole_start_tag(i)
|
||||
if endpos < 0:
|
||||
return endpos
|
||||
rawdata = self.rawdata
|
||||
self.__starttag_text = rawdata[i:endpos]
|
||||
|
||||
# Now parse the data between i+1 and j into a tag and attrs
|
||||
attrs = []
|
||||
match = tagfind.match(rawdata, i+1)
|
||||
assert match, 'unexpected call to parse_starttag()'
|
||||
k = match.end()
|
||||
self.lasttag = tag = rawdata[i+1:k].lower()
|
||||
while k < endpos:
|
||||
if self.strict:
|
||||
m = attrfind.match(rawdata, k)
|
||||
else:
|
||||
m = attrfind_tolerant.match(rawdata, k)
|
||||
if not m:
|
||||
break
|
||||
attrname, rest, attrvalue = m.group(1, 2, 3)
|
||||
if not rest:
|
||||
attrvalue = None
|
||||
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
|
||||
attrvalue[:1] == '"' == attrvalue[-1:]:
|
||||
attrvalue = attrvalue[1:-1]
|
||||
if attrvalue:
|
||||
attrvalue = self.unescape(attrvalue)
|
||||
attrs.append((attrname.lower(), attrvalue))
|
||||
k = m.end()
|
||||
|
||||
end = rawdata[k:endpos].strip()
|
||||
if end not in (">", "/>"):
|
||||
lineno, offset = self.getpos()
|
||||
if "\n" in self.__starttag_text:
|
||||
lineno = lineno + self.__starttag_text.count("\n")
|
||||
offset = len(self.__starttag_text) \
|
||||
- self.__starttag_text.rfind("\n")
|
||||
else:
|
||||
offset = offset + len(self.__starttag_text)
|
||||
if self.strict:
|
||||
self.error("junk characters in start tag: %r"
|
||||
% (rawdata[k:endpos][:20],))
|
||||
self.handle_data(rawdata[i:endpos])
|
||||
return endpos
|
||||
if end.endswith('/>'):
|
||||
# XHTML-style empty tag: <span attr="value" />
|
||||
self.handle_startendtag(tag, attrs)
|
||||
else:
|
||||
self.handle_starttag(tag, attrs)
|
||||
if tag in self.CDATA_CONTENT_ELEMENTS:
|
||||
self.set_cdata_mode(tag)
|
||||
return endpos
|
||||
|
||||
def set_cdata_mode(self, elem):
|
||||
self.cdata_elem = elem.lower()
|
||||
self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
|
||||
|
||||
BeautifulSoupHTMLParser.parse_starttag = parse_starttag
|
||||
BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode
|
||||
|
||||
CONSTRUCTOR_TAKES_STRICT = True
|
||||
233
lib/bs4/builder/_lxml.py
Normal file
233
lib/bs4/builder/_lxml.py
Normal file
@@ -0,0 +1,233 @@
|
||||
__all__ = [
|
||||
'LXMLTreeBuilderForXML',
|
||||
'LXMLTreeBuilder',
|
||||
]
|
||||
|
||||
from io import BytesIO
|
||||
from StringIO import StringIO
|
||||
import collections
|
||||
from lxml import etree
|
||||
from bs4.element import Comment, Doctype, NamespacedAttribute
|
||||
from bs4.builder import (
|
||||
FAST,
|
||||
HTML,
|
||||
HTMLTreeBuilder,
|
||||
PERMISSIVE,
|
||||
ParserRejectedMarkup,
|
||||
TreeBuilder,
|
||||
XML)
|
||||
from bs4.dammit import EncodingDetector
|
||||
|
||||
LXML = 'lxml'
|
||||
|
||||
class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
DEFAULT_PARSER_CLASS = etree.XMLParser
|
||||
|
||||
is_xml = True
|
||||
|
||||
# Well, it's permissive by XML parser standards.
|
||||
features = [LXML, XML, FAST, PERMISSIVE]
|
||||
|
||||
CHUNK_SIZE = 512
|
||||
|
||||
# This namespace mapping is specified in the XML Namespace
|
||||
# standard.
|
||||
DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"}
|
||||
|
||||
def default_parser(self, encoding):
|
||||
# This can either return a parser object or a class, which
|
||||
# will be instantiated with default arguments.
|
||||
if self._default_parser is not None:
|
||||
return self._default_parser
|
||||
return etree.XMLParser(
|
||||
target=self, strip_cdata=False, recover=True, encoding=encoding)
|
||||
|
||||
def parser_for(self, encoding):
|
||||
# Use the default parser.
|
||||
parser = self.default_parser(encoding)
|
||||
|
||||
if isinstance(parser, collections.Callable):
|
||||
# Instantiate the parser with default arguments
|
||||
parser = parser(target=self, strip_cdata=False, encoding=encoding)
|
||||
return parser
|
||||
|
||||
def __init__(self, parser=None, empty_element_tags=None):
|
||||
# TODO: Issue a warning if parser is present but not a
|
||||
# callable, since that means there's no way to create new
|
||||
# parsers for different encodings.
|
||||
self._default_parser = parser
|
||||
if empty_element_tags is not None:
|
||||
self.empty_element_tags = set(empty_element_tags)
|
||||
self.soup = None
|
||||
self.nsmaps = [self.DEFAULT_NSMAPS]
|
||||
|
||||
def _getNsTag(self, tag):
|
||||
# Split the namespace URL out of a fully-qualified lxml tag
|
||||
# name. Copied from lxml's src/lxml/sax.py.
|
||||
if tag[0] == '{':
|
||||
return tuple(tag[1:].split('}', 1))
|
||||
else:
|
||||
return (None, tag)
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding=None,
|
||||
document_declared_encoding=None):
|
||||
"""
|
||||
:yield: A series of 4-tuples.
|
||||
(markup, encoding, declared encoding,
|
||||
has undergone character replacement)
|
||||
|
||||
Each 4-tuple represents a strategy for parsing the document.
|
||||
"""
|
||||
if isinstance(markup, unicode):
|
||||
# We were given Unicode. Maybe lxml can parse Unicode on
|
||||
# this system?
|
||||
yield markup, None, document_declared_encoding, False
|
||||
|
||||
if isinstance(markup, unicode):
|
||||
# No, apparently not. Convert the Unicode to UTF-8 and
|
||||
# tell lxml to parse it as UTF-8.
|
||||
yield (markup.encode("utf8"), "utf8",
|
||||
document_declared_encoding, False)
|
||||
|
||||
# Instead of using UnicodeDammit to convert the bytestring to
|
||||
# Unicode using different encodings, use EncodingDetector to
|
||||
# iterate over the encodings, and tell lxml to try to parse
|
||||
# the document as each one in turn.
|
||||
is_html = not self.is_xml
|
||||
try_encodings = [user_specified_encoding, document_declared_encoding]
|
||||
detector = EncodingDetector(markup, try_encodings, is_html)
|
||||
for encoding in detector.encodings:
|
||||
yield (detector.markup, encoding, document_declared_encoding, False)
|
||||
|
||||
def feed(self, markup):
|
||||
if isinstance(markup, bytes):
|
||||
markup = BytesIO(markup)
|
||||
elif isinstance(markup, unicode):
|
||||
markup = StringIO(markup)
|
||||
|
||||
# Call feed() at least once, even if the markup is empty,
|
||||
# or the parser won't be initialized.
|
||||
data = markup.read(self.CHUNK_SIZE)
|
||||
try:
|
||||
self.parser = self.parser_for(self.soup.original_encoding)
|
||||
self.parser.feed(data)
|
||||
while len(data) != 0:
|
||||
# Now call feed() on the rest of the data, chunk by chunk.
|
||||
data = markup.read(self.CHUNK_SIZE)
|
||||
if len(data) != 0:
|
||||
self.parser.feed(data)
|
||||
self.parser.close()
|
||||
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
|
||||
raise ParserRejectedMarkup(str(e))
|
||||
|
||||
def close(self):
|
||||
self.nsmaps = [self.DEFAULT_NSMAPS]
|
||||
|
||||
def start(self, name, attrs, nsmap={}):
|
||||
# Make sure attrs is a mutable dict--lxml may send an immutable dictproxy.
|
||||
attrs = dict(attrs)
|
||||
nsprefix = None
|
||||
# Invert each namespace map as it comes in.
|
||||
if len(self.nsmaps) > 1:
|
||||
# There are no new namespaces for this tag, but
|
||||
# non-default namespaces are in play, so we need a
|
||||
# separate tag stack to know when they end.
|
||||
self.nsmaps.append(None)
|
||||
elif len(nsmap) > 0:
|
||||
# A new namespace mapping has come into play.
|
||||
inverted_nsmap = dict((value, key) for key, value in nsmap.items())
|
||||
self.nsmaps.append(inverted_nsmap)
|
||||
# Also treat the namespace mapping as a set of attributes on the
|
||||
# tag, so we can recreate it later.
|
||||
attrs = attrs.copy()
|
||||
for prefix, namespace in nsmap.items():
|
||||
attribute = NamespacedAttribute(
|
||||
"xmlns", prefix, "http://www.w3.org/2000/xmlns/")
|
||||
attrs[attribute] = namespace
|
||||
|
||||
# Namespaces are in play. Find any attributes that came in
|
||||
# from lxml with namespaces attached to their names, and
|
||||
# turn then into NamespacedAttribute objects.
|
||||
new_attrs = {}
|
||||
for attr, value in attrs.items():
|
||||
namespace, attr = self._getNsTag(attr)
|
||||
if namespace is None:
|
||||
new_attrs[attr] = value
|
||||
else:
|
||||
nsprefix = self._prefix_for_namespace(namespace)
|
||||
attr = NamespacedAttribute(nsprefix, attr, namespace)
|
||||
new_attrs[attr] = value
|
||||
attrs = new_attrs
|
||||
|
||||
namespace, name = self._getNsTag(name)
|
||||
nsprefix = self._prefix_for_namespace(namespace)
|
||||
self.soup.handle_starttag(name, namespace, nsprefix, attrs)
|
||||
|
||||
def _prefix_for_namespace(self, namespace):
|
||||
"""Find the currently active prefix for the given namespace."""
|
||||
if namespace is None:
|
||||
return None
|
||||
for inverted_nsmap in reversed(self.nsmaps):
|
||||
if inverted_nsmap is not None and namespace in inverted_nsmap:
|
||||
return inverted_nsmap[namespace]
|
||||
return None
|
||||
|
||||
def end(self, name):
|
||||
self.soup.endData()
|
||||
completed_tag = self.soup.tagStack[-1]
|
||||
namespace, name = self._getNsTag(name)
|
||||
nsprefix = None
|
||||
if namespace is not None:
|
||||
for inverted_nsmap in reversed(self.nsmaps):
|
||||
if inverted_nsmap is not None and namespace in inverted_nsmap:
|
||||
nsprefix = inverted_nsmap[namespace]
|
||||
break
|
||||
self.soup.handle_endtag(name, nsprefix)
|
||||
if len(self.nsmaps) > 1:
|
||||
# This tag, or one of its parents, introduced a namespace
|
||||
# mapping, so pop it off the stack.
|
||||
self.nsmaps.pop()
|
||||
|
||||
def pi(self, target, data):
|
||||
pass
|
||||
|
||||
def data(self, content):
|
||||
self.soup.handle_data(content)
|
||||
|
||||
def doctype(self, name, pubid, system):
|
||||
self.soup.endData()
|
||||
doctype = Doctype.for_name_and_ids(name, pubid, system)
|
||||
self.soup.object_was_parsed(doctype)
|
||||
|
||||
def comment(self, content):
|
||||
"Handle comments as Comment objects."
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(content)
|
||||
self.soup.endData(Comment)
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""See `TreeBuilder`."""
|
||||
return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
|
||||
|
||||
|
||||
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
|
||||
|
||||
features = [LXML, HTML, FAST, PERMISSIVE]
|
||||
is_xml = False
|
||||
|
||||
def default_parser(self, encoding):
|
||||
return etree.HTMLParser
|
||||
|
||||
def feed(self, markup):
|
||||
encoding = self.soup.original_encoding
|
||||
try:
|
||||
self.parser = self.parser_for(encoding)
|
||||
self.parser.feed(markup)
|
||||
self.parser.close()
|
||||
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
|
||||
raise ParserRejectedMarkup(str(e))
|
||||
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""See `TreeBuilder`."""
|
||||
return u'<html><body>%s</body></html>' % fragment
|
||||
829
lib/bs4/dammit.py
Normal file
829
lib/bs4/dammit.py
Normal file
@@ -0,0 +1,829 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Beautiful Soup bonus library: Unicode, Dammit
|
||||
|
||||
This library converts a bytestream to Unicode through any means
|
||||
necessary. It is heavily based on code from Mark Pilgrim's Universal
|
||||
Feed Parser. It works best on XML and XML, but it does not rewrite the
|
||||
XML or HTML to reflect a new encoding; that's the tree builder's job.
|
||||
"""
|
||||
|
||||
import codecs
|
||||
from htmlentitydefs import codepoint2name
|
||||
import re
|
||||
import logging
|
||||
import string
|
||||
|
||||
# Import a library to autodetect character encodings.
|
||||
chardet_type = None
|
||||
try:
|
||||
# First try the fast C implementation.
|
||||
# PyPI package: cchardet
|
||||
import cchardet
|
||||
def chardet_dammit(s):
|
||||
return cchardet.detect(s)['encoding']
|
||||
except ImportError:
|
||||
try:
|
||||
# Fall back to the pure Python implementation
|
||||
# Debian package: python-chardet
|
||||
# PyPI package: chardet
|
||||
import chardet
|
||||
def chardet_dammit(s):
|
||||
return chardet.detect(s)['encoding']
|
||||
#import chardet.constants
|
||||
#chardet.constants._debug = 1
|
||||
except ImportError:
|
||||
# No chardet available.
|
||||
def chardet_dammit(s):
|
||||
return None
|
||||
|
||||
# Available from http://cjkpython.i18n.org/.
|
||||
try:
|
||||
import iconv_codec
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
xml_encoding_re = re.compile(
|
||||
'^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
|
||||
html_meta_re = re.compile(
|
||||
'<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
|
||||
|
||||
class EntitySubstitution(object):
|
||||
|
||||
"""Substitute XML or HTML entities for the corresponding characters."""
|
||||
|
||||
def _populate_class_variables():
|
||||
lookup = {}
|
||||
reverse_lookup = {}
|
||||
characters_for_re = []
|
||||
for codepoint, name in list(codepoint2name.items()):
|
||||
character = unichr(codepoint)
|
||||
if codepoint != 34:
|
||||
# There's no point in turning the quotation mark into
|
||||
# ", unless it happens within an attribute value, which
|
||||
# is handled elsewhere.
|
||||
characters_for_re.append(character)
|
||||
lookup[character] = name
|
||||
# But we do want to turn " into the quotation mark.
|
||||
reverse_lookup[name] = character
|
||||
re_definition = "[%s]" % "".join(characters_for_re)
|
||||
return lookup, reverse_lookup, re.compile(re_definition)
|
||||
(CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER,
|
||||
CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables()
|
||||
|
||||
CHARACTER_TO_XML_ENTITY = {
|
||||
"'": "apos",
|
||||
'"': "quot",
|
||||
"&": "amp",
|
||||
"<": "lt",
|
||||
">": "gt",
|
||||
}
|
||||
|
||||
BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
|
||||
"&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
|
||||
")")
|
||||
|
||||
AMPERSAND_OR_BRACKET = re.compile("([<>&])")
|
||||
|
||||
@classmethod
|
||||
def _substitute_html_entity(cls, matchobj):
|
||||
entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0))
|
||||
return "&%s;" % entity
|
||||
|
||||
@classmethod
|
||||
def _substitute_xml_entity(cls, matchobj):
|
||||
"""Used with a regular expression to substitute the
|
||||
appropriate XML entity for an XML special character."""
|
||||
entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)]
|
||||
return "&%s;" % entity
|
||||
|
||||
@classmethod
|
||||
def quoted_attribute_value(self, value):
|
||||
"""Make a value into a quoted XML attribute, possibly escaping it.
|
||||
|
||||
Most strings will be quoted using double quotes.
|
||||
|
||||
Bob's Bar -> "Bob's Bar"
|
||||
|
||||
If a string contains double quotes, it will be quoted using
|
||||
single quotes.
|
||||
|
||||
Welcome to "my bar" -> 'Welcome to "my bar"'
|
||||
|
||||
If a string contains both single and double quotes, the
|
||||
double quotes will be escaped, and the string will be quoted
|
||||
using double quotes.
|
||||
|
||||
Welcome to "Bob's Bar" -> "Welcome to "Bob's bar"
|
||||
"""
|
||||
quote_with = '"'
|
||||
if '"' in value:
|
||||
if "'" in value:
|
||||
# The string contains both single and double
|
||||
# quotes. Turn the double quotes into
|
||||
# entities. We quote the double quotes rather than
|
||||
# the single quotes because the entity name is
|
||||
# """ whether this is HTML or XML. If we
|
||||
# quoted the single quotes, we'd have to decide
|
||||
# between ' and &squot;.
|
||||
replace_with = """
|
||||
value = value.replace('"', replace_with)
|
||||
else:
|
||||
# There are double quotes but no single quotes.
|
||||
# We can use single quotes to quote the attribute.
|
||||
quote_with = "'"
|
||||
return quote_with + value + quote_with
|
||||
|
||||
@classmethod
|
||||
def substitute_xml(cls, value, make_quoted_attribute=False):
|
||||
"""Substitute XML entities for special XML characters.
|
||||
|
||||
:param value: A string to be substituted. The less-than sign
|
||||
will become <, the greater-than sign will become >,
|
||||
and any ampersands will become &. If you want ampersands
|
||||
that appear to be part of an entity definition to be left
|
||||
alone, use substitute_xml_containing_entities() instead.
|
||||
|
||||
:param make_quoted_attribute: If True, then the string will be
|
||||
quoted, as befits an attribute value.
|
||||
"""
|
||||
# Escape angle brackets and ampersands.
|
||||
value = cls.AMPERSAND_OR_BRACKET.sub(
|
||||
cls._substitute_xml_entity, value)
|
||||
|
||||
if make_quoted_attribute:
|
||||
value = cls.quoted_attribute_value(value)
|
||||
return value
|
||||
|
||||
@classmethod
|
||||
def substitute_xml_containing_entities(
|
||||
cls, value, make_quoted_attribute=False):
|
||||
"""Substitute XML entities for special XML characters.
|
||||
|
||||
:param value: A string to be substituted. The less-than sign will
|
||||
become <, the greater-than sign will become >, and any
|
||||
ampersands that are not part of an entity defition will
|
||||
become &.
|
||||
|
||||
:param make_quoted_attribute: If True, then the string will be
|
||||
quoted, as befits an attribute value.
|
||||
"""
|
||||
# Escape angle brackets, and ampersands that aren't part of
|
||||
# entities.
|
||||
value = cls.BARE_AMPERSAND_OR_BRACKET.sub(
|
||||
cls._substitute_xml_entity, value)
|
||||
|
||||
if make_quoted_attribute:
|
||||
value = cls.quoted_attribute_value(value)
|
||||
return value
|
||||
|
||||
@classmethod
|
||||
def substitute_html(cls, s):
|
||||
"""Replace certain Unicode characters with named HTML entities.
|
||||
|
||||
This differs from data.encode(encoding, 'xmlcharrefreplace')
|
||||
in that the goal is to make the result more readable (to those
|
||||
with ASCII displays) rather than to recover from
|
||||
errors. There's absolutely nothing wrong with a UTF-8 string
|
||||
containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that
|
||||
character with "é" will make it more readable to some
|
||||
people.
|
||||
"""
|
||||
return cls.CHARACTER_TO_HTML_ENTITY_RE.sub(
|
||||
cls._substitute_html_entity, s)
|
||||
|
||||
|
||||
class EncodingDetector:
|
||||
"""Suggests a number of possible encodings for a bytestring.
|
||||
|
||||
Order of precedence:
|
||||
|
||||
1. Encodings you specifically tell EncodingDetector to try first
|
||||
(the override_encodings argument to the constructor).
|
||||
|
||||
2. An encoding declared within the bytestring itself, either in an
|
||||
XML declaration (if the bytestring is to be interpreted as an XML
|
||||
document), or in a <meta> tag (if the bytestring is to be
|
||||
interpreted as an HTML document.)
|
||||
|
||||
3. An encoding detected through textual analysis by chardet,
|
||||
cchardet, or a similar external library.
|
||||
|
||||
4. UTF-8.
|
||||
|
||||
5. Windows-1252.
|
||||
"""
|
||||
def __init__(self, markup, override_encodings=None, is_html=False):
|
||||
self.override_encodings = override_encodings or []
|
||||
self.chardet_encoding = None
|
||||
self.is_html = is_html
|
||||
self.declared_encoding = None
|
||||
|
||||
# First order of business: strip a byte-order mark.
|
||||
self.markup, self.sniffed_encoding = self.strip_byte_order_mark(markup)
|
||||
|
||||
def _usable(self, encoding, tried):
|
||||
if encoding is not None:
|
||||
encoding = encoding.lower()
|
||||
if encoding not in tried:
|
||||
tried.add(encoding)
|
||||
return True
|
||||
return False
|
||||
|
||||
@property
|
||||
def encodings(self):
|
||||
"""Yield a number of encodings that might work for this markup."""
|
||||
tried = set()
|
||||
for e in self.override_encodings:
|
||||
if self._usable(e, tried):
|
||||
yield e
|
||||
|
||||
# Did the document originally start with a byte-order mark
|
||||
# that indicated its encoding?
|
||||
if self._usable(self.sniffed_encoding, tried):
|
||||
yield self.sniffed_encoding
|
||||
|
||||
# Look within the document for an XML or HTML encoding
|
||||
# declaration.
|
||||
if self.declared_encoding is None:
|
||||
self.declared_encoding = self.find_declared_encoding(
|
||||
self.markup, self.is_html)
|
||||
if self._usable(self.declared_encoding, tried):
|
||||
yield self.declared_encoding
|
||||
|
||||
# Use third-party character set detection to guess at the
|
||||
# encoding.
|
||||
if self.chardet_encoding is None:
|
||||
self.chardet_encoding = chardet_dammit(self.markup)
|
||||
if self._usable(self.chardet_encoding, tried):
|
||||
yield self.chardet_encoding
|
||||
|
||||
# As a last-ditch effort, try utf-8 and windows-1252.
|
||||
for e in ('utf-8', 'windows-1252'):
|
||||
if self._usable(e, tried):
|
||||
yield e
|
||||
|
||||
@classmethod
|
||||
def strip_byte_order_mark(cls, data):
|
||||
"""If a byte-order mark is present, strip it and return the encoding it implies."""
|
||||
encoding = None
|
||||
if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
|
||||
and (data[2:4] != '\x00\x00'):
|
||||
encoding = 'utf-16be'
|
||||
data = data[2:]
|
||||
elif (len(data) >= 4) and (data[:2] == b'\xff\xfe') \
|
||||
and (data[2:4] != '\x00\x00'):
|
||||
encoding = 'utf-16le'
|
||||
data = data[2:]
|
||||
elif data[:3] == b'\xef\xbb\xbf':
|
||||
encoding = 'utf-8'
|
||||
data = data[3:]
|
||||
elif data[:4] == b'\x00\x00\xfe\xff':
|
||||
encoding = 'utf-32be'
|
||||
data = data[4:]
|
||||
elif data[:4] == b'\xff\xfe\x00\x00':
|
||||
encoding = 'utf-32le'
|
||||
data = data[4:]
|
||||
return data, encoding
|
||||
|
||||
@classmethod
|
||||
def find_declared_encoding(cls, markup, is_html=False, search_entire_document=False):
|
||||
"""Given a document, tries to find its declared encoding.
|
||||
|
||||
An XML encoding is declared at the beginning of the document.
|
||||
|
||||
An HTML encoding is declared in a <meta> tag, hopefully near the
|
||||
beginning of the document.
|
||||
"""
|
||||
if search_entire_document:
|
||||
xml_endpos = html_endpos = len(markup)
|
||||
else:
|
||||
xml_endpos = 1024
|
||||
html_endpos = max(2048, int(len(markup) * 0.05))
|
||||
|
||||
declared_encoding = None
|
||||
declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos)
|
||||
if not declared_encoding_match and is_html:
|
||||
declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos)
|
||||
if declared_encoding_match is not None:
|
||||
declared_encoding = declared_encoding_match.groups()[0].decode(
|
||||
'ascii')
|
||||
if declared_encoding:
|
||||
return declared_encoding.lower()
|
||||
return None
|
||||
|
||||
class UnicodeDammit:
|
||||
"""A class for detecting the encoding of a *ML document and
|
||||
converting it to a Unicode string. If the source encoding is
|
||||
windows-1252, can replace MS smart quotes with their HTML or XML
|
||||
equivalents."""
|
||||
|
||||
# This dictionary maps commonly seen values for "charset" in HTML
|
||||
# meta tags to the corresponding Python codec names. It only covers
|
||||
# values that aren't in Python's aliases and can't be determined
|
||||
# by the heuristics in find_codec.
|
||||
CHARSET_ALIASES = {"macintosh": "mac-roman",
|
||||
"x-sjis": "shift-jis"}
|
||||
|
||||
ENCODINGS_WITH_SMART_QUOTES = [
|
||||
"windows-1252",
|
||||
"iso-8859-1",
|
||||
"iso-8859-2",
|
||||
]
|
||||
|
||||
def __init__(self, markup, override_encodings=[],
|
||||
smart_quotes_to=None, is_html=False):
|
||||
self.smart_quotes_to = smart_quotes_to
|
||||
self.tried_encodings = []
|
||||
self.contains_replacement_characters = False
|
||||
self.is_html = is_html
|
||||
|
||||
self.detector = EncodingDetector(markup, override_encodings, is_html)
|
||||
|
||||
# Short-circuit if the data is in Unicode to begin with.
|
||||
if isinstance(markup, unicode) or markup == '':
|
||||
self.markup = markup
|
||||
self.unicode_markup = unicode(markup)
|
||||
self.original_encoding = None
|
||||
return
|
||||
|
||||
# The encoding detector may have stripped a byte-order mark.
|
||||
# Use the stripped markup from this point on.
|
||||
self.markup = self.detector.markup
|
||||
|
||||
u = None
|
||||
for encoding in self.detector.encodings:
|
||||
markup = self.detector.markup
|
||||
u = self._convert_from(encoding)
|
||||
if u is not None:
|
||||
break
|
||||
|
||||
if not u:
|
||||
# None of the encodings worked. As an absolute last resort,
|
||||
# try them again with character replacement.
|
||||
|
||||
for encoding in self.detector.encodings:
|
||||
if encoding != "ascii":
|
||||
u = self._convert_from(encoding, "replace")
|
||||
if u is not None:
|
||||
logging.warning(
|
||||
"Some characters could not be decoded, and were "
|
||||
"replaced with REPLACEMENT CHARACTER.")
|
||||
self.contains_replacement_characters = True
|
||||
break
|
||||
|
||||
# If none of that worked, we could at this point force it to
|
||||
# ASCII, but that would destroy so much data that I think
|
||||
# giving up is better.
|
||||
self.unicode_markup = u
|
||||
if not u:
|
||||
self.original_encoding = None
|
||||
|
||||
def _sub_ms_char(self, match):
|
||||
"""Changes a MS smart quote character to an XML or HTML
|
||||
entity, or an ASCII character."""
|
||||
orig = match.group(1)
|
||||
if self.smart_quotes_to == 'ascii':
|
||||
sub = self.MS_CHARS_TO_ASCII.get(orig).encode()
|
||||
else:
|
||||
sub = self.MS_CHARS.get(orig)
|
||||
if type(sub) == tuple:
|
||||
if self.smart_quotes_to == 'xml':
|
||||
sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
|
||||
else:
|
||||
sub = '&'.encode() + sub[0].encode() + ';'.encode()
|
||||
else:
|
||||
sub = sub.encode()
|
||||
return sub
|
||||
|
||||
def _convert_from(self, proposed, errors="strict"):
|
||||
proposed = self.find_codec(proposed)
|
||||
if not proposed or (proposed, errors) in self.tried_encodings:
|
||||
return None
|
||||
self.tried_encodings.append((proposed, errors))
|
||||
markup = self.markup
|
||||
# Convert smart quotes to HTML if coming from an encoding
|
||||
# that might have them.
|
||||
if (self.smart_quotes_to is not None
|
||||
and proposed in self.ENCODINGS_WITH_SMART_QUOTES):
|
||||
smart_quotes_re = b"([\x80-\x9f])"
|
||||
smart_quotes_compiled = re.compile(smart_quotes_re)
|
||||
markup = smart_quotes_compiled.sub(self._sub_ms_char, markup)
|
||||
|
||||
try:
|
||||
#print "Trying to convert document to %s (errors=%s)" % (
|
||||
# proposed, errors)
|
||||
u = self._to_unicode(markup, proposed, errors)
|
||||
self.markup = u
|
||||
self.original_encoding = proposed
|
||||
except Exception as e:
|
||||
#print "That didn't work!"
|
||||
#print e
|
||||
return None
|
||||
#print "Correct encoding: %s" % proposed
|
||||
return self.markup
|
||||
|
||||
def _to_unicode(self, data, encoding, errors="strict"):
|
||||
'''Given a string and its encoding, decodes the string into Unicode.
|
||||
%encoding is a string recognized by encodings.aliases'''
|
||||
return unicode(data, encoding, errors)
|
||||
|
||||
@property
|
||||
def declared_html_encoding(self):
|
||||
if not self.is_html:
|
||||
return None
|
||||
return self.detector.declared_encoding
|
||||
|
||||
def find_codec(self, charset):
|
||||
value = (self._codec(self.CHARSET_ALIASES.get(charset, charset))
|
||||
or (charset and self._codec(charset.replace("-", "")))
|
||||
or (charset and self._codec(charset.replace("-", "_")))
|
||||
or (charset and charset.lower())
|
||||
or charset
|
||||
)
|
||||
if value:
|
||||
return value.lower()
|
||||
return None
|
||||
|
||||
def _codec(self, charset):
|
||||
if not charset:
|
||||
return charset
|
||||
codec = None
|
||||
try:
|
||||
codecs.lookup(charset)
|
||||
codec = charset
|
||||
except (LookupError, ValueError):
|
||||
pass
|
||||
return codec
|
||||
|
||||
|
||||
# A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities.
|
||||
MS_CHARS = {b'\x80': ('euro', '20AC'),
|
||||
b'\x81': ' ',
|
||||
b'\x82': ('sbquo', '201A'),
|
||||
b'\x83': ('fnof', '192'),
|
||||
b'\x84': ('bdquo', '201E'),
|
||||
b'\x85': ('hellip', '2026'),
|
||||
b'\x86': ('dagger', '2020'),
|
||||
b'\x87': ('Dagger', '2021'),
|
||||
b'\x88': ('circ', '2C6'),
|
||||
b'\x89': ('permil', '2030'),
|
||||
b'\x8A': ('Scaron', '160'),
|
||||
b'\x8B': ('lsaquo', '2039'),
|
||||
b'\x8C': ('OElig', '152'),
|
||||
b'\x8D': '?',
|
||||
b'\x8E': ('#x17D', '17D'),
|
||||
b'\x8F': '?',
|
||||
b'\x90': '?',
|
||||
b'\x91': ('lsquo', '2018'),
|
||||
b'\x92': ('rsquo', '2019'),
|
||||
b'\x93': ('ldquo', '201C'),
|
||||
b'\x94': ('rdquo', '201D'),
|
||||
b'\x95': ('bull', '2022'),
|
||||
b'\x96': ('ndash', '2013'),
|
||||
b'\x97': ('mdash', '2014'),
|
||||
b'\x98': ('tilde', '2DC'),
|
||||
b'\x99': ('trade', '2122'),
|
||||
b'\x9a': ('scaron', '161'),
|
||||
b'\x9b': ('rsaquo', '203A'),
|
||||
b'\x9c': ('oelig', '153'),
|
||||
b'\x9d': '?',
|
||||
b'\x9e': ('#x17E', '17E'),
|
||||
b'\x9f': ('Yuml', ''),}
|
||||
|
||||
# A parochial partial mapping of ISO-Latin-1 to ASCII. Contains
|
||||
# horrors like stripping diacritical marks to turn á into a, but also
|
||||
# contains non-horrors like turning “ into ".
|
||||
MS_CHARS_TO_ASCII = {
|
||||
b'\x80' : 'EUR',
|
||||
b'\x81' : ' ',
|
||||
b'\x82' : ',',
|
||||
b'\x83' : 'f',
|
||||
b'\x84' : ',,',
|
||||
b'\x85' : '...',
|
||||
b'\x86' : '+',
|
||||
b'\x87' : '++',
|
||||
b'\x88' : '^',
|
||||
b'\x89' : '%',
|
||||
b'\x8a' : 'S',
|
||||
b'\x8b' : '<',
|
||||
b'\x8c' : 'OE',
|
||||
b'\x8d' : '?',
|
||||
b'\x8e' : 'Z',
|
||||
b'\x8f' : '?',
|
||||
b'\x90' : '?',
|
||||
b'\x91' : "'",
|
||||
b'\x92' : "'",
|
||||
b'\x93' : '"',
|
||||
b'\x94' : '"',
|
||||
b'\x95' : '*',
|
||||
b'\x96' : '-',
|
||||
b'\x97' : '--',
|
||||
b'\x98' : '~',
|
||||
b'\x99' : '(TM)',
|
||||
b'\x9a' : 's',
|
||||
b'\x9b' : '>',
|
||||
b'\x9c' : 'oe',
|
||||
b'\x9d' : '?',
|
||||
b'\x9e' : 'z',
|
||||
b'\x9f' : 'Y',
|
||||
b'\xa0' : ' ',
|
||||
b'\xa1' : '!',
|
||||
b'\xa2' : 'c',
|
||||
b'\xa3' : 'GBP',
|
||||
b'\xa4' : '$', #This approximation is especially parochial--this is the
|
||||
#generic currency symbol.
|
||||
b'\xa5' : 'YEN',
|
||||
b'\xa6' : '|',
|
||||
b'\xa7' : 'S',
|
||||
b'\xa8' : '..',
|
||||
b'\xa9' : '',
|
||||
b'\xaa' : '(th)',
|
||||
b'\xab' : '<<',
|
||||
b'\xac' : '!',
|
||||
b'\xad' : ' ',
|
||||
b'\xae' : '(R)',
|
||||
b'\xaf' : '-',
|
||||
b'\xb0' : 'o',
|
||||
b'\xb1' : '+-',
|
||||
b'\xb2' : '2',
|
||||
b'\xb3' : '3',
|
||||
b'\xb4' : ("'", 'acute'),
|
||||
b'\xb5' : 'u',
|
||||
b'\xb6' : 'P',
|
||||
b'\xb7' : '*',
|
||||
b'\xb8' : ',',
|
||||
b'\xb9' : '1',
|
||||
b'\xba' : '(th)',
|
||||
b'\xbb' : '>>',
|
||||
b'\xbc' : '1/4',
|
||||
b'\xbd' : '1/2',
|
||||
b'\xbe' : '3/4',
|
||||
b'\xbf' : '?',
|
||||
b'\xc0' : 'A',
|
||||
b'\xc1' : 'A',
|
||||
b'\xc2' : 'A',
|
||||
b'\xc3' : 'A',
|
||||
b'\xc4' : 'A',
|
||||
b'\xc5' : 'A',
|
||||
b'\xc6' : 'AE',
|
||||
b'\xc7' : 'C',
|
||||
b'\xc8' : 'E',
|
||||
b'\xc9' : 'E',
|
||||
b'\xca' : 'E',
|
||||
b'\xcb' : 'E',
|
||||
b'\xcc' : 'I',
|
||||
b'\xcd' : 'I',
|
||||
b'\xce' : 'I',
|
||||
b'\xcf' : 'I',
|
||||
b'\xd0' : 'D',
|
||||
b'\xd1' : 'N',
|
||||
b'\xd2' : 'O',
|
||||
b'\xd3' : 'O',
|
||||
b'\xd4' : 'O',
|
||||
b'\xd5' : 'O',
|
||||
b'\xd6' : 'O',
|
||||
b'\xd7' : '*',
|
||||
b'\xd8' : 'O',
|
||||
b'\xd9' : 'U',
|
||||
b'\xda' : 'U',
|
||||
b'\xdb' : 'U',
|
||||
b'\xdc' : 'U',
|
||||
b'\xdd' : 'Y',
|
||||
b'\xde' : 'b',
|
||||
b'\xdf' : 'B',
|
||||
b'\xe0' : 'a',
|
||||
b'\xe1' : 'a',
|
||||
b'\xe2' : 'a',
|
||||
b'\xe3' : 'a',
|
||||
b'\xe4' : 'a',
|
||||
b'\xe5' : 'a',
|
||||
b'\xe6' : 'ae',
|
||||
b'\xe7' : 'c',
|
||||
b'\xe8' : 'e',
|
||||
b'\xe9' : 'e',
|
||||
b'\xea' : 'e',
|
||||
b'\xeb' : 'e',
|
||||
b'\xec' : 'i',
|
||||
b'\xed' : 'i',
|
||||
b'\xee' : 'i',
|
||||
b'\xef' : 'i',
|
||||
b'\xf0' : 'o',
|
||||
b'\xf1' : 'n',
|
||||
b'\xf2' : 'o',
|
||||
b'\xf3' : 'o',
|
||||
b'\xf4' : 'o',
|
||||
b'\xf5' : 'o',
|
||||
b'\xf6' : 'o',
|
||||
b'\xf7' : '/',
|
||||
b'\xf8' : 'o',
|
||||
b'\xf9' : 'u',
|
||||
b'\xfa' : 'u',
|
||||
b'\xfb' : 'u',
|
||||
b'\xfc' : 'u',
|
||||
b'\xfd' : 'y',
|
||||
b'\xfe' : 'b',
|
||||
b'\xff' : 'y',
|
||||
}
|
||||
|
||||
# A map used when removing rogue Windows-1252/ISO-8859-1
|
||||
# characters in otherwise UTF-8 documents.
|
||||
#
|
||||
# Note that \x81, \x8d, \x8f, \x90, and \x9d are undefined in
|
||||
# Windows-1252.
|
||||
WINDOWS_1252_TO_UTF8 = {
|
||||
0x80 : b'\xe2\x82\xac', # €
|
||||
0x82 : b'\xe2\x80\x9a', # ‚
|
||||
0x83 : b'\xc6\x92', # ƒ
|
||||
0x84 : b'\xe2\x80\x9e', # „
|
||||
0x85 : b'\xe2\x80\xa6', # …
|
||||
0x86 : b'\xe2\x80\xa0', # †
|
||||
0x87 : b'\xe2\x80\xa1', # ‡
|
||||
0x88 : b'\xcb\x86', # ˆ
|
||||
0x89 : b'\xe2\x80\xb0', # ‰
|
||||
0x8a : b'\xc5\xa0', # Š
|
||||
0x8b : b'\xe2\x80\xb9', # ‹
|
||||
0x8c : b'\xc5\x92', # Œ
|
||||
0x8e : b'\xc5\xbd', # Ž
|
||||
0x91 : b'\xe2\x80\x98', # ‘
|
||||
0x92 : b'\xe2\x80\x99', # ’
|
||||
0x93 : b'\xe2\x80\x9c', # “
|
||||
0x94 : b'\xe2\x80\x9d', # ”
|
||||
0x95 : b'\xe2\x80\xa2', # •
|
||||
0x96 : b'\xe2\x80\x93', # –
|
||||
0x97 : b'\xe2\x80\x94', # —
|
||||
0x98 : b'\xcb\x9c', # ˜
|
||||
0x99 : b'\xe2\x84\xa2', # ™
|
||||
0x9a : b'\xc5\xa1', # š
|
||||
0x9b : b'\xe2\x80\xba', # ›
|
||||
0x9c : b'\xc5\x93', # œ
|
||||
0x9e : b'\xc5\xbe', # ž
|
||||
0x9f : b'\xc5\xb8', # Ÿ
|
||||
0xa0 : b'\xc2\xa0', #
|
||||
0xa1 : b'\xc2\xa1', # ¡
|
||||
0xa2 : b'\xc2\xa2', # ¢
|
||||
0xa3 : b'\xc2\xa3', # £
|
||||
0xa4 : b'\xc2\xa4', # ¤
|
||||
0xa5 : b'\xc2\xa5', # ¥
|
||||
0xa6 : b'\xc2\xa6', # ¦
|
||||
0xa7 : b'\xc2\xa7', # §
|
||||
0xa8 : b'\xc2\xa8', # ¨
|
||||
0xa9 : b'\xc2\xa9', # ©
|
||||
0xaa : b'\xc2\xaa', # ª
|
||||
0xab : b'\xc2\xab', # «
|
||||
0xac : b'\xc2\xac', # ¬
|
||||
0xad : b'\xc2\xad', #
|
||||
0xae : b'\xc2\xae', # ®
|
||||
0xaf : b'\xc2\xaf', # ¯
|
||||
0xb0 : b'\xc2\xb0', # °
|
||||
0xb1 : b'\xc2\xb1', # ±
|
||||
0xb2 : b'\xc2\xb2', # ²
|
||||
0xb3 : b'\xc2\xb3', # ³
|
||||
0xb4 : b'\xc2\xb4', # ´
|
||||
0xb5 : b'\xc2\xb5', # µ
|
||||
0xb6 : b'\xc2\xb6', # ¶
|
||||
0xb7 : b'\xc2\xb7', # ·
|
||||
0xb8 : b'\xc2\xb8', # ¸
|
||||
0xb9 : b'\xc2\xb9', # ¹
|
||||
0xba : b'\xc2\xba', # º
|
||||
0xbb : b'\xc2\xbb', # »
|
||||
0xbc : b'\xc2\xbc', # ¼
|
||||
0xbd : b'\xc2\xbd', # ½
|
||||
0xbe : b'\xc2\xbe', # ¾
|
||||
0xbf : b'\xc2\xbf', # ¿
|
||||
0xc0 : b'\xc3\x80', # À
|
||||
0xc1 : b'\xc3\x81', # Á
|
||||
0xc2 : b'\xc3\x82', # Â
|
||||
0xc3 : b'\xc3\x83', # Ã
|
||||
0xc4 : b'\xc3\x84', # Ä
|
||||
0xc5 : b'\xc3\x85', # Å
|
||||
0xc6 : b'\xc3\x86', # Æ
|
||||
0xc7 : b'\xc3\x87', # Ç
|
||||
0xc8 : b'\xc3\x88', # È
|
||||
0xc9 : b'\xc3\x89', # É
|
||||
0xca : b'\xc3\x8a', # Ê
|
||||
0xcb : b'\xc3\x8b', # Ë
|
||||
0xcc : b'\xc3\x8c', # Ì
|
||||
0xcd : b'\xc3\x8d', # Í
|
||||
0xce : b'\xc3\x8e', # Î
|
||||
0xcf : b'\xc3\x8f', # Ï
|
||||
0xd0 : b'\xc3\x90', # Ð
|
||||
0xd1 : b'\xc3\x91', # Ñ
|
||||
0xd2 : b'\xc3\x92', # Ò
|
||||
0xd3 : b'\xc3\x93', # Ó
|
||||
0xd4 : b'\xc3\x94', # Ô
|
||||
0xd5 : b'\xc3\x95', # Õ
|
||||
0xd6 : b'\xc3\x96', # Ö
|
||||
0xd7 : b'\xc3\x97', # ×
|
||||
0xd8 : b'\xc3\x98', # Ø
|
||||
0xd9 : b'\xc3\x99', # Ù
|
||||
0xda : b'\xc3\x9a', # Ú
|
||||
0xdb : b'\xc3\x9b', # Û
|
||||
0xdc : b'\xc3\x9c', # Ü
|
||||
0xdd : b'\xc3\x9d', # Ý
|
||||
0xde : b'\xc3\x9e', # Þ
|
||||
0xdf : b'\xc3\x9f', # ß
|
||||
0xe0 : b'\xc3\xa0', # à
|
||||
0xe1 : b'\xa1', # á
|
||||
0xe2 : b'\xc3\xa2', # â
|
||||
0xe3 : b'\xc3\xa3', # ã
|
||||
0xe4 : b'\xc3\xa4', # ä
|
||||
0xe5 : b'\xc3\xa5', # å
|
||||
0xe6 : b'\xc3\xa6', # æ
|
||||
0xe7 : b'\xc3\xa7', # ç
|
||||
0xe8 : b'\xc3\xa8', # è
|
||||
0xe9 : b'\xc3\xa9', # é
|
||||
0xea : b'\xc3\xaa', # ê
|
||||
0xeb : b'\xc3\xab', # ë
|
||||
0xec : b'\xc3\xac', # ì
|
||||
0xed : b'\xc3\xad', # í
|
||||
0xee : b'\xc3\xae', # î
|
||||
0xef : b'\xc3\xaf', # ï
|
||||
0xf0 : b'\xc3\xb0', # ð
|
||||
0xf1 : b'\xc3\xb1', # ñ
|
||||
0xf2 : b'\xc3\xb2', # ò
|
||||
0xf3 : b'\xc3\xb3', # ó
|
||||
0xf4 : b'\xc3\xb4', # ô
|
||||
0xf5 : b'\xc3\xb5', # õ
|
||||
0xf6 : b'\xc3\xb6', # ö
|
||||
0xf7 : b'\xc3\xb7', # ÷
|
||||
0xf8 : b'\xc3\xb8', # ø
|
||||
0xf9 : b'\xc3\xb9', # ù
|
||||
0xfa : b'\xc3\xba', # ú
|
||||
0xfb : b'\xc3\xbb', # û
|
||||
0xfc : b'\xc3\xbc', # ü
|
||||
0xfd : b'\xc3\xbd', # ý
|
||||
0xfe : b'\xc3\xbe', # þ
|
||||
}
|
||||
|
||||
MULTIBYTE_MARKERS_AND_SIZES = [
|
||||
(0xc2, 0xdf, 2), # 2-byte characters start with a byte C2-DF
|
||||
(0xe0, 0xef, 3), # 3-byte characters start with E0-EF
|
||||
(0xf0, 0xf4, 4), # 4-byte characters start with F0-F4
|
||||
]
|
||||
|
||||
FIRST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[0][0]
|
||||
LAST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[-1][1]
|
||||
|
||||
@classmethod
|
||||
def detwingle(cls, in_bytes, main_encoding="utf8",
|
||||
embedded_encoding="windows-1252"):
|
||||
"""Fix characters from one encoding embedded in some other encoding.
|
||||
|
||||
Currently the only situation supported is Windows-1252 (or its
|
||||
subset ISO-8859-1), embedded in UTF-8.
|
||||
|
||||
The input must be a bytestring. If you've already converted
|
||||
the document to Unicode, you're too late.
|
||||
|
||||
The output is a bytestring in which `embedded_encoding`
|
||||
characters have been converted to their `main_encoding`
|
||||
equivalents.
|
||||
"""
|
||||
if embedded_encoding.replace('_', '-').lower() not in (
|
||||
'windows-1252', 'windows_1252'):
|
||||
raise NotImplementedError(
|
||||
"Windows-1252 and ISO-8859-1 are the only currently supported "
|
||||
"embedded encodings.")
|
||||
|
||||
if main_encoding.lower() not in ('utf8', 'utf-8'):
|
||||
raise NotImplementedError(
|
||||
"UTF-8 is the only currently supported main encoding.")
|
||||
|
||||
byte_chunks = []
|
||||
|
||||
chunk_start = 0
|
||||
pos = 0
|
||||
while pos < len(in_bytes):
|
||||
byte = in_bytes[pos]
|
||||
if not isinstance(byte, int):
|
||||
# Python 2.x
|
||||
byte = ord(byte)
|
||||
if (byte >= cls.FIRST_MULTIBYTE_MARKER
|
||||
and byte <= cls.LAST_MULTIBYTE_MARKER):
|
||||
# This is the start of a UTF-8 multibyte character. Skip
|
||||
# to the end.
|
||||
for start, end, size in cls.MULTIBYTE_MARKERS_AND_SIZES:
|
||||
if byte >= start and byte <= end:
|
||||
pos += size
|
||||
break
|
||||
elif byte >= 0x80 and byte in cls.WINDOWS_1252_TO_UTF8:
|
||||
# We found a Windows-1252 character!
|
||||
# Save the string up to this point as a chunk.
|
||||
byte_chunks.append(in_bytes[chunk_start:pos])
|
||||
|
||||
# Now translate the Windows-1252 character into UTF-8
|
||||
# and add it as another, one-byte chunk.
|
||||
byte_chunks.append(cls.WINDOWS_1252_TO_UTF8[byte])
|
||||
pos += 1
|
||||
chunk_start = pos
|
||||
else:
|
||||
# Go on to the next character.
|
||||
pos += 1
|
||||
if chunk_start == 0:
|
||||
# The string is unchanged.
|
||||
return in_bytes
|
||||
else:
|
||||
# Store the final chunk.
|
||||
byte_chunks.append(in_bytes[chunk_start:])
|
||||
return b''.join(byte_chunks)
|
||||
|
||||
204
lib/bs4/diagnose.py
Normal file
204
lib/bs4/diagnose.py
Normal file
@@ -0,0 +1,204 @@
|
||||
"""Diagnostic functions, mainly for use when doing tech support."""
|
||||
import cProfile
|
||||
from StringIO import StringIO
|
||||
from HTMLParser import HTMLParser
|
||||
import bs4
|
||||
from bs4 import BeautifulSoup, __version__
|
||||
from bs4.builder import builder_registry
|
||||
|
||||
import os
|
||||
import pstats
|
||||
import random
|
||||
import tempfile
|
||||
import time
|
||||
import traceback
|
||||
import sys
|
||||
import cProfile
|
||||
|
||||
def diagnose(data):
|
||||
"""Diagnostic suite for isolating common problems."""
|
||||
print "Diagnostic running on Beautiful Soup %s" % __version__
|
||||
print "Python version %s" % sys.version
|
||||
|
||||
basic_parsers = ["html.parser", "html5lib", "lxml"]
|
||||
for name in basic_parsers:
|
||||
for builder in builder_registry.builders:
|
||||
if name in builder.features:
|
||||
break
|
||||
else:
|
||||
basic_parsers.remove(name)
|
||||
print (
|
||||
"I noticed that %s is not installed. Installing it may help." %
|
||||
name)
|
||||
|
||||
if 'lxml' in basic_parsers:
|
||||
basic_parsers.append(["lxml", "xml"])
|
||||
from lxml import etree
|
||||
print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
|
||||
|
||||
if 'html5lib' in basic_parsers:
|
||||
import html5lib
|
||||
print "Found html5lib version %s" % html5lib.__version__
|
||||
|
||||
if hasattr(data, 'read'):
|
||||
data = data.read()
|
||||
elif os.path.exists(data):
|
||||
print '"%s" looks like a filename. Reading data from the file.' % data
|
||||
data = open(data).read()
|
||||
elif data.startswith("http:") or data.startswith("https:"):
|
||||
print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
|
||||
print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
|
||||
return
|
||||
print
|
||||
|
||||
for parser in basic_parsers:
|
||||
print "Trying to parse your markup with %s" % parser
|
||||
success = False
|
||||
try:
|
||||
soup = BeautifulSoup(data, parser)
|
||||
success = True
|
||||
except Exception, e:
|
||||
print "%s could not parse the markup." % parser
|
||||
traceback.print_exc()
|
||||
if success:
|
||||
print "Here's what %s did with the markup:" % parser
|
||||
print soup.prettify()
|
||||
|
||||
print "-" * 80
|
||||
|
||||
def lxml_trace(data, html=True, **kwargs):
|
||||
"""Print out the lxml events that occur during parsing.
|
||||
|
||||
This lets you see how lxml parses a document when no Beautiful
|
||||
Soup code is running.
|
||||
"""
|
||||
from lxml import etree
|
||||
for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
|
||||
print("%s, %4s, %s" % (event, element.tag, element.text))
|
||||
|
||||
class AnnouncingParser(HTMLParser):
|
||||
"""Announces HTMLParser parse events, without doing anything else."""
|
||||
|
||||
def _p(self, s):
|
||||
print(s)
|
||||
|
||||
def handle_starttag(self, name, attrs):
|
||||
self._p("%s START" % name)
|
||||
|
||||
def handle_endtag(self, name):
|
||||
self._p("%s END" % name)
|
||||
|
||||
def handle_data(self, data):
|
||||
self._p("%s DATA" % data)
|
||||
|
||||
def handle_charref(self, name):
|
||||
self._p("%s CHARREF" % name)
|
||||
|
||||
def handle_entityref(self, name):
|
||||
self._p("%s ENTITYREF" % name)
|
||||
|
||||
def handle_comment(self, data):
|
||||
self._p("%s COMMENT" % data)
|
||||
|
||||
def handle_decl(self, data):
|
||||
self._p("%s DECL" % data)
|
||||
|
||||
def unknown_decl(self, data):
|
||||
self._p("%s UNKNOWN-DECL" % data)
|
||||
|
||||
def handle_pi(self, data):
|
||||
self._p("%s PI" % data)
|
||||
|
||||
def htmlparser_trace(data):
|
||||
"""Print out the HTMLParser events that occur during parsing.
|
||||
|
||||
This lets you see how HTMLParser parses a document when no
|
||||
Beautiful Soup code is running.
|
||||
"""
|
||||
parser = AnnouncingParser()
|
||||
parser.feed(data)
|
||||
|
||||
_vowels = "aeiou"
|
||||
_consonants = "bcdfghjklmnpqrstvwxyz"
|
||||
|
||||
def rword(length=5):
|
||||
"Generate a random word-like string."
|
||||
s = ''
|
||||
for i in range(length):
|
||||
if i % 2 == 0:
|
||||
t = _consonants
|
||||
else:
|
||||
t = _vowels
|
||||
s += random.choice(t)
|
||||
return s
|
||||
|
||||
def rsentence(length=4):
|
||||
"Generate a random sentence-like string."
|
||||
return " ".join(rword(random.randint(4,9)) for i in range(length))
|
||||
|
||||
def rdoc(num_elements=1000):
|
||||
"""Randomly generate an invalid HTML document."""
|
||||
tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table']
|
||||
elements = []
|
||||
for i in range(num_elements):
|
||||
choice = random.randint(0,3)
|
||||
if choice == 0:
|
||||
# New tag.
|
||||
tag_name = random.choice(tag_names)
|
||||
elements.append("<%s>" % tag_name)
|
||||
elif choice == 1:
|
||||
elements.append(rsentence(random.randint(1,4)))
|
||||
elif choice == 2:
|
||||
# Close a tag.
|
||||
tag_name = random.choice(tag_names)
|
||||
elements.append("</%s>" % tag_name)
|
||||
return "<html>" + "\n".join(elements) + "</html>"
|
||||
|
||||
def benchmark_parsers(num_elements=100000):
|
||||
"""Very basic head-to-head performance benchmark."""
|
||||
print "Comparative parser benchmark on Beautiful Soup %s" % __version__
|
||||
data = rdoc(num_elements)
|
||||
print "Generated a large invalid HTML document (%d bytes)." % len(data)
|
||||
|
||||
for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
|
||||
success = False
|
||||
try:
|
||||
a = time.time()
|
||||
soup = BeautifulSoup(data, parser)
|
||||
b = time.time()
|
||||
success = True
|
||||
except Exception, e:
|
||||
print "%s could not parse the markup." % parser
|
||||
traceback.print_exc()
|
||||
if success:
|
||||
print "BS4+%s parsed the markup in %.2fs." % (parser, b-a)
|
||||
|
||||
from lxml import etree
|
||||
a = time.time()
|
||||
etree.HTML(data)
|
||||
b = time.time()
|
||||
print "Raw lxml parsed the markup in %.2fs." % (b-a)
|
||||
|
||||
import html5lib
|
||||
parser = html5lib.HTMLParser()
|
||||
a = time.time()
|
||||
parser.parse(data)
|
||||
b = time.time()
|
||||
print "Raw html5lib parsed the markup in %.2fs." % (b-a)
|
||||
|
||||
def profile(num_elements=100000, parser="lxml"):
|
||||
|
||||
filehandle = tempfile.NamedTemporaryFile()
|
||||
filename = filehandle.name
|
||||
|
||||
data = rdoc(num_elements)
|
||||
vars = dict(bs4=bs4, data=data, parser=parser)
|
||||
cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename)
|
||||
|
||||
stats = pstats.Stats(filename)
|
||||
# stats.strip_dirs()
|
||||
stats.sort_stats("cumulative")
|
||||
stats.print_stats('_html5lib|bs4', 50)
|
||||
|
||||
if __name__ == '__main__':
|
||||
diagnose(sys.stdin.read())
|
||||
1611
lib/bs4/element.py
Normal file
1611
lib/bs4/element.py
Normal file
File diff suppressed because it is too large
Load Diff
592
lib/bs4/testing.py
Normal file
592
lib/bs4/testing.py
Normal file
@@ -0,0 +1,592 @@
|
||||
"""Helper classes for tests."""
|
||||
|
||||
import copy
|
||||
import functools
|
||||
import unittest
|
||||
from unittest import TestCase
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import (
|
||||
CharsetMetaAttributeValue,
|
||||
Comment,
|
||||
ContentMetaAttributeValue,
|
||||
Doctype,
|
||||
SoupStrainer,
|
||||
)
|
||||
|
||||
from bs4.builder import HTMLParserTreeBuilder
|
||||
default_builder = HTMLParserTreeBuilder
|
||||
|
||||
|
||||
class SoupTest(unittest.TestCase):
|
||||
|
||||
@property
|
||||
def default_builder(self):
|
||||
return default_builder()
|
||||
|
||||
def soup(self, markup, **kwargs):
|
||||
"""Build a Beautiful Soup object from markup."""
|
||||
builder = kwargs.pop('builder', self.default_builder)
|
||||
return BeautifulSoup(markup, builder=builder, **kwargs)
|
||||
|
||||
def document_for(self, markup):
|
||||
"""Turn an HTML fragment into a document.
|
||||
|
||||
The details depend on the builder.
|
||||
"""
|
||||
return self.default_builder.test_fragment_to_document(markup)
|
||||
|
||||
def assertSoupEquals(self, to_parse, compare_parsed_to=None):
|
||||
builder = self.default_builder
|
||||
obj = BeautifulSoup(to_parse, builder=builder)
|
||||
if compare_parsed_to is None:
|
||||
compare_parsed_to = to_parse
|
||||
|
||||
self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))
|
||||
|
||||
|
||||
class HTMLTreeBuilderSmokeTest(object):
|
||||
|
||||
"""A basic test of a treebuilder's competence.
|
||||
|
||||
Any HTML treebuilder, present or future, should be able to pass
|
||||
these tests. With invalid markup, there's room for interpretation,
|
||||
and different parsers can handle it differently. But with the
|
||||
markup in these tests, there's not much room for interpretation.
|
||||
"""
|
||||
|
||||
def assertDoctypeHandled(self, doctype_fragment):
|
||||
"""Assert that a given doctype string is handled correctly."""
|
||||
doctype_str, soup = self._document_with_doctype(doctype_fragment)
|
||||
|
||||
# Make sure a Doctype object was created.
|
||||
doctype = soup.contents[0]
|
||||
self.assertEqual(doctype.__class__, Doctype)
|
||||
self.assertEqual(doctype, doctype_fragment)
|
||||
self.assertEqual(str(soup)[:len(doctype_str)], doctype_str)
|
||||
|
||||
# Make sure that the doctype was correctly associated with the
|
||||
# parse tree and that the rest of the document parsed.
|
||||
self.assertEqual(soup.p.contents[0], 'foo')
|
||||
|
||||
def _document_with_doctype(self, doctype_fragment):
|
||||
"""Generate and parse a document with the given doctype."""
|
||||
doctype = '<!DOCTYPE %s>' % doctype_fragment
|
||||
markup = doctype + '\n<p>foo</p>'
|
||||
soup = self.soup(markup)
|
||||
return doctype, soup
|
||||
|
||||
def test_normal_doctypes(self):
|
||||
"""Make sure normal, everyday HTML doctypes are handled correctly."""
|
||||
self.assertDoctypeHandled("html")
|
||||
self.assertDoctypeHandled(
|
||||
'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"')
|
||||
|
||||
def test_empty_doctype(self):
|
||||
soup = self.soup("<!DOCTYPE>")
|
||||
doctype = soup.contents[0]
|
||||
self.assertEqual("", doctype.strip())
|
||||
|
||||
def test_public_doctype_with_url(self):
|
||||
doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'
|
||||
self.assertDoctypeHandled(doctype)
|
||||
|
||||
def test_system_doctype(self):
|
||||
self.assertDoctypeHandled('foo SYSTEM "http://www.example.com/"')
|
||||
|
||||
def test_namespaced_system_doctype(self):
|
||||
# We can handle a namespaced doctype with a system ID.
|
||||
self.assertDoctypeHandled('xsl:stylesheet SYSTEM "htmlent.dtd"')
|
||||
|
||||
def test_namespaced_public_doctype(self):
|
||||
# Test a namespaced doctype with a public id.
|
||||
self.assertDoctypeHandled('xsl:stylesheet PUBLIC "htmlent.dtd"')
|
||||
|
||||
def test_real_xhtml_document(self):
|
||||
"""A real XHTML document should come out more or less the same as it went in."""
|
||||
markup = b"""<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>Hello.</title></head>
|
||||
<body>Goodbye.</body>
|
||||
</html>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(
|
||||
soup.encode("utf-8").replace(b"\n", b""),
|
||||
markup.replace(b"\n", b""))
|
||||
|
||||
def test_deepcopy(self):
|
||||
"""Make sure you can copy the tree builder.
|
||||
|
||||
This is important because the builder is part of a
|
||||
BeautifulSoup object, and we want to be able to copy that.
|
||||
"""
|
||||
copy.deepcopy(self.default_builder)
|
||||
|
||||
def test_p_tag_is_never_empty_element(self):
|
||||
"""A <p> tag is never designated as an empty-element tag.
|
||||
|
||||
Even if the markup shows it as an empty-element tag, it
|
||||
shouldn't be presented that way.
|
||||
"""
|
||||
soup = self.soup("<p/>")
|
||||
self.assertFalse(soup.p.is_empty_element)
|
||||
self.assertEqual(str(soup.p), "<p></p>")
|
||||
|
||||
def test_unclosed_tags_get_closed(self):
|
||||
"""A tag that's not closed by the end of the document should be closed.
|
||||
|
||||
This applies to all tags except empty-element tags.
|
||||
"""
|
||||
self.assertSoupEquals("<p>", "<p></p>")
|
||||
self.assertSoupEquals("<b>", "<b></b>")
|
||||
|
||||
self.assertSoupEquals("<br>", "<br/>")
|
||||
|
||||
def test_br_is_always_empty_element_tag(self):
|
||||
"""A <br> tag is designated as an empty-element tag.
|
||||
|
||||
Some parsers treat <br></br> as one <br/> tag, some parsers as
|
||||
two tags, but it should always be an empty-element tag.
|
||||
"""
|
||||
soup = self.soup("<br></br>")
|
||||
self.assertTrue(soup.br.is_empty_element)
|
||||
self.assertEqual(str(soup.br), "<br/>")
|
||||
|
||||
def test_nested_formatting_elements(self):
|
||||
self.assertSoupEquals("<em><em></em></em>")
|
||||
|
||||
def test_comment(self):
|
||||
# Comments are represented as Comment objects.
|
||||
markup = "<p>foo<!--foobar-->baz</p>"
|
||||
self.assertSoupEquals(markup)
|
||||
|
||||
soup = self.soup(markup)
|
||||
comment = soup.find(text="foobar")
|
||||
self.assertEqual(comment.__class__, Comment)
|
||||
|
||||
# The comment is properly integrated into the tree.
|
||||
foo = soup.find(text="foo")
|
||||
self.assertEqual(comment, foo.next_element)
|
||||
baz = soup.find(text="baz")
|
||||
self.assertEqual(comment, baz.previous_element)
|
||||
|
||||
def test_preserved_whitespace_in_pre_and_textarea(self):
|
||||
"""Whitespace must be preserved in <pre> and <textarea> tags."""
|
||||
self.assertSoupEquals("<pre> </pre>")
|
||||
self.assertSoupEquals("<textarea> woo </textarea>")
|
||||
|
||||
def test_nested_inline_elements(self):
|
||||
"""Inline elements can be nested indefinitely."""
|
||||
b_tag = "<b>Inside a B tag</b>"
|
||||
self.assertSoupEquals(b_tag)
|
||||
|
||||
nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
|
||||
self.assertSoupEquals(nested_b_tag)
|
||||
|
||||
double_nested_b_tag = "<p>A <a>doubly <i>nested <b>tag</b></i></a></p>"
|
||||
self.assertSoupEquals(nested_b_tag)
|
||||
|
||||
def test_nested_block_level_elements(self):
|
||||
"""Block elements can be nested."""
|
||||
soup = self.soup('<blockquote><p><b>Foo</b></p></blockquote>')
|
||||
blockquote = soup.blockquote
|
||||
self.assertEqual(blockquote.p.b.string, 'Foo')
|
||||
self.assertEqual(blockquote.b.string, 'Foo')
|
||||
|
||||
def test_correctly_nested_tables(self):
|
||||
"""One table can go inside another one."""
|
||||
markup = ('<table id="1">'
|
||||
'<tr>'
|
||||
"<td>Here's another table:"
|
||||
'<table id="2">'
|
||||
'<tr><td>foo</td></tr>'
|
||||
'</table></td>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
markup,
|
||||
'<table id="1"><tr><td>Here\'s another table:'
|
||||
'<table id="2"><tr><td>foo</td></tr></table>'
|
||||
'</td></tr></table>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
"<table><thead><tr><td>Foo</td></tr></thead>"
|
||||
"<tbody><tr><td>Bar</td></tr></tbody>"
|
||||
"<tfoot><tr><td>Baz</td></tr></tfoot></table>")
|
||||
|
||||
def test_deeply_nested_multivalued_attribute(self):
|
||||
# html5lib can set the attributes of the same tag many times
|
||||
# as it rearranges the tree. This has caused problems with
|
||||
# multivalued attributes.
|
||||
markup = '<table><div><div class="css"></div></div></table>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(["css"], soup.div.div['class'])
|
||||
|
||||
def test_angle_brackets_in_attribute_values_are_escaped(self):
|
||||
self.assertSoupEquals('<a b="<a>"></a>', '<a b="<a>"></a>')
|
||||
|
||||
def test_entities_in_attributes_converted_to_unicode(self):
|
||||
expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
|
||||
def test_entities_in_text_converted_to_unicode(self):
|
||||
expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
|
||||
def test_quot_entity_converted_to_quotation_mark(self):
|
||||
self.assertSoupEquals("<p>I said "good day!"</p>",
|
||||
'<p>I said "good day!"</p>')
|
||||
|
||||
def test_out_of_range_entity(self):
|
||||
expect = u"\N{REPLACEMENT CHARACTER}"
|
||||
self.assertSoupEquals("�", expect)
|
||||
self.assertSoupEquals("�", expect)
|
||||
self.assertSoupEquals("�", expect)
|
||||
|
||||
def test_multipart_strings(self):
|
||||
"Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
|
||||
soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
|
||||
self.assertEqual("p", soup.h2.string.next_element.name)
|
||||
self.assertEqual("p", soup.p.name)
|
||||
|
||||
def test_basic_namespaces(self):
|
||||
"""Parsers don't need to *understand* namespaces, but at the
|
||||
very least they should not choke on namespaces or lose
|
||||
data."""
|
||||
|
||||
markup = b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:mathml="http://www.w3.org/1998/Math/MathML" xmlns:svg="http://www.w3.org/2000/svg"><head></head><body><mathml:msqrt>4</mathml:msqrt><b svg:fill="red"></b></body></html>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode())
|
||||
html = soup.html
|
||||
self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns'])
|
||||
self.assertEqual(
|
||||
'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml'])
|
||||
self.assertEqual(
|
||||
'http://www.w3.org/2000/svg', soup.html['xmlns:svg'])
|
||||
|
||||
def test_multivalued_attribute_value_becomes_list(self):
|
||||
markup = b'<a class="foo bar">'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(['foo', 'bar'], soup.a['class'])
|
||||
|
||||
#
|
||||
# Generally speaking, tests below this point are more tests of
|
||||
# Beautiful Soup than tests of the tree builders. But parsers are
|
||||
# weird, so we run these tests separately for every tree builder
|
||||
# to detect any differences between them.
|
||||
#
|
||||
|
||||
def test_can_parse_unicode_document(self):
|
||||
# A seemingly innocuous document... but it's in Unicode! And
|
||||
# it contains characters that can't be represented in the
|
||||
# encoding found in the declaration! The horror!
|
||||
markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string)
|
||||
|
||||
def test_soupstrainer(self):
|
||||
"""Parsers should be able to work with SoupStrainers."""
|
||||
strainer = SoupStrainer("b")
|
||||
soup = self.soup("A <b>bold</b> <meta/> <i>statement</i>",
|
||||
parse_only=strainer)
|
||||
self.assertEqual(soup.decode(), "<b>bold</b>")
|
||||
|
||||
def test_single_quote_attribute_values_become_double_quotes(self):
|
||||
self.assertSoupEquals("<foo attr='bar'></foo>",
|
||||
'<foo attr="bar"></foo>')
|
||||
|
||||
def test_attribute_values_with_nested_quotes_are_left_alone(self):
|
||||
text = """<foo attr='bar "brawls" happen'>a</foo>"""
|
||||
self.assertSoupEquals(text)
|
||||
|
||||
def test_attribute_values_with_double_nested_quotes_get_quoted(self):
|
||||
text = """<foo attr='bar "brawls" happen'>a</foo>"""
|
||||
soup = self.soup(text)
|
||||
soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
|
||||
self.assertSoupEquals(
|
||||
soup.foo.decode(),
|
||||
"""<foo attr="Brawls happen at "Bob\'s Bar"">a</foo>""")
|
||||
|
||||
def test_ampersand_in_attribute_value_gets_escaped(self):
|
||||
self.assertSoupEquals('<this is="really messed up & stuff"></this>',
|
||||
'<this is="really messed up & stuff"></this>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
'<a href="http://example.org?a=1&b=2;3">foo</a>',
|
||||
'<a href="http://example.org?a=1&b=2;3">foo</a>')
|
||||
|
||||
def test_escaped_ampersand_in_attribute_value_is_left_alone(self):
|
||||
self.assertSoupEquals('<a href="http://example.org?a=1&b=2;3"></a>')
|
||||
|
||||
def test_entities_in_strings_converted_during_parsing(self):
|
||||
# Both XML and HTML entities are converted to Unicode characters
|
||||
# during parsing.
|
||||
text = "<p><<sacré bleu!>></p>"
|
||||
expected = u"<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>"
|
||||
self.assertSoupEquals(text, expected)
|
||||
|
||||
def test_smart_quotes_converted_on_the_way_in(self):
|
||||
# Microsoft smart quotes are converted to Unicode characters during
|
||||
# parsing.
|
||||
quote = b"<p>\x91Foo\x92</p>"
|
||||
soup = self.soup(quote)
|
||||
self.assertEqual(
|
||||
soup.p.string,
|
||||
u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
|
||||
|
||||
def test_non_breaking_spaces_converted_on_the_way_in(self):
|
||||
soup = self.soup("<a> </a>")
|
||||
self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
|
||||
|
||||
def test_entities_converted_on_the_way_out(self):
|
||||
text = "<p><<sacré bleu!>></p>"
|
||||
expected = u"<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>".encode("utf-8")
|
||||
soup = self.soup(text)
|
||||
self.assertEqual(soup.p.encode("utf-8"), expected)
|
||||
|
||||
def test_real_iso_latin_document(self):
|
||||
# Smoke test of interrelated functionality, using an
|
||||
# easy-to-understand document.
|
||||
|
||||
# Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
|
||||
unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
|
||||
|
||||
# That's because we're going to encode it into ISO-Latin-1, and use
|
||||
# that to test.
|
||||
iso_latin_html = unicode_html.encode("iso-8859-1")
|
||||
|
||||
# Parse the ISO-Latin-1 HTML.
|
||||
soup = self.soup(iso_latin_html)
|
||||
# Encode it to UTF-8.
|
||||
result = soup.encode("utf-8")
|
||||
|
||||
# What do we expect the result to look like? Well, it would
|
||||
# look like unicode_html, except that the META tag would say
|
||||
# UTF-8 instead of ISO-Latin-1.
|
||||
expected = unicode_html.replace("ISO-Latin-1", "utf-8")
|
||||
|
||||
# And, of course, it would be in UTF-8, not Unicode.
|
||||
expected = expected.encode("utf-8")
|
||||
|
||||
# Ta-da!
|
||||
self.assertEqual(result, expected)
|
||||
|
||||
def test_real_shift_jis_document(self):
|
||||
# Smoke test to make sure the parser can handle a document in
|
||||
# Shift-JIS encoding, without choking.
|
||||
shift_jis_html = (
|
||||
b'<html><head></head><body><pre>'
|
||||
b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
|
||||
b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
|
||||
b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
|
||||
b'</pre></body></html>')
|
||||
unicode_html = shift_jis_html.decode("shift-jis")
|
||||
soup = self.soup(unicode_html)
|
||||
|
||||
# Make sure the parse tree is correctly encoded to various
|
||||
# encodings.
|
||||
self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8"))
|
||||
self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp"))
|
||||
|
||||
def test_real_hebrew_document(self):
|
||||
# A real-world test to make sure we can convert ISO-8859-9 (a
|
||||
# Hebrew encoding) to UTF-8.
|
||||
hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
|
||||
soup = self.soup(
|
||||
hebrew_document, from_encoding="iso8859-8")
|
||||
self.assertEqual(soup.original_encoding, 'iso8859-8')
|
||||
self.assertEqual(
|
||||
soup.encode('utf-8'),
|
||||
hebrew_document.decode("iso8859-8").encode("utf-8"))
|
||||
|
||||
def test_meta_tag_reflects_current_encoding(self):
|
||||
# Here's the <meta> tag saying that a document is
|
||||
# encoded in Shift-JIS.
|
||||
meta_tag = ('<meta content="text/html; charset=x-sjis" '
|
||||
'http-equiv="Content-type"/>')
|
||||
|
||||
# Here's a document incorporating that meta tag.
|
||||
shift_jis_html = (
|
||||
'<html><head>\n%s\n'
|
||||
'<meta http-equiv="Content-language" content="ja"/>'
|
||||
'</head><body>Shift-JIS markup goes here.') % meta_tag
|
||||
soup = self.soup(shift_jis_html)
|
||||
|
||||
# Parse the document, and the charset is seemingly unaffected.
|
||||
parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'})
|
||||
content = parsed_meta['content']
|
||||
self.assertEqual('text/html; charset=x-sjis', content)
|
||||
|
||||
# But that value is actually a ContentMetaAttributeValue object.
|
||||
self.assertTrue(isinstance(content, ContentMetaAttributeValue))
|
||||
|
||||
# And it will take on a value that reflects its current
|
||||
# encoding.
|
||||
self.assertEqual('text/html; charset=utf8', content.encode("utf8"))
|
||||
|
||||
# For the rest of the story, see TestSubstitutions in
|
||||
# test_tree.py.
|
||||
|
||||
def test_html5_style_meta_tag_reflects_current_encoding(self):
|
||||
# Here's the <meta> tag saying that a document is
|
||||
# encoded in Shift-JIS.
|
||||
meta_tag = ('<meta id="encoding" charset="x-sjis" />')
|
||||
|
||||
# Here's a document incorporating that meta tag.
|
||||
shift_jis_html = (
|
||||
'<html><head>\n%s\n'
|
||||
'<meta http-equiv="Content-language" content="ja"/>'
|
||||
'</head><body>Shift-JIS markup goes here.') % meta_tag
|
||||
soup = self.soup(shift_jis_html)
|
||||
|
||||
# Parse the document, and the charset is seemingly unaffected.
|
||||
parsed_meta = soup.find('meta', id="encoding")
|
||||
charset = parsed_meta['charset']
|
||||
self.assertEqual('x-sjis', charset)
|
||||
|
||||
# But that value is actually a CharsetMetaAttributeValue object.
|
||||
self.assertTrue(isinstance(charset, CharsetMetaAttributeValue))
|
||||
|
||||
# And it will take on a value that reflects its current
|
||||
# encoding.
|
||||
self.assertEqual('utf8', charset.encode("utf8"))
|
||||
|
||||
def test_tag_with_no_attributes_can_have_attributes_added(self):
|
||||
data = self.soup("<a>text</a>")
|
||||
data.a['foo'] = 'bar'
|
||||
self.assertEqual('<a foo="bar">text</a>', data.a.decode())
|
||||
|
||||
class XMLTreeBuilderSmokeTest(object):
|
||||
|
||||
def test_docstring_generated(self):
|
||||
soup = self.soup("<root/>")
|
||||
self.assertEqual(
|
||||
soup.encode(), b'<?xml version="1.0" encoding="utf-8"?>\n<root/>')
|
||||
|
||||
def test_real_xhtml_document(self):
|
||||
"""A real XHTML document should come out *exactly* the same as it went in."""
|
||||
markup = b"""<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>Hello.</title></head>
|
||||
<body>Goodbye.</body>
|
||||
</html>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(
|
||||
soup.encode("utf-8"), markup)
|
||||
|
||||
def test_formatter_processes_script_tag_for_xml_documents(self):
|
||||
doc = """
|
||||
<script type="text/javascript">
|
||||
</script>
|
||||
"""
|
||||
soup = BeautifulSoup(doc, "xml")
|
||||
# lxml would have stripped this while parsing, but we can add
|
||||
# it later.
|
||||
soup.script.string = 'console.log("< < hey > > ");'
|
||||
encoded = soup.encode()
|
||||
self.assertTrue(b"< < hey > >" in encoded)
|
||||
|
||||
def test_can_parse_unicode_document(self):
|
||||
markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string)
|
||||
|
||||
def test_popping_namespaced_tag(self):
|
||||
markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(
|
||||
unicode(soup.rss), markup)
|
||||
|
||||
def test_docstring_includes_correct_encoding(self):
|
||||
soup = self.soup("<root/>")
|
||||
self.assertEqual(
|
||||
soup.encode("latin1"),
|
||||
b'<?xml version="1.0" encoding="latin1"?>\n<root/>')
|
||||
|
||||
def test_large_xml_document(self):
|
||||
"""A large XML document should come out the same as it went in."""
|
||||
markup = (b'<?xml version="1.0" encoding="utf-8"?>\n<root>'
|
||||
+ b'0' * (2**12)
|
||||
+ b'</root>')
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(soup.encode("utf-8"), markup)
|
||||
|
||||
|
||||
def test_tags_are_empty_element_if_and_only_if_they_are_empty(self):
|
||||
self.assertSoupEquals("<p>", "<p/>")
|
||||
self.assertSoupEquals("<p>foo</p>")
|
||||
|
||||
def test_namespaces_are_preserved(self):
|
||||
markup = '<root xmlns:a="http://example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>'
|
||||
soup = self.soup(markup)
|
||||
root = soup.root
|
||||
self.assertEqual("http://example.com/", root['xmlns:a'])
|
||||
self.assertEqual("http://example.net/", root['xmlns:b'])
|
||||
|
||||
def test_closing_namespaced_tag(self):
|
||||
markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.p), markup)
|
||||
|
||||
def test_namespaced_attributes(self):
|
||||
markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.foo), markup)
|
||||
|
||||
def test_namespaced_attributes_xml_namespace(self):
|
||||
markup = '<foo xml:lang="fr">bar</foo>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.foo), markup)
|
||||
|
||||
class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
|
||||
"""Smoke test for a tree builder that supports HTML5."""
|
||||
|
||||
def test_real_xhtml_document(self):
|
||||
# Since XHTML is not HTML5, HTML5 parsers are not tested to handle
|
||||
# XHTML documents in any particular way.
|
||||
pass
|
||||
|
||||
def test_html_tags_have_namespace(self):
|
||||
markup = "<a>"
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace)
|
||||
|
||||
def test_svg_tags_have_namespace(self):
|
||||
markup = '<svg><circle/></svg>'
|
||||
soup = self.soup(markup)
|
||||
namespace = "http://www.w3.org/2000/svg"
|
||||
self.assertEqual(namespace, soup.svg.namespace)
|
||||
self.assertEqual(namespace, soup.circle.namespace)
|
||||
|
||||
|
||||
def test_mathml_tags_have_namespace(self):
|
||||
markup = '<math><msqrt>5</msqrt></math>'
|
||||
soup = self.soup(markup)
|
||||
namespace = 'http://www.w3.org/1998/Math/MathML'
|
||||
self.assertEqual(namespace, soup.math.namespace)
|
||||
self.assertEqual(namespace, soup.msqrt.namespace)
|
||||
|
||||
def test_xml_declaration_becomes_comment(self):
|
||||
markup = '<?xml version="1.0" encoding="utf-8"?><html></html>'
|
||||
soup = self.soup(markup)
|
||||
self.assertTrue(isinstance(soup.contents[0], Comment))
|
||||
self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?')
|
||||
self.assertEqual("html", soup.contents[0].next_element.name)
|
||||
|
||||
def skipIf(condition, reason):
|
||||
def nothing(test, *args, **kwargs):
|
||||
return None
|
||||
|
||||
def decorator(test_item):
|
||||
if condition:
|
||||
return nothing
|
||||
else:
|
||||
return test_item
|
||||
|
||||
return decorator
|
||||
82
lib/certgen.py
Normal file
82
lib/certgen.py
Normal file
@@ -0,0 +1,82 @@
|
||||
# -*- coding: latin-1 -*-
|
||||
#
|
||||
# Copyright (C) Martin Sj<53>gren and AB Strakt 2001, All rights reserved
|
||||
# Copyright (C) Jean-Paul Calderone 2008, All rights reserved
|
||||
# This file is licenced under the GNU LESSER GENERAL PUBLIC LICENSE Version 2.1 or later (aka LGPL v2.1)
|
||||
# Please see LGPL2.1.txt for more information
|
||||
"""
|
||||
Certificate generation module.
|
||||
"""
|
||||
|
||||
from OpenSSL import crypto
|
||||
import time
|
||||
|
||||
TYPE_RSA = crypto.TYPE_RSA
|
||||
TYPE_DSA = crypto.TYPE_DSA
|
||||
|
||||
serial = int(time.time())
|
||||
|
||||
|
||||
def createKeyPair(type, bits):
|
||||
"""
|
||||
Create a public/private key pair.
|
||||
|
||||
Arguments: type - Key type, must be one of TYPE_RSA and TYPE_DSA
|
||||
bits - Number of bits to use in the key
|
||||
Returns: The public/private key pair in a PKey object
|
||||
"""
|
||||
pkey = crypto.PKey()
|
||||
pkey.generate_key(type, bits)
|
||||
return pkey
|
||||
|
||||
def createCertRequest(pkey, digest="md5", **name):
|
||||
"""
|
||||
Create a certificate request.
|
||||
|
||||
Arguments: pkey - The key to associate with the request
|
||||
digest - Digestion method to use for signing, default is md5
|
||||
**name - The name of the subject of the request, possible
|
||||
arguments are:
|
||||
C - Country name
|
||||
ST - State or province name
|
||||
L - Locality name
|
||||
O - Organization name
|
||||
OU - Organizational unit name
|
||||
CN - Common name
|
||||
emailAddress - E-mail address
|
||||
Returns: The certificate request in an X509Req object
|
||||
"""
|
||||
req = crypto.X509Req()
|
||||
subj = req.get_subject()
|
||||
|
||||
for (key,value) in name.items():
|
||||
setattr(subj, key, value)
|
||||
|
||||
req.set_pubkey(pkey)
|
||||
req.sign(pkey, digest)
|
||||
return req
|
||||
|
||||
def createCertificate(req, (issuerCert, issuerKey), serial, (notBefore, notAfter), digest="md5"):
|
||||
"""
|
||||
Generate a certificate given a certificate request.
|
||||
|
||||
Arguments: req - Certificate reqeust to use
|
||||
issuerCert - The certificate of the issuer
|
||||
issuerKey - The private key of the issuer
|
||||
serial - Serial number for the certificate
|
||||
notBefore - Timestamp (relative to now) when the certificate
|
||||
starts being valid
|
||||
notAfter - Timestamp (relative to now) when the certificate
|
||||
stops being valid
|
||||
digest - Digest method to use for signing, default is md5
|
||||
Returns: The signed certificate in an X509 object
|
||||
"""
|
||||
cert = crypto.X509()
|
||||
cert.set_serial_number(serial)
|
||||
cert.gmtime_adj_notBefore(notBefore)
|
||||
cert.gmtime_adj_notAfter(notAfter)
|
||||
cert.set_issuer(issuerCert.get_subject())
|
||||
cert.set_subject(req.get_subject())
|
||||
cert.set_pubkey(req.get_pubkey())
|
||||
cert.sign(issuerKey, digest)
|
||||
return cert
|
||||
25
lib/cherrypy/LICENSE.txt
Normal file
25
lib/cherrypy/LICENSE.txt
Normal file
@@ -0,0 +1,25 @@
|
||||
Copyright (c) 2004-2011, CherryPy Team (team@cherrypy.org)
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of the CherryPy Team nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
652
lib/cherrypy/__init__.py
Normal file
652
lib/cherrypy/__init__.py
Normal file
@@ -0,0 +1,652 @@
|
||||
"""CherryPy is a pythonic, object-oriented HTTP framework.
|
||||
|
||||
|
||||
CherryPy consists of not one, but four separate API layers.
|
||||
|
||||
The APPLICATION LAYER is the simplest. CherryPy applications are written as
|
||||
a tree of classes and methods, where each branch in the tree corresponds to
|
||||
a branch in the URL path. Each method is a 'page handler', which receives
|
||||
GET and POST params as keyword arguments, and returns or yields the (HTML)
|
||||
body of the response. The special method name 'index' is used for paths
|
||||
that end in a slash, and the special method name 'default' is used to
|
||||
handle multiple paths via a single handler. This layer also includes:
|
||||
|
||||
* the 'exposed' attribute (and cherrypy.expose)
|
||||
* cherrypy.quickstart()
|
||||
* _cp_config attributes
|
||||
* cherrypy.tools (including cherrypy.session)
|
||||
* cherrypy.url()
|
||||
|
||||
The ENVIRONMENT LAYER is used by developers at all levels. It provides
|
||||
information about the current request and response, plus the application
|
||||
and server environment, via a (default) set of top-level objects:
|
||||
|
||||
* cherrypy.request
|
||||
* cherrypy.response
|
||||
* cherrypy.engine
|
||||
* cherrypy.server
|
||||
* cherrypy.tree
|
||||
* cherrypy.config
|
||||
* cherrypy.thread_data
|
||||
* cherrypy.log
|
||||
* cherrypy.HTTPError, NotFound, and HTTPRedirect
|
||||
* cherrypy.lib
|
||||
|
||||
The EXTENSION LAYER allows advanced users to construct and share their own
|
||||
plugins. It consists of:
|
||||
|
||||
* Hook API
|
||||
* Tool API
|
||||
* Toolbox API
|
||||
* Dispatch API
|
||||
* Config Namespace API
|
||||
|
||||
Finally, there is the CORE LAYER, which uses the core API's to construct
|
||||
the default components which are available at higher layers. You can think
|
||||
of the default components as the 'reference implementation' for CherryPy.
|
||||
Megaframeworks (and advanced users) may replace the default components
|
||||
with customized or extended components. The core API's are:
|
||||
|
||||
* Application API
|
||||
* Engine API
|
||||
* Request API
|
||||
* Server API
|
||||
* WSGI API
|
||||
|
||||
These API's are described in the `CherryPy specification <https://bitbucket.org/cherrypy/cherrypy/wiki/CherryPySpec>`_.
|
||||
"""
|
||||
|
||||
__version__ = "3.6.0"
|
||||
|
||||
from cherrypy._cpcompat import urljoin as _urljoin, urlencode as _urlencode
|
||||
from cherrypy._cpcompat import basestring, unicodestr, set
|
||||
|
||||
from cherrypy._cperror import HTTPError, HTTPRedirect, InternalRedirect
|
||||
from cherrypy._cperror import NotFound, CherryPyException, TimeoutError
|
||||
|
||||
from cherrypy import _cpdispatch as dispatch
|
||||
|
||||
from cherrypy import _cptools
|
||||
tools = _cptools.default_toolbox
|
||||
Tool = _cptools.Tool
|
||||
|
||||
from cherrypy import _cprequest
|
||||
from cherrypy.lib import httputil as _httputil
|
||||
|
||||
from cherrypy import _cptree
|
||||
tree = _cptree.Tree()
|
||||
from cherrypy._cptree import Application
|
||||
from cherrypy import _cpwsgi as wsgi
|
||||
|
||||
from cherrypy import process
|
||||
try:
|
||||
from cherrypy.process import win32
|
||||
engine = win32.Win32Bus()
|
||||
engine.console_control_handler = win32.ConsoleCtrlHandler(engine)
|
||||
del win32
|
||||
except ImportError:
|
||||
engine = process.bus
|
||||
|
||||
|
||||
# Timeout monitor. We add two channels to the engine
|
||||
# to which cherrypy.Application will publish.
|
||||
engine.listeners['before_request'] = set()
|
||||
engine.listeners['after_request'] = set()
|
||||
|
||||
|
||||
class _TimeoutMonitor(process.plugins.Monitor):
|
||||
|
||||
def __init__(self, bus):
|
||||
self.servings = []
|
||||
process.plugins.Monitor.__init__(self, bus, self.run)
|
||||
|
||||
def before_request(self):
|
||||
self.servings.append((serving.request, serving.response))
|
||||
|
||||
def after_request(self):
|
||||
try:
|
||||
self.servings.remove((serving.request, serving.response))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
"""Check timeout on all responses. (Internal)"""
|
||||
for req, resp in self.servings:
|
||||
resp.check_timeout()
|
||||
engine.timeout_monitor = _TimeoutMonitor(engine)
|
||||
engine.timeout_monitor.subscribe()
|
||||
|
||||
engine.autoreload = process.plugins.Autoreloader(engine)
|
||||
engine.autoreload.subscribe()
|
||||
|
||||
engine.thread_manager = process.plugins.ThreadManager(engine)
|
||||
engine.thread_manager.subscribe()
|
||||
|
||||
engine.signal_handler = process.plugins.SignalHandler(engine)
|
||||
|
||||
|
||||
class _HandleSignalsPlugin(object):
|
||||
|
||||
"""Handle signals from other processes based on the configured
|
||||
platform handlers above."""
|
||||
|
||||
def __init__(self, bus):
|
||||
self.bus = bus
|
||||
|
||||
def subscribe(self):
|
||||
"""Add the handlers based on the platform"""
|
||||
if hasattr(self.bus, "signal_handler"):
|
||||
self.bus.signal_handler.subscribe()
|
||||
if hasattr(self.bus, "console_control_handler"):
|
||||
self.bus.console_control_handler.subscribe()
|
||||
|
||||
engine.signals = _HandleSignalsPlugin(engine)
|
||||
|
||||
|
||||
from cherrypy import _cpserver
|
||||
server = _cpserver.Server()
|
||||
server.subscribe()
|
||||
|
||||
|
||||
def quickstart(root=None, script_name="", config=None):
|
||||
"""Mount the given root, start the builtin server (and engine), then block.
|
||||
|
||||
root: an instance of a "controller class" (a collection of page handler
|
||||
methods) which represents the root of the application.
|
||||
script_name: a string containing the "mount point" of the application.
|
||||
This should start with a slash, and be the path portion of the URL
|
||||
at which to mount the given root. For example, if root.index() will
|
||||
handle requests to "http://www.example.com:8080/dept/app1/", then
|
||||
the script_name argument would be "/dept/app1".
|
||||
|
||||
It MUST NOT end in a slash. If the script_name refers to the root
|
||||
of the URI, it MUST be an empty string (not "/").
|
||||
config: a file or dict containing application config. If this contains
|
||||
a [global] section, those entries will be used in the global
|
||||
(site-wide) config.
|
||||
"""
|
||||
if config:
|
||||
_global_conf_alias.update(config)
|
||||
|
||||
tree.mount(root, script_name, config)
|
||||
|
||||
engine.signals.subscribe()
|
||||
engine.start()
|
||||
engine.block()
|
||||
|
||||
|
||||
from cherrypy._cpcompat import threadlocal as _local
|
||||
|
||||
|
||||
class _Serving(_local):
|
||||
|
||||
"""An interface for registering request and response objects.
|
||||
|
||||
Rather than have a separate "thread local" object for the request and
|
||||
the response, this class works as a single threadlocal container for
|
||||
both objects (and any others which developers wish to define). In this
|
||||
way, we can easily dump those objects when we stop/start a new HTTP
|
||||
conversation, yet still refer to them as module-level globals in a
|
||||
thread-safe way.
|
||||
"""
|
||||
|
||||
request = _cprequest.Request(_httputil.Host("127.0.0.1", 80),
|
||||
_httputil.Host("127.0.0.1", 1111))
|
||||
"""
|
||||
The request object for the current thread. In the main thread,
|
||||
and any threads which are not receiving HTTP requests, this is None."""
|
||||
|
||||
response = _cprequest.Response()
|
||||
"""
|
||||
The response object for the current thread. In the main thread,
|
||||
and any threads which are not receiving HTTP requests, this is None."""
|
||||
|
||||
def load(self, request, response):
|
||||
self.request = request
|
||||
self.response = response
|
||||
|
||||
def clear(self):
|
||||
"""Remove all attributes of self."""
|
||||
self.__dict__.clear()
|
||||
|
||||
serving = _Serving()
|
||||
|
||||
|
||||
class _ThreadLocalProxy(object):
|
||||
|
||||
__slots__ = ['__attrname__', '__dict__']
|
||||
|
||||
def __init__(self, attrname):
|
||||
self.__attrname__ = attrname
|
||||
|
||||
def __getattr__(self, name):
|
||||
child = getattr(serving, self.__attrname__)
|
||||
return getattr(child, name)
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
if name in ("__attrname__", ):
|
||||
object.__setattr__(self, name, value)
|
||||
else:
|
||||
child = getattr(serving, self.__attrname__)
|
||||
setattr(child, name, value)
|
||||
|
||||
def __delattr__(self, name):
|
||||
child = getattr(serving, self.__attrname__)
|
||||
delattr(child, name)
|
||||
|
||||
def _get_dict(self):
|
||||
child = getattr(serving, self.__attrname__)
|
||||
d = child.__class__.__dict__.copy()
|
||||
d.update(child.__dict__)
|
||||
return d
|
||||
__dict__ = property(_get_dict)
|
||||
|
||||
def __getitem__(self, key):
|
||||
child = getattr(serving, self.__attrname__)
|
||||
return child[key]
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
child = getattr(serving, self.__attrname__)
|
||||
child[key] = value
|
||||
|
||||
def __delitem__(self, key):
|
||||
child = getattr(serving, self.__attrname__)
|
||||
del child[key]
|
||||
|
||||
def __contains__(self, key):
|
||||
child = getattr(serving, self.__attrname__)
|
||||
return key in child
|
||||
|
||||
def __len__(self):
|
||||
child = getattr(serving, self.__attrname__)
|
||||
return len(child)
|
||||
|
||||
def __nonzero__(self):
|
||||
child = getattr(serving, self.__attrname__)
|
||||
return bool(child)
|
||||
# Python 3
|
||||
__bool__ = __nonzero__
|
||||
|
||||
# Create request and response object (the same objects will be used
|
||||
# throughout the entire life of the webserver, but will redirect
|
||||
# to the "serving" object)
|
||||
request = _ThreadLocalProxy('request')
|
||||
response = _ThreadLocalProxy('response')
|
||||
|
||||
# Create thread_data object as a thread-specific all-purpose storage
|
||||
|
||||
|
||||
class _ThreadData(_local):
|
||||
|
||||
"""A container for thread-specific data."""
|
||||
thread_data = _ThreadData()
|
||||
|
||||
|
||||
# Monkeypatch pydoc to allow help() to go through the threadlocal proxy.
|
||||
# Jan 2007: no Googleable examples of anyone else replacing pydoc.resolve.
|
||||
# The only other way would be to change what is returned from type(request)
|
||||
# and that's not possible in pure Python (you'd have to fake ob_type).
|
||||
def _cherrypy_pydoc_resolve(thing, forceload=0):
|
||||
"""Given an object or a path to an object, get the object and its name."""
|
||||
if isinstance(thing, _ThreadLocalProxy):
|
||||
thing = getattr(serving, thing.__attrname__)
|
||||
return _pydoc._builtin_resolve(thing, forceload)
|
||||
|
||||
try:
|
||||
import pydoc as _pydoc
|
||||
_pydoc._builtin_resolve = _pydoc.resolve
|
||||
_pydoc.resolve = _cherrypy_pydoc_resolve
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
from cherrypy import _cplogging
|
||||
|
||||
|
||||
class _GlobalLogManager(_cplogging.LogManager):
|
||||
|
||||
"""A site-wide LogManager; routes to app.log or global log as appropriate.
|
||||
|
||||
This :class:`LogManager<cherrypy._cplogging.LogManager>` implements
|
||||
cherrypy.log() and cherrypy.log.access(). If either
|
||||
function is called during a request, the message will be sent to the
|
||||
logger for the current Application. If they are called outside of a
|
||||
request, the message will be sent to the site-wide logger.
|
||||
"""
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
"""Log the given message to the app.log or global log as appropriate.
|
||||
"""
|
||||
# Do NOT use try/except here. See
|
||||
# https://bitbucket.org/cherrypy/cherrypy/issue/945
|
||||
if hasattr(request, 'app') and hasattr(request.app, 'log'):
|
||||
log = request.app.log
|
||||
else:
|
||||
log = self
|
||||
return log.error(*args, **kwargs)
|
||||
|
||||
def access(self):
|
||||
"""Log an access message to the app.log or global log as appropriate.
|
||||
"""
|
||||
try:
|
||||
return request.app.log.access()
|
||||
except AttributeError:
|
||||
return _cplogging.LogManager.access(self)
|
||||
|
||||
|
||||
log = _GlobalLogManager()
|
||||
# Set a default screen handler on the global log.
|
||||
log.screen = True
|
||||
log.error_file = ''
|
||||
# Using an access file makes CP about 10% slower. Leave off by default.
|
||||
log.access_file = ''
|
||||
|
||||
|
||||
def _buslog(msg, level):
|
||||
log.error(msg, 'ENGINE', severity=level)
|
||||
engine.subscribe('log', _buslog)
|
||||
|
||||
# Helper functions for CP apps #
|
||||
|
||||
|
||||
def expose(func=None, alias=None):
|
||||
"""Expose the function, optionally providing an alias or set of aliases."""
|
||||
def expose_(func):
|
||||
func.exposed = True
|
||||
if alias is not None:
|
||||
if isinstance(alias, basestring):
|
||||
parents[alias.replace(".", "_")] = func
|
||||
else:
|
||||
for a in alias:
|
||||
parents[a.replace(".", "_")] = func
|
||||
return func
|
||||
|
||||
import sys
|
||||
import types
|
||||
if isinstance(func, (types.FunctionType, types.MethodType)):
|
||||
if alias is None:
|
||||
# @expose
|
||||
func.exposed = True
|
||||
return func
|
||||
else:
|
||||
# func = expose(func, alias)
|
||||
parents = sys._getframe(1).f_locals
|
||||
return expose_(func)
|
||||
elif func is None:
|
||||
if alias is None:
|
||||
# @expose()
|
||||
parents = sys._getframe(1).f_locals
|
||||
return expose_
|
||||
else:
|
||||
# @expose(alias="alias") or
|
||||
# @expose(alias=["alias1", "alias2"])
|
||||
parents = sys._getframe(1).f_locals
|
||||
return expose_
|
||||
else:
|
||||
# @expose("alias") or
|
||||
# @expose(["alias1", "alias2"])
|
||||
parents = sys._getframe(1).f_locals
|
||||
alias = func
|
||||
return expose_
|
||||
|
||||
|
||||
def popargs(*args, **kwargs):
|
||||
"""A decorator for _cp_dispatch
|
||||
(cherrypy.dispatch.Dispatcher.dispatch_method_name).
|
||||
|
||||
Optional keyword argument: handler=(Object or Function)
|
||||
|
||||
Provides a _cp_dispatch function that pops off path segments into
|
||||
cherrypy.request.params under the names specified. The dispatch
|
||||
is then forwarded on to the next vpath element.
|
||||
|
||||
Note that any existing (and exposed) member function of the class that
|
||||
popargs is applied to will override that value of the argument. For
|
||||
instance, if you have a method named "list" on the class decorated with
|
||||
popargs, then accessing "/list" will call that function instead of popping
|
||||
it off as the requested parameter. This restriction applies to all
|
||||
_cp_dispatch functions. The only way around this restriction is to create
|
||||
a "blank class" whose only function is to provide _cp_dispatch.
|
||||
|
||||
If there are path elements after the arguments, or more arguments
|
||||
are requested than are available in the vpath, then the 'handler'
|
||||
keyword argument specifies the next object to handle the parameterized
|
||||
request. If handler is not specified or is None, then self is used.
|
||||
If handler is a function rather than an instance, then that function
|
||||
will be called with the args specified and the return value from that
|
||||
function used as the next object INSTEAD of adding the parameters to
|
||||
cherrypy.request.args.
|
||||
|
||||
This decorator may be used in one of two ways:
|
||||
|
||||
As a class decorator:
|
||||
@cherrypy.popargs('year', 'month', 'day')
|
||||
class Blog:
|
||||
def index(self, year=None, month=None, day=None):
|
||||
#Process the parameters here; any url like
|
||||
#/, /2009, /2009/12, or /2009/12/31
|
||||
#will fill in the appropriate parameters.
|
||||
|
||||
def create(self):
|
||||
#This link will still be available at /create. Defined functions
|
||||
#take precedence over arguments.
|
||||
|
||||
Or as a member of a class:
|
||||
class Blog:
|
||||
_cp_dispatch = cherrypy.popargs('year', 'month', 'day')
|
||||
#...
|
||||
|
||||
The handler argument may be used to mix arguments with built in functions.
|
||||
For instance, the following setup allows different activities at the
|
||||
day, month, and year level:
|
||||
|
||||
class DayHandler:
|
||||
def index(self, year, month, day):
|
||||
#Do something with this day; probably list entries
|
||||
|
||||
def delete(self, year, month, day):
|
||||
#Delete all entries for this day
|
||||
|
||||
@cherrypy.popargs('day', handler=DayHandler())
|
||||
class MonthHandler:
|
||||
def index(self, year, month):
|
||||
#Do something with this month; probably list entries
|
||||
|
||||
def delete(self, year, month):
|
||||
#Delete all entries for this month
|
||||
|
||||
@cherrypy.popargs('month', handler=MonthHandler())
|
||||
class YearHandler:
|
||||
def index(self, year):
|
||||
#Do something with this year
|
||||
|
||||
#...
|
||||
|
||||
@cherrypy.popargs('year', handler=YearHandler())
|
||||
class Root:
|
||||
def index(self):
|
||||
#...
|
||||
|
||||
"""
|
||||
|
||||
# Since keyword arg comes after *args, we have to process it ourselves
|
||||
# for lower versions of python.
|
||||
|
||||
handler = None
|
||||
handler_call = False
|
||||
for k, v in kwargs.items():
|
||||
if k == 'handler':
|
||||
handler = v
|
||||
else:
|
||||
raise TypeError(
|
||||
"cherrypy.popargs() got an unexpected keyword argument '{0}'"
|
||||
.format(k)
|
||||
)
|
||||
|
||||
import inspect
|
||||
|
||||
if handler is not None \
|
||||
and (hasattr(handler, '__call__') or inspect.isclass(handler)):
|
||||
handler_call = True
|
||||
|
||||
def decorated(cls_or_self=None, vpath=None):
|
||||
if inspect.isclass(cls_or_self):
|
||||
# cherrypy.popargs is a class decorator
|
||||
cls = cls_or_self
|
||||
setattr(cls, dispatch.Dispatcher.dispatch_method_name, decorated)
|
||||
return cls
|
||||
|
||||
# We're in the actual function
|
||||
self = cls_or_self
|
||||
parms = {}
|
||||
for arg in args:
|
||||
if not vpath:
|
||||
break
|
||||
parms[arg] = vpath.pop(0)
|
||||
|
||||
if handler is not None:
|
||||
if handler_call:
|
||||
return handler(**parms)
|
||||
else:
|
||||
request.params.update(parms)
|
||||
return handler
|
||||
|
||||
request.params.update(parms)
|
||||
|
||||
# If we are the ultimate handler, then to prevent our _cp_dispatch
|
||||
# from being called again, we will resolve remaining elements through
|
||||
# getattr() directly.
|
||||
if vpath:
|
||||
return getattr(self, vpath.pop(0), None)
|
||||
else:
|
||||
return self
|
||||
|
||||
return decorated
|
||||
|
||||
|
||||
def url(path="", qs="", script_name=None, base=None, relative=None):
|
||||
"""Create an absolute URL for the given path.
|
||||
|
||||
If 'path' starts with a slash ('/'), this will return
|
||||
(base + script_name + path + qs).
|
||||
If it does not start with a slash, this returns
|
||||
(base + script_name [+ request.path_info] + path + qs).
|
||||
|
||||
If script_name is None, cherrypy.request will be used
|
||||
to find a script_name, if available.
|
||||
|
||||
If base is None, cherrypy.request.base will be used (if available).
|
||||
Note that you can use cherrypy.tools.proxy to change this.
|
||||
|
||||
Finally, note that this function can be used to obtain an absolute URL
|
||||
for the current request path (minus the querystring) by passing no args.
|
||||
If you call url(qs=cherrypy.request.query_string), you should get the
|
||||
original browser URL (assuming no internal redirections).
|
||||
|
||||
If relative is None or not provided, request.app.relative_urls will
|
||||
be used (if available, else False). If False, the output will be an
|
||||
absolute URL (including the scheme, host, vhost, and script_name).
|
||||
If True, the output will instead be a URL that is relative to the
|
||||
current request path, perhaps including '..' atoms. If relative is
|
||||
the string 'server', the output will instead be a URL that is
|
||||
relative to the server root; i.e., it will start with a slash.
|
||||
"""
|
||||
if isinstance(qs, (tuple, list, dict)):
|
||||
qs = _urlencode(qs)
|
||||
if qs:
|
||||
qs = '?' + qs
|
||||
|
||||
if request.app:
|
||||
if not path.startswith("/"):
|
||||
# Append/remove trailing slash from path_info as needed
|
||||
# (this is to support mistyped URL's without redirecting;
|
||||
# if you want to redirect, use tools.trailing_slash).
|
||||
pi = request.path_info
|
||||
if request.is_index is True:
|
||||
if not pi.endswith('/'):
|
||||
pi = pi + '/'
|
||||
elif request.is_index is False:
|
||||
if pi.endswith('/') and pi != '/':
|
||||
pi = pi[:-1]
|
||||
|
||||
if path == "":
|
||||
path = pi
|
||||
else:
|
||||
path = _urljoin(pi, path)
|
||||
|
||||
if script_name is None:
|
||||
script_name = request.script_name
|
||||
if base is None:
|
||||
base = request.base
|
||||
|
||||
newurl = base + script_name + path + qs
|
||||
else:
|
||||
# No request.app (we're being called outside a request).
|
||||
# We'll have to guess the base from server.* attributes.
|
||||
# This will produce very different results from the above
|
||||
# if you're using vhosts or tools.proxy.
|
||||
if base is None:
|
||||
base = server.base()
|
||||
|
||||
path = (script_name or "") + path
|
||||
newurl = base + path + qs
|
||||
|
||||
if './' in newurl:
|
||||
# Normalize the URL by removing ./ and ../
|
||||
atoms = []
|
||||
for atom in newurl.split('/'):
|
||||
if atom == '.':
|
||||
pass
|
||||
elif atom == '..':
|
||||
atoms.pop()
|
||||
else:
|
||||
atoms.append(atom)
|
||||
newurl = '/'.join(atoms)
|
||||
|
||||
# At this point, we should have a fully-qualified absolute URL.
|
||||
|
||||
if relative is None:
|
||||
relative = getattr(request.app, "relative_urls", False)
|
||||
|
||||
# See http://www.ietf.org/rfc/rfc2396.txt
|
||||
if relative == 'server':
|
||||
# "A relative reference beginning with a single slash character is
|
||||
# termed an absolute-path reference, as defined by <abs_path>..."
|
||||
# This is also sometimes called "server-relative".
|
||||
newurl = '/' + '/'.join(newurl.split('/', 3)[3:])
|
||||
elif relative:
|
||||
# "A relative reference that does not begin with a scheme name
|
||||
# or a slash character is termed a relative-path reference."
|
||||
old = url(relative=False).split('/')[:-1]
|
||||
new = newurl.split('/')
|
||||
while old and new:
|
||||
a, b = old[0], new[0]
|
||||
if a != b:
|
||||
break
|
||||
old.pop(0)
|
||||
new.pop(0)
|
||||
new = (['..'] * len(old)) + new
|
||||
newurl = '/'.join(new)
|
||||
|
||||
return newurl
|
||||
|
||||
|
||||
# import _cpconfig last so it can reference other top-level objects
|
||||
from cherrypy import _cpconfig
|
||||
# Use _global_conf_alias so quickstart can use 'config' as an arg
|
||||
# without shadowing cherrypy.config.
|
||||
config = _global_conf_alias = _cpconfig.Config()
|
||||
config.defaults = {
|
||||
'tools.log_tracebacks.on': True,
|
||||
'tools.log_headers.on': True,
|
||||
'tools.trailing_slash.on': True,
|
||||
'tools.encode.on': True
|
||||
}
|
||||
config.namespaces["log"] = lambda k, v: setattr(log, k, v)
|
||||
config.namespaces["checker"] = lambda k, v: setattr(checker, k, v)
|
||||
# Must reset to get our defaults applied.
|
||||
config.reset()
|
||||
|
||||
from cherrypy import _cpchecker
|
||||
checker = _cpchecker.Checker()
|
||||
engine.subscribe('start', checker)
|
||||
332
lib/cherrypy/_cpchecker.py
Normal file
332
lib/cherrypy/_cpchecker.py
Normal file
@@ -0,0 +1,332 @@
|
||||
import os
|
||||
import warnings
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import iteritems, copykeys, builtins
|
||||
|
||||
|
||||
class Checker(object):
|
||||
|
||||
"""A checker for CherryPy sites and their mounted applications.
|
||||
|
||||
When this object is called at engine startup, it executes each
|
||||
of its own methods whose names start with ``check_``. If you wish
|
||||
to disable selected checks, simply add a line in your global
|
||||
config which sets the appropriate method to False::
|
||||
|
||||
[global]
|
||||
checker.check_skipped_app_config = False
|
||||
|
||||
You may also dynamically add or replace ``check_*`` methods in this way.
|
||||
"""
|
||||
|
||||
on = True
|
||||
"""If True (the default), run all checks; if False, turn off all checks."""
|
||||
|
||||
def __init__(self):
|
||||
self._populate_known_types()
|
||||
|
||||
def __call__(self):
|
||||
"""Run all check_* methods."""
|
||||
if self.on:
|
||||
oldformatwarning = warnings.formatwarning
|
||||
warnings.formatwarning = self.formatwarning
|
||||
try:
|
||||
for name in dir(self):
|
||||
if name.startswith("check_"):
|
||||
method = getattr(self, name)
|
||||
if method and hasattr(method, '__call__'):
|
||||
method()
|
||||
finally:
|
||||
warnings.formatwarning = oldformatwarning
|
||||
|
||||
def formatwarning(self, message, category, filename, lineno, line=None):
|
||||
"""Function to format a warning."""
|
||||
return "CherryPy Checker:\n%s\n\n" % message
|
||||
|
||||
# This value should be set inside _cpconfig.
|
||||
global_config_contained_paths = False
|
||||
|
||||
def check_app_config_entries_dont_start_with_script_name(self):
|
||||
"""Check for Application config with sections that repeat script_name.
|
||||
"""
|
||||
for sn, app in cherrypy.tree.apps.items():
|
||||
if not isinstance(app, cherrypy.Application):
|
||||
continue
|
||||
if not app.config:
|
||||
continue
|
||||
if sn == '':
|
||||
continue
|
||||
sn_atoms = sn.strip("/").split("/")
|
||||
for key in app.config.keys():
|
||||
key_atoms = key.strip("/").split("/")
|
||||
if key_atoms[:len(sn_atoms)] == sn_atoms:
|
||||
warnings.warn(
|
||||
"The application mounted at %r has config "
|
||||
"entries that start with its script name: %r" % (sn,
|
||||
key))
|
||||
|
||||
def check_site_config_entries_in_app_config(self):
|
||||
"""Check for mounted Applications that have site-scoped config."""
|
||||
for sn, app in iteritems(cherrypy.tree.apps):
|
||||
if not isinstance(app, cherrypy.Application):
|
||||
continue
|
||||
|
||||
msg = []
|
||||
for section, entries in iteritems(app.config):
|
||||
if section.startswith('/'):
|
||||
for key, value in iteritems(entries):
|
||||
for n in ("engine.", "server.", "tree.", "checker."):
|
||||
if key.startswith(n):
|
||||
msg.append("[%s] %s = %s" %
|
||||
(section, key, value))
|
||||
if msg:
|
||||
msg.insert(0,
|
||||
"The application mounted at %r contains the "
|
||||
"following config entries, which are only allowed "
|
||||
"in site-wide config. Move them to a [global] "
|
||||
"section and pass them to cherrypy.config.update() "
|
||||
"instead of tree.mount()." % sn)
|
||||
warnings.warn(os.linesep.join(msg))
|
||||
|
||||
def check_skipped_app_config(self):
|
||||
"""Check for mounted Applications that have no config."""
|
||||
for sn, app in cherrypy.tree.apps.items():
|
||||
if not isinstance(app, cherrypy.Application):
|
||||
continue
|
||||
if not app.config:
|
||||
msg = "The Application mounted at %r has an empty config." % sn
|
||||
if self.global_config_contained_paths:
|
||||
msg += (" It looks like the config you passed to "
|
||||
"cherrypy.config.update() contains application-"
|
||||
"specific sections. You must explicitly pass "
|
||||
"application config via "
|
||||
"cherrypy.tree.mount(..., config=app_config)")
|
||||
warnings.warn(msg)
|
||||
return
|
||||
|
||||
def check_app_config_brackets(self):
|
||||
"""Check for Application config with extraneous brackets in section
|
||||
names.
|
||||
"""
|
||||
for sn, app in cherrypy.tree.apps.items():
|
||||
if not isinstance(app, cherrypy.Application):
|
||||
continue
|
||||
if not app.config:
|
||||
continue
|
||||
for key in app.config.keys():
|
||||
if key.startswith("[") or key.endswith("]"):
|
||||
warnings.warn(
|
||||
"The application mounted at %r has config "
|
||||
"section names with extraneous brackets: %r. "
|
||||
"Config *files* need brackets; config *dicts* "
|
||||
"(e.g. passed to tree.mount) do not." % (sn, key))
|
||||
|
||||
def check_static_paths(self):
|
||||
"""Check Application config for incorrect static paths."""
|
||||
# Use the dummy Request object in the main thread.
|
||||
request = cherrypy.request
|
||||
for sn, app in cherrypy.tree.apps.items():
|
||||
if not isinstance(app, cherrypy.Application):
|
||||
continue
|
||||
request.app = app
|
||||
for section in app.config:
|
||||
# get_resource will populate request.config
|
||||
request.get_resource(section + "/dummy.html")
|
||||
conf = request.config.get
|
||||
|
||||
if conf("tools.staticdir.on", False):
|
||||
msg = ""
|
||||
root = conf("tools.staticdir.root")
|
||||
dir = conf("tools.staticdir.dir")
|
||||
if dir is None:
|
||||
msg = "tools.staticdir.dir is not set."
|
||||
else:
|
||||
fulldir = ""
|
||||
if os.path.isabs(dir):
|
||||
fulldir = dir
|
||||
if root:
|
||||
msg = ("dir is an absolute path, even "
|
||||
"though a root is provided.")
|
||||
testdir = os.path.join(root, dir[1:])
|
||||
if os.path.exists(testdir):
|
||||
msg += (
|
||||
"\nIf you meant to serve the "
|
||||
"filesystem folder at %r, remove the "
|
||||
"leading slash from dir." % (testdir,))
|
||||
else:
|
||||
if not root:
|
||||
msg = (
|
||||
"dir is a relative path and "
|
||||
"no root provided.")
|
||||
else:
|
||||
fulldir = os.path.join(root, dir)
|
||||
if not os.path.isabs(fulldir):
|
||||
msg = ("%r is not an absolute path." % (
|
||||
fulldir,))
|
||||
|
||||
if fulldir and not os.path.exists(fulldir):
|
||||
if msg:
|
||||
msg += "\n"
|
||||
msg += ("%r (root + dir) is not an existing "
|
||||
"filesystem path." % fulldir)
|
||||
|
||||
if msg:
|
||||
warnings.warn("%s\nsection: [%s]\nroot: %r\ndir: %r"
|
||||
% (msg, section, root, dir))
|
||||
|
||||
# -------------------------- Compatibility -------------------------- #
|
||||
obsolete = {
|
||||
'server.default_content_type': 'tools.response_headers.headers',
|
||||
'log_access_file': 'log.access_file',
|
||||
'log_config_options': None,
|
||||
'log_file': 'log.error_file',
|
||||
'log_file_not_found': None,
|
||||
'log_request_headers': 'tools.log_headers.on',
|
||||
'log_to_screen': 'log.screen',
|
||||
'show_tracebacks': 'request.show_tracebacks',
|
||||
'throw_errors': 'request.throw_errors',
|
||||
'profiler.on': ('cherrypy.tree.mount(profiler.make_app('
|
||||
'cherrypy.Application(Root())))'),
|
||||
}
|
||||
|
||||
deprecated = {}
|
||||
|
||||
def _compat(self, config):
|
||||
"""Process config and warn on each obsolete or deprecated entry."""
|
||||
for section, conf in config.items():
|
||||
if isinstance(conf, dict):
|
||||
for k, v in conf.items():
|
||||
if k in self.obsolete:
|
||||
warnings.warn("%r is obsolete. Use %r instead.\n"
|
||||
"section: [%s]" %
|
||||
(k, self.obsolete[k], section))
|
||||
elif k in self.deprecated:
|
||||
warnings.warn("%r is deprecated. Use %r instead.\n"
|
||||
"section: [%s]" %
|
||||
(k, self.deprecated[k], section))
|
||||
else:
|
||||
if section in self.obsolete:
|
||||
warnings.warn("%r is obsolete. Use %r instead."
|
||||
% (section, self.obsolete[section]))
|
||||
elif section in self.deprecated:
|
||||
warnings.warn("%r is deprecated. Use %r instead."
|
||||
% (section, self.deprecated[section]))
|
||||
|
||||
def check_compatibility(self):
|
||||
"""Process config and warn on each obsolete or deprecated entry."""
|
||||
self._compat(cherrypy.config)
|
||||
for sn, app in cherrypy.tree.apps.items():
|
||||
if not isinstance(app, cherrypy.Application):
|
||||
continue
|
||||
self._compat(app.config)
|
||||
|
||||
# ------------------------ Known Namespaces ------------------------ #
|
||||
extra_config_namespaces = []
|
||||
|
||||
def _known_ns(self, app):
|
||||
ns = ["wsgi"]
|
||||
ns.extend(copykeys(app.toolboxes))
|
||||
ns.extend(copykeys(app.namespaces))
|
||||
ns.extend(copykeys(app.request_class.namespaces))
|
||||
ns.extend(copykeys(cherrypy.config.namespaces))
|
||||
ns += self.extra_config_namespaces
|
||||
|
||||
for section, conf in app.config.items():
|
||||
is_path_section = section.startswith("/")
|
||||
if is_path_section and isinstance(conf, dict):
|
||||
for k, v in conf.items():
|
||||
atoms = k.split(".")
|
||||
if len(atoms) > 1:
|
||||
if atoms[0] not in ns:
|
||||
# Spit out a special warning if a known
|
||||
# namespace is preceded by "cherrypy."
|
||||
if atoms[0] == "cherrypy" and atoms[1] in ns:
|
||||
msg = (
|
||||
"The config entry %r is invalid; "
|
||||
"try %r instead.\nsection: [%s]"
|
||||
% (k, ".".join(atoms[1:]), section))
|
||||
else:
|
||||
msg = (
|
||||
"The config entry %r is invalid, "
|
||||
"because the %r config namespace "
|
||||
"is unknown.\n"
|
||||
"section: [%s]" % (k, atoms[0], section))
|
||||
warnings.warn(msg)
|
||||
elif atoms[0] == "tools":
|
||||
if atoms[1] not in dir(cherrypy.tools):
|
||||
msg = (
|
||||
"The config entry %r may be invalid, "
|
||||
"because the %r tool was not found.\n"
|
||||
"section: [%s]" % (k, atoms[1], section))
|
||||
warnings.warn(msg)
|
||||
|
||||
def check_config_namespaces(self):
|
||||
"""Process config and warn on each unknown config namespace."""
|
||||
for sn, app in cherrypy.tree.apps.items():
|
||||
if not isinstance(app, cherrypy.Application):
|
||||
continue
|
||||
self._known_ns(app)
|
||||
|
||||
# -------------------------- Config Types -------------------------- #
|
||||
known_config_types = {}
|
||||
|
||||
def _populate_known_types(self):
|
||||
b = [x for x in vars(builtins).values()
|
||||
if type(x) is type(str)]
|
||||
|
||||
def traverse(obj, namespace):
|
||||
for name in dir(obj):
|
||||
# Hack for 3.2's warning about body_params
|
||||
if name == 'body_params':
|
||||
continue
|
||||
vtype = type(getattr(obj, name, None))
|
||||
if vtype in b:
|
||||
self.known_config_types[namespace + "." + name] = vtype
|
||||
|
||||
traverse(cherrypy.request, "request")
|
||||
traverse(cherrypy.response, "response")
|
||||
traverse(cherrypy.server, "server")
|
||||
traverse(cherrypy.engine, "engine")
|
||||
traverse(cherrypy.log, "log")
|
||||
|
||||
def _known_types(self, config):
|
||||
msg = ("The config entry %r in section %r is of type %r, "
|
||||
"which does not match the expected type %r.")
|
||||
|
||||
for section, conf in config.items():
|
||||
if isinstance(conf, dict):
|
||||
for k, v in conf.items():
|
||||
if v is not None:
|
||||
expected_type = self.known_config_types.get(k, None)
|
||||
vtype = type(v)
|
||||
if expected_type and vtype != expected_type:
|
||||
warnings.warn(msg % (k, section, vtype.__name__,
|
||||
expected_type.__name__))
|
||||
else:
|
||||
k, v = section, conf
|
||||
if v is not None:
|
||||
expected_type = self.known_config_types.get(k, None)
|
||||
vtype = type(v)
|
||||
if expected_type and vtype != expected_type:
|
||||
warnings.warn(msg % (k, section, vtype.__name__,
|
||||
expected_type.__name__))
|
||||
|
||||
def check_config_types(self):
|
||||
"""Assert that config values are of the same type as default values."""
|
||||
self._known_types(cherrypy.config)
|
||||
for sn, app in cherrypy.tree.apps.items():
|
||||
if not isinstance(app, cherrypy.Application):
|
||||
continue
|
||||
self._known_types(app.config)
|
||||
|
||||
# -------------------- Specific config warnings -------------------- #
|
||||
def check_localhost(self):
|
||||
"""Warn if any socket_host is 'localhost'. See #711."""
|
||||
for k, v in cherrypy.config.items():
|
||||
if k == 'server.socket_host' and v == 'localhost':
|
||||
warnings.warn("The use of 'localhost' as a socket host can "
|
||||
"cause problems on newer systems, since "
|
||||
"'localhost' can map to either an IPv4 or an "
|
||||
"IPv6 address. You should use '127.0.0.1' "
|
||||
"or '[::1]' instead.")
|
||||
383
lib/cherrypy/_cpcompat.py
Normal file
383
lib/cherrypy/_cpcompat.py
Normal file
@@ -0,0 +1,383 @@
|
||||
"""Compatibility code for using CherryPy with various versions of Python.
|
||||
|
||||
CherryPy 3.2 is compatible with Python versions 2.3+. This module provides a
|
||||
useful abstraction over the differences between Python versions, sometimes by
|
||||
preferring a newer idiom, sometimes an older one, and sometimes a custom one.
|
||||
|
||||
In particular, Python 2 uses str and '' for byte strings, while Python 3
|
||||
uses str and '' for unicode strings. We will call each of these the 'native
|
||||
string' type for each version. Because of this major difference, this module
|
||||
provides new 'bytestr', 'unicodestr', and 'nativestr' attributes, as well as
|
||||
two functions: 'ntob', which translates native strings (of type 'str') into
|
||||
byte strings regardless of Python version, and 'ntou', which translates native
|
||||
strings to unicode strings. This also provides a 'BytesIO' name for dealing
|
||||
specifically with bytes, and a 'StringIO' name for dealing with native strings.
|
||||
It also provides a 'base64_decode' function with native strings as input and
|
||||
output.
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import threading
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
py3k = True
|
||||
bytestr = bytes
|
||||
unicodestr = str
|
||||
nativestr = unicodestr
|
||||
basestring = (bytes, str)
|
||||
|
||||
def ntob(n, encoding='ISO-8859-1'):
|
||||
"""Return the given native string as a byte string in the given
|
||||
encoding.
|
||||
"""
|
||||
assert_native(n)
|
||||
# In Python 3, the native string type is unicode
|
||||
return n.encode(encoding)
|
||||
|
||||
def ntou(n, encoding='ISO-8859-1'):
|
||||
"""Return the given native string as a unicode string with the given
|
||||
encoding.
|
||||
"""
|
||||
assert_native(n)
|
||||
# In Python 3, the native string type is unicode
|
||||
return n
|
||||
|
||||
def tonative(n, encoding='ISO-8859-1'):
|
||||
"""Return the given string as a native string in the given encoding."""
|
||||
# In Python 3, the native string type is unicode
|
||||
if isinstance(n, bytes):
|
||||
return n.decode(encoding)
|
||||
return n
|
||||
# type("")
|
||||
from io import StringIO
|
||||
# bytes:
|
||||
from io import BytesIO as BytesIO
|
||||
else:
|
||||
# Python 2
|
||||
py3k = False
|
||||
bytestr = str
|
||||
unicodestr = unicode
|
||||
nativestr = bytestr
|
||||
basestring = basestring
|
||||
|
||||
def ntob(n, encoding='ISO-8859-1'):
|
||||
"""Return the given native string as a byte string in the given
|
||||
encoding.
|
||||
"""
|
||||
assert_native(n)
|
||||
# In Python 2, the native string type is bytes. Assume it's already
|
||||
# in the given encoding, which for ISO-8859-1 is almost always what
|
||||
# was intended.
|
||||
return n
|
||||
|
||||
def ntou(n, encoding='ISO-8859-1'):
|
||||
"""Return the given native string as a unicode string with the given
|
||||
encoding.
|
||||
"""
|
||||
assert_native(n)
|
||||
# In Python 2, the native string type is bytes.
|
||||
# First, check for the special encoding 'escape'. The test suite uses
|
||||
# this to signal that it wants to pass a string with embedded \uXXXX
|
||||
# escapes, but without having to prefix it with u'' for Python 2,
|
||||
# but no prefix for Python 3.
|
||||
if encoding == 'escape':
|
||||
return unicode(
|
||||
re.sub(r'\\u([0-9a-zA-Z]{4})',
|
||||
lambda m: unichr(int(m.group(1), 16)),
|
||||
n.decode('ISO-8859-1')))
|
||||
# Assume it's already in the given encoding, which for ISO-8859-1
|
||||
# is almost always what was intended.
|
||||
return n.decode(encoding)
|
||||
|
||||
def tonative(n, encoding='ISO-8859-1'):
|
||||
"""Return the given string as a native string in the given encoding."""
|
||||
# In Python 2, the native string type is bytes.
|
||||
if isinstance(n, unicode):
|
||||
return n.encode(encoding)
|
||||
return n
|
||||
try:
|
||||
# type("")
|
||||
from cStringIO import StringIO
|
||||
except ImportError:
|
||||
# type("")
|
||||
from StringIO import StringIO
|
||||
# bytes:
|
||||
BytesIO = StringIO
|
||||
|
||||
|
||||
def assert_native(n):
|
||||
if not isinstance(n, nativestr):
|
||||
raise TypeError("n must be a native str (got %s)" % type(n).__name__)
|
||||
|
||||
try:
|
||||
set = set
|
||||
except NameError:
|
||||
from sets import Set as set
|
||||
|
||||
try:
|
||||
# Python 3.1+
|
||||
from base64 import decodebytes as _base64_decodebytes
|
||||
except ImportError:
|
||||
# Python 3.0-
|
||||
# since CherryPy claims compability with Python 2.3, we must use
|
||||
# the legacy API of base64
|
||||
from base64 import decodestring as _base64_decodebytes
|
||||
|
||||
|
||||
def base64_decode(n, encoding='ISO-8859-1'):
|
||||
"""Return the native string base64-decoded (as a native string)."""
|
||||
if isinstance(n, unicodestr):
|
||||
b = n.encode(encoding)
|
||||
else:
|
||||
b = n
|
||||
b = _base64_decodebytes(b)
|
||||
if nativestr is unicodestr:
|
||||
return b.decode(encoding)
|
||||
else:
|
||||
return b
|
||||
|
||||
try:
|
||||
# Python 2.5+
|
||||
from hashlib import md5
|
||||
except ImportError:
|
||||
from md5 import new as md5
|
||||
|
||||
try:
|
||||
# Python 2.5+
|
||||
from hashlib import sha1 as sha
|
||||
except ImportError:
|
||||
from sha import new as sha
|
||||
|
||||
try:
|
||||
sorted = sorted
|
||||
except NameError:
|
||||
def sorted(i):
|
||||
i = i[:]
|
||||
i.sort()
|
||||
return i
|
||||
|
||||
try:
|
||||
reversed = reversed
|
||||
except NameError:
|
||||
def reversed(x):
|
||||
i = len(x)
|
||||
while i > 0:
|
||||
i -= 1
|
||||
yield x[i]
|
||||
|
||||
try:
|
||||
# Python 3
|
||||
from urllib.parse import urljoin, urlencode
|
||||
from urllib.parse import quote, quote_plus
|
||||
from urllib.request import unquote, urlopen
|
||||
from urllib.request import parse_http_list, parse_keqv_list
|
||||
except ImportError:
|
||||
# Python 2
|
||||
from urlparse import urljoin
|
||||
from urllib import urlencode, urlopen
|
||||
from urllib import quote, quote_plus
|
||||
from urllib import unquote
|
||||
from urllib2 import parse_http_list, parse_keqv_list
|
||||
|
||||
try:
|
||||
from threading import local as threadlocal
|
||||
except ImportError:
|
||||
from cherrypy._cpthreadinglocal import local as threadlocal
|
||||
|
||||
try:
|
||||
dict.iteritems
|
||||
# Python 2
|
||||
iteritems = lambda d: d.iteritems()
|
||||
copyitems = lambda d: d.items()
|
||||
except AttributeError:
|
||||
# Python 3
|
||||
iteritems = lambda d: d.items()
|
||||
copyitems = lambda d: list(d.items())
|
||||
|
||||
try:
|
||||
dict.iterkeys
|
||||
# Python 2
|
||||
iterkeys = lambda d: d.iterkeys()
|
||||
copykeys = lambda d: d.keys()
|
||||
except AttributeError:
|
||||
# Python 3
|
||||
iterkeys = lambda d: d.keys()
|
||||
copykeys = lambda d: list(d.keys())
|
||||
|
||||
try:
|
||||
dict.itervalues
|
||||
# Python 2
|
||||
itervalues = lambda d: d.itervalues()
|
||||
copyvalues = lambda d: d.values()
|
||||
except AttributeError:
|
||||
# Python 3
|
||||
itervalues = lambda d: d.values()
|
||||
copyvalues = lambda d: list(d.values())
|
||||
|
||||
try:
|
||||
# Python 3
|
||||
import builtins
|
||||
except ImportError:
|
||||
# Python 2
|
||||
import __builtin__ as builtins
|
||||
|
||||
try:
|
||||
# Python 2. We try Python 2 first clients on Python 2
|
||||
# don't try to import the 'http' module from cherrypy.lib
|
||||
from Cookie import SimpleCookie, CookieError
|
||||
from httplib import BadStatusLine, HTTPConnection, IncompleteRead
|
||||
from httplib import NotConnected
|
||||
from BaseHTTPServer import BaseHTTPRequestHandler
|
||||
except ImportError:
|
||||
# Python 3
|
||||
from http.cookies import SimpleCookie, CookieError
|
||||
from http.client import BadStatusLine, HTTPConnection, IncompleteRead
|
||||
from http.client import NotConnected
|
||||
from http.server import BaseHTTPRequestHandler
|
||||
|
||||
# Some platforms don't expose HTTPSConnection, so handle it separately
|
||||
if py3k:
|
||||
try:
|
||||
from http.client import HTTPSConnection
|
||||
except ImportError:
|
||||
# Some platforms which don't have SSL don't expose HTTPSConnection
|
||||
HTTPSConnection = None
|
||||
else:
|
||||
try:
|
||||
from httplib import HTTPSConnection
|
||||
except ImportError:
|
||||
HTTPSConnection = None
|
||||
|
||||
try:
|
||||
# Python 2
|
||||
xrange = xrange
|
||||
except NameError:
|
||||
# Python 3
|
||||
xrange = range
|
||||
|
||||
import threading
|
||||
if hasattr(threading.Thread, "daemon"):
|
||||
# Python 2.6+
|
||||
def get_daemon(t):
|
||||
return t.daemon
|
||||
|
||||
def set_daemon(t, val):
|
||||
t.daemon = val
|
||||
else:
|
||||
def get_daemon(t):
|
||||
return t.isDaemon()
|
||||
|
||||
def set_daemon(t, val):
|
||||
t.setDaemon(val)
|
||||
|
||||
try:
|
||||
from email.utils import formatdate
|
||||
|
||||
def HTTPDate(timeval=None):
|
||||
return formatdate(timeval, usegmt=True)
|
||||
except ImportError:
|
||||
from rfc822 import formatdate as HTTPDate
|
||||
|
||||
try:
|
||||
# Python 3
|
||||
from urllib.parse import unquote as parse_unquote
|
||||
|
||||
def unquote_qs(atom, encoding, errors='strict'):
|
||||
return parse_unquote(
|
||||
atom.replace('+', ' '),
|
||||
encoding=encoding,
|
||||
errors=errors)
|
||||
except ImportError:
|
||||
# Python 2
|
||||
from urllib import unquote as parse_unquote
|
||||
|
||||
def unquote_qs(atom, encoding, errors='strict'):
|
||||
return parse_unquote(atom.replace('+', ' ')).decode(encoding, errors)
|
||||
|
||||
try:
|
||||
# Prefer simplejson, which is usually more advanced than the builtin
|
||||
# module.
|
||||
import simplejson as json
|
||||
json_decode = json.JSONDecoder().decode
|
||||
_json_encode = json.JSONEncoder().iterencode
|
||||
except ImportError:
|
||||
if sys.version_info >= (2, 6):
|
||||
# Python >=2.6 : json is part of the standard library
|
||||
import json
|
||||
json_decode = json.JSONDecoder().decode
|
||||
_json_encode = json.JSONEncoder().iterencode
|
||||
else:
|
||||
json = None
|
||||
|
||||
def json_decode(s):
|
||||
raise ValueError('No JSON library is available')
|
||||
|
||||
def _json_encode(s):
|
||||
raise ValueError('No JSON library is available')
|
||||
finally:
|
||||
if json and py3k:
|
||||
# The two Python 3 implementations (simplejson/json)
|
||||
# outputs str. We need bytes.
|
||||
def json_encode(value):
|
||||
for chunk in _json_encode(value):
|
||||
yield chunk.encode('utf8')
|
||||
else:
|
||||
json_encode = _json_encode
|
||||
|
||||
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except ImportError:
|
||||
# In Python 2, pickle is a Python version.
|
||||
# In Python 3, pickle is the sped-up C version.
|
||||
import pickle
|
||||
|
||||
try:
|
||||
os.urandom(20)
|
||||
import binascii
|
||||
|
||||
def random20():
|
||||
return binascii.hexlify(os.urandom(20)).decode('ascii')
|
||||
except (AttributeError, NotImplementedError):
|
||||
import random
|
||||
# os.urandom not available until Python 2.4. Fall back to random.random.
|
||||
|
||||
def random20():
|
||||
return sha('%s' % random.random()).hexdigest()
|
||||
|
||||
try:
|
||||
from _thread import get_ident as get_thread_ident
|
||||
except ImportError:
|
||||
from thread import get_ident as get_thread_ident
|
||||
|
||||
try:
|
||||
# Python 3
|
||||
next = next
|
||||
except NameError:
|
||||
# Python 2
|
||||
def next(i):
|
||||
return i.next()
|
||||
|
||||
if sys.version_info >= (3, 3):
|
||||
Timer = threading.Timer
|
||||
Event = threading.Event
|
||||
else:
|
||||
# Python 3.2 and earlier
|
||||
Timer = threading._Timer
|
||||
Event = threading._Event
|
||||
|
||||
# Prior to Python 2.6, the Thread class did not have a .daemon property.
|
||||
# This mix-in adds that property.
|
||||
|
||||
|
||||
class SetDaemonProperty:
|
||||
|
||||
def __get_daemon(self):
|
||||
return self.isDaemon()
|
||||
|
||||
def __set_daemon(self, daemon):
|
||||
self.setDaemon(daemon)
|
||||
|
||||
if sys.version_info < (2, 6):
|
||||
daemon = property(__get_daemon, __set_daemon)
|
||||
1544
lib/cherrypy/_cpcompat_subprocess.py
Normal file
1544
lib/cherrypy/_cpcompat_subprocess.py
Normal file
File diff suppressed because it is too large
Load Diff
317
lib/cherrypy/_cpconfig.py
Normal file
317
lib/cherrypy/_cpconfig.py
Normal file
@@ -0,0 +1,317 @@
|
||||
"""
|
||||
Configuration system for CherryPy.
|
||||
|
||||
Configuration in CherryPy is implemented via dictionaries. Keys are strings
|
||||
which name the mapped value, which may be of any type.
|
||||
|
||||
|
||||
Architecture
|
||||
------------
|
||||
|
||||
CherryPy Requests are part of an Application, which runs in a global context,
|
||||
and configuration data may apply to any of those three scopes:
|
||||
|
||||
Global
|
||||
Configuration entries which apply everywhere are stored in
|
||||
cherrypy.config.
|
||||
|
||||
Application
|
||||
Entries which apply to each mounted application are stored
|
||||
on the Application object itself, as 'app.config'. This is a two-level
|
||||
dict where each key is a path, or "relative URL" (for example, "/" or
|
||||
"/path/to/my/page"), and each value is a config dict. Usually, this
|
||||
data is provided in the call to tree.mount(root(), config=conf),
|
||||
although you may also use app.merge(conf).
|
||||
|
||||
Request
|
||||
Each Request object possesses a single 'Request.config' dict.
|
||||
Early in the request process, this dict is populated by merging global
|
||||
config entries, Application entries (whose path equals or is a parent
|
||||
of Request.path_info), and any config acquired while looking up the
|
||||
page handler (see next).
|
||||
|
||||
|
||||
Declaration
|
||||
-----------
|
||||
|
||||
Configuration data may be supplied as a Python dictionary, as a filename,
|
||||
or as an open file object. When you supply a filename or file, CherryPy
|
||||
uses Python's builtin ConfigParser; you declare Application config by
|
||||
writing each path as a section header::
|
||||
|
||||
[/path/to/my/page]
|
||||
request.stream = True
|
||||
|
||||
To declare global configuration entries, place them in a [global] section.
|
||||
|
||||
You may also declare config entries directly on the classes and methods
|
||||
(page handlers) that make up your CherryPy application via the ``_cp_config``
|
||||
attribute. For example::
|
||||
|
||||
class Demo:
|
||||
_cp_config = {'tools.gzip.on': True}
|
||||
|
||||
def index(self):
|
||||
return "Hello world"
|
||||
index.exposed = True
|
||||
index._cp_config = {'request.show_tracebacks': False}
|
||||
|
||||
.. note::
|
||||
|
||||
This behavior is only guaranteed for the default dispatcher.
|
||||
Other dispatchers may have different restrictions on where
|
||||
you can attach _cp_config attributes.
|
||||
|
||||
|
||||
Namespaces
|
||||
----------
|
||||
|
||||
Configuration keys are separated into namespaces by the first "." in the key.
|
||||
Current namespaces:
|
||||
|
||||
engine
|
||||
Controls the 'application engine', including autoreload.
|
||||
These can only be declared in the global config.
|
||||
|
||||
tree
|
||||
Grafts cherrypy.Application objects onto cherrypy.tree.
|
||||
These can only be declared in the global config.
|
||||
|
||||
hooks
|
||||
Declares additional request-processing functions.
|
||||
|
||||
log
|
||||
Configures the logging for each application.
|
||||
These can only be declared in the global or / config.
|
||||
|
||||
request
|
||||
Adds attributes to each Request.
|
||||
|
||||
response
|
||||
Adds attributes to each Response.
|
||||
|
||||
server
|
||||
Controls the default HTTP server via cherrypy.server.
|
||||
These can only be declared in the global config.
|
||||
|
||||
tools
|
||||
Runs and configures additional request-processing packages.
|
||||
|
||||
wsgi
|
||||
Adds WSGI middleware to an Application's "pipeline".
|
||||
These can only be declared in the app's root config ("/").
|
||||
|
||||
checker
|
||||
Controls the 'checker', which looks for common errors in
|
||||
app state (including config) when the engine starts.
|
||||
Global config only.
|
||||
|
||||
The only key that does not exist in a namespace is the "environment" entry.
|
||||
This special entry 'imports' other config entries from a template stored in
|
||||
cherrypy._cpconfig.environments[environment]. It only applies to the global
|
||||
config, and only when you use cherrypy.config.update.
|
||||
|
||||
You can define your own namespaces to be called at the Global, Application,
|
||||
or Request level, by adding a named handler to cherrypy.config.namespaces,
|
||||
app.namespaces, or app.request_class.namespaces. The name can
|
||||
be any string, and the handler must be either a callable or a (Python 2.5
|
||||
style) context manager.
|
||||
"""
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import set, basestring
|
||||
from cherrypy.lib import reprconf
|
||||
|
||||
# Deprecated in CherryPy 3.2--remove in 3.3
|
||||
NamespaceSet = reprconf.NamespaceSet
|
||||
|
||||
|
||||
def merge(base, other):
|
||||
"""Merge one app config (from a dict, file, or filename) into another.
|
||||
|
||||
If the given config is a filename, it will be appended to
|
||||
the list of files to monitor for "autoreload" changes.
|
||||
"""
|
||||
if isinstance(other, basestring):
|
||||
cherrypy.engine.autoreload.files.add(other)
|
||||
|
||||
# Load other into base
|
||||
for section, value_map in reprconf.as_dict(other).items():
|
||||
if not isinstance(value_map, dict):
|
||||
raise ValueError(
|
||||
"Application config must include section headers, but the "
|
||||
"config you tried to merge doesn't have any sections. "
|
||||
"Wrap your config in another dict with paths as section "
|
||||
"headers, for example: {'/': config}.")
|
||||
base.setdefault(section, {}).update(value_map)
|
||||
|
||||
|
||||
class Config(reprconf.Config):
|
||||
|
||||
"""The 'global' configuration data for the entire CherryPy process."""
|
||||
|
||||
def update(self, config):
|
||||
"""Update self from a dict, file or filename."""
|
||||
if isinstance(config, basestring):
|
||||
# Filename
|
||||
cherrypy.engine.autoreload.files.add(config)
|
||||
reprconf.Config.update(self, config)
|
||||
|
||||
def _apply(self, config):
|
||||
"""Update self from a dict."""
|
||||
if isinstance(config.get("global"), dict):
|
||||
if len(config) > 1:
|
||||
cherrypy.checker.global_config_contained_paths = True
|
||||
config = config["global"]
|
||||
if 'tools.staticdir.dir' in config:
|
||||
config['tools.staticdir.section'] = "global"
|
||||
reprconf.Config._apply(self, config)
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
"""Decorator for page handlers to set _cp_config."""
|
||||
if args:
|
||||
raise TypeError(
|
||||
"The cherrypy.config decorator does not accept positional "
|
||||
"arguments; you must use keyword arguments.")
|
||||
|
||||
def tool_decorator(f):
|
||||
if not hasattr(f, "_cp_config"):
|
||||
f._cp_config = {}
|
||||
for k, v in kwargs.items():
|
||||
f._cp_config[k] = v
|
||||
return f
|
||||
return tool_decorator
|
||||
|
||||
|
||||
# Sphinx begin config.environments
|
||||
Config.environments = environments = {
|
||||
"staging": {
|
||||
'engine.autoreload.on': False,
|
||||
'checker.on': False,
|
||||
'tools.log_headers.on': False,
|
||||
'request.show_tracebacks': False,
|
||||
'request.show_mismatched_params': False,
|
||||
},
|
||||
"production": {
|
||||
'engine.autoreload.on': False,
|
||||
'checker.on': False,
|
||||
'tools.log_headers.on': False,
|
||||
'request.show_tracebacks': False,
|
||||
'request.show_mismatched_params': False,
|
||||
'log.screen': False,
|
||||
},
|
||||
"embedded": {
|
||||
# For use with CherryPy embedded in another deployment stack.
|
||||
'engine.autoreload.on': False,
|
||||
'checker.on': False,
|
||||
'tools.log_headers.on': False,
|
||||
'request.show_tracebacks': False,
|
||||
'request.show_mismatched_params': False,
|
||||
'log.screen': False,
|
||||
'engine.SIGHUP': None,
|
||||
'engine.SIGTERM': None,
|
||||
},
|
||||
"test_suite": {
|
||||
'engine.autoreload.on': False,
|
||||
'checker.on': False,
|
||||
'tools.log_headers.on': False,
|
||||
'request.show_tracebacks': True,
|
||||
'request.show_mismatched_params': True,
|
||||
'log.screen': False,
|
||||
},
|
||||
}
|
||||
# Sphinx end config.environments
|
||||
|
||||
|
||||
def _server_namespace_handler(k, v):
|
||||
"""Config handler for the "server" namespace."""
|
||||
atoms = k.split(".", 1)
|
||||
if len(atoms) > 1:
|
||||
# Special-case config keys of the form 'server.servername.socket_port'
|
||||
# to configure additional HTTP servers.
|
||||
if not hasattr(cherrypy, "servers"):
|
||||
cherrypy.servers = {}
|
||||
|
||||
servername, k = atoms
|
||||
if servername not in cherrypy.servers:
|
||||
from cherrypy import _cpserver
|
||||
cherrypy.servers[servername] = _cpserver.Server()
|
||||
# On by default, but 'on = False' can unsubscribe it (see below).
|
||||
cherrypy.servers[servername].subscribe()
|
||||
|
||||
if k == 'on':
|
||||
if v:
|
||||
cherrypy.servers[servername].subscribe()
|
||||
else:
|
||||
cherrypy.servers[servername].unsubscribe()
|
||||
else:
|
||||
setattr(cherrypy.servers[servername], k, v)
|
||||
else:
|
||||
setattr(cherrypy.server, k, v)
|
||||
Config.namespaces["server"] = _server_namespace_handler
|
||||
|
||||
|
||||
def _engine_namespace_handler(k, v):
|
||||
"""Backward compatibility handler for the "engine" namespace."""
|
||||
engine = cherrypy.engine
|
||||
|
||||
deprecated = {
|
||||
'autoreload_on': 'autoreload.on',
|
||||
'autoreload_frequency': 'autoreload.frequency',
|
||||
'autoreload_match': 'autoreload.match',
|
||||
'reload_files': 'autoreload.files',
|
||||
'deadlock_poll_freq': 'timeout_monitor.frequency'
|
||||
}
|
||||
|
||||
if k in deprecated:
|
||||
engine.log(
|
||||
'WARNING: Use of engine.%s is deprecated and will be removed in a '
|
||||
'future version. Use engine.%s instead.' % (k, deprecated[k]))
|
||||
|
||||
if k == 'autoreload_on':
|
||||
if v:
|
||||
engine.autoreload.subscribe()
|
||||
else:
|
||||
engine.autoreload.unsubscribe()
|
||||
elif k == 'autoreload_frequency':
|
||||
engine.autoreload.frequency = v
|
||||
elif k == 'autoreload_match':
|
||||
engine.autoreload.match = v
|
||||
elif k == 'reload_files':
|
||||
engine.autoreload.files = set(v)
|
||||
elif k == 'deadlock_poll_freq':
|
||||
engine.timeout_monitor.frequency = v
|
||||
elif k == 'SIGHUP':
|
||||
engine.listeners['SIGHUP'] = set([v])
|
||||
elif k == 'SIGTERM':
|
||||
engine.listeners['SIGTERM'] = set([v])
|
||||
elif "." in k:
|
||||
plugin, attrname = k.split(".", 1)
|
||||
plugin = getattr(engine, plugin)
|
||||
if attrname == 'on':
|
||||
if v and hasattr(getattr(plugin, 'subscribe', None), '__call__'):
|
||||
plugin.subscribe()
|
||||
return
|
||||
elif (
|
||||
(not v) and
|
||||
hasattr(getattr(plugin, 'unsubscribe', None), '__call__')
|
||||
):
|
||||
plugin.unsubscribe()
|
||||
return
|
||||
setattr(plugin, attrname, v)
|
||||
else:
|
||||
setattr(engine, k, v)
|
||||
Config.namespaces["engine"] = _engine_namespace_handler
|
||||
|
||||
|
||||
def _tree_namespace_handler(k, v):
|
||||
"""Namespace handler for the 'tree' config namespace."""
|
||||
if isinstance(v, dict):
|
||||
for script_name, app in v.items():
|
||||
cherrypy.tree.graft(app, script_name)
|
||||
cherrypy.engine.log("Mounted: %s on %s" %
|
||||
(app, script_name or "/"))
|
||||
else:
|
||||
cherrypy.tree.graft(v, v.script_name)
|
||||
cherrypy.engine.log("Mounted: %s on %s" % (v, v.script_name or "/"))
|
||||
Config.namespaces["tree"] = _tree_namespace_handler
|
||||
686
lib/cherrypy/_cpdispatch.py
Normal file
686
lib/cherrypy/_cpdispatch.py
Normal file
@@ -0,0 +1,686 @@
|
||||
"""CherryPy dispatchers.
|
||||
|
||||
A 'dispatcher' is the object which looks up the 'page handler' callable
|
||||
and collects config for the current request based on the path_info, other
|
||||
request attributes, and the application architecture. The core calls the
|
||||
dispatcher as early as possible, passing it a 'path_info' argument.
|
||||
|
||||
The default dispatcher discovers the page handler by matching path_info
|
||||
to a hierarchical arrangement of objects, starting at request.app.root.
|
||||
"""
|
||||
|
||||
import string
|
||||
import sys
|
||||
import types
|
||||
try:
|
||||
classtype = (type, types.ClassType)
|
||||
except AttributeError:
|
||||
classtype = type
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import set
|
||||
|
||||
|
||||
class PageHandler(object):
|
||||
|
||||
"""Callable which sets response.body."""
|
||||
|
||||
def __init__(self, callable, *args, **kwargs):
|
||||
self.callable = callable
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
def get_args(self):
|
||||
return cherrypy.serving.request.args
|
||||
|
||||
def set_args(self, args):
|
||||
cherrypy.serving.request.args = args
|
||||
return cherrypy.serving.request.args
|
||||
|
||||
args = property(
|
||||
get_args,
|
||||
set_args,
|
||||
doc="The ordered args should be accessible from post dispatch hooks"
|
||||
)
|
||||
|
||||
def get_kwargs(self):
|
||||
return cherrypy.serving.request.kwargs
|
||||
|
||||
def set_kwargs(self, kwargs):
|
||||
cherrypy.serving.request.kwargs = kwargs
|
||||
return cherrypy.serving.request.kwargs
|
||||
|
||||
kwargs = property(
|
||||
get_kwargs,
|
||||
set_kwargs,
|
||||
doc="The named kwargs should be accessible from post dispatch hooks"
|
||||
)
|
||||
|
||||
def __call__(self):
|
||||
try:
|
||||
return self.callable(*self.args, **self.kwargs)
|
||||
except TypeError:
|
||||
x = sys.exc_info()[1]
|
||||
try:
|
||||
test_callable_spec(self.callable, self.args, self.kwargs)
|
||||
except cherrypy.HTTPError:
|
||||
raise sys.exc_info()[1]
|
||||
except:
|
||||
raise x
|
||||
raise
|
||||
|
||||
|
||||
def test_callable_spec(callable, callable_args, callable_kwargs):
|
||||
"""
|
||||
Inspect callable and test to see if the given args are suitable for it.
|
||||
|
||||
When an error occurs during the handler's invoking stage there are 2
|
||||
erroneous cases:
|
||||
1. Too many parameters passed to a function which doesn't define
|
||||
one of *args or **kwargs.
|
||||
2. Too little parameters are passed to the function.
|
||||
|
||||
There are 3 sources of parameters to a cherrypy handler.
|
||||
1. query string parameters are passed as keyword parameters to the
|
||||
handler.
|
||||
2. body parameters are also passed as keyword parameters.
|
||||
3. when partial matching occurs, the final path atoms are passed as
|
||||
positional args.
|
||||
Both the query string and path atoms are part of the URI. If they are
|
||||
incorrect, then a 404 Not Found should be raised. Conversely the body
|
||||
parameters are part of the request; if they are invalid a 400 Bad Request.
|
||||
"""
|
||||
show_mismatched_params = getattr(
|
||||
cherrypy.serving.request, 'show_mismatched_params', False)
|
||||
try:
|
||||
(args, varargs, varkw, defaults) = getargspec(callable)
|
||||
except TypeError:
|
||||
if isinstance(callable, object) and hasattr(callable, '__call__'):
|
||||
(args, varargs, varkw,
|
||||
defaults) = getargspec(callable.__call__)
|
||||
else:
|
||||
# If it wasn't one of our own types, re-raise
|
||||
# the original error
|
||||
raise
|
||||
|
||||
if args and args[0] == 'self':
|
||||
args = args[1:]
|
||||
|
||||
arg_usage = dict([(arg, 0,) for arg in args])
|
||||
vararg_usage = 0
|
||||
varkw_usage = 0
|
||||
extra_kwargs = set()
|
||||
|
||||
for i, value in enumerate(callable_args):
|
||||
try:
|
||||
arg_usage[args[i]] += 1
|
||||
except IndexError:
|
||||
vararg_usage += 1
|
||||
|
||||
for key in callable_kwargs.keys():
|
||||
try:
|
||||
arg_usage[key] += 1
|
||||
except KeyError:
|
||||
varkw_usage += 1
|
||||
extra_kwargs.add(key)
|
||||
|
||||
# figure out which args have defaults.
|
||||
args_with_defaults = args[-len(defaults or []):]
|
||||
for i, val in enumerate(defaults or []):
|
||||
# Defaults take effect only when the arg hasn't been used yet.
|
||||
if arg_usage[args_with_defaults[i]] == 0:
|
||||
arg_usage[args_with_defaults[i]] += 1
|
||||
|
||||
missing_args = []
|
||||
multiple_args = []
|
||||
for key, usage in arg_usage.items():
|
||||
if usage == 0:
|
||||
missing_args.append(key)
|
||||
elif usage > 1:
|
||||
multiple_args.append(key)
|
||||
|
||||
if missing_args:
|
||||
# In the case where the method allows body arguments
|
||||
# there are 3 potential errors:
|
||||
# 1. not enough query string parameters -> 404
|
||||
# 2. not enough body parameters -> 400
|
||||
# 3. not enough path parts (partial matches) -> 404
|
||||
#
|
||||
# We can't actually tell which case it is,
|
||||
# so I'm raising a 404 because that covers 2/3 of the
|
||||
# possibilities
|
||||
#
|
||||
# In the case where the method does not allow body
|
||||
# arguments it's definitely a 404.
|
||||
message = None
|
||||
if show_mismatched_params:
|
||||
message = "Missing parameters: %s" % ",".join(missing_args)
|
||||
raise cherrypy.HTTPError(404, message=message)
|
||||
|
||||
# the extra positional arguments come from the path - 404 Not Found
|
||||
if not varargs and vararg_usage > 0:
|
||||
raise cherrypy.HTTPError(404)
|
||||
|
||||
body_params = cherrypy.serving.request.body.params or {}
|
||||
body_params = set(body_params.keys())
|
||||
qs_params = set(callable_kwargs.keys()) - body_params
|
||||
|
||||
if multiple_args:
|
||||
if qs_params.intersection(set(multiple_args)):
|
||||
# If any of the multiple parameters came from the query string then
|
||||
# it's a 404 Not Found
|
||||
error = 404
|
||||
else:
|
||||
# Otherwise it's a 400 Bad Request
|
||||
error = 400
|
||||
|
||||
message = None
|
||||
if show_mismatched_params:
|
||||
message = "Multiple values for parameters: "\
|
||||
"%s" % ",".join(multiple_args)
|
||||
raise cherrypy.HTTPError(error, message=message)
|
||||
|
||||
if not varkw and varkw_usage > 0:
|
||||
|
||||
# If there were extra query string parameters, it's a 404 Not Found
|
||||
extra_qs_params = set(qs_params).intersection(extra_kwargs)
|
||||
if extra_qs_params:
|
||||
message = None
|
||||
if show_mismatched_params:
|
||||
message = "Unexpected query string "\
|
||||
"parameters: %s" % ", ".join(extra_qs_params)
|
||||
raise cherrypy.HTTPError(404, message=message)
|
||||
|
||||
# If there were any extra body parameters, it's a 400 Not Found
|
||||
extra_body_params = set(body_params).intersection(extra_kwargs)
|
||||
if extra_body_params:
|
||||
message = None
|
||||
if show_mismatched_params:
|
||||
message = "Unexpected body parameters: "\
|
||||
"%s" % ", ".join(extra_body_params)
|
||||
raise cherrypy.HTTPError(400, message=message)
|
||||
|
||||
|
||||
try:
|
||||
import inspect
|
||||
except ImportError:
|
||||
test_callable_spec = lambda callable, args, kwargs: None
|
||||
else:
|
||||
getargspec = inspect.getargspec
|
||||
# Python 3 requires using getfullargspec if keyword-only arguments are present
|
||||
if hasattr(inspect, 'getfullargspec'):
|
||||
def getargspec(callable):
|
||||
return inspect.getfullargspec(callable)[:4]
|
||||
|
||||
|
||||
class LateParamPageHandler(PageHandler):
|
||||
|
||||
"""When passing cherrypy.request.params to the page handler, we do not
|
||||
want to capture that dict too early; we want to give tools like the
|
||||
decoding tool a chance to modify the params dict in-between the lookup
|
||||
of the handler and the actual calling of the handler. This subclass
|
||||
takes that into account, and allows request.params to be 'bound late'
|
||||
(it's more complicated than that, but that's the effect).
|
||||
"""
|
||||
|
||||
def _get_kwargs(self):
|
||||
kwargs = cherrypy.serving.request.params.copy()
|
||||
if self._kwargs:
|
||||
kwargs.update(self._kwargs)
|
||||
return kwargs
|
||||
|
||||
def _set_kwargs(self, kwargs):
|
||||
cherrypy.serving.request.kwargs = kwargs
|
||||
self._kwargs = kwargs
|
||||
|
||||
kwargs = property(_get_kwargs, _set_kwargs,
|
||||
doc='page handler kwargs (with '
|
||||
'cherrypy.request.params copied in)')
|
||||
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
punctuation_to_underscores = string.maketrans(
|
||||
string.punctuation, '_' * len(string.punctuation))
|
||||
|
||||
def validate_translator(t):
|
||||
if not isinstance(t, str) or len(t) != 256:
|
||||
raise ValueError(
|
||||
"The translate argument must be a str of len 256.")
|
||||
else:
|
||||
punctuation_to_underscores = str.maketrans(
|
||||
string.punctuation, '_' * len(string.punctuation))
|
||||
|
||||
def validate_translator(t):
|
||||
if not isinstance(t, dict):
|
||||
raise ValueError("The translate argument must be a dict.")
|
||||
|
||||
|
||||
class Dispatcher(object):
|
||||
|
||||
"""CherryPy Dispatcher which walks a tree of objects to find a handler.
|
||||
|
||||
The tree is rooted at cherrypy.request.app.root, and each hierarchical
|
||||
component in the path_info argument is matched to a corresponding nested
|
||||
attribute of the root object. Matching handlers must have an 'exposed'
|
||||
attribute which evaluates to True. The special method name "index"
|
||||
matches a URI which ends in a slash ("/"). The special method name
|
||||
"default" may match a portion of the path_info (but only when no longer
|
||||
substring of the path_info matches some other object).
|
||||
|
||||
This is the default, built-in dispatcher for CherryPy.
|
||||
"""
|
||||
|
||||
dispatch_method_name = '_cp_dispatch'
|
||||
"""
|
||||
The name of the dispatch method that nodes may optionally implement
|
||||
to provide their own dynamic dispatch algorithm.
|
||||
"""
|
||||
|
||||
def __init__(self, dispatch_method_name=None,
|
||||
translate=punctuation_to_underscores):
|
||||
validate_translator(translate)
|
||||
self.translate = translate
|
||||
if dispatch_method_name:
|
||||
self.dispatch_method_name = dispatch_method_name
|
||||
|
||||
def __call__(self, path_info):
|
||||
"""Set handler and config for the current request."""
|
||||
request = cherrypy.serving.request
|
||||
func, vpath = self.find_handler(path_info)
|
||||
|
||||
if func:
|
||||
# Decode any leftover %2F in the virtual_path atoms.
|
||||
vpath = [x.replace("%2F", "/") for x in vpath]
|
||||
request.handler = LateParamPageHandler(func, *vpath)
|
||||
else:
|
||||
request.handler = cherrypy.NotFound()
|
||||
|
||||
def find_handler(self, path):
|
||||
"""Return the appropriate page handler, plus any virtual path.
|
||||
|
||||
This will return two objects. The first will be a callable,
|
||||
which can be used to generate page output. Any parameters from
|
||||
the query string or request body will be sent to that callable
|
||||
as keyword arguments.
|
||||
|
||||
The callable is found by traversing the application's tree,
|
||||
starting from cherrypy.request.app.root, and matching path
|
||||
components to successive objects in the tree. For example, the
|
||||
URL "/path/to/handler" might return root.path.to.handler.
|
||||
|
||||
The second object returned will be a list of names which are
|
||||
'virtual path' components: parts of the URL which are dynamic,
|
||||
and were not used when looking up the handler.
|
||||
These virtual path components are passed to the handler as
|
||||
positional arguments.
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
app = request.app
|
||||
root = app.root
|
||||
dispatch_name = self.dispatch_method_name
|
||||
|
||||
# Get config for the root object/path.
|
||||
fullpath = [x for x in path.strip('/').split('/') if x] + ['index']
|
||||
fullpath_len = len(fullpath)
|
||||
segleft = fullpath_len
|
||||
nodeconf = {}
|
||||
if hasattr(root, "_cp_config"):
|
||||
nodeconf.update(root._cp_config)
|
||||
if "/" in app.config:
|
||||
nodeconf.update(app.config["/"])
|
||||
object_trail = [['root', root, nodeconf, segleft]]
|
||||
|
||||
node = root
|
||||
iternames = fullpath[:]
|
||||
while iternames:
|
||||
name = iternames[0]
|
||||
# map to legal Python identifiers (e.g. replace '.' with '_')
|
||||
objname = name.translate(self.translate)
|
||||
|
||||
nodeconf = {}
|
||||
subnode = getattr(node, objname, None)
|
||||
pre_len = len(iternames)
|
||||
if subnode is None:
|
||||
dispatch = getattr(node, dispatch_name, None)
|
||||
if dispatch and hasattr(dispatch, '__call__') and not \
|
||||
getattr(dispatch, 'exposed', False) and \
|
||||
pre_len > 1:
|
||||
# Don't expose the hidden 'index' token to _cp_dispatch
|
||||
# We skip this if pre_len == 1 since it makes no sense
|
||||
# to call a dispatcher when we have no tokens left.
|
||||
index_name = iternames.pop()
|
||||
subnode = dispatch(vpath=iternames)
|
||||
iternames.append(index_name)
|
||||
else:
|
||||
# We didn't find a path, but keep processing in case there
|
||||
# is a default() handler.
|
||||
iternames.pop(0)
|
||||
else:
|
||||
# We found the path, remove the vpath entry
|
||||
iternames.pop(0)
|
||||
segleft = len(iternames)
|
||||
if segleft > pre_len:
|
||||
# No path segment was removed. Raise an error.
|
||||
raise cherrypy.CherryPyException(
|
||||
"A vpath segment was added. Custom dispatchers may only "
|
||||
+ "remove elements. While trying to process "
|
||||
+ "{0} in {1}".format(name, fullpath)
|
||||
)
|
||||
elif segleft == pre_len:
|
||||
# Assume that the handler used the current path segment, but
|
||||
# did not pop it. This allows things like
|
||||
# return getattr(self, vpath[0], None)
|
||||
iternames.pop(0)
|
||||
segleft -= 1
|
||||
node = subnode
|
||||
|
||||
if node is not None:
|
||||
# Get _cp_config attached to this node.
|
||||
if hasattr(node, "_cp_config"):
|
||||
nodeconf.update(node._cp_config)
|
||||
|
||||
# Mix in values from app.config for this path.
|
||||
existing_len = fullpath_len - pre_len
|
||||
if existing_len != 0:
|
||||
curpath = '/' + '/'.join(fullpath[0:existing_len])
|
||||
else:
|
||||
curpath = ''
|
||||
new_segs = fullpath[fullpath_len - pre_len:fullpath_len - segleft]
|
||||
for seg in new_segs:
|
||||
curpath += '/' + seg
|
||||
if curpath in app.config:
|
||||
nodeconf.update(app.config[curpath])
|
||||
|
||||
object_trail.append([name, node, nodeconf, segleft])
|
||||
|
||||
def set_conf():
|
||||
"""Collapse all object_trail config into cherrypy.request.config.
|
||||
"""
|
||||
base = cherrypy.config.copy()
|
||||
# Note that we merge the config from each node
|
||||
# even if that node was None.
|
||||
for name, obj, conf, segleft in object_trail:
|
||||
base.update(conf)
|
||||
if 'tools.staticdir.dir' in conf:
|
||||
base['tools.staticdir.section'] = '/' + \
|
||||
'/'.join(fullpath[0:fullpath_len - segleft])
|
||||
return base
|
||||
|
||||
# Try successive objects (reverse order)
|
||||
num_candidates = len(object_trail) - 1
|
||||
for i in range(num_candidates, -1, -1):
|
||||
|
||||
name, candidate, nodeconf, segleft = object_trail[i]
|
||||
if candidate is None:
|
||||
continue
|
||||
|
||||
# Try a "default" method on the current leaf.
|
||||
if hasattr(candidate, "default"):
|
||||
defhandler = candidate.default
|
||||
if getattr(defhandler, 'exposed', False):
|
||||
# Insert any extra _cp_config from the default handler.
|
||||
conf = getattr(defhandler, "_cp_config", {})
|
||||
object_trail.insert(
|
||||
i + 1, ["default", defhandler, conf, segleft])
|
||||
request.config = set_conf()
|
||||
# See https://bitbucket.org/cherrypy/cherrypy/issue/613
|
||||
request.is_index = path.endswith("/")
|
||||
return defhandler, fullpath[fullpath_len - segleft:-1]
|
||||
|
||||
# Uncomment the next line to restrict positional params to
|
||||
# "default".
|
||||
# if i < num_candidates - 2: continue
|
||||
|
||||
# Try the current leaf.
|
||||
if getattr(candidate, 'exposed', False):
|
||||
request.config = set_conf()
|
||||
if i == num_candidates:
|
||||
# We found the extra ".index". Mark request so tools
|
||||
# can redirect if path_info has no trailing slash.
|
||||
request.is_index = True
|
||||
else:
|
||||
# We're not at an 'index' handler. Mark request so tools
|
||||
# can redirect if path_info has NO trailing slash.
|
||||
# Note that this also includes handlers which take
|
||||
# positional parameters (virtual paths).
|
||||
request.is_index = False
|
||||
return candidate, fullpath[fullpath_len - segleft:-1]
|
||||
|
||||
# We didn't find anything
|
||||
request.config = set_conf()
|
||||
return None, []
|
||||
|
||||
|
||||
class MethodDispatcher(Dispatcher):
|
||||
|
||||
"""Additional dispatch based on cherrypy.request.method.upper().
|
||||
|
||||
Methods named GET, POST, etc will be called on an exposed class.
|
||||
The method names must be all caps; the appropriate Allow header
|
||||
will be output showing all capitalized method names as allowable
|
||||
HTTP verbs.
|
||||
|
||||
Note that the containing class must be exposed, not the methods.
|
||||
"""
|
||||
|
||||
def __call__(self, path_info):
|
||||
"""Set handler and config for the current request."""
|
||||
request = cherrypy.serving.request
|
||||
resource, vpath = self.find_handler(path_info)
|
||||
|
||||
if resource:
|
||||
# Set Allow header
|
||||
avail = [m for m in dir(resource) if m.isupper()]
|
||||
if "GET" in avail and "HEAD" not in avail:
|
||||
avail.append("HEAD")
|
||||
avail.sort()
|
||||
cherrypy.serving.response.headers['Allow'] = ", ".join(avail)
|
||||
|
||||
# Find the subhandler
|
||||
meth = request.method.upper()
|
||||
func = getattr(resource, meth, None)
|
||||
if func is None and meth == "HEAD":
|
||||
func = getattr(resource, "GET", None)
|
||||
if func:
|
||||
# Grab any _cp_config on the subhandler.
|
||||
if hasattr(func, "_cp_config"):
|
||||
request.config.update(func._cp_config)
|
||||
|
||||
# Decode any leftover %2F in the virtual_path atoms.
|
||||
vpath = [x.replace("%2F", "/") for x in vpath]
|
||||
request.handler = LateParamPageHandler(func, *vpath)
|
||||
else:
|
||||
request.handler = cherrypy.HTTPError(405)
|
||||
else:
|
||||
request.handler = cherrypy.NotFound()
|
||||
|
||||
|
||||
class RoutesDispatcher(object):
|
||||
|
||||
"""A Routes based dispatcher for CherryPy."""
|
||||
|
||||
def __init__(self, full_result=False, **mapper_options):
|
||||
"""
|
||||
Routes dispatcher
|
||||
|
||||
Set full_result to True if you wish the controller
|
||||
and the action to be passed on to the page handler
|
||||
parameters. By default they won't be.
|
||||
"""
|
||||
import routes
|
||||
self.full_result = full_result
|
||||
self.controllers = {}
|
||||
self.mapper = routes.Mapper(**mapper_options)
|
||||
self.mapper.controller_scan = self.controllers.keys
|
||||
|
||||
def connect(self, name, route, controller, **kwargs):
|
||||
self.controllers[name] = controller
|
||||
self.mapper.connect(name, route, controller=name, **kwargs)
|
||||
|
||||
def redirect(self, url):
|
||||
raise cherrypy.HTTPRedirect(url)
|
||||
|
||||
def __call__(self, path_info):
|
||||
"""Set handler and config for the current request."""
|
||||
func = self.find_handler(path_info)
|
||||
if func:
|
||||
cherrypy.serving.request.handler = LateParamPageHandler(func)
|
||||
else:
|
||||
cherrypy.serving.request.handler = cherrypy.NotFound()
|
||||
|
||||
def find_handler(self, path_info):
|
||||
"""Find the right page handler, and set request.config."""
|
||||
import routes
|
||||
|
||||
request = cherrypy.serving.request
|
||||
|
||||
config = routes.request_config()
|
||||
config.mapper = self.mapper
|
||||
if hasattr(request, 'wsgi_environ'):
|
||||
config.environ = request.wsgi_environ
|
||||
config.host = request.headers.get('Host', None)
|
||||
config.protocol = request.scheme
|
||||
config.redirect = self.redirect
|
||||
|
||||
result = self.mapper.match(path_info)
|
||||
|
||||
config.mapper_dict = result
|
||||
params = {}
|
||||
if result:
|
||||
params = result.copy()
|
||||
if not self.full_result:
|
||||
params.pop('controller', None)
|
||||
params.pop('action', None)
|
||||
request.params.update(params)
|
||||
|
||||
# Get config for the root object/path.
|
||||
request.config = base = cherrypy.config.copy()
|
||||
curpath = ""
|
||||
|
||||
def merge(nodeconf):
|
||||
if 'tools.staticdir.dir' in nodeconf:
|
||||
nodeconf['tools.staticdir.section'] = curpath or "/"
|
||||
base.update(nodeconf)
|
||||
|
||||
app = request.app
|
||||
root = app.root
|
||||
if hasattr(root, "_cp_config"):
|
||||
merge(root._cp_config)
|
||||
if "/" in app.config:
|
||||
merge(app.config["/"])
|
||||
|
||||
# Mix in values from app.config.
|
||||
atoms = [x for x in path_info.split("/") if x]
|
||||
if atoms:
|
||||
last = atoms.pop()
|
||||
else:
|
||||
last = None
|
||||
for atom in atoms:
|
||||
curpath = "/".join((curpath, atom))
|
||||
if curpath in app.config:
|
||||
merge(app.config[curpath])
|
||||
|
||||
handler = None
|
||||
if result:
|
||||
controller = result.get('controller')
|
||||
controller = self.controllers.get(controller, controller)
|
||||
if controller:
|
||||
if isinstance(controller, classtype):
|
||||
controller = controller()
|
||||
# Get config from the controller.
|
||||
if hasattr(controller, "_cp_config"):
|
||||
merge(controller._cp_config)
|
||||
|
||||
action = result.get('action')
|
||||
if action is not None:
|
||||
handler = getattr(controller, action, None)
|
||||
# Get config from the handler
|
||||
if hasattr(handler, "_cp_config"):
|
||||
merge(handler._cp_config)
|
||||
else:
|
||||
handler = controller
|
||||
|
||||
# Do the last path atom here so it can
|
||||
# override the controller's _cp_config.
|
||||
if last:
|
||||
curpath = "/".join((curpath, last))
|
||||
if curpath in app.config:
|
||||
merge(app.config[curpath])
|
||||
|
||||
return handler
|
||||
|
||||
|
||||
def XMLRPCDispatcher(next_dispatcher=Dispatcher()):
|
||||
from cherrypy.lib import xmlrpcutil
|
||||
|
||||
def xmlrpc_dispatch(path_info):
|
||||
path_info = xmlrpcutil.patched_path(path_info)
|
||||
return next_dispatcher(path_info)
|
||||
return xmlrpc_dispatch
|
||||
|
||||
|
||||
def VirtualHost(next_dispatcher=Dispatcher(), use_x_forwarded_host=True,
|
||||
**domains):
|
||||
"""
|
||||
Select a different handler based on the Host header.
|
||||
|
||||
This can be useful when running multiple sites within one CP server.
|
||||
It allows several domains to point to different parts of a single
|
||||
website structure. For example::
|
||||
|
||||
http://www.domain.example -> root
|
||||
http://www.domain2.example -> root/domain2/
|
||||
http://www.domain2.example:443 -> root/secure
|
||||
|
||||
can be accomplished via the following config::
|
||||
|
||||
[/]
|
||||
request.dispatch = cherrypy.dispatch.VirtualHost(
|
||||
**{'www.domain2.example': '/domain2',
|
||||
'www.domain2.example:443': '/secure',
|
||||
})
|
||||
|
||||
next_dispatcher
|
||||
The next dispatcher object in the dispatch chain.
|
||||
The VirtualHost dispatcher adds a prefix to the URL and calls
|
||||
another dispatcher. Defaults to cherrypy.dispatch.Dispatcher().
|
||||
|
||||
use_x_forwarded_host
|
||||
If True (the default), any "X-Forwarded-Host"
|
||||
request header will be used instead of the "Host" header. This
|
||||
is commonly added by HTTP servers (such as Apache) when proxying.
|
||||
|
||||
``**domains``
|
||||
A dict of {host header value: virtual prefix} pairs.
|
||||
The incoming "Host" request header is looked up in this dict,
|
||||
and, if a match is found, the corresponding "virtual prefix"
|
||||
value will be prepended to the URL path before calling the
|
||||
next dispatcher. Note that you often need separate entries
|
||||
for "example.com" and "www.example.com". In addition, "Host"
|
||||
headers may contain the port number.
|
||||
"""
|
||||
from cherrypy.lib import httputil
|
||||
|
||||
def vhost_dispatch(path_info):
|
||||
request = cherrypy.serving.request
|
||||
header = request.headers.get
|
||||
|
||||
domain = header('Host', '')
|
||||
if use_x_forwarded_host:
|
||||
domain = header("X-Forwarded-Host", domain)
|
||||
|
||||
prefix = domains.get(domain, "")
|
||||
if prefix:
|
||||
path_info = httputil.urljoin(prefix, path_info)
|
||||
|
||||
result = next_dispatcher(path_info)
|
||||
|
||||
# Touch up staticdir config. See
|
||||
# https://bitbucket.org/cherrypy/cherrypy/issue/614.
|
||||
section = request.config.get('tools.staticdir.section')
|
||||
if section:
|
||||
section = section[len(prefix):]
|
||||
request.config['tools.staticdir.section'] = section
|
||||
|
||||
return result
|
||||
return vhost_dispatch
|
||||
609
lib/cherrypy/_cperror.py
Normal file
609
lib/cherrypy/_cperror.py
Normal file
@@ -0,0 +1,609 @@
|
||||
"""Exception classes for CherryPy.
|
||||
|
||||
CherryPy provides (and uses) exceptions for declaring that the HTTP response
|
||||
should be a status other than the default "200 OK". You can ``raise`` them like
|
||||
normal Python exceptions. You can also call them and they will raise
|
||||
themselves; this means you can set an
|
||||
:class:`HTTPError<cherrypy._cperror.HTTPError>`
|
||||
or :class:`HTTPRedirect<cherrypy._cperror.HTTPRedirect>` as the
|
||||
:attr:`request.handler<cherrypy._cprequest.Request.handler>`.
|
||||
|
||||
.. _redirectingpost:
|
||||
|
||||
Redirecting POST
|
||||
================
|
||||
|
||||
When you GET a resource and are redirected by the server to another Location,
|
||||
there's generally no problem since GET is both a "safe method" (there should
|
||||
be no side-effects) and an "idempotent method" (multiple calls are no different
|
||||
than a single call).
|
||||
|
||||
POST, however, is neither safe nor idempotent--if you
|
||||
charge a credit card, you don't want to be charged twice by a redirect!
|
||||
|
||||
For this reason, *none* of the 3xx responses permit a user-agent (browser) to
|
||||
resubmit a POST on redirection without first confirming the action with the
|
||||
user:
|
||||
|
||||
===== ================================= ===========
|
||||
300 Multiple Choices Confirm with the user
|
||||
301 Moved Permanently Confirm with the user
|
||||
302 Found (Object moved temporarily) Confirm with the user
|
||||
303 See Other GET the new URI--no confirmation
|
||||
304 Not modified (for conditional GET only--POST should not raise this error)
|
||||
305 Use Proxy Confirm with the user
|
||||
307 Temporary Redirect Confirm with the user
|
||||
===== ================================= ===========
|
||||
|
||||
However, browsers have historically implemented these restrictions poorly;
|
||||
in particular, many browsers do not force the user to confirm 301, 302
|
||||
or 307 when redirecting POST. For this reason, CherryPy defaults to 303,
|
||||
which most user-agents appear to have implemented correctly. Therefore, if
|
||||
you raise HTTPRedirect for a POST request, the user-agent will most likely
|
||||
attempt to GET the new URI (without asking for confirmation from the user).
|
||||
We realize this is confusing for developers, but it's the safest thing we
|
||||
could do. You are of course free to raise ``HTTPRedirect(uri, status=302)``
|
||||
or any other 3xx status if you know what you're doing, but given the
|
||||
environment, we couldn't let any of those be the default.
|
||||
|
||||
Custom Error Handling
|
||||
=====================
|
||||
|
||||
.. image:: /refman/cperrors.gif
|
||||
|
||||
Anticipated HTTP responses
|
||||
--------------------------
|
||||
|
||||
The 'error_page' config namespace can be used to provide custom HTML output for
|
||||
expected responses (like 404 Not Found). Supply a filename from which the
|
||||
output will be read. The contents will be interpolated with the values
|
||||
%(status)s, %(message)s, %(traceback)s, and %(version)s using plain old Python
|
||||
`string formatting <http://docs.python.org/2/library/stdtypes.html#string-formatting-operations>`_.
|
||||
|
||||
::
|
||||
|
||||
_cp_config = {
|
||||
'error_page.404': os.path.join(localDir, "static/index.html")
|
||||
}
|
||||
|
||||
|
||||
Beginning in version 3.1, you may also provide a function or other callable as
|
||||
an error_page entry. It will be passed the same status, message, traceback and
|
||||
version arguments that are interpolated into templates::
|
||||
|
||||
def error_page_402(status, message, traceback, version):
|
||||
return "Error %s - Well, I'm very sorry but you haven't paid!" % status
|
||||
cherrypy.config.update({'error_page.402': error_page_402})
|
||||
|
||||
Also in 3.1, in addition to the numbered error codes, you may also supply
|
||||
"error_page.default" to handle all codes which do not have their own error_page
|
||||
entry.
|
||||
|
||||
|
||||
|
||||
Unanticipated errors
|
||||
--------------------
|
||||
|
||||
CherryPy also has a generic error handling mechanism: whenever an unanticipated
|
||||
error occurs in your code, it will call
|
||||
:func:`Request.error_response<cherrypy._cprequest.Request.error_response>` to
|
||||
set the response status, headers, and body. By default, this is the same
|
||||
output as
|
||||
:class:`HTTPError(500) <cherrypy._cperror.HTTPError>`. If you want to provide
|
||||
some other behavior, you generally replace "request.error_response".
|
||||
|
||||
Here is some sample code that shows how to display a custom error message and
|
||||
send an e-mail containing the error::
|
||||
|
||||
from cherrypy import _cperror
|
||||
|
||||
def handle_error():
|
||||
cherrypy.response.status = 500
|
||||
cherrypy.response.body = [
|
||||
"<html><body>Sorry, an error occured</body></html>"
|
||||
]
|
||||
sendMail('error@domain.com',
|
||||
'Error in your web app',
|
||||
_cperror.format_exc())
|
||||
|
||||
class Root:
|
||||
_cp_config = {'request.error_response': handle_error}
|
||||
|
||||
|
||||
Note that you have to explicitly set
|
||||
:attr:`response.body <cherrypy._cprequest.Response.body>`
|
||||
and not simply return an error message as a result.
|
||||
"""
|
||||
|
||||
from cgi import escape as _escape
|
||||
from sys import exc_info as _exc_info
|
||||
from traceback import format_exception as _format_exception
|
||||
from cherrypy._cpcompat import basestring, bytestr, iteritems, ntob
|
||||
from cherrypy._cpcompat import tonative, urljoin as _urljoin
|
||||
from cherrypy.lib import httputil as _httputil
|
||||
|
||||
|
||||
class CherryPyException(Exception):
|
||||
|
||||
"""A base class for CherryPy exceptions."""
|
||||
pass
|
||||
|
||||
|
||||
class TimeoutError(CherryPyException):
|
||||
|
||||
"""Exception raised when Response.timed_out is detected."""
|
||||
pass
|
||||
|
||||
|
||||
class InternalRedirect(CherryPyException):
|
||||
|
||||
"""Exception raised to switch to the handler for a different URL.
|
||||
|
||||
This exception will redirect processing to another path within the site
|
||||
(without informing the client). Provide the new path as an argument when
|
||||
raising the exception. Provide any params in the querystring for the new
|
||||
URL.
|
||||
"""
|
||||
|
||||
def __init__(self, path, query_string=""):
|
||||
import cherrypy
|
||||
self.request = cherrypy.serving.request
|
||||
|
||||
self.query_string = query_string
|
||||
if "?" in path:
|
||||
# Separate any params included in the path
|
||||
path, self.query_string = path.split("?", 1)
|
||||
|
||||
# Note that urljoin will "do the right thing" whether url is:
|
||||
# 1. a URL relative to root (e.g. "/dummy")
|
||||
# 2. a URL relative to the current path
|
||||
# Note that any query string will be discarded.
|
||||
path = _urljoin(self.request.path_info, path)
|
||||
|
||||
# Set a 'path' member attribute so that code which traps this
|
||||
# error can have access to it.
|
||||
self.path = path
|
||||
|
||||
CherryPyException.__init__(self, path, self.query_string)
|
||||
|
||||
|
||||
class HTTPRedirect(CherryPyException):
|
||||
|
||||
"""Exception raised when the request should be redirected.
|
||||
|
||||
This exception will force a HTTP redirect to the URL or URL's you give it.
|
||||
The new URL must be passed as the first argument to the Exception,
|
||||
e.g., HTTPRedirect(newUrl). Multiple URLs are allowed in a list.
|
||||
If a URL is absolute, it will be used as-is. If it is relative, it is
|
||||
assumed to be relative to the current cherrypy.request.path_info.
|
||||
|
||||
If one of the provided URL is a unicode object, it will be encoded
|
||||
using the default encoding or the one passed in parameter.
|
||||
|
||||
There are multiple types of redirect, from which you can select via the
|
||||
``status`` argument. If you do not provide a ``status`` arg, it defaults to
|
||||
303 (or 302 if responding with HTTP/1.0).
|
||||
|
||||
Examples::
|
||||
|
||||
raise cherrypy.HTTPRedirect("")
|
||||
raise cherrypy.HTTPRedirect("/abs/path", 307)
|
||||
raise cherrypy.HTTPRedirect(["path1", "path2?a=1&b=2"], 301)
|
||||
|
||||
See :ref:`redirectingpost` for additional caveats.
|
||||
"""
|
||||
|
||||
status = None
|
||||
"""The integer HTTP status code to emit."""
|
||||
|
||||
urls = None
|
||||
"""The list of URL's to emit."""
|
||||
|
||||
encoding = 'utf-8'
|
||||
"""The encoding when passed urls are not native strings"""
|
||||
|
||||
def __init__(self, urls, status=None, encoding=None):
|
||||
import cherrypy
|
||||
request = cherrypy.serving.request
|
||||
|
||||
if isinstance(urls, basestring):
|
||||
urls = [urls]
|
||||
|
||||
abs_urls = []
|
||||
for url in urls:
|
||||
url = tonative(url, encoding or self.encoding)
|
||||
|
||||
# Note that urljoin will "do the right thing" whether url is:
|
||||
# 1. a complete URL with host (e.g. "http://www.example.com/test")
|
||||
# 2. a URL relative to root (e.g. "/dummy")
|
||||
# 3. a URL relative to the current path
|
||||
# Note that any query string in cherrypy.request is discarded.
|
||||
url = _urljoin(cherrypy.url(), url)
|
||||
abs_urls.append(url)
|
||||
self.urls = abs_urls
|
||||
|
||||
# RFC 2616 indicates a 301 response code fits our goal; however,
|
||||
# browser support for 301 is quite messy. Do 302/303 instead. See
|
||||
# http://www.alanflavell.org.uk/www/post-redirect.html
|
||||
if status is None:
|
||||
if request.protocol >= (1, 1):
|
||||
status = 303
|
||||
else:
|
||||
status = 302
|
||||
else:
|
||||
status = int(status)
|
||||
if status < 300 or status > 399:
|
||||
raise ValueError("status must be between 300 and 399.")
|
||||
|
||||
self.status = status
|
||||
CherryPyException.__init__(self, abs_urls, status)
|
||||
|
||||
def set_response(self):
|
||||
"""Modify cherrypy.response status, headers, and body to represent
|
||||
self.
|
||||
|
||||
CherryPy uses this internally, but you can also use it to create an
|
||||
HTTPRedirect object and set its output without *raising* the exception.
|
||||
"""
|
||||
import cherrypy
|
||||
response = cherrypy.serving.response
|
||||
response.status = status = self.status
|
||||
|
||||
if status in (300, 301, 302, 303, 307):
|
||||
response.headers['Content-Type'] = "text/html;charset=utf-8"
|
||||
# "The ... URI SHOULD be given by the Location field
|
||||
# in the response."
|
||||
response.headers['Location'] = self.urls[0]
|
||||
|
||||
# "Unless the request method was HEAD, the entity of the response
|
||||
# SHOULD contain a short hypertext note with a hyperlink to the
|
||||
# new URI(s)."
|
||||
msg = {
|
||||
300: "This resource can be found at ",
|
||||
301: "This resource has permanently moved to ",
|
||||
302: "This resource resides temporarily at ",
|
||||
303: "This resource can be found at ",
|
||||
307: "This resource has moved temporarily to ",
|
||||
}[status]
|
||||
msg += '<a href=%s>%s</a>.'
|
||||
from xml.sax import saxutils
|
||||
msgs = [msg % (saxutils.quoteattr(u), u) for u in self.urls]
|
||||
response.body = ntob("<br />\n".join(msgs), 'utf-8')
|
||||
# Previous code may have set C-L, so we have to reset it
|
||||
# (allow finalize to set it).
|
||||
response.headers.pop('Content-Length', None)
|
||||
elif status == 304:
|
||||
# Not Modified.
|
||||
# "The response MUST include the following header fields:
|
||||
# Date, unless its omission is required by section 14.18.1"
|
||||
# The "Date" header should have been set in Response.__init__
|
||||
|
||||
# "...the response SHOULD NOT include other entity-headers."
|
||||
for key in ('Allow', 'Content-Encoding', 'Content-Language',
|
||||
'Content-Length', 'Content-Location', 'Content-MD5',
|
||||
'Content-Range', 'Content-Type', 'Expires',
|
||||
'Last-Modified'):
|
||||
if key in response.headers:
|
||||
del response.headers[key]
|
||||
|
||||
# "The 304 response MUST NOT contain a message-body."
|
||||
response.body = None
|
||||
# Previous code may have set C-L, so we have to reset it.
|
||||
response.headers.pop('Content-Length', None)
|
||||
elif status == 305:
|
||||
# Use Proxy.
|
||||
# self.urls[0] should be the URI of the proxy.
|
||||
response.headers['Location'] = self.urls[0]
|
||||
response.body = None
|
||||
# Previous code may have set C-L, so we have to reset it.
|
||||
response.headers.pop('Content-Length', None)
|
||||
else:
|
||||
raise ValueError("The %s status code is unknown." % status)
|
||||
|
||||
def __call__(self):
|
||||
"""Use this exception as a request.handler (raise self)."""
|
||||
raise self
|
||||
|
||||
|
||||
def clean_headers(status):
|
||||
"""Remove any headers which should not apply to an error response."""
|
||||
import cherrypy
|
||||
|
||||
response = cherrypy.serving.response
|
||||
|
||||
# Remove headers which applied to the original content,
|
||||
# but do not apply to the error page.
|
||||
respheaders = response.headers
|
||||
for key in ["Accept-Ranges", "Age", "ETag", "Location", "Retry-After",
|
||||
"Vary", "Content-Encoding", "Content-Length", "Expires",
|
||||
"Content-Location", "Content-MD5", "Last-Modified"]:
|
||||
if key in respheaders:
|
||||
del respheaders[key]
|
||||
|
||||
if status != 416:
|
||||
# A server sending a response with status code 416 (Requested
|
||||
# range not satisfiable) SHOULD include a Content-Range field
|
||||
# with a byte-range-resp-spec of "*". The instance-length
|
||||
# specifies the current length of the selected resource.
|
||||
# A response with status code 206 (Partial Content) MUST NOT
|
||||
# include a Content-Range field with a byte-range- resp-spec of "*".
|
||||
if "Content-Range" in respheaders:
|
||||
del respheaders["Content-Range"]
|
||||
|
||||
|
||||
class HTTPError(CherryPyException):
|
||||
|
||||
"""Exception used to return an HTTP error code (4xx-5xx) to the client.
|
||||
|
||||
This exception can be used to automatically send a response using a
|
||||
http status code, with an appropriate error page. It takes an optional
|
||||
``status`` argument (which must be between 400 and 599); it defaults to 500
|
||||
("Internal Server Error"). It also takes an optional ``message`` argument,
|
||||
which will be returned in the response body. See
|
||||
`RFC2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.4>`_
|
||||
for a complete list of available error codes and when to use them.
|
||||
|
||||
Examples::
|
||||
|
||||
raise cherrypy.HTTPError(403)
|
||||
raise cherrypy.HTTPError(
|
||||
"403 Forbidden", "You are not allowed to access this resource.")
|
||||
"""
|
||||
|
||||
status = None
|
||||
"""The HTTP status code. May be of type int or str (with a Reason-Phrase).
|
||||
"""
|
||||
|
||||
code = None
|
||||
"""The integer HTTP status code."""
|
||||
|
||||
reason = None
|
||||
"""The HTTP Reason-Phrase string."""
|
||||
|
||||
def __init__(self, status=500, message=None):
|
||||
self.status = status
|
||||
try:
|
||||
self.code, self.reason, defaultmsg = _httputil.valid_status(status)
|
||||
except ValueError:
|
||||
raise self.__class__(500, _exc_info()[1].args[0])
|
||||
|
||||
if self.code < 400 or self.code > 599:
|
||||
raise ValueError("status must be between 400 and 599.")
|
||||
|
||||
# See http://www.python.org/dev/peps/pep-0352/
|
||||
# self.message = message
|
||||
self._message = message or defaultmsg
|
||||
CherryPyException.__init__(self, status, message)
|
||||
|
||||
def set_response(self):
|
||||
"""Modify cherrypy.response status, headers, and body to represent
|
||||
self.
|
||||
|
||||
CherryPy uses this internally, but you can also use it to create an
|
||||
HTTPError object and set its output without *raising* the exception.
|
||||
"""
|
||||
import cherrypy
|
||||
|
||||
response = cherrypy.serving.response
|
||||
|
||||
clean_headers(self.code)
|
||||
|
||||
# In all cases, finalize will be called after this method,
|
||||
# so don't bother cleaning up response values here.
|
||||
response.status = self.status
|
||||
tb = None
|
||||
if cherrypy.serving.request.show_tracebacks:
|
||||
tb = format_exc()
|
||||
|
||||
response.headers.pop('Content-Length', None)
|
||||
|
||||
content = self.get_error_page(self.status, traceback=tb,
|
||||
message=self._message)
|
||||
response.body = content
|
||||
|
||||
_be_ie_unfriendly(self.code)
|
||||
|
||||
def get_error_page(self, *args, **kwargs):
|
||||
return get_error_page(*args, **kwargs)
|
||||
|
||||
def __call__(self):
|
||||
"""Use this exception as a request.handler (raise self)."""
|
||||
raise self
|
||||
|
||||
|
||||
class NotFound(HTTPError):
|
||||
|
||||
"""Exception raised when a URL could not be mapped to any handler (404).
|
||||
|
||||
This is equivalent to raising
|
||||
:class:`HTTPError("404 Not Found") <cherrypy._cperror.HTTPError>`.
|
||||
"""
|
||||
|
||||
def __init__(self, path=None):
|
||||
if path is None:
|
||||
import cherrypy
|
||||
request = cherrypy.serving.request
|
||||
path = request.script_name + request.path_info
|
||||
self.args = (path,)
|
||||
HTTPError.__init__(self, 404, "The path '%s' was not found." % path)
|
||||
|
||||
|
||||
_HTTPErrorTemplate = '''<!DOCTYPE html PUBLIC
|
||||
"-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"></meta>
|
||||
<title>%(status)s</title>
|
||||
<style type="text/css">
|
||||
#powered_by {
|
||||
margin-top: 20px;
|
||||
border-top: 2px solid black;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
#traceback {
|
||||
color: red;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h2>%(status)s</h2>
|
||||
<p>%(message)s</p>
|
||||
<pre id="traceback">%(traceback)s</pre>
|
||||
<div id="powered_by">
|
||||
<span>
|
||||
Powered by <a href="http://www.cherrypy.org">CherryPy %(version)s</a>
|
||||
</span>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
'''
|
||||
|
||||
|
||||
def get_error_page(status, **kwargs):
|
||||
"""Return an HTML page, containing a pretty error response.
|
||||
|
||||
status should be an int or a str.
|
||||
kwargs will be interpolated into the page template.
|
||||
"""
|
||||
import cherrypy
|
||||
|
||||
try:
|
||||
code, reason, message = _httputil.valid_status(status)
|
||||
except ValueError:
|
||||
raise cherrypy.HTTPError(500, _exc_info()[1].args[0])
|
||||
|
||||
# We can't use setdefault here, because some
|
||||
# callers send None for kwarg values.
|
||||
if kwargs.get('status') is None:
|
||||
kwargs['status'] = "%s %s" % (code, reason)
|
||||
if kwargs.get('message') is None:
|
||||
kwargs['message'] = message
|
||||
if kwargs.get('traceback') is None:
|
||||
kwargs['traceback'] = ''
|
||||
if kwargs.get('version') is None:
|
||||
kwargs['version'] = cherrypy.__version__
|
||||
|
||||
for k, v in iteritems(kwargs):
|
||||
if v is None:
|
||||
kwargs[k] = ""
|
||||
else:
|
||||
kwargs[k] = _escape(kwargs[k])
|
||||
|
||||
# Use a custom template or callable for the error page?
|
||||
pages = cherrypy.serving.request.error_page
|
||||
error_page = pages.get(code) or pages.get('default')
|
||||
|
||||
# Default template, can be overridden below.
|
||||
template = _HTTPErrorTemplate
|
||||
if error_page:
|
||||
try:
|
||||
if hasattr(error_page, '__call__'):
|
||||
# The caller function may be setting headers manually,
|
||||
# so we delegate to it completely. We may be returning
|
||||
# an iterator as well as a string here.
|
||||
#
|
||||
# We *must* make sure any content is not unicode.
|
||||
result = error_page(**kwargs)
|
||||
if cherrypy.lib.is_iterator(result):
|
||||
from cherrypy.lib.encoding import UTF8StreamEncoder
|
||||
return UTF8StreamEncoder(result)
|
||||
elif isinstance(result, cherrypy._cpcompat.unicodestr):
|
||||
return result.encode('utf-8')
|
||||
else:
|
||||
if not isinstance(result, cherrypy._cpcompat.bytestr):
|
||||
raise ValueError('error page function did not '
|
||||
'return a bytestring, unicodestring or an '
|
||||
'iterator - returned object of type %s.'
|
||||
% (type(result).__name__))
|
||||
return result
|
||||
else:
|
||||
# Load the template from this path.
|
||||
template = tonative(open(error_page, 'rb').read())
|
||||
except:
|
||||
e = _format_exception(*_exc_info())[-1]
|
||||
m = kwargs['message']
|
||||
if m:
|
||||
m += "<br />"
|
||||
m += "In addition, the custom error page failed:\n<br />%s" % e
|
||||
kwargs['message'] = m
|
||||
|
||||
response = cherrypy.serving.response
|
||||
response.headers['Content-Type'] = "text/html;charset=utf-8"
|
||||
result = template % kwargs
|
||||
return result.encode('utf-8')
|
||||
|
||||
|
||||
|
||||
_ie_friendly_error_sizes = {
|
||||
400: 512, 403: 256, 404: 512, 405: 256,
|
||||
406: 512, 408: 512, 409: 512, 410: 256,
|
||||
500: 512, 501: 512, 505: 512,
|
||||
}
|
||||
|
||||
|
||||
def _be_ie_unfriendly(status):
|
||||
import cherrypy
|
||||
response = cherrypy.serving.response
|
||||
|
||||
# For some statuses, Internet Explorer 5+ shows "friendly error
|
||||
# messages" instead of our response.body if the body is smaller
|
||||
# than a given size. Fix this by returning a body over that size
|
||||
# (by adding whitespace).
|
||||
# See http://support.microsoft.com/kb/q218155/
|
||||
s = _ie_friendly_error_sizes.get(status, 0)
|
||||
if s:
|
||||
s += 1
|
||||
# Since we are issuing an HTTP error status, we assume that
|
||||
# the entity is short, and we should just collapse it.
|
||||
content = response.collapse_body()
|
||||
l = len(content)
|
||||
if l and l < s:
|
||||
# IN ADDITION: the response must be written to IE
|
||||
# in one chunk or it will still get replaced! Bah.
|
||||
content = content + (ntob(" ") * (s - l))
|
||||
response.body = content
|
||||
response.headers['Content-Length'] = str(len(content))
|
||||
|
||||
|
||||
def format_exc(exc=None):
|
||||
"""Return exc (or sys.exc_info if None), formatted."""
|
||||
try:
|
||||
if exc is None:
|
||||
exc = _exc_info()
|
||||
if exc == (None, None, None):
|
||||
return ""
|
||||
import traceback
|
||||
return "".join(traceback.format_exception(*exc))
|
||||
finally:
|
||||
del exc
|
||||
|
||||
|
||||
def bare_error(extrabody=None):
|
||||
"""Produce status, headers, body for a critical error.
|
||||
|
||||
Returns a triple without calling any other questionable functions,
|
||||
so it should be as error-free as possible. Call it from an HTTP server
|
||||
if you get errors outside of the request.
|
||||
|
||||
If extrabody is None, a friendly but rather unhelpful error message
|
||||
is set in the body. If extrabody is a string, it will be appended
|
||||
as-is to the body.
|
||||
"""
|
||||
|
||||
# The whole point of this function is to be a last line-of-defense
|
||||
# in handling errors. That is, it must not raise any errors itself;
|
||||
# it cannot be allowed to fail. Therefore, don't add to it!
|
||||
# In particular, don't call any other CP functions.
|
||||
|
||||
body = ntob("Unrecoverable error in the server.")
|
||||
if extrabody is not None:
|
||||
if not isinstance(extrabody, bytestr):
|
||||
extrabody = extrabody.encode('utf-8')
|
||||
body += ntob("\n") + extrabody
|
||||
|
||||
return (ntob("500 Internal Server Error"),
|
||||
[(ntob('Content-Type'), ntob('text/plain')),
|
||||
(ntob('Content-Length'), ntob(str(len(body)), 'ISO-8859-1'))],
|
||||
[body])
|
||||
459
lib/cherrypy/_cplogging.py
Normal file
459
lib/cherrypy/_cplogging.py
Normal file
@@ -0,0 +1,459 @@
|
||||
"""
|
||||
Simple config
|
||||
=============
|
||||
|
||||
Although CherryPy uses the :mod:`Python logging module <logging>`, it does so
|
||||
behind the scenes so that simple logging is simple, but complicated logging
|
||||
is still possible. "Simple" logging means that you can log to the screen
|
||||
(i.e. console/stdout) or to a file, and that you can easily have separate
|
||||
error and access log files.
|
||||
|
||||
Here are the simplified logging settings. You use these by adding lines to
|
||||
your config file or dict. You should set these at either the global level or
|
||||
per application (see next), but generally not both.
|
||||
|
||||
* ``log.screen``: Set this to True to have both "error" and "access" messages
|
||||
printed to stdout.
|
||||
* ``log.access_file``: Set this to an absolute filename where you want
|
||||
"access" messages written.
|
||||
* ``log.error_file``: Set this to an absolute filename where you want "error"
|
||||
messages written.
|
||||
|
||||
Many events are automatically logged; to log your own application events, call
|
||||
:func:`cherrypy.log`.
|
||||
|
||||
Architecture
|
||||
============
|
||||
|
||||
Separate scopes
|
||||
---------------
|
||||
|
||||
CherryPy provides log managers at both the global and application layers.
|
||||
This means you can have one set of logging rules for your entire site,
|
||||
and another set of rules specific to each application. The global log
|
||||
manager is found at :func:`cherrypy.log`, and the log manager for each
|
||||
application is found at :attr:`app.log<cherrypy._cptree.Application.log>`.
|
||||
If you're inside a request, the latter is reachable from
|
||||
``cherrypy.request.app.log``; if you're outside a request, you'll have to
|
||||
obtain a reference to the ``app``: either the return value of
|
||||
:func:`tree.mount()<cherrypy._cptree.Tree.mount>` or, if you used
|
||||
:func:`quickstart()<cherrypy.quickstart>` instead, via
|
||||
``cherrypy.tree.apps['/']``.
|
||||
|
||||
By default, the global logs are named "cherrypy.error" and "cherrypy.access",
|
||||
and the application logs are named "cherrypy.error.2378745" and
|
||||
"cherrypy.access.2378745" (the number is the id of the Application object).
|
||||
This means that the application logs "bubble up" to the site logs, so if your
|
||||
application has no log handlers, the site-level handlers will still log the
|
||||
messages.
|
||||
|
||||
Errors vs. Access
|
||||
-----------------
|
||||
|
||||
Each log manager handles both "access" messages (one per HTTP request) and
|
||||
"error" messages (everything else). Note that the "error" log is not just for
|
||||
errors! The format of access messages is highly formalized, but the error log
|
||||
isn't--it receives messages from a variety of sources (including full error
|
||||
tracebacks, if enabled).
|
||||
|
||||
If you are logging the access log and error log to the same source, then there
|
||||
is a possibility that a specially crafted error message may replicate an access
|
||||
log message as described in CWE-117. In this case it is the application
|
||||
developer's responsibility to manually escape data before using CherryPy's log()
|
||||
functionality, or they may create an application that is vulnerable to CWE-117.
|
||||
This would be achieved by using a custom handler escape any special characters,
|
||||
and attached as described below.
|
||||
|
||||
Custom Handlers
|
||||
===============
|
||||
|
||||
The simple settings above work by manipulating Python's standard :mod:`logging`
|
||||
module. So when you need something more complex, the full power of the standard
|
||||
module is yours to exploit. You can borrow or create custom handlers, formats,
|
||||
filters, and much more. Here's an example that skips the standard FileHandler
|
||||
and uses a RotatingFileHandler instead:
|
||||
|
||||
::
|
||||
|
||||
#python
|
||||
log = app.log
|
||||
|
||||
# Remove the default FileHandlers if present.
|
||||
log.error_file = ""
|
||||
log.access_file = ""
|
||||
|
||||
maxBytes = getattr(log, "rot_maxBytes", 10000000)
|
||||
backupCount = getattr(log, "rot_backupCount", 1000)
|
||||
|
||||
# Make a new RotatingFileHandler for the error log.
|
||||
fname = getattr(log, "rot_error_file", "error.log")
|
||||
h = handlers.RotatingFileHandler(fname, 'a', maxBytes, backupCount)
|
||||
h.setLevel(DEBUG)
|
||||
h.setFormatter(_cplogging.logfmt)
|
||||
log.error_log.addHandler(h)
|
||||
|
||||
# Make a new RotatingFileHandler for the access log.
|
||||
fname = getattr(log, "rot_access_file", "access.log")
|
||||
h = handlers.RotatingFileHandler(fname, 'a', maxBytes, backupCount)
|
||||
h.setLevel(DEBUG)
|
||||
h.setFormatter(_cplogging.logfmt)
|
||||
log.access_log.addHandler(h)
|
||||
|
||||
|
||||
The ``rot_*`` attributes are pulled straight from the application log object.
|
||||
Since "log.*" config entries simply set attributes on the log object, you can
|
||||
add custom attributes to your heart's content. Note that these handlers are
|
||||
used ''instead'' of the default, simple handlers outlined above (so don't set
|
||||
the "log.error_file" config entry, for example).
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
# Silence the no-handlers "warning" (stderr write!) in stdlib logging
|
||||
logging.Logger.manager.emittedNoHandlerWarning = 1
|
||||
logfmt = logging.Formatter("%(message)s")
|
||||
import os
|
||||
import sys
|
||||
|
||||
import cherrypy
|
||||
from cherrypy import _cperror
|
||||
from cherrypy._cpcompat import ntob, py3k
|
||||
|
||||
|
||||
class NullHandler(logging.Handler):
|
||||
|
||||
"""A no-op logging handler to silence the logging.lastResort handler."""
|
||||
|
||||
def handle(self, record):
|
||||
pass
|
||||
|
||||
def emit(self, record):
|
||||
pass
|
||||
|
||||
def createLock(self):
|
||||
self.lock = None
|
||||
|
||||
|
||||
class LogManager(object):
|
||||
|
||||
"""An object to assist both simple and advanced logging.
|
||||
|
||||
``cherrypy.log`` is an instance of this class.
|
||||
"""
|
||||
|
||||
appid = None
|
||||
"""The id() of the Application object which owns this log manager. If this
|
||||
is a global log manager, appid is None."""
|
||||
|
||||
error_log = None
|
||||
"""The actual :class:`logging.Logger` instance for error messages."""
|
||||
|
||||
access_log = None
|
||||
"""The actual :class:`logging.Logger` instance for access messages."""
|
||||
|
||||
if py3k:
|
||||
access_log_format = \
|
||||
'{h} {l} {u} {t} "{r}" {s} {b} "{f}" "{a}"'
|
||||
else:
|
||||
access_log_format = \
|
||||
'%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'
|
||||
|
||||
logger_root = None
|
||||
"""The "top-level" logger name.
|
||||
|
||||
This string will be used as the first segment in the Logger names.
|
||||
The default is "cherrypy", for example, in which case the Logger names
|
||||
will be of the form::
|
||||
|
||||
cherrypy.error.<appid>
|
||||
cherrypy.access.<appid>
|
||||
"""
|
||||
|
||||
def __init__(self, appid=None, logger_root="cherrypy"):
|
||||
self.logger_root = logger_root
|
||||
self.appid = appid
|
||||
if appid is None:
|
||||
self.error_log = logging.getLogger("%s.error" % logger_root)
|
||||
self.access_log = logging.getLogger("%s.access" % logger_root)
|
||||
else:
|
||||
self.error_log = logging.getLogger(
|
||||
"%s.error.%s" % (logger_root, appid))
|
||||
self.access_log = logging.getLogger(
|
||||
"%s.access.%s" % (logger_root, appid))
|
||||
self.error_log.setLevel(logging.INFO)
|
||||
self.access_log.setLevel(logging.INFO)
|
||||
|
||||
# Silence the no-handlers "warning" (stderr write!) in stdlib logging
|
||||
self.error_log.addHandler(NullHandler())
|
||||
self.access_log.addHandler(NullHandler())
|
||||
|
||||
cherrypy.engine.subscribe('graceful', self.reopen_files)
|
||||
|
||||
def reopen_files(self):
|
||||
"""Close and reopen all file handlers."""
|
||||
for log in (self.error_log, self.access_log):
|
||||
for h in log.handlers:
|
||||
if isinstance(h, logging.FileHandler):
|
||||
h.acquire()
|
||||
h.stream.close()
|
||||
h.stream = open(h.baseFilename, h.mode)
|
||||
h.release()
|
||||
|
||||
def error(self, msg='', context='', severity=logging.INFO,
|
||||
traceback=False):
|
||||
"""Write the given ``msg`` to the error log.
|
||||
|
||||
This is not just for errors! Applications may call this at any time
|
||||
to log application-specific information.
|
||||
|
||||
If ``traceback`` is True, the traceback of the current exception
|
||||
(if any) will be appended to ``msg``.
|
||||
"""
|
||||
if traceback:
|
||||
msg += _cperror.format_exc()
|
||||
self.error_log.log(severity, ' '.join((self.time(), context, msg)))
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
"""An alias for ``error``."""
|
||||
return self.error(*args, **kwargs)
|
||||
|
||||
def access(self):
|
||||
"""Write to the access log (in Apache/NCSA Combined Log format).
|
||||
|
||||
See the
|
||||
`apache documentation <http://httpd.apache.org/docs/current/logs.html#combined>`_
|
||||
for format details.
|
||||
|
||||
CherryPy calls this automatically for you. Note there are no arguments;
|
||||
it collects the data itself from
|
||||
:class:`cherrypy.request<cherrypy._cprequest.Request>`.
|
||||
|
||||
Like Apache started doing in 2.0.46, non-printable and other special
|
||||
characters in %r (and we expand that to all parts) are escaped using
|
||||
\\xhh sequences, where hh stands for the hexadecimal representation
|
||||
of the raw byte. Exceptions from this rule are " and \\, which are
|
||||
escaped by prepending a backslash, and all whitespace characters,
|
||||
which are written in their C-style notation (\\n, \\t, etc).
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
remote = request.remote
|
||||
response = cherrypy.serving.response
|
||||
outheaders = response.headers
|
||||
inheaders = request.headers
|
||||
if response.output_status is None:
|
||||
status = "-"
|
||||
else:
|
||||
status = response.output_status.split(ntob(" "), 1)[0]
|
||||
if py3k:
|
||||
status = status.decode('ISO-8859-1')
|
||||
|
||||
atoms = {'h': remote.name or remote.ip,
|
||||
'l': '-',
|
||||
'u': getattr(request, "login", None) or "-",
|
||||
't': self.time(),
|
||||
'r': request.request_line,
|
||||
's': status,
|
||||
'b': dict.get(outheaders, 'Content-Length', '') or "-",
|
||||
'f': dict.get(inheaders, 'Referer', ''),
|
||||
'a': dict.get(inheaders, 'User-Agent', ''),
|
||||
'o': dict.get(inheaders, 'Host', '-'),
|
||||
}
|
||||
if py3k:
|
||||
for k, v in atoms.items():
|
||||
if not isinstance(v, str):
|
||||
v = str(v)
|
||||
v = v.replace('"', '\\"').encode('utf8')
|
||||
# Fortunately, repr(str) escapes unprintable chars, \n, \t, etc
|
||||
# and backslash for us. All we have to do is strip the quotes.
|
||||
v = repr(v)[2:-1]
|
||||
|
||||
# in python 3.0 the repr of bytes (as returned by encode)
|
||||
# uses double \'s. But then the logger escapes them yet, again
|
||||
# resulting in quadruple slashes. Remove the extra one here.
|
||||
v = v.replace('\\\\', '\\')
|
||||
|
||||
# Escape double-quote.
|
||||
atoms[k] = v
|
||||
|
||||
try:
|
||||
self.access_log.log(
|
||||
logging.INFO, self.access_log_format.format(**atoms))
|
||||
except:
|
||||
self(traceback=True)
|
||||
else:
|
||||
for k, v in atoms.items():
|
||||
if isinstance(v, unicode):
|
||||
v = v.encode('utf8')
|
||||
elif not isinstance(v, str):
|
||||
v = str(v)
|
||||
# Fortunately, repr(str) escapes unprintable chars, \n, \t, etc
|
||||
# and backslash for us. All we have to do is strip the quotes.
|
||||
v = repr(v)[1:-1]
|
||||
# Escape double-quote.
|
||||
atoms[k] = v.replace('"', '\\"')
|
||||
|
||||
try:
|
||||
self.access_log.log(
|
||||
logging.INFO, self.access_log_format % atoms)
|
||||
except:
|
||||
self(traceback=True)
|
||||
|
||||
def time(self):
|
||||
"""Return now() in Apache Common Log Format (no timezone)."""
|
||||
now = datetime.datetime.now()
|
||||
monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun',
|
||||
'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
|
||||
month = monthnames[now.month - 1].capitalize()
|
||||
return ('[%02d/%s/%04d:%02d:%02d:%02d]' %
|
||||
(now.day, month, now.year, now.hour, now.minute, now.second))
|
||||
|
||||
def _get_builtin_handler(self, log, key):
|
||||
for h in log.handlers:
|
||||
if getattr(h, "_cpbuiltin", None) == key:
|
||||
return h
|
||||
|
||||
# ------------------------- Screen handlers ------------------------- #
|
||||
def _set_screen_handler(self, log, enable, stream=None):
|
||||
h = self._get_builtin_handler(log, "screen")
|
||||
if enable:
|
||||
if not h:
|
||||
if stream is None:
|
||||
stream = sys.stderr
|
||||
h = logging.StreamHandler(stream)
|
||||
h.setFormatter(logfmt)
|
||||
h._cpbuiltin = "screen"
|
||||
log.addHandler(h)
|
||||
elif h:
|
||||
log.handlers.remove(h)
|
||||
|
||||
def _get_screen(self):
|
||||
h = self._get_builtin_handler
|
||||
has_h = h(self.error_log, "screen") or h(self.access_log, "screen")
|
||||
return bool(has_h)
|
||||
|
||||
def _set_screen(self, newvalue):
|
||||
self._set_screen_handler(self.error_log, newvalue, stream=sys.stderr)
|
||||
self._set_screen_handler(self.access_log, newvalue, stream=sys.stdout)
|
||||
screen = property(_get_screen, _set_screen,
|
||||
doc="""Turn stderr/stdout logging on or off.
|
||||
|
||||
If you set this to True, it'll add the appropriate StreamHandler for
|
||||
you. If you set it to False, it will remove the handler.
|
||||
""")
|
||||
|
||||
# -------------------------- File handlers -------------------------- #
|
||||
|
||||
def _add_builtin_file_handler(self, log, fname):
|
||||
h = logging.FileHandler(fname)
|
||||
h.setFormatter(logfmt)
|
||||
h._cpbuiltin = "file"
|
||||
log.addHandler(h)
|
||||
|
||||
def _set_file_handler(self, log, filename):
|
||||
h = self._get_builtin_handler(log, "file")
|
||||
if filename:
|
||||
if h:
|
||||
if h.baseFilename != os.path.abspath(filename):
|
||||
h.close()
|
||||
log.handlers.remove(h)
|
||||
self._add_builtin_file_handler(log, filename)
|
||||
else:
|
||||
self._add_builtin_file_handler(log, filename)
|
||||
else:
|
||||
if h:
|
||||
h.close()
|
||||
log.handlers.remove(h)
|
||||
|
||||
def _get_error_file(self):
|
||||
h = self._get_builtin_handler(self.error_log, "file")
|
||||
if h:
|
||||
return h.baseFilename
|
||||
return ''
|
||||
|
||||
def _set_error_file(self, newvalue):
|
||||
self._set_file_handler(self.error_log, newvalue)
|
||||
error_file = property(_get_error_file, _set_error_file,
|
||||
doc="""The filename for self.error_log.
|
||||
|
||||
If you set this to a string, it'll add the appropriate FileHandler for
|
||||
you. If you set it to ``None`` or ``''``, it will remove the handler.
|
||||
""")
|
||||
|
||||
def _get_access_file(self):
|
||||
h = self._get_builtin_handler(self.access_log, "file")
|
||||
if h:
|
||||
return h.baseFilename
|
||||
return ''
|
||||
|
||||
def _set_access_file(self, newvalue):
|
||||
self._set_file_handler(self.access_log, newvalue)
|
||||
access_file = property(_get_access_file, _set_access_file,
|
||||
doc="""The filename for self.access_log.
|
||||
|
||||
If you set this to a string, it'll add the appropriate FileHandler for
|
||||
you. If you set it to ``None`` or ``''``, it will remove the handler.
|
||||
""")
|
||||
|
||||
# ------------------------- WSGI handlers ------------------------- #
|
||||
|
||||
def _set_wsgi_handler(self, log, enable):
|
||||
h = self._get_builtin_handler(log, "wsgi")
|
||||
if enable:
|
||||
if not h:
|
||||
h = WSGIErrorHandler()
|
||||
h.setFormatter(logfmt)
|
||||
h._cpbuiltin = "wsgi"
|
||||
log.addHandler(h)
|
||||
elif h:
|
||||
log.handlers.remove(h)
|
||||
|
||||
def _get_wsgi(self):
|
||||
return bool(self._get_builtin_handler(self.error_log, "wsgi"))
|
||||
|
||||
def _set_wsgi(self, newvalue):
|
||||
self._set_wsgi_handler(self.error_log, newvalue)
|
||||
wsgi = property(_get_wsgi, _set_wsgi,
|
||||
doc="""Write errors to wsgi.errors.
|
||||
|
||||
If you set this to True, it'll add the appropriate
|
||||
:class:`WSGIErrorHandler<cherrypy._cplogging.WSGIErrorHandler>` for you
|
||||
(which writes errors to ``wsgi.errors``).
|
||||
If you set it to False, it will remove the handler.
|
||||
""")
|
||||
|
||||
|
||||
class WSGIErrorHandler(logging.Handler):
|
||||
|
||||
"A handler class which writes logging records to environ['wsgi.errors']."
|
||||
|
||||
def flush(self):
|
||||
"""Flushes the stream."""
|
||||
try:
|
||||
stream = cherrypy.serving.request.wsgi_environ.get('wsgi.errors')
|
||||
except (AttributeError, KeyError):
|
||||
pass
|
||||
else:
|
||||
stream.flush()
|
||||
|
||||
def emit(self, record):
|
||||
"""Emit a record."""
|
||||
try:
|
||||
stream = cherrypy.serving.request.wsgi_environ.get('wsgi.errors')
|
||||
except (AttributeError, KeyError):
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
msg = self.format(record)
|
||||
fs = "%s\n"
|
||||
import types
|
||||
# if no unicode support...
|
||||
if not hasattr(types, "UnicodeType"):
|
||||
stream.write(fs % msg)
|
||||
else:
|
||||
try:
|
||||
stream.write(fs % msg)
|
||||
except UnicodeError:
|
||||
stream.write(fs % msg.encode("UTF-8"))
|
||||
self.flush()
|
||||
except:
|
||||
self.handleError(record)
|
||||
353
lib/cherrypy/_cpmodpy.py
Normal file
353
lib/cherrypy/_cpmodpy.py
Normal file
@@ -0,0 +1,353 @@
|
||||
"""Native adapter for serving CherryPy via mod_python
|
||||
|
||||
Basic usage:
|
||||
|
||||
##########################################
|
||||
# Application in a module called myapp.py
|
||||
##########################################
|
||||
|
||||
import cherrypy
|
||||
|
||||
class Root:
|
||||
@cherrypy.expose
|
||||
def index(self):
|
||||
return 'Hi there, Ho there, Hey there'
|
||||
|
||||
|
||||
# We will use this method from the mod_python configuration
|
||||
# as the entry point to our application
|
||||
def setup_server():
|
||||
cherrypy.tree.mount(Root())
|
||||
cherrypy.config.update({'environment': 'production',
|
||||
'log.screen': False,
|
||||
'show_tracebacks': False})
|
||||
|
||||
##########################################
|
||||
# mod_python settings for apache2
|
||||
# This should reside in your httpd.conf
|
||||
# or a file that will be loaded at
|
||||
# apache startup
|
||||
##########################################
|
||||
|
||||
# Start
|
||||
DocumentRoot "/"
|
||||
Listen 8080
|
||||
LoadModule python_module /usr/lib/apache2/modules/mod_python.so
|
||||
|
||||
<Location "/">
|
||||
PythonPath "sys.path+['/path/to/my/application']"
|
||||
SetHandler python-program
|
||||
PythonHandler cherrypy._cpmodpy::handler
|
||||
PythonOption cherrypy.setup myapp::setup_server
|
||||
PythonDebug On
|
||||
</Location>
|
||||
# End
|
||||
|
||||
The actual path to your mod_python.so is dependent on your
|
||||
environment. In this case we suppose a global mod_python
|
||||
installation on a Linux distribution such as Ubuntu.
|
||||
|
||||
We do set the PythonPath configuration setting so that
|
||||
your application can be found by from the user running
|
||||
the apache2 instance. Of course if your application
|
||||
resides in the global site-package this won't be needed.
|
||||
|
||||
Then restart apache2 and access http://127.0.0.1:8080
|
||||
"""
|
||||
|
||||
import logging
|
||||
import sys
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import BytesIO, copyitems, ntob
|
||||
from cherrypy._cperror import format_exc, bare_error
|
||||
from cherrypy.lib import httputil
|
||||
|
||||
|
||||
# ------------------------------ Request-handling
|
||||
|
||||
|
||||
def setup(req):
|
||||
from mod_python import apache
|
||||
|
||||
# Run any setup functions defined by a "PythonOption cherrypy.setup"
|
||||
# directive.
|
||||
options = req.get_options()
|
||||
if 'cherrypy.setup' in options:
|
||||
for function in options['cherrypy.setup'].split():
|
||||
atoms = function.split('::', 1)
|
||||
if len(atoms) == 1:
|
||||
mod = __import__(atoms[0], globals(), locals())
|
||||
else:
|
||||
modname, fname = atoms
|
||||
mod = __import__(modname, globals(), locals(), [fname])
|
||||
func = getattr(mod, fname)
|
||||
func()
|
||||
|
||||
cherrypy.config.update({'log.screen': False,
|
||||
"tools.ignore_headers.on": True,
|
||||
"tools.ignore_headers.headers": ['Range'],
|
||||
})
|
||||
|
||||
engine = cherrypy.engine
|
||||
if hasattr(engine, "signal_handler"):
|
||||
engine.signal_handler.unsubscribe()
|
||||
if hasattr(engine, "console_control_handler"):
|
||||
engine.console_control_handler.unsubscribe()
|
||||
engine.autoreload.unsubscribe()
|
||||
cherrypy.server.unsubscribe()
|
||||
|
||||
def _log(msg, level):
|
||||
newlevel = apache.APLOG_ERR
|
||||
if logging.DEBUG >= level:
|
||||
newlevel = apache.APLOG_DEBUG
|
||||
elif logging.INFO >= level:
|
||||
newlevel = apache.APLOG_INFO
|
||||
elif logging.WARNING >= level:
|
||||
newlevel = apache.APLOG_WARNING
|
||||
# On Windows, req.server is required or the msg will vanish. See
|
||||
# http://www.modpython.org/pipermail/mod_python/2003-October/014291.html
|
||||
# Also, "When server is not specified...LogLevel does not apply..."
|
||||
apache.log_error(msg, newlevel, req.server)
|
||||
engine.subscribe('log', _log)
|
||||
|
||||
engine.start()
|
||||
|
||||
def cherrypy_cleanup(data):
|
||||
engine.exit()
|
||||
try:
|
||||
# apache.register_cleanup wasn't available until 3.1.4.
|
||||
apache.register_cleanup(cherrypy_cleanup)
|
||||
except AttributeError:
|
||||
req.server.register_cleanup(req, cherrypy_cleanup)
|
||||
|
||||
|
||||
class _ReadOnlyRequest:
|
||||
expose = ('read', 'readline', 'readlines')
|
||||
|
||||
def __init__(self, req):
|
||||
for method in self.expose:
|
||||
self.__dict__[method] = getattr(req, method)
|
||||
|
||||
|
||||
recursive = False
|
||||
|
||||
_isSetUp = False
|
||||
|
||||
|
||||
def handler(req):
|
||||
from mod_python import apache
|
||||
try:
|
||||
global _isSetUp
|
||||
if not _isSetUp:
|
||||
setup(req)
|
||||
_isSetUp = True
|
||||
|
||||
# Obtain a Request object from CherryPy
|
||||
local = req.connection.local_addr
|
||||
local = httputil.Host(
|
||||
local[0], local[1], req.connection.local_host or "")
|
||||
remote = req.connection.remote_addr
|
||||
remote = httputil.Host(
|
||||
remote[0], remote[1], req.connection.remote_host or "")
|
||||
|
||||
scheme = req.parsed_uri[0] or 'http'
|
||||
req.get_basic_auth_pw()
|
||||
|
||||
try:
|
||||
# apache.mpm_query only became available in mod_python 3.1
|
||||
q = apache.mpm_query
|
||||
threaded = q(apache.AP_MPMQ_IS_THREADED)
|
||||
forked = q(apache.AP_MPMQ_IS_FORKED)
|
||||
except AttributeError:
|
||||
bad_value = ("You must provide a PythonOption '%s', "
|
||||
"either 'on' or 'off', when running a version "
|
||||
"of mod_python < 3.1")
|
||||
|
||||
threaded = options.get('multithread', '').lower()
|
||||
if threaded == 'on':
|
||||
threaded = True
|
||||
elif threaded == 'off':
|
||||
threaded = False
|
||||
else:
|
||||
raise ValueError(bad_value % "multithread")
|
||||
|
||||
forked = options.get('multiprocess', '').lower()
|
||||
if forked == 'on':
|
||||
forked = True
|
||||
elif forked == 'off':
|
||||
forked = False
|
||||
else:
|
||||
raise ValueError(bad_value % "multiprocess")
|
||||
|
||||
sn = cherrypy.tree.script_name(req.uri or "/")
|
||||
if sn is None:
|
||||
send_response(req, '404 Not Found', [], '')
|
||||
else:
|
||||
app = cherrypy.tree.apps[sn]
|
||||
method = req.method
|
||||
path = req.uri
|
||||
qs = req.args or ""
|
||||
reqproto = req.protocol
|
||||
headers = copyitems(req.headers_in)
|
||||
rfile = _ReadOnlyRequest(req)
|
||||
prev = None
|
||||
|
||||
try:
|
||||
redirections = []
|
||||
while True:
|
||||
request, response = app.get_serving(local, remote, scheme,
|
||||
"HTTP/1.1")
|
||||
request.login = req.user
|
||||
request.multithread = bool(threaded)
|
||||
request.multiprocess = bool(forked)
|
||||
request.app = app
|
||||
request.prev = prev
|
||||
|
||||
# Run the CherryPy Request object and obtain the response
|
||||
try:
|
||||
request.run(method, path, qs, reqproto, headers, rfile)
|
||||
break
|
||||
except cherrypy.InternalRedirect:
|
||||
ir = sys.exc_info()[1]
|
||||
app.release_serving()
|
||||
prev = request
|
||||
|
||||
if not recursive:
|
||||
if ir.path in redirections:
|
||||
raise RuntimeError(
|
||||
"InternalRedirector visited the same URL "
|
||||
"twice: %r" % ir.path)
|
||||
else:
|
||||
# Add the *previous* path_info + qs to
|
||||
# redirections.
|
||||
if qs:
|
||||
qs = "?" + qs
|
||||
redirections.append(sn + path + qs)
|
||||
|
||||
# Munge environment and try again.
|
||||
method = "GET"
|
||||
path = ir.path
|
||||
qs = ir.query_string
|
||||
rfile = BytesIO()
|
||||
|
||||
send_response(
|
||||
req, response.output_status, response.header_list,
|
||||
response.body, response.stream)
|
||||
finally:
|
||||
app.release_serving()
|
||||
except:
|
||||
tb = format_exc()
|
||||
cherrypy.log(tb, 'MOD_PYTHON', severity=logging.ERROR)
|
||||
s, h, b = bare_error()
|
||||
send_response(req, s, h, b)
|
||||
return apache.OK
|
||||
|
||||
|
||||
def send_response(req, status, headers, body, stream=False):
|
||||
# Set response status
|
||||
req.status = int(status[:3])
|
||||
|
||||
# Set response headers
|
||||
req.content_type = "text/plain"
|
||||
for header, value in headers:
|
||||
if header.lower() == 'content-type':
|
||||
req.content_type = value
|
||||
continue
|
||||
req.headers_out.add(header, value)
|
||||
|
||||
if stream:
|
||||
# Flush now so the status and headers are sent immediately.
|
||||
req.flush()
|
||||
|
||||
# Set response body
|
||||
if isinstance(body, basestring):
|
||||
req.write(body)
|
||||
else:
|
||||
for seg in body:
|
||||
req.write(seg)
|
||||
|
||||
|
||||
# --------------- Startup tools for CherryPy + mod_python --------------- #
|
||||
import os
|
||||
import re
|
||||
try:
|
||||
import subprocess
|
||||
|
||||
def popen(fullcmd):
|
||||
p = subprocess.Popen(fullcmd, shell=True,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
close_fds=True)
|
||||
return p.stdout
|
||||
except ImportError:
|
||||
def popen(fullcmd):
|
||||
pipein, pipeout = os.popen4(fullcmd)
|
||||
return pipeout
|
||||
|
||||
|
||||
def read_process(cmd, args=""):
|
||||
fullcmd = "%s %s" % (cmd, args)
|
||||
pipeout = popen(fullcmd)
|
||||
try:
|
||||
firstline = pipeout.readline()
|
||||
cmd_not_found = re.search(
|
||||
ntob("(not recognized|No such file|not found)"),
|
||||
firstline,
|
||||
re.IGNORECASE
|
||||
)
|
||||
if cmd_not_found:
|
||||
raise IOError('%s must be on your system path.' % cmd)
|
||||
output = firstline + pipeout.read()
|
||||
finally:
|
||||
pipeout.close()
|
||||
return output
|
||||
|
||||
|
||||
class ModPythonServer(object):
|
||||
|
||||
template = """
|
||||
# Apache2 server configuration file for running CherryPy with mod_python.
|
||||
|
||||
DocumentRoot "/"
|
||||
Listen %(port)s
|
||||
LoadModule python_module modules/mod_python.so
|
||||
|
||||
<Location %(loc)s>
|
||||
SetHandler python-program
|
||||
PythonHandler %(handler)s
|
||||
PythonDebug On
|
||||
%(opts)s
|
||||
</Location>
|
||||
"""
|
||||
|
||||
def __init__(self, loc="/", port=80, opts=None, apache_path="apache",
|
||||
handler="cherrypy._cpmodpy::handler"):
|
||||
self.loc = loc
|
||||
self.port = port
|
||||
self.opts = opts
|
||||
self.apache_path = apache_path
|
||||
self.handler = handler
|
||||
|
||||
def start(self):
|
||||
opts = "".join([" PythonOption %s %s\n" % (k, v)
|
||||
for k, v in self.opts])
|
||||
conf_data = self.template % {"port": self.port,
|
||||
"loc": self.loc,
|
||||
"opts": opts,
|
||||
"handler": self.handler,
|
||||
}
|
||||
|
||||
mpconf = os.path.join(os.path.dirname(__file__), "cpmodpy.conf")
|
||||
f = open(mpconf, 'wb')
|
||||
try:
|
||||
f.write(conf_data)
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
response = read_process(self.apache_path, "-k start -f %s" % mpconf)
|
||||
self.ready = True
|
||||
return response
|
||||
|
||||
def stop(self):
|
||||
os.popen("apache -k stop")
|
||||
self.ready = False
|
||||
154
lib/cherrypy/_cpnative_server.py
Normal file
154
lib/cherrypy/_cpnative_server.py
Normal file
@@ -0,0 +1,154 @@
|
||||
"""Native adapter for serving CherryPy via its builtin server."""
|
||||
|
||||
import logging
|
||||
import sys
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import BytesIO
|
||||
from cherrypy._cperror import format_exc, bare_error
|
||||
from cherrypy.lib import httputil
|
||||
from cherrypy import wsgiserver
|
||||
|
||||
|
||||
class NativeGateway(wsgiserver.Gateway):
|
||||
|
||||
recursive = False
|
||||
|
||||
def respond(self):
|
||||
req = self.req
|
||||
try:
|
||||
# Obtain a Request object from CherryPy
|
||||
local = req.server.bind_addr
|
||||
local = httputil.Host(local[0], local[1], "")
|
||||
remote = req.conn.remote_addr, req.conn.remote_port
|
||||
remote = httputil.Host(remote[0], remote[1], "")
|
||||
|
||||
scheme = req.scheme
|
||||
sn = cherrypy.tree.script_name(req.uri or "/")
|
||||
if sn is None:
|
||||
self.send_response('404 Not Found', [], [''])
|
||||
else:
|
||||
app = cherrypy.tree.apps[sn]
|
||||
method = req.method
|
||||
path = req.path
|
||||
qs = req.qs or ""
|
||||
headers = req.inheaders.items()
|
||||
rfile = req.rfile
|
||||
prev = None
|
||||
|
||||
try:
|
||||
redirections = []
|
||||
while True:
|
||||
request, response = app.get_serving(
|
||||
local, remote, scheme, "HTTP/1.1")
|
||||
request.multithread = True
|
||||
request.multiprocess = False
|
||||
request.app = app
|
||||
request.prev = prev
|
||||
|
||||
# Run the CherryPy Request object and obtain the
|
||||
# response
|
||||
try:
|
||||
request.run(method, path, qs,
|
||||
req.request_protocol, headers, rfile)
|
||||
break
|
||||
except cherrypy.InternalRedirect:
|
||||
ir = sys.exc_info()[1]
|
||||
app.release_serving()
|
||||
prev = request
|
||||
|
||||
if not self.recursive:
|
||||
if ir.path in redirections:
|
||||
raise RuntimeError(
|
||||
"InternalRedirector visited the same "
|
||||
"URL twice: %r" % ir.path)
|
||||
else:
|
||||
# Add the *previous* path_info + qs to
|
||||
# redirections.
|
||||
if qs:
|
||||
qs = "?" + qs
|
||||
redirections.append(sn + path + qs)
|
||||
|
||||
# Munge environment and try again.
|
||||
method = "GET"
|
||||
path = ir.path
|
||||
qs = ir.query_string
|
||||
rfile = BytesIO()
|
||||
|
||||
self.send_response(
|
||||
response.output_status, response.header_list,
|
||||
response.body)
|
||||
finally:
|
||||
app.release_serving()
|
||||
except:
|
||||
tb = format_exc()
|
||||
# print tb
|
||||
cherrypy.log(tb, 'NATIVE_ADAPTER', severity=logging.ERROR)
|
||||
s, h, b = bare_error()
|
||||
self.send_response(s, h, b)
|
||||
|
||||
def send_response(self, status, headers, body):
|
||||
req = self.req
|
||||
|
||||
# Set response status
|
||||
req.status = str(status or "500 Server Error")
|
||||
|
||||
# Set response headers
|
||||
for header, value in headers:
|
||||
req.outheaders.append((header, value))
|
||||
if (req.ready and not req.sent_headers):
|
||||
req.sent_headers = True
|
||||
req.send_headers()
|
||||
|
||||
# Set response body
|
||||
for seg in body:
|
||||
req.write(seg)
|
||||
|
||||
|
||||
class CPHTTPServer(wsgiserver.HTTPServer):
|
||||
|
||||
"""Wrapper for wsgiserver.HTTPServer.
|
||||
|
||||
wsgiserver has been designed to not reference CherryPy in any way,
|
||||
so that it can be used in other frameworks and applications.
|
||||
Therefore, we wrap it here, so we can apply some attributes
|
||||
from config -> cherrypy.server -> HTTPServer.
|
||||
"""
|
||||
|
||||
def __init__(self, server_adapter=cherrypy.server):
|
||||
self.server_adapter = server_adapter
|
||||
|
||||
server_name = (self.server_adapter.socket_host or
|
||||
self.server_adapter.socket_file or
|
||||
None)
|
||||
|
||||
wsgiserver.HTTPServer.__init__(
|
||||
self, server_adapter.bind_addr, NativeGateway,
|
||||
minthreads=server_adapter.thread_pool,
|
||||
maxthreads=server_adapter.thread_pool_max,
|
||||
server_name=server_name)
|
||||
|
||||
self.max_request_header_size = (
|
||||
self.server_adapter.max_request_header_size or 0)
|
||||
self.max_request_body_size = (
|
||||
self.server_adapter.max_request_body_size or 0)
|
||||
self.request_queue_size = self.server_adapter.socket_queue_size
|
||||
self.timeout = self.server_adapter.socket_timeout
|
||||
self.shutdown_timeout = self.server_adapter.shutdown_timeout
|
||||
self.protocol = self.server_adapter.protocol_version
|
||||
self.nodelay = self.server_adapter.nodelay
|
||||
|
||||
ssl_module = self.server_adapter.ssl_module or 'pyopenssl'
|
||||
if self.server_adapter.ssl_context:
|
||||
adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module)
|
||||
self.ssl_adapter = adapter_class(
|
||||
self.server_adapter.ssl_certificate,
|
||||
self.server_adapter.ssl_private_key,
|
||||
self.server_adapter.ssl_certificate_chain)
|
||||
self.ssl_adapter.context = self.server_adapter.ssl_context
|
||||
elif self.server_adapter.ssl_certificate:
|
||||
adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module)
|
||||
self.ssl_adapter = adapter_class(
|
||||
self.server_adapter.ssl_certificate,
|
||||
self.server_adapter.ssl_private_key,
|
||||
self.server_adapter.ssl_certificate_chain)
|
||||
1013
lib/cherrypy/_cpreqbody.py
Normal file
1013
lib/cherrypy/_cpreqbody.py
Normal file
File diff suppressed because it is too large
Load Diff
973
lib/cherrypy/_cprequest.py
Normal file
973
lib/cherrypy/_cprequest.py
Normal file
@@ -0,0 +1,973 @@
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import warnings
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import basestring, copykeys, ntob, unicodestr
|
||||
from cherrypy._cpcompat import SimpleCookie, CookieError, py3k
|
||||
from cherrypy import _cpreqbody, _cpconfig
|
||||
from cherrypy._cperror import format_exc, bare_error
|
||||
from cherrypy.lib import httputil, file_generator
|
||||
|
||||
|
||||
class Hook(object):
|
||||
|
||||
"""A callback and its metadata: failsafe, priority, and kwargs."""
|
||||
|
||||
callback = None
|
||||
"""
|
||||
The bare callable that this Hook object is wrapping, which will
|
||||
be called when the Hook is called."""
|
||||
|
||||
failsafe = False
|
||||
"""
|
||||
If True, the callback is guaranteed to run even if other callbacks
|
||||
from the same call point raise exceptions."""
|
||||
|
||||
priority = 50
|
||||
"""
|
||||
Defines the order of execution for a list of Hooks. Priority numbers
|
||||
should be limited to the closed interval [0, 100], but values outside
|
||||
this range are acceptable, as are fractional values."""
|
||||
|
||||
kwargs = {}
|
||||
"""
|
||||
A set of keyword arguments that will be passed to the
|
||||
callable on each call."""
|
||||
|
||||
def __init__(self, callback, failsafe=None, priority=None, **kwargs):
|
||||
self.callback = callback
|
||||
|
||||
if failsafe is None:
|
||||
failsafe = getattr(callback, "failsafe", False)
|
||||
self.failsafe = failsafe
|
||||
|
||||
if priority is None:
|
||||
priority = getattr(callback, "priority", 50)
|
||||
self.priority = priority
|
||||
|
||||
self.kwargs = kwargs
|
||||
|
||||
def __lt__(self, other):
|
||||
# Python 3
|
||||
return self.priority < other.priority
|
||||
|
||||
def __cmp__(self, other):
|
||||
# Python 2
|
||||
return cmp(self.priority, other.priority)
|
||||
|
||||
def __call__(self):
|
||||
"""Run self.callback(**self.kwargs)."""
|
||||
return self.callback(**self.kwargs)
|
||||
|
||||
def __repr__(self):
|
||||
cls = self.__class__
|
||||
return ("%s.%s(callback=%r, failsafe=%r, priority=%r, %s)"
|
||||
% (cls.__module__, cls.__name__, self.callback,
|
||||
self.failsafe, self.priority,
|
||||
", ".join(['%s=%r' % (k, v)
|
||||
for k, v in self.kwargs.items()])))
|
||||
|
||||
|
||||
class HookMap(dict):
|
||||
|
||||
"""A map of call points to lists of callbacks (Hook objects)."""
|
||||
|
||||
def __new__(cls, points=None):
|
||||
d = dict.__new__(cls)
|
||||
for p in points or []:
|
||||
d[p] = []
|
||||
return d
|
||||
|
||||
def __init__(self, *a, **kw):
|
||||
pass
|
||||
|
||||
def attach(self, point, callback, failsafe=None, priority=None, **kwargs):
|
||||
"""Append a new Hook made from the supplied arguments."""
|
||||
self[point].append(Hook(callback, failsafe, priority, **kwargs))
|
||||
|
||||
def run(self, point):
|
||||
"""Execute all registered Hooks (callbacks) for the given point."""
|
||||
exc = None
|
||||
hooks = self[point]
|
||||
hooks.sort()
|
||||
for hook in hooks:
|
||||
# Some hooks are guaranteed to run even if others at
|
||||
# the same hookpoint fail. We will still log the failure,
|
||||
# but proceed on to the next hook. The only way
|
||||
# to stop all processing from one of these hooks is
|
||||
# to raise SystemExit and stop the whole server.
|
||||
if exc is None or hook.failsafe:
|
||||
try:
|
||||
hook()
|
||||
except (KeyboardInterrupt, SystemExit):
|
||||
raise
|
||||
except (cherrypy.HTTPError, cherrypy.HTTPRedirect,
|
||||
cherrypy.InternalRedirect):
|
||||
exc = sys.exc_info()[1]
|
||||
except:
|
||||
exc = sys.exc_info()[1]
|
||||
cherrypy.log(traceback=True, severity=40)
|
||||
if exc:
|
||||
raise exc
|
||||
|
||||
def __copy__(self):
|
||||
newmap = self.__class__()
|
||||
# We can't just use 'update' because we want copies of the
|
||||
# mutable values (each is a list) as well.
|
||||
for k, v in self.items():
|
||||
newmap[k] = v[:]
|
||||
return newmap
|
||||
copy = __copy__
|
||||
|
||||
def __repr__(self):
|
||||
cls = self.__class__
|
||||
return "%s.%s(points=%r)" % (
|
||||
cls.__module__,
|
||||
cls.__name__,
|
||||
copykeys(self)
|
||||
)
|
||||
|
||||
|
||||
# Config namespace handlers
|
||||
|
||||
def hooks_namespace(k, v):
|
||||
"""Attach bare hooks declared in config."""
|
||||
# Use split again to allow multiple hooks for a single
|
||||
# hookpoint per path (e.g. "hooks.before_handler.1").
|
||||
# Little-known fact you only get from reading source ;)
|
||||
hookpoint = k.split(".", 1)[0]
|
||||
if isinstance(v, basestring):
|
||||
v = cherrypy.lib.attributes(v)
|
||||
if not isinstance(v, Hook):
|
||||
v = Hook(v)
|
||||
cherrypy.serving.request.hooks[hookpoint].append(v)
|
||||
|
||||
|
||||
def request_namespace(k, v):
|
||||
"""Attach request attributes declared in config."""
|
||||
# Provides config entries to set request.body attrs (like
|
||||
# attempt_charsets).
|
||||
if k[:5] == 'body.':
|
||||
setattr(cherrypy.serving.request.body, k[5:], v)
|
||||
else:
|
||||
setattr(cherrypy.serving.request, k, v)
|
||||
|
||||
|
||||
def response_namespace(k, v):
|
||||
"""Attach response attributes declared in config."""
|
||||
# Provides config entries to set default response headers
|
||||
# http://cherrypy.org/ticket/889
|
||||
if k[:8] == 'headers.':
|
||||
cherrypy.serving.response.headers[k.split('.', 1)[1]] = v
|
||||
else:
|
||||
setattr(cherrypy.serving.response, k, v)
|
||||
|
||||
|
||||
def error_page_namespace(k, v):
|
||||
"""Attach error pages declared in config."""
|
||||
if k != 'default':
|
||||
k = int(k)
|
||||
cherrypy.serving.request.error_page[k] = v
|
||||
|
||||
|
||||
hookpoints = ['on_start_resource', 'before_request_body',
|
||||
'before_handler', 'before_finalize',
|
||||
'on_end_resource', 'on_end_request',
|
||||
'before_error_response', 'after_error_response']
|
||||
|
||||
|
||||
class Request(object):
|
||||
|
||||
"""An HTTP request.
|
||||
|
||||
This object represents the metadata of an HTTP request message;
|
||||
that is, it contains attributes which describe the environment
|
||||
in which the request URL, headers, and body were sent (if you
|
||||
want tools to interpret the headers and body, those are elsewhere,
|
||||
mostly in Tools). This 'metadata' consists of socket data,
|
||||
transport characteristics, and the Request-Line. This object
|
||||
also contains data regarding the configuration in effect for
|
||||
the given URL, and the execution plan for generating a response.
|
||||
"""
|
||||
|
||||
prev = None
|
||||
"""
|
||||
The previous Request object (if any). This should be None
|
||||
unless we are processing an InternalRedirect."""
|
||||
|
||||
# Conversation/connection attributes
|
||||
local = httputil.Host("127.0.0.1", 80)
|
||||
"An httputil.Host(ip, port, hostname) object for the server socket."
|
||||
|
||||
remote = httputil.Host("127.0.0.1", 1111)
|
||||
"An httputil.Host(ip, port, hostname) object for the client socket."
|
||||
|
||||
scheme = "http"
|
||||
"""
|
||||
The protocol used between client and server. In most cases,
|
||||
this will be either 'http' or 'https'."""
|
||||
|
||||
server_protocol = "HTTP/1.1"
|
||||
"""
|
||||
The HTTP version for which the HTTP server is at least
|
||||
conditionally compliant."""
|
||||
|
||||
base = ""
|
||||
"""The (scheme://host) portion of the requested URL.
|
||||
In some cases (e.g. when proxying via mod_rewrite), this may contain
|
||||
path segments which cherrypy.url uses when constructing url's, but
|
||||
which otherwise are ignored by CherryPy. Regardless, this value
|
||||
MUST NOT end in a slash."""
|
||||
|
||||
# Request-Line attributes
|
||||
request_line = ""
|
||||
"""
|
||||
The complete Request-Line received from the client. This is a
|
||||
single string consisting of the request method, URI, and protocol
|
||||
version (joined by spaces). Any final CRLF is removed."""
|
||||
|
||||
method = "GET"
|
||||
"""
|
||||
Indicates the HTTP method to be performed on the resource identified
|
||||
by the Request-URI. Common methods include GET, HEAD, POST, PUT, and
|
||||
DELETE. CherryPy allows any extension method; however, various HTTP
|
||||
servers and gateways may restrict the set of allowable methods.
|
||||
CherryPy applications SHOULD restrict the set (on a per-URI basis)."""
|
||||
|
||||
query_string = ""
|
||||
"""
|
||||
The query component of the Request-URI, a string of information to be
|
||||
interpreted by the resource. The query portion of a URI follows the
|
||||
path component, and is separated by a '?'. For example, the URI
|
||||
'http://www.cherrypy.org/wiki?a=3&b=4' has the query component,
|
||||
'a=3&b=4'."""
|
||||
|
||||
query_string_encoding = 'utf8'
|
||||
"""
|
||||
The encoding expected for query string arguments after % HEX HEX decoding).
|
||||
If a query string is provided that cannot be decoded with this encoding,
|
||||
404 is raised (since technically it's a different URI). If you want
|
||||
arbitrary encodings to not error, set this to 'Latin-1'; you can then
|
||||
encode back to bytes and re-decode to whatever encoding you like later.
|
||||
"""
|
||||
|
||||
protocol = (1, 1)
|
||||
"""The HTTP protocol version corresponding to the set
|
||||
of features which should be allowed in the response. If BOTH
|
||||
the client's request message AND the server's level of HTTP
|
||||
compliance is HTTP/1.1, this attribute will be the tuple (1, 1).
|
||||
If either is 1.0, this attribute will be the tuple (1, 0).
|
||||
Lower HTTP protocol versions are not explicitly supported."""
|
||||
|
||||
params = {}
|
||||
"""
|
||||
A dict which combines query string (GET) and request entity (POST)
|
||||
variables. This is populated in two stages: GET params are added
|
||||
before the 'on_start_resource' hook, and POST params are added
|
||||
between the 'before_request_body' and 'before_handler' hooks."""
|
||||
|
||||
# Message attributes
|
||||
header_list = []
|
||||
"""
|
||||
A list of the HTTP request headers as (name, value) tuples.
|
||||
In general, you should use request.headers (a dict) instead."""
|
||||
|
||||
headers = httputil.HeaderMap()
|
||||
"""
|
||||
A dict-like object containing the request headers. Keys are header
|
||||
names (in Title-Case format); however, you may get and set them in
|
||||
a case-insensitive manner. That is, headers['Content-Type'] and
|
||||
headers['content-type'] refer to the same value. Values are header
|
||||
values (decoded according to :rfc:`2047` if necessary). See also:
|
||||
httputil.HeaderMap, httputil.HeaderElement."""
|
||||
|
||||
cookie = SimpleCookie()
|
||||
"""See help(Cookie)."""
|
||||
|
||||
rfile = None
|
||||
"""
|
||||
If the request included an entity (body), it will be available
|
||||
as a stream in this attribute. However, the rfile will normally
|
||||
be read for you between the 'before_request_body' hook and the
|
||||
'before_handler' hook, and the resulting string is placed into
|
||||
either request.params or the request.body attribute.
|
||||
|
||||
You may disable the automatic consumption of the rfile by setting
|
||||
request.process_request_body to False, either in config for the desired
|
||||
path, or in an 'on_start_resource' or 'before_request_body' hook.
|
||||
|
||||
WARNING: In almost every case, you should not attempt to read from the
|
||||
rfile stream after CherryPy's automatic mechanism has read it. If you
|
||||
turn off the automatic parsing of rfile, you should read exactly the
|
||||
number of bytes specified in request.headers['Content-Length'].
|
||||
Ignoring either of these warnings may result in a hung request thread
|
||||
or in corruption of the next (pipelined) request.
|
||||
"""
|
||||
|
||||
process_request_body = True
|
||||
"""
|
||||
If True, the rfile (if any) is automatically read and parsed,
|
||||
and the result placed into request.params or request.body."""
|
||||
|
||||
methods_with_bodies = ("POST", "PUT")
|
||||
"""
|
||||
A sequence of HTTP methods for which CherryPy will automatically
|
||||
attempt to read a body from the rfile. If you are going to change
|
||||
this property, modify it on the configuration (recommended)
|
||||
or on the "hook point" `on_start_resource`.
|
||||
"""
|
||||
|
||||
body = None
|
||||
"""
|
||||
If the request Content-Type is 'application/x-www-form-urlencoded'
|
||||
or multipart, this will be None. Otherwise, this will be an instance
|
||||
of :class:`RequestBody<cherrypy._cpreqbody.RequestBody>` (which you
|
||||
can .read()); this value is set between the 'before_request_body' and
|
||||
'before_handler' hooks (assuming that process_request_body is True)."""
|
||||
|
||||
# Dispatch attributes
|
||||
dispatch = cherrypy.dispatch.Dispatcher()
|
||||
"""
|
||||
The object which looks up the 'page handler' callable and collects
|
||||
config for the current request based on the path_info, other
|
||||
request attributes, and the application architecture. The core
|
||||
calls the dispatcher as early as possible, passing it a 'path_info'
|
||||
argument.
|
||||
|
||||
The default dispatcher discovers the page handler by matching path_info
|
||||
to a hierarchical arrangement of objects, starting at request.app.root.
|
||||
See help(cherrypy.dispatch) for more information."""
|
||||
|
||||
script_name = ""
|
||||
"""
|
||||
The 'mount point' of the application which is handling this request.
|
||||
|
||||
This attribute MUST NOT end in a slash. If the script_name refers to
|
||||
the root of the URI, it MUST be an empty string (not "/").
|
||||
"""
|
||||
|
||||
path_info = "/"
|
||||
"""
|
||||
The 'relative path' portion of the Request-URI. This is relative
|
||||
to the script_name ('mount point') of the application which is
|
||||
handling this request."""
|
||||
|
||||
login = None
|
||||
"""
|
||||
When authentication is used during the request processing this is
|
||||
set to 'False' if it failed and to the 'username' value if it succeeded.
|
||||
The default 'None' implies that no authentication happened."""
|
||||
|
||||
# Note that cherrypy.url uses "if request.app:" to determine whether
|
||||
# the call is during a real HTTP request or not. So leave this None.
|
||||
app = None
|
||||
"""The cherrypy.Application object which is handling this request."""
|
||||
|
||||
handler = None
|
||||
"""
|
||||
The function, method, or other callable which CherryPy will call to
|
||||
produce the response. The discovery of the handler and the arguments
|
||||
it will receive are determined by the request.dispatch object.
|
||||
By default, the handler is discovered by walking a tree of objects
|
||||
starting at request.app.root, and is then passed all HTTP params
|
||||
(from the query string and POST body) as keyword arguments."""
|
||||
|
||||
toolmaps = {}
|
||||
"""
|
||||
A nested dict of all Toolboxes and Tools in effect for this request,
|
||||
of the form: {Toolbox.namespace: {Tool.name: config dict}}."""
|
||||
|
||||
config = None
|
||||
"""
|
||||
A flat dict of all configuration entries which apply to the
|
||||
current request. These entries are collected from global config,
|
||||
application config (based on request.path_info), and from handler
|
||||
config (exactly how is governed by the request.dispatch object in
|
||||
effect for this request; by default, handler config can be attached
|
||||
anywhere in the tree between request.app.root and the final handler,
|
||||
and inherits downward)."""
|
||||
|
||||
is_index = None
|
||||
"""
|
||||
This will be True if the current request is mapped to an 'index'
|
||||
resource handler (also, a 'default' handler if path_info ends with
|
||||
a slash). The value may be used to automatically redirect the
|
||||
user-agent to a 'more canonical' URL which either adds or removes
|
||||
the trailing slash. See cherrypy.tools.trailing_slash."""
|
||||
|
||||
hooks = HookMap(hookpoints)
|
||||
"""
|
||||
A HookMap (dict-like object) of the form: {hookpoint: [hook, ...]}.
|
||||
Each key is a str naming the hook point, and each value is a list
|
||||
of hooks which will be called at that hook point during this request.
|
||||
The list of hooks is generally populated as early as possible (mostly
|
||||
from Tools specified in config), but may be extended at any time.
|
||||
See also: _cprequest.Hook, _cprequest.HookMap, and cherrypy.tools."""
|
||||
|
||||
error_response = cherrypy.HTTPError(500).set_response
|
||||
"""
|
||||
The no-arg callable which will handle unexpected, untrapped errors
|
||||
during request processing. This is not used for expected exceptions
|
||||
(like NotFound, HTTPError, or HTTPRedirect) which are raised in
|
||||
response to expected conditions (those should be customized either
|
||||
via request.error_page or by overriding HTTPError.set_response).
|
||||
By default, error_response uses HTTPError(500) to return a generic
|
||||
error response to the user-agent."""
|
||||
|
||||
error_page = {}
|
||||
"""
|
||||
A dict of {error code: response filename or callable} pairs.
|
||||
|
||||
The error code must be an int representing a given HTTP error code,
|
||||
or the string 'default', which will be used if no matching entry
|
||||
is found for a given numeric code.
|
||||
|
||||
If a filename is provided, the file should contain a Python string-
|
||||
formatting template, and can expect by default to receive format
|
||||
values with the mapping keys %(status)s, %(message)s, %(traceback)s,
|
||||
and %(version)s. The set of format mappings can be extended by
|
||||
overriding HTTPError.set_response.
|
||||
|
||||
If a callable is provided, it will be called by default with keyword
|
||||
arguments 'status', 'message', 'traceback', and 'version', as for a
|
||||
string-formatting template. The callable must return a string or
|
||||
iterable of strings which will be set to response.body. It may also
|
||||
override headers or perform any other processing.
|
||||
|
||||
If no entry is given for an error code, and no 'default' entry exists,
|
||||
a default template will be used.
|
||||
"""
|
||||
|
||||
show_tracebacks = True
|
||||
"""
|
||||
If True, unexpected errors encountered during request processing will
|
||||
include a traceback in the response body."""
|
||||
|
||||
show_mismatched_params = True
|
||||
"""
|
||||
If True, mismatched parameters encountered during PageHandler invocation
|
||||
processing will be included in the response body."""
|
||||
|
||||
throws = (KeyboardInterrupt, SystemExit, cherrypy.InternalRedirect)
|
||||
"""The sequence of exceptions which Request.run does not trap."""
|
||||
|
||||
throw_errors = False
|
||||
"""
|
||||
If True, Request.run will not trap any errors (except HTTPRedirect and
|
||||
HTTPError, which are more properly called 'exceptions', not errors)."""
|
||||
|
||||
closed = False
|
||||
"""True once the close method has been called, False otherwise."""
|
||||
|
||||
stage = None
|
||||
"""
|
||||
A string containing the stage reached in the request-handling process.
|
||||
This is useful when debugging a live server with hung requests."""
|
||||
|
||||
namespaces = _cpconfig.NamespaceSet(
|
||||
**{"hooks": hooks_namespace,
|
||||
"request": request_namespace,
|
||||
"response": response_namespace,
|
||||
"error_page": error_page_namespace,
|
||||
"tools": cherrypy.tools,
|
||||
})
|
||||
|
||||
def __init__(self, local_host, remote_host, scheme="http",
|
||||
server_protocol="HTTP/1.1"):
|
||||
"""Populate a new Request object.
|
||||
|
||||
local_host should be an httputil.Host object with the server info.
|
||||
remote_host should be an httputil.Host object with the client info.
|
||||
scheme should be a string, either "http" or "https".
|
||||
"""
|
||||
self.local = local_host
|
||||
self.remote = remote_host
|
||||
self.scheme = scheme
|
||||
self.server_protocol = server_protocol
|
||||
|
||||
self.closed = False
|
||||
|
||||
# Put a *copy* of the class error_page into self.
|
||||
self.error_page = self.error_page.copy()
|
||||
|
||||
# Put a *copy* of the class namespaces into self.
|
||||
self.namespaces = self.namespaces.copy()
|
||||
|
||||
self.stage = None
|
||||
|
||||
def close(self):
|
||||
"""Run cleanup code. (Core)"""
|
||||
if not self.closed:
|
||||
self.closed = True
|
||||
self.stage = 'on_end_request'
|
||||
self.hooks.run('on_end_request')
|
||||
self.stage = 'close'
|
||||
|
||||
def run(self, method, path, query_string, req_protocol, headers, rfile):
|
||||
r"""Process the Request. (Core)
|
||||
|
||||
method, path, query_string, and req_protocol should be pulled directly
|
||||
from the Request-Line (e.g. "GET /path?key=val HTTP/1.0").
|
||||
|
||||
path
|
||||
This should be %XX-unquoted, but query_string should not be.
|
||||
|
||||
When using Python 2, they both MUST be byte strings,
|
||||
not unicode strings.
|
||||
|
||||
When using Python 3, they both MUST be unicode strings,
|
||||
not byte strings, and preferably not bytes \x00-\xFF
|
||||
disguised as unicode.
|
||||
|
||||
headers
|
||||
A list of (name, value) tuples.
|
||||
|
||||
rfile
|
||||
A file-like object containing the HTTP request entity.
|
||||
|
||||
When run() is done, the returned object should have 3 attributes:
|
||||
|
||||
* status, e.g. "200 OK"
|
||||
* header_list, a list of (name, value) tuples
|
||||
* body, an iterable yielding strings
|
||||
|
||||
Consumer code (HTTP servers) should then access these response
|
||||
attributes to build the outbound stream.
|
||||
|
||||
"""
|
||||
response = cherrypy.serving.response
|
||||
self.stage = 'run'
|
||||
try:
|
||||
self.error_response = cherrypy.HTTPError(500).set_response
|
||||
|
||||
self.method = method
|
||||
path = path or "/"
|
||||
self.query_string = query_string or ''
|
||||
self.params = {}
|
||||
|
||||
# Compare request and server HTTP protocol versions, in case our
|
||||
# server does not support the requested protocol. Limit our output
|
||||
# to min(req, server). We want the following output:
|
||||
# request server actual written supported response
|
||||
# protocol protocol response protocol feature set
|
||||
# a 1.0 1.0 1.0 1.0
|
||||
# b 1.0 1.1 1.1 1.0
|
||||
# c 1.1 1.0 1.0 1.0
|
||||
# d 1.1 1.1 1.1 1.1
|
||||
# Notice that, in (b), the response will be "HTTP/1.1" even though
|
||||
# the client only understands 1.0. RFC 2616 10.5.6 says we should
|
||||
# only return 505 if the _major_ version is different.
|
||||
rp = int(req_protocol[5]), int(req_protocol[7])
|
||||
sp = int(self.server_protocol[5]), int(self.server_protocol[7])
|
||||
self.protocol = min(rp, sp)
|
||||
response.headers.protocol = self.protocol
|
||||
|
||||
# Rebuild first line of the request (e.g. "GET /path HTTP/1.0").
|
||||
url = path
|
||||
if query_string:
|
||||
url += '?' + query_string
|
||||
self.request_line = '%s %s %s' % (method, url, req_protocol)
|
||||
|
||||
self.header_list = list(headers)
|
||||
self.headers = httputil.HeaderMap()
|
||||
|
||||
self.rfile = rfile
|
||||
self.body = None
|
||||
|
||||
self.cookie = SimpleCookie()
|
||||
self.handler = None
|
||||
|
||||
# path_info should be the path from the
|
||||
# app root (script_name) to the handler.
|
||||
self.script_name = self.app.script_name
|
||||
self.path_info = pi = path[len(self.script_name):]
|
||||
|
||||
self.stage = 'respond'
|
||||
self.respond(pi)
|
||||
|
||||
except self.throws:
|
||||
raise
|
||||
except:
|
||||
if self.throw_errors:
|
||||
raise
|
||||
else:
|
||||
# Failure in setup, error handler or finalize. Bypass them.
|
||||
# Can't use handle_error because we may not have hooks yet.
|
||||
cherrypy.log(traceback=True, severity=40)
|
||||
if self.show_tracebacks:
|
||||
body = format_exc()
|
||||
else:
|
||||
body = ""
|
||||
r = bare_error(body)
|
||||
response.output_status, response.header_list, response.body = r
|
||||
|
||||
if self.method == "HEAD":
|
||||
# HEAD requests MUST NOT return a message-body in the response.
|
||||
response.body = []
|
||||
|
||||
try:
|
||||
cherrypy.log.access()
|
||||
except:
|
||||
cherrypy.log.error(traceback=True)
|
||||
|
||||
if response.timed_out:
|
||||
raise cherrypy.TimeoutError()
|
||||
|
||||
return response
|
||||
|
||||
# Uncomment for stage debugging
|
||||
# stage = property(lambda self: self._stage, lambda self, v: print(v))
|
||||
|
||||
def respond(self, path_info):
|
||||
"""Generate a response for the resource at self.path_info. (Core)"""
|
||||
response = cherrypy.serving.response
|
||||
try:
|
||||
try:
|
||||
try:
|
||||
if self.app is None:
|
||||
raise cherrypy.NotFound()
|
||||
|
||||
# Get the 'Host' header, so we can HTTPRedirect properly.
|
||||
self.stage = 'process_headers'
|
||||
self.process_headers()
|
||||
|
||||
# Make a copy of the class hooks
|
||||
self.hooks = self.__class__.hooks.copy()
|
||||
self.toolmaps = {}
|
||||
|
||||
self.stage = 'get_resource'
|
||||
self.get_resource(path_info)
|
||||
|
||||
self.body = _cpreqbody.RequestBody(
|
||||
self.rfile, self.headers, request_params=self.params)
|
||||
|
||||
self.namespaces(self.config)
|
||||
|
||||
self.stage = 'on_start_resource'
|
||||
self.hooks.run('on_start_resource')
|
||||
|
||||
# Parse the querystring
|
||||
self.stage = 'process_query_string'
|
||||
self.process_query_string()
|
||||
|
||||
# Process the body
|
||||
if self.process_request_body:
|
||||
if self.method not in self.methods_with_bodies:
|
||||
self.process_request_body = False
|
||||
self.stage = 'before_request_body'
|
||||
self.hooks.run('before_request_body')
|
||||
if self.process_request_body:
|
||||
self.body.process()
|
||||
|
||||
# Run the handler
|
||||
self.stage = 'before_handler'
|
||||
self.hooks.run('before_handler')
|
||||
if self.handler:
|
||||
self.stage = 'handler'
|
||||
response.body = self.handler()
|
||||
|
||||
# Finalize
|
||||
self.stage = 'before_finalize'
|
||||
self.hooks.run('before_finalize')
|
||||
response.finalize()
|
||||
except (cherrypy.HTTPRedirect, cherrypy.HTTPError):
|
||||
inst = sys.exc_info()[1]
|
||||
inst.set_response()
|
||||
self.stage = 'before_finalize (HTTPError)'
|
||||
self.hooks.run('before_finalize')
|
||||
response.finalize()
|
||||
finally:
|
||||
self.stage = 'on_end_resource'
|
||||
self.hooks.run('on_end_resource')
|
||||
except self.throws:
|
||||
raise
|
||||
except:
|
||||
if self.throw_errors:
|
||||
raise
|
||||
self.handle_error()
|
||||
|
||||
def process_query_string(self):
|
||||
"""Parse the query string into Python structures. (Core)"""
|
||||
try:
|
||||
p = httputil.parse_query_string(
|
||||
self.query_string, encoding=self.query_string_encoding)
|
||||
except UnicodeDecodeError:
|
||||
raise cherrypy.HTTPError(
|
||||
404, "The given query string could not be processed. Query "
|
||||
"strings for this resource must be encoded with %r." %
|
||||
self.query_string_encoding)
|
||||
|
||||
# Python 2 only: keyword arguments must be byte strings (type 'str').
|
||||
if not py3k:
|
||||
for key, value in p.items():
|
||||
if isinstance(key, unicode):
|
||||
del p[key]
|
||||
p[key.encode(self.query_string_encoding)] = value
|
||||
self.params.update(p)
|
||||
|
||||
def process_headers(self):
|
||||
"""Parse HTTP header data into Python structures. (Core)"""
|
||||
# Process the headers into self.headers
|
||||
headers = self.headers
|
||||
for name, value in self.header_list:
|
||||
# Call title() now (and use dict.__method__(headers))
|
||||
# so title doesn't have to be called twice.
|
||||
name = name.title()
|
||||
value = value.strip()
|
||||
|
||||
# Warning: if there is more than one header entry for cookies
|
||||
# (AFAIK, only Konqueror does that), only the last one will
|
||||
# remain in headers (but they will be correctly stored in
|
||||
# request.cookie).
|
||||
if "=?" in value:
|
||||
dict.__setitem__(headers, name, httputil.decode_TEXT(value))
|
||||
else:
|
||||
dict.__setitem__(headers, name, value)
|
||||
|
||||
# Handle cookies differently because on Konqueror, multiple
|
||||
# cookies come on different lines with the same key
|
||||
if name == 'Cookie':
|
||||
try:
|
||||
self.cookie.load(value)
|
||||
except CookieError:
|
||||
msg = "Illegal cookie name %s" % value.split('=')[0]
|
||||
raise cherrypy.HTTPError(400, msg)
|
||||
|
||||
if not dict.__contains__(headers, 'Host'):
|
||||
# All Internet-based HTTP/1.1 servers MUST respond with a 400
|
||||
# (Bad Request) status code to any HTTP/1.1 request message
|
||||
# which lacks a Host header field.
|
||||
if self.protocol >= (1, 1):
|
||||
msg = "HTTP/1.1 requires a 'Host' request header."
|
||||
raise cherrypy.HTTPError(400, msg)
|
||||
host = dict.get(headers, 'Host')
|
||||
if not host:
|
||||
host = self.local.name or self.local.ip
|
||||
self.base = "%s://%s" % (self.scheme, host)
|
||||
|
||||
def get_resource(self, path):
|
||||
"""Call a dispatcher (which sets self.handler and .config). (Core)"""
|
||||
# First, see if there is a custom dispatch at this URI. Custom
|
||||
# dispatchers can only be specified in app.config, not in _cp_config
|
||||
# (since custom dispatchers may not even have an app.root).
|
||||
dispatch = self.app.find_config(
|
||||
path, "request.dispatch", self.dispatch)
|
||||
|
||||
# dispatch() should set self.handler and self.config
|
||||
dispatch(path)
|
||||
|
||||
def handle_error(self):
|
||||
"""Handle the last unanticipated exception. (Core)"""
|
||||
try:
|
||||
self.hooks.run("before_error_response")
|
||||
if self.error_response:
|
||||
self.error_response()
|
||||
self.hooks.run("after_error_response")
|
||||
cherrypy.serving.response.finalize()
|
||||
except cherrypy.HTTPRedirect:
|
||||
inst = sys.exc_info()[1]
|
||||
inst.set_response()
|
||||
cherrypy.serving.response.finalize()
|
||||
|
||||
# ------------------------- Properties ------------------------- #
|
||||
|
||||
def _get_body_params(self):
|
||||
warnings.warn(
|
||||
"body_params is deprecated in CherryPy 3.2, will be removed in "
|
||||
"CherryPy 3.3.",
|
||||
DeprecationWarning
|
||||
)
|
||||
return self.body.params
|
||||
body_params = property(_get_body_params,
|
||||
doc="""
|
||||
If the request Content-Type is 'application/x-www-form-urlencoded' or
|
||||
multipart, this will be a dict of the params pulled from the entity
|
||||
body; that is, it will be the portion of request.params that come
|
||||
from the message body (sometimes called "POST params", although they
|
||||
can be sent with various HTTP method verbs). This value is set between
|
||||
the 'before_request_body' and 'before_handler' hooks (assuming that
|
||||
process_request_body is True).
|
||||
|
||||
Deprecated in 3.2, will be removed for 3.3 in favor of
|
||||
:attr:`request.body.params<cherrypy._cprequest.RequestBody.params>`.""")
|
||||
|
||||
|
||||
class ResponseBody(object):
|
||||
|
||||
"""The body of the HTTP response (the response entity)."""
|
||||
|
||||
if py3k:
|
||||
unicode_err = ("Page handlers MUST return bytes. Use tools.encode "
|
||||
"if you wish to return unicode.")
|
||||
|
||||
def __get__(self, obj, objclass=None):
|
||||
if obj is None:
|
||||
# When calling on the class instead of an instance...
|
||||
return self
|
||||
else:
|
||||
return obj._body
|
||||
|
||||
def __set__(self, obj, value):
|
||||
# Convert the given value to an iterable object.
|
||||
if py3k and isinstance(value, str):
|
||||
raise ValueError(self.unicode_err)
|
||||
|
||||
if isinstance(value, basestring):
|
||||
# strings get wrapped in a list because iterating over a single
|
||||
# item list is much faster than iterating over every character
|
||||
# in a long string.
|
||||
if value:
|
||||
value = [value]
|
||||
else:
|
||||
# [''] doesn't evaluate to False, so replace it with [].
|
||||
value = []
|
||||
elif py3k and isinstance(value, list):
|
||||
# every item in a list must be bytes...
|
||||
for i, item in enumerate(value):
|
||||
if isinstance(item, str):
|
||||
raise ValueError(self.unicode_err)
|
||||
# Don't use isinstance here; io.IOBase which has an ABC takes
|
||||
# 1000 times as long as, say, isinstance(value, str)
|
||||
elif hasattr(value, 'read'):
|
||||
value = file_generator(value)
|
||||
elif value is None:
|
||||
value = []
|
||||
obj._body = value
|
||||
|
||||
|
||||
class Response(object):
|
||||
|
||||
"""An HTTP Response, including status, headers, and body."""
|
||||
|
||||
status = ""
|
||||
"""The HTTP Status-Code and Reason-Phrase."""
|
||||
|
||||
header_list = []
|
||||
"""
|
||||
A list of the HTTP response headers as (name, value) tuples.
|
||||
In general, you should use response.headers (a dict) instead. This
|
||||
attribute is generated from response.headers and is not valid until
|
||||
after the finalize phase."""
|
||||
|
||||
headers = httputil.HeaderMap()
|
||||
"""
|
||||
A dict-like object containing the response headers. Keys are header
|
||||
names (in Title-Case format); however, you may get and set them in
|
||||
a case-insensitive manner. That is, headers['Content-Type'] and
|
||||
headers['content-type'] refer to the same value. Values are header
|
||||
values (decoded according to :rfc:`2047` if necessary).
|
||||
|
||||
.. seealso:: classes :class:`HeaderMap`, :class:`HeaderElement`
|
||||
"""
|
||||
|
||||
cookie = SimpleCookie()
|
||||
"""See help(Cookie)."""
|
||||
|
||||
body = ResponseBody()
|
||||
"""The body (entity) of the HTTP response."""
|
||||
|
||||
time = None
|
||||
"""The value of time.time() when created. Use in HTTP dates."""
|
||||
|
||||
timeout = 300
|
||||
"""Seconds after which the response will be aborted."""
|
||||
|
||||
timed_out = False
|
||||
"""
|
||||
Flag to indicate the response should be aborted, because it has
|
||||
exceeded its timeout."""
|
||||
|
||||
stream = False
|
||||
"""If False, buffer the response body."""
|
||||
|
||||
def __init__(self):
|
||||
self.status = None
|
||||
self.header_list = None
|
||||
self._body = []
|
||||
self.time = time.time()
|
||||
|
||||
self.headers = httputil.HeaderMap()
|
||||
# Since we know all our keys are titled strings, we can
|
||||
# bypass HeaderMap.update and get a big speed boost.
|
||||
dict.update(self.headers, {
|
||||
"Content-Type": 'text/html',
|
||||
"Server": "CherryPy/" + cherrypy.__version__,
|
||||
"Date": httputil.HTTPDate(self.time),
|
||||
})
|
||||
self.cookie = SimpleCookie()
|
||||
|
||||
def collapse_body(self):
|
||||
"""Collapse self.body to a single string; replace it and return it."""
|
||||
if isinstance(self.body, basestring):
|
||||
return self.body
|
||||
|
||||
newbody = []
|
||||
for chunk in self.body:
|
||||
if py3k and not isinstance(chunk, bytes):
|
||||
raise TypeError("Chunk %s is not of type 'bytes'." %
|
||||
repr(chunk))
|
||||
newbody.append(chunk)
|
||||
newbody = ntob('').join(newbody)
|
||||
|
||||
self.body = newbody
|
||||
return newbody
|
||||
|
||||
def finalize(self):
|
||||
"""Transform headers (and cookies) into self.header_list. (Core)"""
|
||||
try:
|
||||
code, reason, _ = httputil.valid_status(self.status)
|
||||
except ValueError:
|
||||
raise cherrypy.HTTPError(500, sys.exc_info()[1].args[0])
|
||||
|
||||
headers = self.headers
|
||||
|
||||
self.status = "%s %s" % (code, reason)
|
||||
self.output_status = ntob(str(code), 'ascii') + \
|
||||
ntob(" ") + headers.encode(reason)
|
||||
|
||||
if self.stream:
|
||||
# The upshot: wsgiserver will chunk the response if
|
||||
# you pop Content-Length (or set it explicitly to None).
|
||||
# Note that lib.static sets C-L to the file's st_size.
|
||||
if dict.get(headers, 'Content-Length') is None:
|
||||
dict.pop(headers, 'Content-Length', None)
|
||||
elif code < 200 or code in (204, 205, 304):
|
||||
# "All 1xx (informational), 204 (no content),
|
||||
# and 304 (not modified) responses MUST NOT
|
||||
# include a message-body."
|
||||
dict.pop(headers, 'Content-Length', None)
|
||||
self.body = ntob("")
|
||||
else:
|
||||
# Responses which are not streamed should have a Content-Length,
|
||||
# but allow user code to set Content-Length if desired.
|
||||
if dict.get(headers, 'Content-Length') is None:
|
||||
content = self.collapse_body()
|
||||
dict.__setitem__(headers, 'Content-Length', len(content))
|
||||
|
||||
# Transform our header dict into a list of tuples.
|
||||
self.header_list = h = headers.output()
|
||||
|
||||
cookie = self.cookie.output()
|
||||
if cookie:
|
||||
for line in cookie.split("\n"):
|
||||
if line.endswith("\r"):
|
||||
# Python 2.4 emits cookies joined by LF but 2.5+ by CRLF.
|
||||
line = line[:-1]
|
||||
name, value = line.split(": ", 1)
|
||||
if isinstance(name, unicodestr):
|
||||
name = name.encode("ISO-8859-1")
|
||||
if isinstance(value, unicodestr):
|
||||
value = headers.encode(value)
|
||||
h.append((name, value))
|
||||
|
||||
def check_timeout(self):
|
||||
"""If now > self.time + self.timeout, set self.timed_out.
|
||||
|
||||
This purposefully sets a flag, rather than raising an error,
|
||||
so that a monitor thread can interrupt the Response thread.
|
||||
"""
|
||||
if time.time() > self.time + self.timeout:
|
||||
self.timed_out = True
|
||||
226
lib/cherrypy/_cpserver.py
Normal file
226
lib/cherrypy/_cpserver.py
Normal file
@@ -0,0 +1,226 @@
|
||||
"""Manage HTTP servers with CherryPy."""
|
||||
|
||||
import warnings
|
||||
|
||||
import cherrypy
|
||||
from cherrypy.lib import attributes
|
||||
from cherrypy._cpcompat import basestring, py3k
|
||||
|
||||
# We import * because we want to export check_port
|
||||
# et al as attributes of this module.
|
||||
from cherrypy.process.servers import *
|
||||
|
||||
|
||||
class Server(ServerAdapter):
|
||||
|
||||
"""An adapter for an HTTP server.
|
||||
|
||||
You can set attributes (like socket_host and socket_port)
|
||||
on *this* object (which is probably cherrypy.server), and call
|
||||
quickstart. For example::
|
||||
|
||||
cherrypy.server.socket_port = 80
|
||||
cherrypy.quickstart()
|
||||
"""
|
||||
|
||||
socket_port = 8080
|
||||
"""The TCP port on which to listen for connections."""
|
||||
|
||||
_socket_host = '127.0.0.1'
|
||||
|
||||
def _get_socket_host(self):
|
||||
return self._socket_host
|
||||
|
||||
def _set_socket_host(self, value):
|
||||
if value == '':
|
||||
raise ValueError("The empty string ('') is not an allowed value. "
|
||||
"Use '0.0.0.0' instead to listen on all active "
|
||||
"interfaces (INADDR_ANY).")
|
||||
self._socket_host = value
|
||||
socket_host = property(
|
||||
_get_socket_host,
|
||||
_set_socket_host,
|
||||
doc="""The hostname or IP address on which to listen for connections.
|
||||
|
||||
Host values may be any IPv4 or IPv6 address, or any valid hostname.
|
||||
The string 'localhost' is a synonym for '127.0.0.1' (or '::1', if
|
||||
your hosts file prefers IPv6). The string '0.0.0.0' is a special
|
||||
IPv4 entry meaning "any active interface" (INADDR_ANY), and '::'
|
||||
is the similar IN6ADDR_ANY for IPv6. The empty string or None are
|
||||
not allowed.""")
|
||||
|
||||
socket_file = None
|
||||
"""If given, the name of the UNIX socket to use instead of TCP/IP.
|
||||
|
||||
When this option is not None, the `socket_host` and `socket_port` options
|
||||
are ignored."""
|
||||
|
||||
socket_queue_size = 5
|
||||
"""The 'backlog' argument to socket.listen(); specifies the maximum number
|
||||
of queued connections (default 5)."""
|
||||
|
||||
socket_timeout = 10
|
||||
"""The timeout in seconds for accepted connections (default 10)."""
|
||||
|
||||
accepted_queue_size = -1
|
||||
"""The maximum number of requests which will be queued up before
|
||||
the server refuses to accept it (default -1, meaning no limit)."""
|
||||
|
||||
accepted_queue_timeout = 10
|
||||
"""The timeout in seconds for attempting to add a request to the
|
||||
queue when the queue is full (default 10)."""
|
||||
|
||||
shutdown_timeout = 5
|
||||
"""The time to wait for HTTP worker threads to clean up."""
|
||||
|
||||
protocol_version = 'HTTP/1.1'
|
||||
"""The version string to write in the Status-Line of all HTTP responses,
|
||||
for example, "HTTP/1.1" (the default). Depending on the HTTP server used,
|
||||
this should also limit the supported features used in the response."""
|
||||
|
||||
thread_pool = 10
|
||||
"""The number of worker threads to start up in the pool."""
|
||||
|
||||
thread_pool_max = -1
|
||||
"""The maximum size of the worker-thread pool. Use -1 to indicate no limit.
|
||||
"""
|
||||
|
||||
max_request_header_size = 500 * 1024
|
||||
"""The maximum number of bytes allowable in the request headers.
|
||||
If exceeded, the HTTP server should return "413 Request Entity Too Large".
|
||||
"""
|
||||
|
||||
max_request_body_size = 100 * 1024 * 1024
|
||||
"""The maximum number of bytes allowable in the request body. If exceeded,
|
||||
the HTTP server should return "413 Request Entity Too Large"."""
|
||||
|
||||
instance = None
|
||||
"""If not None, this should be an HTTP server instance (such as
|
||||
CPWSGIServer) which cherrypy.server will control. Use this when you need
|
||||
more control over object instantiation than is available in the various
|
||||
configuration options."""
|
||||
|
||||
ssl_context = None
|
||||
"""When using PyOpenSSL, an instance of SSL.Context."""
|
||||
|
||||
ssl_certificate = None
|
||||
"""The filename of the SSL certificate to use."""
|
||||
|
||||
ssl_certificate_chain = None
|
||||
"""When using PyOpenSSL, the certificate chain to pass to
|
||||
Context.load_verify_locations."""
|
||||
|
||||
ssl_private_key = None
|
||||
"""The filename of the private key to use with SSL."""
|
||||
|
||||
if py3k:
|
||||
ssl_module = 'builtin'
|
||||
"""The name of a registered SSL adaptation module to use with
|
||||
the builtin WSGI server. Builtin options are: 'builtin' (to
|
||||
use the SSL library built into recent versions of Python).
|
||||
You may also register your own classes in the
|
||||
wsgiserver.ssl_adapters dict."""
|
||||
else:
|
||||
ssl_module = 'pyopenssl'
|
||||
"""The name of a registered SSL adaptation module to use with the
|
||||
builtin WSGI server. Builtin options are 'builtin' (to use the SSL
|
||||
library built into recent versions of Python) and 'pyopenssl' (to
|
||||
use the PyOpenSSL project, which you must install separately). You
|
||||
may also register your own classes in the wsgiserver.ssl_adapters
|
||||
dict."""
|
||||
|
||||
statistics = False
|
||||
"""Turns statistics-gathering on or off for aware HTTP servers."""
|
||||
|
||||
nodelay = True
|
||||
"""If True (the default since 3.1), sets the TCP_NODELAY socket option."""
|
||||
|
||||
wsgi_version = (1, 0)
|
||||
"""The WSGI version tuple to use with the builtin WSGI server.
|
||||
The provided options are (1, 0) [which includes support for PEP 3333,
|
||||
which declares it covers WSGI version 1.0.1 but still mandates the
|
||||
wsgi.version (1, 0)] and ('u', 0), an experimental unicode version.
|
||||
You may create and register your own experimental versions of the WSGI
|
||||
protocol by adding custom classes to the wsgiserver.wsgi_gateways dict."""
|
||||
|
||||
def __init__(self):
|
||||
self.bus = cherrypy.engine
|
||||
self.httpserver = None
|
||||
self.interrupt = None
|
||||
self.running = False
|
||||
|
||||
def httpserver_from_self(self, httpserver=None):
|
||||
"""Return a (httpserver, bind_addr) pair based on self attributes."""
|
||||
if httpserver is None:
|
||||
httpserver = self.instance
|
||||
if httpserver is None:
|
||||
from cherrypy import _cpwsgi_server
|
||||
httpserver = _cpwsgi_server.CPWSGIServer(self)
|
||||
if isinstance(httpserver, basestring):
|
||||
# Is anyone using this? Can I add an arg?
|
||||
httpserver = attributes(httpserver)(self)
|
||||
return httpserver, self.bind_addr
|
||||
|
||||
def start(self):
|
||||
"""Start the HTTP server."""
|
||||
if not self.httpserver:
|
||||
self.httpserver, self.bind_addr = self.httpserver_from_self()
|
||||
ServerAdapter.start(self)
|
||||
start.priority = 75
|
||||
|
||||
def _get_bind_addr(self):
|
||||
if self.socket_file:
|
||||
return self.socket_file
|
||||
if self.socket_host is None and self.socket_port is None:
|
||||
return None
|
||||
return (self.socket_host, self.socket_port)
|
||||
|
||||
def _set_bind_addr(self, value):
|
||||
if value is None:
|
||||
self.socket_file = None
|
||||
self.socket_host = None
|
||||
self.socket_port = None
|
||||
elif isinstance(value, basestring):
|
||||
self.socket_file = value
|
||||
self.socket_host = None
|
||||
self.socket_port = None
|
||||
else:
|
||||
try:
|
||||
self.socket_host, self.socket_port = value
|
||||
self.socket_file = None
|
||||
except ValueError:
|
||||
raise ValueError("bind_addr must be a (host, port) tuple "
|
||||
"(for TCP sockets) or a string (for Unix "
|
||||
"domain sockets), not %r" % value)
|
||||
bind_addr = property(
|
||||
_get_bind_addr,
|
||||
_set_bind_addr,
|
||||
doc='A (host, port) tuple for TCP sockets or '
|
||||
'a str for Unix domain sockets.')
|
||||
|
||||
def base(self):
|
||||
"""Return the base (scheme://host[:port] or sock file) for this server.
|
||||
"""
|
||||
if self.socket_file:
|
||||
return self.socket_file
|
||||
|
||||
host = self.socket_host
|
||||
if host in ('0.0.0.0', '::'):
|
||||
# 0.0.0.0 is INADDR_ANY and :: is IN6ADDR_ANY.
|
||||
# Look up the host name, which should be the
|
||||
# safest thing to spit out in a URL.
|
||||
import socket
|
||||
host = socket.gethostname()
|
||||
|
||||
port = self.socket_port
|
||||
|
||||
if self.ssl_certificate:
|
||||
scheme = "https"
|
||||
if port != 443:
|
||||
host += ":%s" % port
|
||||
else:
|
||||
scheme = "http"
|
||||
if port != 80:
|
||||
host += ":%s" % port
|
||||
|
||||
return "%s://%s" % (scheme, host)
|
||||
241
lib/cherrypy/_cpthreadinglocal.py
Normal file
241
lib/cherrypy/_cpthreadinglocal.py
Normal file
@@ -0,0 +1,241 @@
|
||||
# This is a backport of Python-2.4's threading.local() implementation
|
||||
|
||||
"""Thread-local objects
|
||||
|
||||
(Note that this module provides a Python version of thread
|
||||
threading.local class. Depending on the version of Python you're
|
||||
using, there may be a faster one available. You should always import
|
||||
the local class from threading.)
|
||||
|
||||
Thread-local objects support the management of thread-local data.
|
||||
If you have data that you want to be local to a thread, simply create
|
||||
a thread-local object and use its attributes:
|
||||
|
||||
>>> mydata = local()
|
||||
>>> mydata.number = 42
|
||||
>>> mydata.number
|
||||
42
|
||||
|
||||
You can also access the local-object's dictionary:
|
||||
|
||||
>>> mydata.__dict__
|
||||
{'number': 42}
|
||||
>>> mydata.__dict__.setdefault('widgets', [])
|
||||
[]
|
||||
>>> mydata.widgets
|
||||
[]
|
||||
|
||||
What's important about thread-local objects is that their data are
|
||||
local to a thread. If we access the data in a different thread:
|
||||
|
||||
>>> log = []
|
||||
>>> def f():
|
||||
... items = mydata.__dict__.items()
|
||||
... items.sort()
|
||||
... log.append(items)
|
||||
... mydata.number = 11
|
||||
... log.append(mydata.number)
|
||||
|
||||
>>> import threading
|
||||
>>> thread = threading.Thread(target=f)
|
||||
>>> thread.start()
|
||||
>>> thread.join()
|
||||
>>> log
|
||||
[[], 11]
|
||||
|
||||
we get different data. Furthermore, changes made in the other thread
|
||||
don't affect data seen in this thread:
|
||||
|
||||
>>> mydata.number
|
||||
42
|
||||
|
||||
Of course, values you get from a local object, including a __dict__
|
||||
attribute, are for whatever thread was current at the time the
|
||||
attribute was read. For that reason, you generally don't want to save
|
||||
these values across threads, as they apply only to the thread they
|
||||
came from.
|
||||
|
||||
You can create custom local objects by subclassing the local class:
|
||||
|
||||
>>> class MyLocal(local):
|
||||
... number = 2
|
||||
... initialized = False
|
||||
... def __init__(self, **kw):
|
||||
... if self.initialized:
|
||||
... raise SystemError('__init__ called too many times')
|
||||
... self.initialized = True
|
||||
... self.__dict__.update(kw)
|
||||
... def squared(self):
|
||||
... return self.number ** 2
|
||||
|
||||
This can be useful to support default values, methods and
|
||||
initialization. Note that if you define an __init__ method, it will be
|
||||
called each time the local object is used in a separate thread. This
|
||||
is necessary to initialize each thread's dictionary.
|
||||
|
||||
Now if we create a local object:
|
||||
|
||||
>>> mydata = MyLocal(color='red')
|
||||
|
||||
Now we have a default number:
|
||||
|
||||
>>> mydata.number
|
||||
2
|
||||
|
||||
an initial color:
|
||||
|
||||
>>> mydata.color
|
||||
'red'
|
||||
>>> del mydata.color
|
||||
|
||||
And a method that operates on the data:
|
||||
|
||||
>>> mydata.squared()
|
||||
4
|
||||
|
||||
As before, we can access the data in a separate thread:
|
||||
|
||||
>>> log = []
|
||||
>>> thread = threading.Thread(target=f)
|
||||
>>> thread.start()
|
||||
>>> thread.join()
|
||||
>>> log
|
||||
[[('color', 'red'), ('initialized', True)], 11]
|
||||
|
||||
without affecting this thread's data:
|
||||
|
||||
>>> mydata.number
|
||||
2
|
||||
>>> mydata.color
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
AttributeError: 'MyLocal' object has no attribute 'color'
|
||||
|
||||
Note that subclasses can define slots, but they are not thread
|
||||
local. They are shared across threads:
|
||||
|
||||
>>> class MyLocal(local):
|
||||
... __slots__ = 'number'
|
||||
|
||||
>>> mydata = MyLocal()
|
||||
>>> mydata.number = 42
|
||||
>>> mydata.color = 'red'
|
||||
|
||||
So, the separate thread:
|
||||
|
||||
>>> thread = threading.Thread(target=f)
|
||||
>>> thread.start()
|
||||
>>> thread.join()
|
||||
|
||||
affects what we see:
|
||||
|
||||
>>> mydata.number
|
||||
11
|
||||
|
||||
>>> del mydata
|
||||
"""
|
||||
|
||||
# Threading import is at end
|
||||
|
||||
|
||||
class _localbase(object):
|
||||
__slots__ = '_local__key', '_local__args', '_local__lock'
|
||||
|
||||
def __new__(cls, *args, **kw):
|
||||
self = object.__new__(cls)
|
||||
key = 'thread.local.' + str(id(self))
|
||||
object.__setattr__(self, '_local__key', key)
|
||||
object.__setattr__(self, '_local__args', (args, kw))
|
||||
object.__setattr__(self, '_local__lock', RLock())
|
||||
|
||||
if args or kw and (cls.__init__ is object.__init__):
|
||||
raise TypeError("Initialization arguments are not supported")
|
||||
|
||||
# We need to create the thread dict in anticipation of
|
||||
# __init__ being called, to make sure we don't call it
|
||||
# again ourselves.
|
||||
dict = object.__getattribute__(self, '__dict__')
|
||||
currentThread().__dict__[key] = dict
|
||||
|
||||
return self
|
||||
|
||||
|
||||
def _patch(self):
|
||||
key = object.__getattribute__(self, '_local__key')
|
||||
d = currentThread().__dict__.get(key)
|
||||
if d is None:
|
||||
d = {}
|
||||
currentThread().__dict__[key] = d
|
||||
object.__setattr__(self, '__dict__', d)
|
||||
|
||||
# we have a new instance dict, so call out __init__ if we have
|
||||
# one
|
||||
cls = type(self)
|
||||
if cls.__init__ is not object.__init__:
|
||||
args, kw = object.__getattribute__(self, '_local__args')
|
||||
cls.__init__(self, *args, **kw)
|
||||
else:
|
||||
object.__setattr__(self, '__dict__', d)
|
||||
|
||||
|
||||
class local(_localbase):
|
||||
|
||||
def __getattribute__(self, name):
|
||||
lock = object.__getattribute__(self, '_local__lock')
|
||||
lock.acquire()
|
||||
try:
|
||||
_patch(self)
|
||||
return object.__getattribute__(self, name)
|
||||
finally:
|
||||
lock.release()
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
lock = object.__getattribute__(self, '_local__lock')
|
||||
lock.acquire()
|
||||
try:
|
||||
_patch(self)
|
||||
return object.__setattr__(self, name, value)
|
||||
finally:
|
||||
lock.release()
|
||||
|
||||
def __delattr__(self, name):
|
||||
lock = object.__getattribute__(self, '_local__lock')
|
||||
lock.acquire()
|
||||
try:
|
||||
_patch(self)
|
||||
return object.__delattr__(self, name)
|
||||
finally:
|
||||
lock.release()
|
||||
|
||||
def __del__():
|
||||
threading_enumerate = enumerate
|
||||
__getattribute__ = object.__getattribute__
|
||||
|
||||
def __del__(self):
|
||||
key = __getattribute__(self, '_local__key')
|
||||
|
||||
try:
|
||||
threads = list(threading_enumerate())
|
||||
except:
|
||||
# if enumerate fails, as it seems to do during
|
||||
# shutdown, we'll skip cleanup under the assumption
|
||||
# that there is nothing to clean up
|
||||
return
|
||||
|
||||
for thread in threads:
|
||||
try:
|
||||
__dict__ = thread.__dict__
|
||||
except AttributeError:
|
||||
# Thread is dying, rest in peace
|
||||
continue
|
||||
|
||||
if key in __dict__:
|
||||
try:
|
||||
del __dict__[key]
|
||||
except KeyError:
|
||||
pass # didn't have anything in this thread
|
||||
|
||||
return __del__
|
||||
__del__ = __del__()
|
||||
|
||||
from threading import currentThread, enumerate, RLock
|
||||
529
lib/cherrypy/_cptools.py
Normal file
529
lib/cherrypy/_cptools.py
Normal file
@@ -0,0 +1,529 @@
|
||||
"""CherryPy tools. A "tool" is any helper, adapted to CP.
|
||||
|
||||
Tools are usually designed to be used in a variety of ways (although some
|
||||
may only offer one if they choose):
|
||||
|
||||
Library calls
|
||||
All tools are callables that can be used wherever needed.
|
||||
The arguments are straightforward and should be detailed within the
|
||||
docstring.
|
||||
|
||||
Function decorators
|
||||
All tools, when called, may be used as decorators which configure
|
||||
individual CherryPy page handlers (methods on the CherryPy tree).
|
||||
That is, "@tools.anytool()" should "turn on" the tool via the
|
||||
decorated function's _cp_config attribute.
|
||||
|
||||
CherryPy config
|
||||
If a tool exposes a "_setup" callable, it will be called
|
||||
once per Request (if the feature is "turned on" via config).
|
||||
|
||||
Tools may be implemented as any object with a namespace. The builtins
|
||||
are generally either modules or instances of the tools.Tool class.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
import cherrypy
|
||||
|
||||
|
||||
def _getargs(func):
|
||||
"""Return the names of all static arguments to the given function."""
|
||||
# Use this instead of importing inspect for less mem overhead.
|
||||
import types
|
||||
if sys.version_info >= (3, 0):
|
||||
if isinstance(func, types.MethodType):
|
||||
func = func.__func__
|
||||
co = func.__code__
|
||||
else:
|
||||
if isinstance(func, types.MethodType):
|
||||
func = func.im_func
|
||||
co = func.func_code
|
||||
return co.co_varnames[:co.co_argcount]
|
||||
|
||||
|
||||
_attr_error = (
|
||||
"CherryPy Tools cannot be turned on directly. Instead, turn them "
|
||||
"on via config, or use them as decorators on your page handlers."
|
||||
)
|
||||
|
||||
|
||||
class Tool(object):
|
||||
|
||||
"""A registered function for use with CherryPy request-processing hooks.
|
||||
|
||||
help(tool.callable) should give you more information about this Tool.
|
||||
"""
|
||||
|
||||
namespace = "tools"
|
||||
|
||||
def __init__(self, point, callable, name=None, priority=50):
|
||||
self._point = point
|
||||
self.callable = callable
|
||||
self._name = name
|
||||
self._priority = priority
|
||||
self.__doc__ = self.callable.__doc__
|
||||
self._setargs()
|
||||
|
||||
def _get_on(self):
|
||||
raise AttributeError(_attr_error)
|
||||
|
||||
def _set_on(self, value):
|
||||
raise AttributeError(_attr_error)
|
||||
on = property(_get_on, _set_on)
|
||||
|
||||
def _setargs(self):
|
||||
"""Copy func parameter names to obj attributes."""
|
||||
try:
|
||||
for arg in _getargs(self.callable):
|
||||
setattr(self, arg, None)
|
||||
except (TypeError, AttributeError):
|
||||
if hasattr(self.callable, "__call__"):
|
||||
for arg in _getargs(self.callable.__call__):
|
||||
setattr(self, arg, None)
|
||||
# IronPython 1.0 raises NotImplementedError because
|
||||
# inspect.getargspec tries to access Python bytecode
|
||||
# in co_code attribute.
|
||||
except NotImplementedError:
|
||||
pass
|
||||
# IronPython 1B1 may raise IndexError in some cases,
|
||||
# but if we trap it here it doesn't prevent CP from working.
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
def _merged_args(self, d=None):
|
||||
"""Return a dict of configuration entries for this Tool."""
|
||||
if d:
|
||||
conf = d.copy()
|
||||
else:
|
||||
conf = {}
|
||||
|
||||
tm = cherrypy.serving.request.toolmaps[self.namespace]
|
||||
if self._name in tm:
|
||||
conf.update(tm[self._name])
|
||||
|
||||
if "on" in conf:
|
||||
del conf["on"]
|
||||
|
||||
return conf
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
"""Compile-time decorator (turn on the tool in config).
|
||||
|
||||
For example::
|
||||
|
||||
@tools.proxy()
|
||||
def whats_my_base(self):
|
||||
return cherrypy.request.base
|
||||
whats_my_base.exposed = True
|
||||
"""
|
||||
if args:
|
||||
raise TypeError("The %r Tool does not accept positional "
|
||||
"arguments; you must use keyword arguments."
|
||||
% self._name)
|
||||
|
||||
def tool_decorator(f):
|
||||
if not hasattr(f, "_cp_config"):
|
||||
f._cp_config = {}
|
||||
subspace = self.namespace + "." + self._name + "."
|
||||
f._cp_config[subspace + "on"] = True
|
||||
for k, v in kwargs.items():
|
||||
f._cp_config[subspace + k] = v
|
||||
return f
|
||||
return tool_decorator
|
||||
|
||||
def _setup(self):
|
||||
"""Hook this tool into cherrypy.request.
|
||||
|
||||
The standard CherryPy request object will automatically call this
|
||||
method when the tool is "turned on" in config.
|
||||
"""
|
||||
conf = self._merged_args()
|
||||
p = conf.pop("priority", None)
|
||||
if p is None:
|
||||
p = getattr(self.callable, "priority", self._priority)
|
||||
cherrypy.serving.request.hooks.attach(self._point, self.callable,
|
||||
priority=p, **conf)
|
||||
|
||||
|
||||
class HandlerTool(Tool):
|
||||
|
||||
"""Tool which is called 'before main', that may skip normal handlers.
|
||||
|
||||
If the tool successfully handles the request (by setting response.body),
|
||||
if should return True. This will cause CherryPy to skip any 'normal' page
|
||||
handler. If the tool did not handle the request, it should return False
|
||||
to tell CherryPy to continue on and call the normal page handler. If the
|
||||
tool is declared AS a page handler (see the 'handler' method), returning
|
||||
False will raise NotFound.
|
||||
"""
|
||||
|
||||
def __init__(self, callable, name=None):
|
||||
Tool.__init__(self, 'before_handler', callable, name)
|
||||
|
||||
def handler(self, *args, **kwargs):
|
||||
"""Use this tool as a CherryPy page handler.
|
||||
|
||||
For example::
|
||||
|
||||
class Root:
|
||||
nav = tools.staticdir.handler(section="/nav", dir="nav",
|
||||
root=absDir)
|
||||
"""
|
||||
def handle_func(*a, **kw):
|
||||
handled = self.callable(*args, **self._merged_args(kwargs))
|
||||
if not handled:
|
||||
raise cherrypy.NotFound()
|
||||
return cherrypy.serving.response.body
|
||||
handle_func.exposed = True
|
||||
return handle_func
|
||||
|
||||
def _wrapper(self, **kwargs):
|
||||
if self.callable(**kwargs):
|
||||
cherrypy.serving.request.handler = None
|
||||
|
||||
def _setup(self):
|
||||
"""Hook this tool into cherrypy.request.
|
||||
|
||||
The standard CherryPy request object will automatically call this
|
||||
method when the tool is "turned on" in config.
|
||||
"""
|
||||
conf = self._merged_args()
|
||||
p = conf.pop("priority", None)
|
||||
if p is None:
|
||||
p = getattr(self.callable, "priority", self._priority)
|
||||
cherrypy.serving.request.hooks.attach(self._point, self._wrapper,
|
||||
priority=p, **conf)
|
||||
|
||||
|
||||
class HandlerWrapperTool(Tool):
|
||||
|
||||
"""Tool which wraps request.handler in a provided wrapper function.
|
||||
|
||||
The 'newhandler' arg must be a handler wrapper function that takes a
|
||||
'next_handler' argument, plus ``*args`` and ``**kwargs``. Like all
|
||||
page handler
|
||||
functions, it must return an iterable for use as cherrypy.response.body.
|
||||
|
||||
For example, to allow your 'inner' page handlers to return dicts
|
||||
which then get interpolated into a template::
|
||||
|
||||
def interpolator(next_handler, *args, **kwargs):
|
||||
filename = cherrypy.request.config.get('template')
|
||||
cherrypy.response.template = env.get_template(filename)
|
||||
response_dict = next_handler(*args, **kwargs)
|
||||
return cherrypy.response.template.render(**response_dict)
|
||||
cherrypy.tools.jinja = HandlerWrapperTool(interpolator)
|
||||
"""
|
||||
|
||||
def __init__(self, newhandler, point='before_handler', name=None,
|
||||
priority=50):
|
||||
self.newhandler = newhandler
|
||||
self._point = point
|
||||
self._name = name
|
||||
self._priority = priority
|
||||
|
||||
def callable(self, *args, **kwargs):
|
||||
innerfunc = cherrypy.serving.request.handler
|
||||
|
||||
def wrap(*args, **kwargs):
|
||||
return self.newhandler(innerfunc, *args, **kwargs)
|
||||
cherrypy.serving.request.handler = wrap
|
||||
|
||||
|
||||
class ErrorTool(Tool):
|
||||
|
||||
"""Tool which is used to replace the default request.error_response."""
|
||||
|
||||
def __init__(self, callable, name=None):
|
||||
Tool.__init__(self, None, callable, name)
|
||||
|
||||
def _wrapper(self):
|
||||
self.callable(**self._merged_args())
|
||||
|
||||
def _setup(self):
|
||||
"""Hook this tool into cherrypy.request.
|
||||
|
||||
The standard CherryPy request object will automatically call this
|
||||
method when the tool is "turned on" in config.
|
||||
"""
|
||||
cherrypy.serving.request.error_response = self._wrapper
|
||||
|
||||
|
||||
# Builtin tools #
|
||||
|
||||
from cherrypy.lib import cptools, encoding, auth, static, jsontools
|
||||
from cherrypy.lib import sessions as _sessions, xmlrpcutil as _xmlrpc
|
||||
from cherrypy.lib import caching as _caching
|
||||
from cherrypy.lib import auth_basic, auth_digest
|
||||
|
||||
|
||||
class SessionTool(Tool):
|
||||
|
||||
"""Session Tool for CherryPy.
|
||||
|
||||
sessions.locking
|
||||
When 'implicit' (the default), the session will be locked for you,
|
||||
just before running the page handler.
|
||||
|
||||
When 'early', the session will be locked before reading the request
|
||||
body. This is off by default for safety reasons; for example,
|
||||
a large upload would block the session, denying an AJAX
|
||||
progress meter
|
||||
(`issue <https://bitbucket.org/cherrypy/cherrypy/issue/630>`_).
|
||||
|
||||
When 'explicit' (or any other value), you need to call
|
||||
cherrypy.session.acquire_lock() yourself before using
|
||||
session data.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
# _sessions.init must be bound after headers are read
|
||||
Tool.__init__(self, 'before_request_body', _sessions.init)
|
||||
|
||||
def _lock_session(self):
|
||||
cherrypy.serving.session.acquire_lock()
|
||||
|
||||
def _setup(self):
|
||||
"""Hook this tool into cherrypy.request.
|
||||
|
||||
The standard CherryPy request object will automatically call this
|
||||
method when the tool is "turned on" in config.
|
||||
"""
|
||||
hooks = cherrypy.serving.request.hooks
|
||||
|
||||
conf = self._merged_args()
|
||||
|
||||
p = conf.pop("priority", None)
|
||||
if p is None:
|
||||
p = getattr(self.callable, "priority", self._priority)
|
||||
|
||||
hooks.attach(self._point, self.callable, priority=p, **conf)
|
||||
|
||||
locking = conf.pop('locking', 'implicit')
|
||||
if locking == 'implicit':
|
||||
hooks.attach('before_handler', self._lock_session)
|
||||
elif locking == 'early':
|
||||
# Lock before the request body (but after _sessions.init runs!)
|
||||
hooks.attach('before_request_body', self._lock_session,
|
||||
priority=60)
|
||||
else:
|
||||
# Don't lock
|
||||
pass
|
||||
|
||||
hooks.attach('before_finalize', _sessions.save)
|
||||
hooks.attach('on_end_request', _sessions.close)
|
||||
|
||||
def regenerate(self):
|
||||
"""Drop the current session and make a new one (with a new id)."""
|
||||
sess = cherrypy.serving.session
|
||||
sess.regenerate()
|
||||
|
||||
# Grab cookie-relevant tool args
|
||||
conf = dict([(k, v) for k, v in self._merged_args().items()
|
||||
if k in ('path', 'path_header', 'name', 'timeout',
|
||||
'domain', 'secure')])
|
||||
_sessions.set_response_cookie(**conf)
|
||||
|
||||
|
||||
class XMLRPCController(object):
|
||||
|
||||
"""A Controller (page handler collection) for XML-RPC.
|
||||
|
||||
To use it, have your controllers subclass this base class (it will
|
||||
turn on the tool for you).
|
||||
|
||||
You can also supply the following optional config entries::
|
||||
|
||||
tools.xmlrpc.encoding: 'utf-8'
|
||||
tools.xmlrpc.allow_none: 0
|
||||
|
||||
XML-RPC is a rather discontinuous layer over HTTP; dispatching to the
|
||||
appropriate handler must first be performed according to the URL, and
|
||||
then a second dispatch step must take place according to the RPC method
|
||||
specified in the request body. It also allows a superfluous "/RPC2"
|
||||
prefix in the URL, supplies its own handler args in the body, and
|
||||
requires a 200 OK "Fault" response instead of 404 when the desired
|
||||
method is not found.
|
||||
|
||||
Therefore, XML-RPC cannot be implemented for CherryPy via a Tool alone.
|
||||
This Controller acts as the dispatch target for the first half (based
|
||||
on the URL); it then reads the RPC method from the request body and
|
||||
does its own second dispatch step based on that method. It also reads
|
||||
body params, and returns a Fault on error.
|
||||
|
||||
The XMLRPCDispatcher strips any /RPC2 prefix; if you aren't using /RPC2
|
||||
in your URL's, you can safely skip turning on the XMLRPCDispatcher.
|
||||
Otherwise, you need to use declare it in config::
|
||||
|
||||
request.dispatch: cherrypy.dispatch.XMLRPCDispatcher()
|
||||
"""
|
||||
|
||||
# Note we're hard-coding this into the 'tools' namespace. We could do
|
||||
# a huge amount of work to make it relocatable, but the only reason why
|
||||
# would be if someone actually disabled the default_toolbox. Meh.
|
||||
_cp_config = {'tools.xmlrpc.on': True}
|
||||
|
||||
def default(self, *vpath, **params):
|
||||
rpcparams, rpcmethod = _xmlrpc.process_body()
|
||||
|
||||
subhandler = self
|
||||
for attr in str(rpcmethod).split('.'):
|
||||
subhandler = getattr(subhandler, attr, None)
|
||||
|
||||
if subhandler and getattr(subhandler, "exposed", False):
|
||||
body = subhandler(*(vpath + rpcparams), **params)
|
||||
|
||||
else:
|
||||
# https://bitbucket.org/cherrypy/cherrypy/issue/533
|
||||
# if a method is not found, an xmlrpclib.Fault should be returned
|
||||
# raising an exception here will do that; see
|
||||
# cherrypy.lib.xmlrpcutil.on_error
|
||||
raise Exception('method "%s" is not supported' % attr)
|
||||
|
||||
conf = cherrypy.serving.request.toolmaps['tools'].get("xmlrpc", {})
|
||||
_xmlrpc.respond(body,
|
||||
conf.get('encoding', 'utf-8'),
|
||||
conf.get('allow_none', 0))
|
||||
return cherrypy.serving.response.body
|
||||
default.exposed = True
|
||||
|
||||
|
||||
class SessionAuthTool(HandlerTool):
|
||||
|
||||
def _setargs(self):
|
||||
for name in dir(cptools.SessionAuth):
|
||||
if not name.startswith("__"):
|
||||
setattr(self, name, None)
|
||||
|
||||
|
||||
class CachingTool(Tool):
|
||||
|
||||
"""Caching Tool for CherryPy."""
|
||||
|
||||
def _wrapper(self, **kwargs):
|
||||
request = cherrypy.serving.request
|
||||
if _caching.get(**kwargs):
|
||||
request.handler = None
|
||||
else:
|
||||
if request.cacheable:
|
||||
# Note the devious technique here of adding hooks on the fly
|
||||
request.hooks.attach('before_finalize', _caching.tee_output,
|
||||
priority=90)
|
||||
_wrapper.priority = 20
|
||||
|
||||
def _setup(self):
|
||||
"""Hook caching into cherrypy.request."""
|
||||
conf = self._merged_args()
|
||||
|
||||
p = conf.pop("priority", None)
|
||||
cherrypy.serving.request.hooks.attach('before_handler', self._wrapper,
|
||||
priority=p, **conf)
|
||||
|
||||
|
||||
class Toolbox(object):
|
||||
|
||||
"""A collection of Tools.
|
||||
|
||||
This object also functions as a config namespace handler for itself.
|
||||
Custom toolboxes should be added to each Application's toolboxes dict.
|
||||
"""
|
||||
|
||||
def __init__(self, namespace):
|
||||
self.namespace = namespace
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
# If the Tool._name is None, supply it from the attribute name.
|
||||
if isinstance(value, Tool):
|
||||
if value._name is None:
|
||||
value._name = name
|
||||
value.namespace = self.namespace
|
||||
object.__setattr__(self, name, value)
|
||||
|
||||
def __enter__(self):
|
||||
"""Populate request.toolmaps from tools specified in config."""
|
||||
cherrypy.serving.request.toolmaps[self.namespace] = map = {}
|
||||
|
||||
def populate(k, v):
|
||||
toolname, arg = k.split(".", 1)
|
||||
bucket = map.setdefault(toolname, {})
|
||||
bucket[arg] = v
|
||||
return populate
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Run tool._setup() for each tool in our toolmap."""
|
||||
map = cherrypy.serving.request.toolmaps.get(self.namespace)
|
||||
if map:
|
||||
for name, settings in map.items():
|
||||
if settings.get("on", False):
|
||||
tool = getattr(self, name)
|
||||
tool._setup()
|
||||
|
||||
|
||||
class DeprecatedTool(Tool):
|
||||
|
||||
_name = None
|
||||
warnmsg = "This Tool is deprecated."
|
||||
|
||||
def __init__(self, point, warnmsg=None):
|
||||
self.point = point
|
||||
if warnmsg is not None:
|
||||
self.warnmsg = warnmsg
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
warnings.warn(self.warnmsg)
|
||||
|
||||
def tool_decorator(f):
|
||||
return f
|
||||
return tool_decorator
|
||||
|
||||
def _setup(self):
|
||||
warnings.warn(self.warnmsg)
|
||||
|
||||
|
||||
default_toolbox = _d = Toolbox("tools")
|
||||
_d.session_auth = SessionAuthTool(cptools.session_auth)
|
||||
_d.allow = Tool('on_start_resource', cptools.allow)
|
||||
_d.proxy = Tool('before_request_body', cptools.proxy, priority=30)
|
||||
_d.response_headers = Tool('on_start_resource', cptools.response_headers)
|
||||
_d.log_tracebacks = Tool('before_error_response', cptools.log_traceback)
|
||||
_d.log_headers = Tool('before_error_response', cptools.log_request_headers)
|
||||
_d.log_hooks = Tool('on_end_request', cptools.log_hooks, priority=100)
|
||||
_d.err_redirect = ErrorTool(cptools.redirect)
|
||||
_d.etags = Tool('before_finalize', cptools.validate_etags, priority=75)
|
||||
_d.decode = Tool('before_request_body', encoding.decode)
|
||||
# the order of encoding, gzip, caching is important
|
||||
_d.encode = Tool('before_handler', encoding.ResponseEncoder, priority=70)
|
||||
_d.gzip = Tool('before_finalize', encoding.gzip, priority=80)
|
||||
_d.staticdir = HandlerTool(static.staticdir)
|
||||
_d.staticfile = HandlerTool(static.staticfile)
|
||||
_d.sessions = SessionTool()
|
||||
_d.xmlrpc = ErrorTool(_xmlrpc.on_error)
|
||||
_d.caching = CachingTool('before_handler', _caching.get, 'caching')
|
||||
_d.expires = Tool('before_finalize', _caching.expires)
|
||||
_d.tidy = DeprecatedTool(
|
||||
'before_finalize',
|
||||
"The tidy tool has been removed from the standard distribution of "
|
||||
"CherryPy. The most recent version can be found at "
|
||||
"http://tools.cherrypy.org/browser.")
|
||||
_d.nsgmls = DeprecatedTool(
|
||||
'before_finalize',
|
||||
"The nsgmls tool has been removed from the standard distribution of "
|
||||
"CherryPy. The most recent version can be found at "
|
||||
"http://tools.cherrypy.org/browser.")
|
||||
_d.ignore_headers = Tool('before_request_body', cptools.ignore_headers)
|
||||
_d.referer = Tool('before_request_body', cptools.referer)
|
||||
_d.basic_auth = Tool('on_start_resource', auth.basic_auth)
|
||||
_d.digest_auth = Tool('on_start_resource', auth.digest_auth)
|
||||
_d.trailing_slash = Tool('before_handler', cptools.trailing_slash, priority=60)
|
||||
_d.flatten = Tool('before_finalize', cptools.flatten)
|
||||
_d.accept = Tool('on_start_resource', cptools.accept)
|
||||
_d.redirect = Tool('on_start_resource', cptools.redirect)
|
||||
_d.autovary = Tool('on_start_resource', cptools.autovary, priority=0)
|
||||
_d.json_in = Tool('before_request_body', jsontools.json_in, priority=30)
|
||||
_d.json_out = Tool('before_handler', jsontools.json_out, priority=30)
|
||||
_d.auth_basic = Tool('before_handler', auth_basic.basic_auth, priority=1)
|
||||
_d.auth_digest = Tool('before_handler', auth_digest.digest_auth, priority=1)
|
||||
|
||||
del _d, cptools, encoding, auth, static
|
||||
299
lib/cherrypy/_cptree.py
Normal file
299
lib/cherrypy/_cptree.py
Normal file
@@ -0,0 +1,299 @@
|
||||
"""CherryPy Application and Tree objects."""
|
||||
|
||||
import os
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import ntou, py3k
|
||||
from cherrypy import _cpconfig, _cplogging, _cprequest, _cpwsgi, tools
|
||||
from cherrypy.lib import httputil
|
||||
|
||||
|
||||
class Application(object):
|
||||
|
||||
"""A CherryPy Application.
|
||||
|
||||
Servers and gateways should not instantiate Request objects directly.
|
||||
Instead, they should ask an Application object for a request object.
|
||||
|
||||
An instance of this class may also be used as a WSGI callable
|
||||
(WSGI application object) for itself.
|
||||
"""
|
||||
|
||||
root = None
|
||||
"""The top-most container of page handlers for this app. Handlers should
|
||||
be arranged in a hierarchy of attributes, matching the expected URI
|
||||
hierarchy; the default dispatcher then searches this hierarchy for a
|
||||
matching handler. When using a dispatcher other than the default,
|
||||
this value may be None."""
|
||||
|
||||
config = {}
|
||||
"""A dict of {path: pathconf} pairs, where 'pathconf' is itself a dict
|
||||
of {key: value} pairs."""
|
||||
|
||||
namespaces = _cpconfig.NamespaceSet()
|
||||
toolboxes = {'tools': cherrypy.tools}
|
||||
|
||||
log = None
|
||||
"""A LogManager instance. See _cplogging."""
|
||||
|
||||
wsgiapp = None
|
||||
"""A CPWSGIApp instance. See _cpwsgi."""
|
||||
|
||||
request_class = _cprequest.Request
|
||||
response_class = _cprequest.Response
|
||||
|
||||
relative_urls = False
|
||||
|
||||
def __init__(self, root, script_name="", config=None):
|
||||
self.log = _cplogging.LogManager(id(self), cherrypy.log.logger_root)
|
||||
self.root = root
|
||||
self.script_name = script_name
|
||||
self.wsgiapp = _cpwsgi.CPWSGIApp(self)
|
||||
|
||||
self.namespaces = self.namespaces.copy()
|
||||
self.namespaces["log"] = lambda k, v: setattr(self.log, k, v)
|
||||
self.namespaces["wsgi"] = self.wsgiapp.namespace_handler
|
||||
|
||||
self.config = self.__class__.config.copy()
|
||||
if config:
|
||||
self.merge(config)
|
||||
|
||||
def __repr__(self):
|
||||
return "%s.%s(%r, %r)" % (self.__module__, self.__class__.__name__,
|
||||
self.root, self.script_name)
|
||||
|
||||
script_name_doc = """The URI "mount point" for this app. A mount point
|
||||
is that portion of the URI which is constant for all URIs that are
|
||||
serviced by this application; it does not include scheme, host, or proxy
|
||||
("virtual host") portions of the URI.
|
||||
|
||||
For example, if script_name is "/my/cool/app", then the URL
|
||||
"http://www.example.com/my/cool/app/page1" might be handled by a
|
||||
"page1" method on the root object.
|
||||
|
||||
The value of script_name MUST NOT end in a slash. If the script_name
|
||||
refers to the root of the URI, it MUST be an empty string (not "/").
|
||||
|
||||
If script_name is explicitly set to None, then the script_name will be
|
||||
provided for each call from request.wsgi_environ['SCRIPT_NAME'].
|
||||
"""
|
||||
|
||||
def _get_script_name(self):
|
||||
if self._script_name is not None:
|
||||
return self._script_name
|
||||
|
||||
# A `_script_name` with a value of None signals that the script name
|
||||
# should be pulled from WSGI environ.
|
||||
return cherrypy.serving.request.wsgi_environ['SCRIPT_NAME'].rstrip("/")
|
||||
|
||||
def _set_script_name(self, value):
|
||||
if value:
|
||||
value = value.rstrip("/")
|
||||
self._script_name = value
|
||||
script_name = property(fget=_get_script_name, fset=_set_script_name,
|
||||
doc=script_name_doc)
|
||||
|
||||
def merge(self, config):
|
||||
"""Merge the given config into self.config."""
|
||||
_cpconfig.merge(self.config, config)
|
||||
|
||||
# Handle namespaces specified in config.
|
||||
self.namespaces(self.config.get("/", {}))
|
||||
|
||||
def find_config(self, path, key, default=None):
|
||||
"""Return the most-specific value for key along path, or default."""
|
||||
trail = path or "/"
|
||||
while trail:
|
||||
nodeconf = self.config.get(trail, {})
|
||||
|
||||
if key in nodeconf:
|
||||
return nodeconf[key]
|
||||
|
||||
lastslash = trail.rfind("/")
|
||||
if lastslash == -1:
|
||||
break
|
||||
elif lastslash == 0 and trail != "/":
|
||||
trail = "/"
|
||||
else:
|
||||
trail = trail[:lastslash]
|
||||
|
||||
return default
|
||||
|
||||
def get_serving(self, local, remote, scheme, sproto):
|
||||
"""Create and return a Request and Response object."""
|
||||
req = self.request_class(local, remote, scheme, sproto)
|
||||
req.app = self
|
||||
|
||||
for name, toolbox in self.toolboxes.items():
|
||||
req.namespaces[name] = toolbox
|
||||
|
||||
resp = self.response_class()
|
||||
cherrypy.serving.load(req, resp)
|
||||
cherrypy.engine.publish('acquire_thread')
|
||||
cherrypy.engine.publish('before_request')
|
||||
|
||||
return req, resp
|
||||
|
||||
def release_serving(self):
|
||||
"""Release the current serving (request and response)."""
|
||||
req = cherrypy.serving.request
|
||||
|
||||
cherrypy.engine.publish('after_request')
|
||||
|
||||
try:
|
||||
req.close()
|
||||
except:
|
||||
cherrypy.log(traceback=True, severity=40)
|
||||
|
||||
cherrypy.serving.clear()
|
||||
|
||||
def __call__(self, environ, start_response):
|
||||
return self.wsgiapp(environ, start_response)
|
||||
|
||||
|
||||
class Tree(object):
|
||||
|
||||
"""A registry of CherryPy applications, mounted at diverse points.
|
||||
|
||||
An instance of this class may also be used as a WSGI callable
|
||||
(WSGI application object), in which case it dispatches to all
|
||||
mounted apps.
|
||||
"""
|
||||
|
||||
apps = {}
|
||||
"""
|
||||
A dict of the form {script name: application}, where "script name"
|
||||
is a string declaring the URI mount point (no trailing slash), and
|
||||
"application" is an instance of cherrypy.Application (or an arbitrary
|
||||
WSGI callable if you happen to be using a WSGI server)."""
|
||||
|
||||
def __init__(self):
|
||||
self.apps = {}
|
||||
|
||||
def mount(self, root, script_name="", config=None):
|
||||
"""Mount a new app from a root object, script_name, and config.
|
||||
|
||||
root
|
||||
An instance of a "controller class" (a collection of page
|
||||
handler methods) which represents the root of the application.
|
||||
This may also be an Application instance, or None if using
|
||||
a dispatcher other than the default.
|
||||
|
||||
script_name
|
||||
A string containing the "mount point" of the application.
|
||||
This should start with a slash, and be the path portion of the
|
||||
URL at which to mount the given root. For example, if root.index()
|
||||
will handle requests to "http://www.example.com:8080/dept/app1/",
|
||||
then the script_name argument would be "/dept/app1".
|
||||
|
||||
It MUST NOT end in a slash. If the script_name refers to the
|
||||
root of the URI, it MUST be an empty string (not "/").
|
||||
|
||||
config
|
||||
A file or dict containing application config.
|
||||
"""
|
||||
if script_name is None:
|
||||
raise TypeError(
|
||||
"The 'script_name' argument may not be None. Application "
|
||||
"objects may, however, possess a script_name of None (in "
|
||||
"order to inpect the WSGI environ for SCRIPT_NAME upon each "
|
||||
"request). You cannot mount such Applications on this Tree; "
|
||||
"you must pass them to a WSGI server interface directly.")
|
||||
|
||||
# Next line both 1) strips trailing slash and 2) maps "/" -> "".
|
||||
script_name = script_name.rstrip("/")
|
||||
|
||||
if isinstance(root, Application):
|
||||
app = root
|
||||
if script_name != "" and script_name != app.script_name:
|
||||
raise ValueError(
|
||||
"Cannot specify a different script name and pass an "
|
||||
"Application instance to cherrypy.mount")
|
||||
script_name = app.script_name
|
||||
else:
|
||||
app = Application(root, script_name)
|
||||
|
||||
# If mounted at "", add favicon.ico
|
||||
if (script_name == "" and root is not None
|
||||
and not hasattr(root, "favicon_ico")):
|
||||
favicon = os.path.join(os.getcwd(), os.path.dirname(__file__),
|
||||
"favicon.ico")
|
||||
root.favicon_ico = tools.staticfile.handler(favicon)
|
||||
|
||||
if config:
|
||||
app.merge(config)
|
||||
|
||||
self.apps[script_name] = app
|
||||
|
||||
return app
|
||||
|
||||
def graft(self, wsgi_callable, script_name=""):
|
||||
"""Mount a wsgi callable at the given script_name."""
|
||||
# Next line both 1) strips trailing slash and 2) maps "/" -> "".
|
||||
script_name = script_name.rstrip("/")
|
||||
self.apps[script_name] = wsgi_callable
|
||||
|
||||
def script_name(self, path=None):
|
||||
"""The script_name of the app at the given path, or None.
|
||||
|
||||
If path is None, cherrypy.request is used.
|
||||
"""
|
||||
if path is None:
|
||||
try:
|
||||
request = cherrypy.serving.request
|
||||
path = httputil.urljoin(request.script_name,
|
||||
request.path_info)
|
||||
except AttributeError:
|
||||
return None
|
||||
|
||||
while True:
|
||||
if path in self.apps:
|
||||
return path
|
||||
|
||||
if path == "":
|
||||
return None
|
||||
|
||||
# Move one node up the tree and try again.
|
||||
path = path[:path.rfind("/")]
|
||||
|
||||
def __call__(self, environ, start_response):
|
||||
# If you're calling this, then you're probably setting SCRIPT_NAME
|
||||
# to '' (some WSGI servers always set SCRIPT_NAME to '').
|
||||
# Try to look up the app using the full path.
|
||||
env1x = environ
|
||||
if environ.get(ntou('wsgi.version')) == (ntou('u'), 0):
|
||||
env1x = _cpwsgi.downgrade_wsgi_ux_to_1x(environ)
|
||||
path = httputil.urljoin(env1x.get('SCRIPT_NAME', ''),
|
||||
env1x.get('PATH_INFO', ''))
|
||||
sn = self.script_name(path or "/")
|
||||
if sn is None:
|
||||
start_response('404 Not Found', [])
|
||||
return []
|
||||
|
||||
app = self.apps[sn]
|
||||
|
||||
# Correct the SCRIPT_NAME and PATH_INFO environ entries.
|
||||
environ = environ.copy()
|
||||
if not py3k:
|
||||
if environ.get(ntou('wsgi.version')) == (ntou('u'), 0):
|
||||
# Python 2/WSGI u.0: all strings MUST be of type unicode
|
||||
enc = environ[ntou('wsgi.url_encoding')]
|
||||
environ[ntou('SCRIPT_NAME')] = sn.decode(enc)
|
||||
environ[ntou('PATH_INFO')] = path[
|
||||
len(sn.rstrip("/")):].decode(enc)
|
||||
else:
|
||||
# Python 2/WSGI 1.x: all strings MUST be of type str
|
||||
environ['SCRIPT_NAME'] = sn
|
||||
environ['PATH_INFO'] = path[len(sn.rstrip("/")):]
|
||||
else:
|
||||
if environ.get(ntou('wsgi.version')) == (ntou('u'), 0):
|
||||
# Python 3/WSGI u.0: all strings MUST be full unicode
|
||||
environ['SCRIPT_NAME'] = sn
|
||||
environ['PATH_INFO'] = path[len(sn.rstrip("/")):]
|
||||
else:
|
||||
# Python 3/WSGI 1.x: all strings MUST be ISO-8859-1 str
|
||||
environ['SCRIPT_NAME'] = sn.encode(
|
||||
'utf-8').decode('ISO-8859-1')
|
||||
environ['PATH_INFO'] = path[
|
||||
len(sn.rstrip("/")):].encode('utf-8').decode('ISO-8859-1')
|
||||
return app(environ, start_response)
|
||||
438
lib/cherrypy/_cpwsgi.py
Normal file
438
lib/cherrypy/_cpwsgi.py
Normal file
@@ -0,0 +1,438 @@
|
||||
"""WSGI interface (see PEP 333 and 3333).
|
||||
|
||||
Note that WSGI environ keys and values are 'native strings'; that is,
|
||||
whatever the type of "" is. For Python 2, that's a byte string; for Python 3,
|
||||
it's a unicode string. But PEP 3333 says: "even if Python's str type is
|
||||
actually Unicode "under the hood", the content of native strings must
|
||||
still be translatable to bytes via the Latin-1 encoding!"
|
||||
"""
|
||||
|
||||
import sys as _sys
|
||||
|
||||
import cherrypy as _cherrypy
|
||||
from cherrypy._cpcompat import BytesIO, bytestr, ntob, ntou, py3k, unicodestr
|
||||
from cherrypy import _cperror
|
||||
from cherrypy.lib import httputil
|
||||
from cherrypy.lib import is_closable_iterator
|
||||
|
||||
def downgrade_wsgi_ux_to_1x(environ):
|
||||
"""Return a new environ dict for WSGI 1.x from the given WSGI u.x environ.
|
||||
"""
|
||||
env1x = {}
|
||||
|
||||
url_encoding = environ[ntou('wsgi.url_encoding')]
|
||||
for k, v in list(environ.items()):
|
||||
if k in [ntou('PATH_INFO'), ntou('SCRIPT_NAME'), ntou('QUERY_STRING')]:
|
||||
v = v.encode(url_encoding)
|
||||
elif isinstance(v, unicodestr):
|
||||
v = v.encode('ISO-8859-1')
|
||||
env1x[k.encode('ISO-8859-1')] = v
|
||||
|
||||
return env1x
|
||||
|
||||
|
||||
class VirtualHost(object):
|
||||
|
||||
"""Select a different WSGI application based on the Host header.
|
||||
|
||||
This can be useful when running multiple sites within one CP server.
|
||||
It allows several domains to point to different applications. For example::
|
||||
|
||||
root = Root()
|
||||
RootApp = cherrypy.Application(root)
|
||||
Domain2App = cherrypy.Application(root)
|
||||
SecureApp = cherrypy.Application(Secure())
|
||||
|
||||
vhost = cherrypy._cpwsgi.VirtualHost(RootApp,
|
||||
domains={'www.domain2.example': Domain2App,
|
||||
'www.domain2.example:443': SecureApp,
|
||||
})
|
||||
|
||||
cherrypy.tree.graft(vhost)
|
||||
"""
|
||||
default = None
|
||||
"""Required. The default WSGI application."""
|
||||
|
||||
use_x_forwarded_host = True
|
||||
"""If True (the default), any "X-Forwarded-Host"
|
||||
request header will be used instead of the "Host" header. This
|
||||
is commonly added by HTTP servers (such as Apache) when proxying."""
|
||||
|
||||
domains = {}
|
||||
"""A dict of {host header value: application} pairs.
|
||||
The incoming "Host" request header is looked up in this dict,
|
||||
and, if a match is found, the corresponding WSGI application
|
||||
will be called instead of the default. Note that you often need
|
||||
separate entries for "example.com" and "www.example.com".
|
||||
In addition, "Host" headers may contain the port number.
|
||||
"""
|
||||
|
||||
def __init__(self, default, domains=None, use_x_forwarded_host=True):
|
||||
self.default = default
|
||||
self.domains = domains or {}
|
||||
self.use_x_forwarded_host = use_x_forwarded_host
|
||||
|
||||
def __call__(self, environ, start_response):
|
||||
domain = environ.get('HTTP_HOST', '')
|
||||
if self.use_x_forwarded_host:
|
||||
domain = environ.get("HTTP_X_FORWARDED_HOST", domain)
|
||||
|
||||
nextapp = self.domains.get(domain)
|
||||
if nextapp is None:
|
||||
nextapp = self.default
|
||||
return nextapp(environ, start_response)
|
||||
|
||||
|
||||
class InternalRedirector(object):
|
||||
|
||||
"""WSGI middleware that handles raised cherrypy.InternalRedirect."""
|
||||
|
||||
def __init__(self, nextapp, recursive=False):
|
||||
self.nextapp = nextapp
|
||||
self.recursive = recursive
|
||||
|
||||
def __call__(self, environ, start_response):
|
||||
redirections = []
|
||||
while True:
|
||||
environ = environ.copy()
|
||||
try:
|
||||
return self.nextapp(environ, start_response)
|
||||
except _cherrypy.InternalRedirect:
|
||||
ir = _sys.exc_info()[1]
|
||||
sn = environ.get('SCRIPT_NAME', '')
|
||||
path = environ.get('PATH_INFO', '')
|
||||
qs = environ.get('QUERY_STRING', '')
|
||||
|
||||
# Add the *previous* path_info + qs to redirections.
|
||||
old_uri = sn + path
|
||||
if qs:
|
||||
old_uri += "?" + qs
|
||||
redirections.append(old_uri)
|
||||
|
||||
if not self.recursive:
|
||||
# Check to see if the new URI has been redirected to
|
||||
# already
|
||||
new_uri = sn + ir.path
|
||||
if ir.query_string:
|
||||
new_uri += "?" + ir.query_string
|
||||
if new_uri in redirections:
|
||||
ir.request.close()
|
||||
raise RuntimeError("InternalRedirector visited the "
|
||||
"same URL twice: %r" % new_uri)
|
||||
|
||||
# Munge the environment and try again.
|
||||
environ['REQUEST_METHOD'] = "GET"
|
||||
environ['PATH_INFO'] = ir.path
|
||||
environ['QUERY_STRING'] = ir.query_string
|
||||
environ['wsgi.input'] = BytesIO()
|
||||
environ['CONTENT_LENGTH'] = "0"
|
||||
environ['cherrypy.previous_request'] = ir.request
|
||||
|
||||
|
||||
class ExceptionTrapper(object):
|
||||
|
||||
"""WSGI middleware that traps exceptions."""
|
||||
|
||||
def __init__(self, nextapp, throws=(KeyboardInterrupt, SystemExit)):
|
||||
self.nextapp = nextapp
|
||||
self.throws = throws
|
||||
|
||||
def __call__(self, environ, start_response):
|
||||
return _TrappedResponse(
|
||||
self.nextapp,
|
||||
environ,
|
||||
start_response,
|
||||
self.throws
|
||||
)
|
||||
|
||||
|
||||
class _TrappedResponse(object):
|
||||
|
||||
response = iter([])
|
||||
|
||||
def __init__(self, nextapp, environ, start_response, throws):
|
||||
self.nextapp = nextapp
|
||||
self.environ = environ
|
||||
self.start_response = start_response
|
||||
self.throws = throws
|
||||
self.started_response = False
|
||||
self.response = self.trap(
|
||||
self.nextapp, self.environ, self.start_response)
|
||||
self.iter_response = iter(self.response)
|
||||
|
||||
def __iter__(self):
|
||||
self.started_response = True
|
||||
return self
|
||||
|
||||
if py3k:
|
||||
def __next__(self):
|
||||
return self.trap(next, self.iter_response)
|
||||
else:
|
||||
def next(self):
|
||||
return self.trap(self.iter_response.next)
|
||||
|
||||
def close(self):
|
||||
if hasattr(self.response, 'close'):
|
||||
self.response.close()
|
||||
|
||||
def trap(self, func, *args, **kwargs):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except self.throws:
|
||||
raise
|
||||
except StopIteration:
|
||||
raise
|
||||
except:
|
||||
tb = _cperror.format_exc()
|
||||
#print('trapped (started %s):' % self.started_response, tb)
|
||||
_cherrypy.log(tb, severity=40)
|
||||
if not _cherrypy.request.show_tracebacks:
|
||||
tb = ""
|
||||
s, h, b = _cperror.bare_error(tb)
|
||||
if py3k:
|
||||
# What fun.
|
||||
s = s.decode('ISO-8859-1')
|
||||
h = [(k.decode('ISO-8859-1'), v.decode('ISO-8859-1'))
|
||||
for k, v in h]
|
||||
if self.started_response:
|
||||
# Empty our iterable (so future calls raise StopIteration)
|
||||
self.iter_response = iter([])
|
||||
else:
|
||||
self.iter_response = iter(b)
|
||||
|
||||
try:
|
||||
self.start_response(s, h, _sys.exc_info())
|
||||
except:
|
||||
# "The application must not trap any exceptions raised by
|
||||
# start_response, if it called start_response with exc_info.
|
||||
# Instead, it should allow such exceptions to propagate
|
||||
# back to the server or gateway."
|
||||
# But we still log and call close() to clean up ourselves.
|
||||
_cherrypy.log(traceback=True, severity=40)
|
||||
raise
|
||||
|
||||
if self.started_response:
|
||||
return ntob("").join(b)
|
||||
else:
|
||||
return b
|
||||
|
||||
|
||||
# WSGI-to-CP Adapter #
|
||||
|
||||
|
||||
class AppResponse(object):
|
||||
|
||||
"""WSGI response iterable for CherryPy applications."""
|
||||
|
||||
def __init__(self, environ, start_response, cpapp):
|
||||
self.cpapp = cpapp
|
||||
try:
|
||||
if not py3k:
|
||||
if environ.get(ntou('wsgi.version')) == (ntou('u'), 0):
|
||||
environ = downgrade_wsgi_ux_to_1x(environ)
|
||||
self.environ = environ
|
||||
self.run()
|
||||
|
||||
r = _cherrypy.serving.response
|
||||
|
||||
outstatus = r.output_status
|
||||
if not isinstance(outstatus, bytestr):
|
||||
raise TypeError("response.output_status is not a byte string.")
|
||||
|
||||
outheaders = []
|
||||
for k, v in r.header_list:
|
||||
if not isinstance(k, bytestr):
|
||||
raise TypeError(
|
||||
"response.header_list key %r is not a byte string." %
|
||||
k)
|
||||
if not isinstance(v, bytestr):
|
||||
raise TypeError(
|
||||
"response.header_list value %r is not a byte string." %
|
||||
v)
|
||||
outheaders.append((k, v))
|
||||
|
||||
if py3k:
|
||||
# According to PEP 3333, when using Python 3, the response
|
||||
# status and headers must be bytes masquerading as unicode;
|
||||
# that is, they must be of type "str" but are restricted to
|
||||
# code points in the "latin-1" set.
|
||||
outstatus = outstatus.decode('ISO-8859-1')
|
||||
outheaders = [(k.decode('ISO-8859-1'), v.decode('ISO-8859-1'))
|
||||
for k, v in outheaders]
|
||||
|
||||
self.iter_response = iter(r.body)
|
||||
self.write = start_response(outstatus, outheaders)
|
||||
except:
|
||||
self.close()
|
||||
raise
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
if py3k:
|
||||
def __next__(self):
|
||||
return next(self.iter_response)
|
||||
else:
|
||||
def next(self):
|
||||
return self.iter_response.next()
|
||||
|
||||
def close(self):
|
||||
"""Close and de-reference the current request and response. (Core)"""
|
||||
streaming = _cherrypy.serving.response.stream
|
||||
self.cpapp.release_serving()
|
||||
|
||||
# We avoid the expense of examining the iterator to see if it's
|
||||
# closable unless we are streaming the response, as that's the
|
||||
# only situation where we are going to have an iterator which
|
||||
# may not have been exhausted yet.
|
||||
if streaming and is_closable_iterator(self.iter_response):
|
||||
iter_close = self.iter_response.close
|
||||
try:
|
||||
iter_close()
|
||||
except Exception:
|
||||
_cherrypy.log(traceback=True, severity=40)
|
||||
|
||||
def run(self):
|
||||
"""Create a Request object using environ."""
|
||||
env = self.environ.get
|
||||
|
||||
local = httputil.Host('', int(env('SERVER_PORT', 80)),
|
||||
env('SERVER_NAME', ''))
|
||||
remote = httputil.Host(env('REMOTE_ADDR', ''),
|
||||
int(env('REMOTE_PORT', -1) or -1),
|
||||
env('REMOTE_HOST', ''))
|
||||
scheme = env('wsgi.url_scheme')
|
||||
sproto = env('ACTUAL_SERVER_PROTOCOL', "HTTP/1.1")
|
||||
request, resp = self.cpapp.get_serving(local, remote, scheme, sproto)
|
||||
|
||||
# LOGON_USER is served by IIS, and is the name of the
|
||||
# user after having been mapped to a local account.
|
||||
# Both IIS and Apache set REMOTE_USER, when possible.
|
||||
request.login = env('LOGON_USER') or env('REMOTE_USER') or None
|
||||
request.multithread = self.environ['wsgi.multithread']
|
||||
request.multiprocess = self.environ['wsgi.multiprocess']
|
||||
request.wsgi_environ = self.environ
|
||||
request.prev = env('cherrypy.previous_request', None)
|
||||
|
||||
meth = self.environ['REQUEST_METHOD']
|
||||
|
||||
path = httputil.urljoin(self.environ.get('SCRIPT_NAME', ''),
|
||||
self.environ.get('PATH_INFO', ''))
|
||||
qs = self.environ.get('QUERY_STRING', '')
|
||||
|
||||
if py3k:
|
||||
# This isn't perfect; if the given PATH_INFO is in the
|
||||
# wrong encoding, it may fail to match the appropriate config
|
||||
# section URI. But meh.
|
||||
old_enc = self.environ.get('wsgi.url_encoding', 'ISO-8859-1')
|
||||
new_enc = self.cpapp.find_config(self.environ.get('PATH_INFO', ''),
|
||||
"request.uri_encoding", 'utf-8')
|
||||
if new_enc.lower() != old_enc.lower():
|
||||
# Even though the path and qs are unicode, the WSGI server
|
||||
# is required by PEP 3333 to coerce them to ISO-8859-1
|
||||
# masquerading as unicode. So we have to encode back to
|
||||
# bytes and then decode again using the "correct" encoding.
|
||||
try:
|
||||
u_path = path.encode(old_enc).decode(new_enc)
|
||||
u_qs = qs.encode(old_enc).decode(new_enc)
|
||||
except (UnicodeEncodeError, UnicodeDecodeError):
|
||||
# Just pass them through without transcoding and hope.
|
||||
pass
|
||||
else:
|
||||
# Only set transcoded values if they both succeed.
|
||||
path = u_path
|
||||
qs = u_qs
|
||||
|
||||
rproto = self.environ.get('SERVER_PROTOCOL')
|
||||
headers = self.translate_headers(self.environ)
|
||||
rfile = self.environ['wsgi.input']
|
||||
request.run(meth, path, qs, rproto, headers, rfile)
|
||||
|
||||
headerNames = {'HTTP_CGI_AUTHORIZATION': 'Authorization',
|
||||
'CONTENT_LENGTH': 'Content-Length',
|
||||
'CONTENT_TYPE': 'Content-Type',
|
||||
'REMOTE_HOST': 'Remote-Host',
|
||||
'REMOTE_ADDR': 'Remote-Addr',
|
||||
}
|
||||
|
||||
def translate_headers(self, environ):
|
||||
"""Translate CGI-environ header names to HTTP header names."""
|
||||
for cgiName in environ:
|
||||
# We assume all incoming header keys are uppercase already.
|
||||
if cgiName in self.headerNames:
|
||||
yield self.headerNames[cgiName], environ[cgiName]
|
||||
elif cgiName[:5] == "HTTP_":
|
||||
# Hackish attempt at recovering original header names.
|
||||
translatedHeader = cgiName[5:].replace("_", "-")
|
||||
yield translatedHeader, environ[cgiName]
|
||||
|
||||
|
||||
class CPWSGIApp(object):
|
||||
|
||||
"""A WSGI application object for a CherryPy Application."""
|
||||
|
||||
pipeline = [('ExceptionTrapper', ExceptionTrapper),
|
||||
('InternalRedirector', InternalRedirector),
|
||||
]
|
||||
"""A list of (name, wsgiapp) pairs. Each 'wsgiapp' MUST be a
|
||||
constructor that takes an initial, positional 'nextapp' argument,
|
||||
plus optional keyword arguments, and returns a WSGI application
|
||||
(that takes environ and start_response arguments). The 'name' can
|
||||
be any you choose, and will correspond to keys in self.config."""
|
||||
|
||||
head = None
|
||||
"""Rather than nest all apps in the pipeline on each call, it's only
|
||||
done the first time, and the result is memoized into self.head. Set
|
||||
this to None again if you change self.pipeline after calling self."""
|
||||
|
||||
config = {}
|
||||
"""A dict whose keys match names listed in the pipeline. Each
|
||||
value is a further dict which will be passed to the corresponding
|
||||
named WSGI callable (from the pipeline) as keyword arguments."""
|
||||
|
||||
response_class = AppResponse
|
||||
"""The class to instantiate and return as the next app in the WSGI chain.
|
||||
"""
|
||||
|
||||
def __init__(self, cpapp, pipeline=None):
|
||||
self.cpapp = cpapp
|
||||
self.pipeline = self.pipeline[:]
|
||||
if pipeline:
|
||||
self.pipeline.extend(pipeline)
|
||||
self.config = self.config.copy()
|
||||
|
||||
def tail(self, environ, start_response):
|
||||
"""WSGI application callable for the actual CherryPy application.
|
||||
|
||||
You probably shouldn't call this; call self.__call__ instead,
|
||||
so that any WSGI middleware in self.pipeline can run first.
|
||||
"""
|
||||
return self.response_class(environ, start_response, self.cpapp)
|
||||
|
||||
def __call__(self, environ, start_response):
|
||||
head = self.head
|
||||
if head is None:
|
||||
# Create and nest the WSGI apps in our pipeline (in reverse order).
|
||||
# Then memoize the result in self.head.
|
||||
head = self.tail
|
||||
for name, callable in self.pipeline[::-1]:
|
||||
conf = self.config.get(name, {})
|
||||
head = callable(head, **conf)
|
||||
self.head = head
|
||||
return head(environ, start_response)
|
||||
|
||||
def namespace_handler(self, k, v):
|
||||
"""Config handler for the 'wsgi' namespace."""
|
||||
if k == "pipeline":
|
||||
# Note this allows multiple 'wsgi.pipeline' config entries
|
||||
# (but each entry will be processed in a 'random' order).
|
||||
# It should also allow developers to set default middleware
|
||||
# in code (passed to self.__init__) that deployers can add to
|
||||
# (but not remove) via config.
|
||||
self.pipeline.extend(v)
|
||||
elif k == "response_class":
|
||||
self.response_class = v
|
||||
else:
|
||||
name, arg = k.split(".", 1)
|
||||
bucket = self.config.setdefault(name, {})
|
||||
bucket[arg] = v
|
||||
70
lib/cherrypy/_cpwsgi_server.py
Normal file
70
lib/cherrypy/_cpwsgi_server.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""WSGI server interface (see PEP 333). This adds some CP-specific bits to
|
||||
the framework-agnostic wsgiserver package.
|
||||
"""
|
||||
import sys
|
||||
|
||||
import cherrypy
|
||||
from cherrypy import wsgiserver
|
||||
|
||||
|
||||
class CPWSGIServer(wsgiserver.CherryPyWSGIServer):
|
||||
|
||||
"""Wrapper for wsgiserver.CherryPyWSGIServer.
|
||||
|
||||
wsgiserver has been designed to not reference CherryPy in any way,
|
||||
so that it can be used in other frameworks and applications. Therefore,
|
||||
we wrap it here, so we can set our own mount points from cherrypy.tree
|
||||
and apply some attributes from config -> cherrypy.server -> wsgiserver.
|
||||
"""
|
||||
|
||||
def __init__(self, server_adapter=cherrypy.server):
|
||||
self.server_adapter = server_adapter
|
||||
self.max_request_header_size = (
|
||||
self.server_adapter.max_request_header_size or 0
|
||||
)
|
||||
self.max_request_body_size = (
|
||||
self.server_adapter.max_request_body_size or 0
|
||||
)
|
||||
|
||||
server_name = (self.server_adapter.socket_host or
|
||||
self.server_adapter.socket_file or
|
||||
None)
|
||||
|
||||
self.wsgi_version = self.server_adapter.wsgi_version
|
||||
s = wsgiserver.CherryPyWSGIServer
|
||||
s.__init__(self, server_adapter.bind_addr, cherrypy.tree,
|
||||
self.server_adapter.thread_pool,
|
||||
server_name,
|
||||
max=self.server_adapter.thread_pool_max,
|
||||
request_queue_size=self.server_adapter.socket_queue_size,
|
||||
timeout=self.server_adapter.socket_timeout,
|
||||
shutdown_timeout=self.server_adapter.shutdown_timeout,
|
||||
accepted_queue_size=self.server_adapter.accepted_queue_size,
|
||||
accepted_queue_timeout=self.server_adapter.accepted_queue_timeout,
|
||||
)
|
||||
self.protocol = self.server_adapter.protocol_version
|
||||
self.nodelay = self.server_adapter.nodelay
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
ssl_module = self.server_adapter.ssl_module or 'builtin'
|
||||
else:
|
||||
ssl_module = self.server_adapter.ssl_module or 'pyopenssl'
|
||||
if self.server_adapter.ssl_context:
|
||||
adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module)
|
||||
self.ssl_adapter = adapter_class(
|
||||
self.server_adapter.ssl_certificate,
|
||||
self.server_adapter.ssl_private_key,
|
||||
self.server_adapter.ssl_certificate_chain)
|
||||
self.ssl_adapter.context = self.server_adapter.ssl_context
|
||||
elif self.server_adapter.ssl_certificate:
|
||||
adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module)
|
||||
self.ssl_adapter = adapter_class(
|
||||
self.server_adapter.ssl_certificate,
|
||||
self.server_adapter.ssl_private_key,
|
||||
self.server_adapter.ssl_certificate_chain)
|
||||
|
||||
self.stats['Enabled'] = getattr(
|
||||
self.server_adapter, 'statistics', False)
|
||||
|
||||
def error_log(self, msg="", level=20, traceback=False):
|
||||
cherrypy.engine.log(msg, level, traceback)
|
||||
110
lib/cherrypy/cherryd
Normal file
110
lib/cherrypy/cherryd
Normal file
@@ -0,0 +1,110 @@
|
||||
#! /usr/bin/env python
|
||||
"""The CherryPy daemon."""
|
||||
|
||||
import sys
|
||||
|
||||
import cherrypy
|
||||
from cherrypy.process import plugins, servers
|
||||
from cherrypy import Application
|
||||
|
||||
|
||||
def start(configfiles=None, daemonize=False, environment=None,
|
||||
fastcgi=False, scgi=False, pidfile=None, imports=None,
|
||||
cgi=False):
|
||||
"""Subscribe all engine plugins and start the engine."""
|
||||
sys.path = [''] + sys.path
|
||||
for i in imports or []:
|
||||
exec("import %s" % i)
|
||||
|
||||
for c in configfiles or []:
|
||||
cherrypy.config.update(c)
|
||||
# If there's only one app mounted, merge config into it.
|
||||
if len(cherrypy.tree.apps) == 1:
|
||||
for app in cherrypy.tree.apps.values():
|
||||
if isinstance(app, Application):
|
||||
app.merge(c)
|
||||
|
||||
engine = cherrypy.engine
|
||||
|
||||
if environment is not None:
|
||||
cherrypy.config.update({'environment': environment})
|
||||
|
||||
# Only daemonize if asked to.
|
||||
if daemonize:
|
||||
# Don't print anything to stdout/sterr.
|
||||
cherrypy.config.update({'log.screen': False})
|
||||
plugins.Daemonizer(engine).subscribe()
|
||||
|
||||
if pidfile:
|
||||
plugins.PIDFile(engine, pidfile).subscribe()
|
||||
|
||||
if hasattr(engine, "signal_handler"):
|
||||
engine.signal_handler.subscribe()
|
||||
if hasattr(engine, "console_control_handler"):
|
||||
engine.console_control_handler.subscribe()
|
||||
|
||||
if (fastcgi and (scgi or cgi)) or (scgi and cgi):
|
||||
cherrypy.log.error("You may only specify one of the cgi, fastcgi, and "
|
||||
"scgi options.", 'ENGINE')
|
||||
sys.exit(1)
|
||||
elif fastcgi or scgi or cgi:
|
||||
# Turn off autoreload when using *cgi.
|
||||
cherrypy.config.update({'engine.autoreload_on': False})
|
||||
# Turn off the default HTTP server (which is subscribed by default).
|
||||
cherrypy.server.unsubscribe()
|
||||
|
||||
addr = cherrypy.server.bind_addr
|
||||
if fastcgi:
|
||||
f = servers.FlupFCGIServer(application=cherrypy.tree,
|
||||
bindAddress=addr)
|
||||
elif scgi:
|
||||
f = servers.FlupSCGIServer(application=cherrypy.tree,
|
||||
bindAddress=addr)
|
||||
else:
|
||||
f = servers.FlupCGIServer(application=cherrypy.tree,
|
||||
bindAddress=addr)
|
||||
s = servers.ServerAdapter(engine, httpserver=f, bind_addr=addr)
|
||||
s.subscribe()
|
||||
|
||||
# Always start the engine; this will start all other services
|
||||
try:
|
||||
engine.start()
|
||||
except:
|
||||
# Assume the error has been logged already via bus.log.
|
||||
sys.exit(1)
|
||||
else:
|
||||
engine.block()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from optparse import OptionParser
|
||||
|
||||
p = OptionParser()
|
||||
p.add_option('-c', '--config', action="append", dest='config',
|
||||
help="specify config file(s)")
|
||||
p.add_option('-d', action="store_true", dest='daemonize',
|
||||
help="run the server as a daemon")
|
||||
p.add_option('-e', '--environment', dest='environment', default=None,
|
||||
help="apply the given config environment")
|
||||
p.add_option('-f', action="store_true", dest='fastcgi',
|
||||
help="start a fastcgi server instead of the default HTTP "
|
||||
"server")
|
||||
p.add_option('-s', action="store_true", dest='scgi',
|
||||
help="start a scgi server instead of the default HTTP server")
|
||||
p.add_option('-x', action="store_true", dest='cgi',
|
||||
help="start a cgi server instead of the default HTTP server")
|
||||
p.add_option('-i', '--import', action="append", dest='imports',
|
||||
help="specify modules to import")
|
||||
p.add_option('-p', '--pidfile', dest='pidfile', default=None,
|
||||
help="store the process id in the given file")
|
||||
p.add_option('-P', '--Path', action="append", dest='Path',
|
||||
help="add the given paths to sys.path")
|
||||
options, args = p.parse_args()
|
||||
|
||||
if options.Path:
|
||||
for p in options.Path:
|
||||
sys.path.insert(0, p)
|
||||
|
||||
start(options.config, options.daemonize,
|
||||
options.environment, options.fastcgi, options.scgi,
|
||||
options.pidfile, options.imports, options.cgi)
|
||||
BIN
lib/cherrypy/favicon.ico
Normal file
BIN
lib/cherrypy/favicon.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.4 KiB |
85
lib/cherrypy/lib/__init__.py
Normal file
85
lib/cherrypy/lib/__init__.py
Normal file
@@ -0,0 +1,85 @@
|
||||
"""CherryPy Library"""
|
||||
|
||||
# Deprecated in CherryPy 3.2 -- remove in CherryPy 3.3
|
||||
from cherrypy.lib.reprconf import unrepr, modules, attributes
|
||||
|
||||
def is_iterator(obj):
|
||||
'''Returns a boolean indicating if the object provided implements
|
||||
the iterator protocol (i.e. like a generator). This will return
|
||||
false for objects which iterable, but not iterators themselves.'''
|
||||
from types import GeneratorType
|
||||
if isinstance(obj, GeneratorType):
|
||||
return True
|
||||
elif not hasattr(obj, '__iter__'):
|
||||
return False
|
||||
else:
|
||||
# Types which implement the protocol must return themselves when
|
||||
# invoking 'iter' upon them.
|
||||
return iter(obj) is obj
|
||||
|
||||
def is_closable_iterator(obj):
|
||||
|
||||
# Not an iterator.
|
||||
if not is_iterator(obj):
|
||||
return False
|
||||
|
||||
# A generator - the easiest thing to deal with.
|
||||
import inspect
|
||||
if inspect.isgenerator(obj):
|
||||
return True
|
||||
|
||||
# A custom iterator. Look for a close method...
|
||||
if not (hasattr(obj, 'close') and callable(obj.close)):
|
||||
return False
|
||||
|
||||
# ... which doesn't require any arguments.
|
||||
try:
|
||||
inspect.getcallargs(obj.close)
|
||||
except TypeError:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
class file_generator(object):
|
||||
|
||||
"""Yield the given input (a file object) in chunks (default 64k). (Core)"""
|
||||
|
||||
def __init__(self, input, chunkSize=65536):
|
||||
self.input = input
|
||||
self.chunkSize = chunkSize
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
chunk = self.input.read(self.chunkSize)
|
||||
if chunk:
|
||||
return chunk
|
||||
else:
|
||||
if hasattr(self.input, 'close'):
|
||||
self.input.close()
|
||||
raise StopIteration()
|
||||
next = __next__
|
||||
|
||||
|
||||
def file_generator_limited(fileobj, count, chunk_size=65536):
|
||||
"""Yield the given file object in chunks, stopping after `count`
|
||||
bytes has been emitted. Default chunk size is 64kB. (Core)
|
||||
"""
|
||||
remaining = count
|
||||
while remaining > 0:
|
||||
chunk = fileobj.read(min(chunk_size, remaining))
|
||||
chunklen = len(chunk)
|
||||
if chunklen == 0:
|
||||
return
|
||||
remaining -= chunklen
|
||||
yield chunk
|
||||
|
||||
|
||||
def set_vary_header(response, header_name):
|
||||
"Add a Vary header to a response"
|
||||
varies = response.headers.get("Vary", "")
|
||||
varies = [x.strip() for x in varies.split(",") if x.strip()]
|
||||
if header_name not in varies:
|
||||
varies.append(header_name)
|
||||
response.headers['Vary'] = ", ".join(varies)
|
||||
97
lib/cherrypy/lib/auth.py
Normal file
97
lib/cherrypy/lib/auth.py
Normal file
@@ -0,0 +1,97 @@
|
||||
import cherrypy
|
||||
from cherrypy.lib import httpauth
|
||||
|
||||
|
||||
def check_auth(users, encrypt=None, realm=None):
|
||||
"""If an authorization header contains credentials, return True or False.
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
if 'authorization' in request.headers:
|
||||
# make sure the provided credentials are correctly set
|
||||
ah = httpauth.parseAuthorization(request.headers['authorization'])
|
||||
if ah is None:
|
||||
raise cherrypy.HTTPError(400, 'Bad Request')
|
||||
|
||||
if not encrypt:
|
||||
encrypt = httpauth.DIGEST_AUTH_ENCODERS[httpauth.MD5]
|
||||
|
||||
if hasattr(users, '__call__'):
|
||||
try:
|
||||
# backward compatibility
|
||||
users = users() # expect it to return a dictionary
|
||||
|
||||
if not isinstance(users, dict):
|
||||
raise ValueError(
|
||||
"Authentication users must be a dictionary")
|
||||
|
||||
# fetch the user password
|
||||
password = users.get(ah["username"], None)
|
||||
except TypeError:
|
||||
# returns a password (encrypted or clear text)
|
||||
password = users(ah["username"])
|
||||
else:
|
||||
if not isinstance(users, dict):
|
||||
raise ValueError("Authentication users must be a dictionary")
|
||||
|
||||
# fetch the user password
|
||||
password = users.get(ah["username"], None)
|
||||
|
||||
# validate the authorization by re-computing it here
|
||||
# and compare it with what the user-agent provided
|
||||
if httpauth.checkResponse(ah, password, method=request.method,
|
||||
encrypt=encrypt, realm=realm):
|
||||
request.login = ah["username"]
|
||||
return True
|
||||
|
||||
request.login = False
|
||||
return False
|
||||
|
||||
|
||||
def basic_auth(realm, users, encrypt=None, debug=False):
|
||||
"""If auth fails, raise 401 with a basic authentication header.
|
||||
|
||||
realm
|
||||
A string containing the authentication realm.
|
||||
|
||||
users
|
||||
A dict of the form: {username: password} or a callable returning
|
||||
a dict.
|
||||
|
||||
encrypt
|
||||
callable used to encrypt the password returned from the user-agent.
|
||||
if None it defaults to a md5 encryption.
|
||||
|
||||
"""
|
||||
if check_auth(users, encrypt):
|
||||
if debug:
|
||||
cherrypy.log('Auth successful', 'TOOLS.BASIC_AUTH')
|
||||
return
|
||||
|
||||
# inform the user-agent this path is protected
|
||||
cherrypy.serving.response.headers[
|
||||
'www-authenticate'] = httpauth.basicAuth(realm)
|
||||
|
||||
raise cherrypy.HTTPError(
|
||||
401, "You are not authorized to access that resource")
|
||||
|
||||
|
||||
def digest_auth(realm, users, debug=False):
|
||||
"""If auth fails, raise 401 with a digest authentication header.
|
||||
|
||||
realm
|
||||
A string containing the authentication realm.
|
||||
users
|
||||
A dict of the form: {username: password} or a callable returning
|
||||
a dict.
|
||||
"""
|
||||
if check_auth(users, realm=realm):
|
||||
if debug:
|
||||
cherrypy.log('Auth successful', 'TOOLS.DIGEST_AUTH')
|
||||
return
|
||||
|
||||
# inform the user-agent this path is protected
|
||||
cherrypy.serving.response.headers[
|
||||
'www-authenticate'] = httpauth.digestAuth(realm)
|
||||
|
||||
raise cherrypy.HTTPError(
|
||||
401, "You are not authorized to access that resource")
|
||||
90
lib/cherrypy/lib/auth_basic.py
Normal file
90
lib/cherrypy/lib/auth_basic.py
Normal file
@@ -0,0 +1,90 @@
|
||||
# This file is part of CherryPy <http://www.cherrypy.org/>
|
||||
# -*- coding: utf-8 -*-
|
||||
# vim:ts=4:sw=4:expandtab:fileencoding=utf-8
|
||||
|
||||
__doc__ = """This module provides a CherryPy 3.x tool which implements
|
||||
the server-side of HTTP Basic Access Authentication, as described in
|
||||
:rfc:`2617`.
|
||||
|
||||
Example usage, using the built-in checkpassword_dict function which uses a dict
|
||||
as the credentials store::
|
||||
|
||||
userpassdict = {'bird' : 'bebop', 'ornette' : 'wayout'}
|
||||
checkpassword = cherrypy.lib.auth_basic.checkpassword_dict(userpassdict)
|
||||
basic_auth = {'tools.auth_basic.on': True,
|
||||
'tools.auth_basic.realm': 'earth',
|
||||
'tools.auth_basic.checkpassword': checkpassword,
|
||||
}
|
||||
app_config = { '/' : basic_auth }
|
||||
|
||||
"""
|
||||
|
||||
__author__ = 'visteya'
|
||||
__date__ = 'April 2009'
|
||||
|
||||
import binascii
|
||||
from cherrypy._cpcompat import base64_decode
|
||||
import cherrypy
|
||||
|
||||
|
||||
def checkpassword_dict(user_password_dict):
|
||||
"""Returns a checkpassword function which checks credentials
|
||||
against a dictionary of the form: {username : password}.
|
||||
|
||||
If you want a simple dictionary-based authentication scheme, use
|
||||
checkpassword_dict(my_credentials_dict) as the value for the
|
||||
checkpassword argument to basic_auth().
|
||||
"""
|
||||
def checkpassword(realm, user, password):
|
||||
p = user_password_dict.get(user)
|
||||
return p and p == password or False
|
||||
|
||||
return checkpassword
|
||||
|
||||
|
||||
def basic_auth(realm, checkpassword, debug=False):
|
||||
"""A CherryPy tool which hooks at before_handler to perform
|
||||
HTTP Basic Access Authentication, as specified in :rfc:`2617`.
|
||||
|
||||
If the request has an 'authorization' header with a 'Basic' scheme, this
|
||||
tool attempts to authenticate the credentials supplied in that header. If
|
||||
the request has no 'authorization' header, or if it does but the scheme is
|
||||
not 'Basic', or if authentication fails, the tool sends a 401 response with
|
||||
a 'WWW-Authenticate' Basic header.
|
||||
|
||||
realm
|
||||
A string containing the authentication realm.
|
||||
|
||||
checkpassword
|
||||
A callable which checks the authentication credentials.
|
||||
Its signature is checkpassword(realm, username, password). where
|
||||
username and password are the values obtained from the request's
|
||||
'authorization' header. If authentication succeeds, checkpassword
|
||||
returns True, else it returns False.
|
||||
|
||||
"""
|
||||
|
||||
if '"' in realm:
|
||||
raise ValueError('Realm cannot contain the " (quote) character.')
|
||||
request = cherrypy.serving.request
|
||||
|
||||
auth_header = request.headers.get('authorization')
|
||||
if auth_header is not None:
|
||||
try:
|
||||
scheme, params = auth_header.split(' ', 1)
|
||||
if scheme.lower() == 'basic':
|
||||
username, password = base64_decode(params).split(':', 1)
|
||||
if checkpassword(realm, username, password):
|
||||
if debug:
|
||||
cherrypy.log('Auth succeeded', 'TOOLS.AUTH_BASIC')
|
||||
request.login = username
|
||||
return # successful authentication
|
||||
# split() error, base64.decodestring() error
|
||||
except (ValueError, binascii.Error):
|
||||
raise cherrypy.HTTPError(400, 'Bad Request')
|
||||
|
||||
# Respond with 401 status and a WWW-Authenticate header
|
||||
cherrypy.serving.response.headers[
|
||||
'www-authenticate'] = 'Basic realm="%s"' % realm
|
||||
raise cherrypy.HTTPError(
|
||||
401, "You are not authorized to access that resource")
|
||||
390
lib/cherrypy/lib/auth_digest.py
Normal file
390
lib/cherrypy/lib/auth_digest.py
Normal file
@@ -0,0 +1,390 @@
|
||||
# This file is part of CherryPy <http://www.cherrypy.org/>
|
||||
# -*- coding: utf-8 -*-
|
||||
# vim:ts=4:sw=4:expandtab:fileencoding=utf-8
|
||||
|
||||
__doc__ = """An implementation of the server-side of HTTP Digest Access
|
||||
Authentication, which is described in :rfc:`2617`.
|
||||
|
||||
Example usage, using the built-in get_ha1_dict_plain function which uses a dict
|
||||
of plaintext passwords as the credentials store::
|
||||
|
||||
userpassdict = {'alice' : '4x5istwelve'}
|
||||
get_ha1 = cherrypy.lib.auth_digest.get_ha1_dict_plain(userpassdict)
|
||||
digest_auth = {'tools.auth_digest.on': True,
|
||||
'tools.auth_digest.realm': 'wonderland',
|
||||
'tools.auth_digest.get_ha1': get_ha1,
|
||||
'tools.auth_digest.key': 'a565c27146791cfb',
|
||||
}
|
||||
app_config = { '/' : digest_auth }
|
||||
"""
|
||||
|
||||
__author__ = 'visteya'
|
||||
__date__ = 'April 2009'
|
||||
|
||||
|
||||
import time
|
||||
from cherrypy._cpcompat import parse_http_list, parse_keqv_list
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import md5, ntob
|
||||
md5_hex = lambda s: md5(ntob(s)).hexdigest()
|
||||
|
||||
qop_auth = 'auth'
|
||||
qop_auth_int = 'auth-int'
|
||||
valid_qops = (qop_auth, qop_auth_int)
|
||||
|
||||
valid_algorithms = ('MD5', 'MD5-sess')
|
||||
|
||||
|
||||
def TRACE(msg):
|
||||
cherrypy.log(msg, context='TOOLS.AUTH_DIGEST')
|
||||
|
||||
# Three helper functions for users of the tool, providing three variants
|
||||
# of get_ha1() functions for three different kinds of credential stores.
|
||||
|
||||
|
||||
def get_ha1_dict_plain(user_password_dict):
|
||||
"""Returns a get_ha1 function which obtains a plaintext password from a
|
||||
dictionary of the form: {username : password}.
|
||||
|
||||
If you want a simple dictionary-based authentication scheme, with plaintext
|
||||
passwords, use get_ha1_dict_plain(my_userpass_dict) as the value for the
|
||||
get_ha1 argument to digest_auth().
|
||||
"""
|
||||
def get_ha1(realm, username):
|
||||
password = user_password_dict.get(username)
|
||||
if password:
|
||||
return md5_hex('%s:%s:%s' % (username, realm, password))
|
||||
return None
|
||||
|
||||
return get_ha1
|
||||
|
||||
|
||||
def get_ha1_dict(user_ha1_dict):
|
||||
"""Returns a get_ha1 function which obtains a HA1 password hash from a
|
||||
dictionary of the form: {username : HA1}.
|
||||
|
||||
If you want a dictionary-based authentication scheme, but with
|
||||
pre-computed HA1 hashes instead of plain-text passwords, use
|
||||
get_ha1_dict(my_userha1_dict) as the value for the get_ha1
|
||||
argument to digest_auth().
|
||||
"""
|
||||
def get_ha1(realm, username):
|
||||
return user_ha1_dict.get(username)
|
||||
|
||||
return get_ha1
|
||||
|
||||
|
||||
def get_ha1_file_htdigest(filename):
|
||||
"""Returns a get_ha1 function which obtains a HA1 password hash from a
|
||||
flat file with lines of the same format as that produced by the Apache
|
||||
htdigest utility. For example, for realm 'wonderland', username 'alice',
|
||||
and password '4x5istwelve', the htdigest line would be::
|
||||
|
||||
alice:wonderland:3238cdfe91a8b2ed8e39646921a02d4c
|
||||
|
||||
If you want to use an Apache htdigest file as the credentials store,
|
||||
then use get_ha1_file_htdigest(my_htdigest_file) as the value for the
|
||||
get_ha1 argument to digest_auth(). It is recommended that the filename
|
||||
argument be an absolute path, to avoid problems.
|
||||
"""
|
||||
def get_ha1(realm, username):
|
||||
result = None
|
||||
f = open(filename, 'r')
|
||||
for line in f:
|
||||
u, r, ha1 = line.rstrip().split(':')
|
||||
if u == username and r == realm:
|
||||
result = ha1
|
||||
break
|
||||
f.close()
|
||||
return result
|
||||
|
||||
return get_ha1
|
||||
|
||||
|
||||
def synthesize_nonce(s, key, timestamp=None):
|
||||
"""Synthesize a nonce value which resists spoofing and can be checked
|
||||
for staleness. Returns a string suitable as the value for 'nonce' in
|
||||
the www-authenticate header.
|
||||
|
||||
s
|
||||
A string related to the resource, such as the hostname of the server.
|
||||
|
||||
key
|
||||
A secret string known only to the server.
|
||||
|
||||
timestamp
|
||||
An integer seconds-since-the-epoch timestamp
|
||||
|
||||
"""
|
||||
if timestamp is None:
|
||||
timestamp = int(time.time())
|
||||
h = md5_hex('%s:%s:%s' % (timestamp, s, key))
|
||||
nonce = '%s:%s' % (timestamp, h)
|
||||
return nonce
|
||||
|
||||
|
||||
def H(s):
|
||||
"""The hash function H"""
|
||||
return md5_hex(s)
|
||||
|
||||
|
||||
class HttpDigestAuthorization (object):
|
||||
|
||||
"""Class to parse a Digest Authorization header and perform re-calculation
|
||||
of the digest.
|
||||
"""
|
||||
|
||||
def errmsg(self, s):
|
||||
return 'Digest Authorization header: %s' % s
|
||||
|
||||
def __init__(self, auth_header, http_method, debug=False):
|
||||
self.http_method = http_method
|
||||
self.debug = debug
|
||||
scheme, params = auth_header.split(" ", 1)
|
||||
self.scheme = scheme.lower()
|
||||
if self.scheme != 'digest':
|
||||
raise ValueError('Authorization scheme is not "Digest"')
|
||||
|
||||
self.auth_header = auth_header
|
||||
|
||||
# make a dict of the params
|
||||
items = parse_http_list(params)
|
||||
paramsd = parse_keqv_list(items)
|
||||
|
||||
self.realm = paramsd.get('realm')
|
||||
self.username = paramsd.get('username')
|
||||
self.nonce = paramsd.get('nonce')
|
||||
self.uri = paramsd.get('uri')
|
||||
self.method = paramsd.get('method')
|
||||
self.response = paramsd.get('response') # the response digest
|
||||
self.algorithm = paramsd.get('algorithm', 'MD5').upper()
|
||||
self.cnonce = paramsd.get('cnonce')
|
||||
self.opaque = paramsd.get('opaque')
|
||||
self.qop = paramsd.get('qop') # qop
|
||||
self.nc = paramsd.get('nc') # nonce count
|
||||
|
||||
# perform some correctness checks
|
||||
if self.algorithm not in valid_algorithms:
|
||||
raise ValueError(
|
||||
self.errmsg("Unsupported value for algorithm: '%s'" %
|
||||
self.algorithm))
|
||||
|
||||
has_reqd = (
|
||||
self.username and
|
||||
self.realm and
|
||||
self.nonce and
|
||||
self.uri and
|
||||
self.response
|
||||
)
|
||||
if not has_reqd:
|
||||
raise ValueError(
|
||||
self.errmsg("Not all required parameters are present."))
|
||||
|
||||
if self.qop:
|
||||
if self.qop not in valid_qops:
|
||||
raise ValueError(
|
||||
self.errmsg("Unsupported value for qop: '%s'" % self.qop))
|
||||
if not (self.cnonce and self.nc):
|
||||
raise ValueError(
|
||||
self.errmsg("If qop is sent then "
|
||||
"cnonce and nc MUST be present"))
|
||||
else:
|
||||
if self.cnonce or self.nc:
|
||||
raise ValueError(
|
||||
self.errmsg("If qop is not sent, "
|
||||
"neither cnonce nor nc can be present"))
|
||||
|
||||
def __str__(self):
|
||||
return 'authorization : %s' % self.auth_header
|
||||
|
||||
def validate_nonce(self, s, key):
|
||||
"""Validate the nonce.
|
||||
Returns True if nonce was generated by synthesize_nonce() and the
|
||||
timestamp is not spoofed, else returns False.
|
||||
|
||||
s
|
||||
A string related to the resource, such as the hostname of
|
||||
the server.
|
||||
|
||||
key
|
||||
A secret string known only to the server.
|
||||
|
||||
Both s and key must be the same values which were used to synthesize
|
||||
the nonce we are trying to validate.
|
||||
"""
|
||||
try:
|
||||
timestamp, hashpart = self.nonce.split(':', 1)
|
||||
s_timestamp, s_hashpart = synthesize_nonce(
|
||||
s, key, timestamp).split(':', 1)
|
||||
is_valid = s_hashpart == hashpart
|
||||
if self.debug:
|
||||
TRACE('validate_nonce: %s' % is_valid)
|
||||
return is_valid
|
||||
except ValueError: # split() error
|
||||
pass
|
||||
return False
|
||||
|
||||
def is_nonce_stale(self, max_age_seconds=600):
|
||||
"""Returns True if a validated nonce is stale. The nonce contains a
|
||||
timestamp in plaintext and also a secure hash of the timestamp.
|
||||
You should first validate the nonce to ensure the plaintext
|
||||
timestamp is not spoofed.
|
||||
"""
|
||||
try:
|
||||
timestamp, hashpart = self.nonce.split(':', 1)
|
||||
if int(timestamp) + max_age_seconds > int(time.time()):
|
||||
return False
|
||||
except ValueError: # int() error
|
||||
pass
|
||||
if self.debug:
|
||||
TRACE("nonce is stale")
|
||||
return True
|
||||
|
||||
def HA2(self, entity_body=''):
|
||||
"""Returns the H(A2) string. See :rfc:`2617` section 3.2.2.3."""
|
||||
# RFC 2617 3.2.2.3
|
||||
# If the "qop" directive's value is "auth" or is unspecified,
|
||||
# then A2 is:
|
||||
# A2 = method ":" digest-uri-value
|
||||
#
|
||||
# If the "qop" value is "auth-int", then A2 is:
|
||||
# A2 = method ":" digest-uri-value ":" H(entity-body)
|
||||
if self.qop is None or self.qop == "auth":
|
||||
a2 = '%s:%s' % (self.http_method, self.uri)
|
||||
elif self.qop == "auth-int":
|
||||
a2 = "%s:%s:%s" % (self.http_method, self.uri, H(entity_body))
|
||||
else:
|
||||
# in theory, this should never happen, since I validate qop in
|
||||
# __init__()
|
||||
raise ValueError(self.errmsg("Unrecognized value for qop!"))
|
||||
return H(a2)
|
||||
|
||||
def request_digest(self, ha1, entity_body=''):
|
||||
"""Calculates the Request-Digest. See :rfc:`2617` section 3.2.2.1.
|
||||
|
||||
ha1
|
||||
The HA1 string obtained from the credentials store.
|
||||
|
||||
entity_body
|
||||
If 'qop' is set to 'auth-int', then A2 includes a hash
|
||||
of the "entity body". The entity body is the part of the
|
||||
message which follows the HTTP headers. See :rfc:`2617` section
|
||||
4.3. This refers to the entity the user agent sent in the
|
||||
request which has the Authorization header. Typically GET
|
||||
requests don't have an entity, and POST requests do.
|
||||
|
||||
"""
|
||||
ha2 = self.HA2(entity_body)
|
||||
# Request-Digest -- RFC 2617 3.2.2.1
|
||||
if self.qop:
|
||||
req = "%s:%s:%s:%s:%s" % (
|
||||
self.nonce, self.nc, self.cnonce, self.qop, ha2)
|
||||
else:
|
||||
req = "%s:%s" % (self.nonce, ha2)
|
||||
|
||||
# RFC 2617 3.2.2.2
|
||||
#
|
||||
# If the "algorithm" directive's value is "MD5" or is unspecified,
|
||||
# then A1 is:
|
||||
# A1 = unq(username-value) ":" unq(realm-value) ":" passwd
|
||||
#
|
||||
# If the "algorithm" directive's value is "MD5-sess", then A1 is
|
||||
# calculated only once - on the first request by the client following
|
||||
# receipt of a WWW-Authenticate challenge from the server.
|
||||
# A1 = H( unq(username-value) ":" unq(realm-value) ":" passwd )
|
||||
# ":" unq(nonce-value) ":" unq(cnonce-value)
|
||||
if self.algorithm == 'MD5-sess':
|
||||
ha1 = H('%s:%s:%s' % (ha1, self.nonce, self.cnonce))
|
||||
|
||||
digest = H('%s:%s' % (ha1, req))
|
||||
return digest
|
||||
|
||||
|
||||
def www_authenticate(realm, key, algorithm='MD5', nonce=None, qop=qop_auth,
|
||||
stale=False):
|
||||
"""Constructs a WWW-Authenticate header for Digest authentication."""
|
||||
if qop not in valid_qops:
|
||||
raise ValueError("Unsupported value for qop: '%s'" % qop)
|
||||
if algorithm not in valid_algorithms:
|
||||
raise ValueError("Unsupported value for algorithm: '%s'" % algorithm)
|
||||
|
||||
if nonce is None:
|
||||
nonce = synthesize_nonce(realm, key)
|
||||
s = 'Digest realm="%s", nonce="%s", algorithm="%s", qop="%s"' % (
|
||||
realm, nonce, algorithm, qop)
|
||||
if stale:
|
||||
s += ', stale="true"'
|
||||
return s
|
||||
|
||||
|
||||
def digest_auth(realm, get_ha1, key, debug=False):
|
||||
"""A CherryPy tool which hooks at before_handler to perform
|
||||
HTTP Digest Access Authentication, as specified in :rfc:`2617`.
|
||||
|
||||
If the request has an 'authorization' header with a 'Digest' scheme,
|
||||
this tool authenticates the credentials supplied in that header.
|
||||
If the request has no 'authorization' header, or if it does but the
|
||||
scheme is not "Digest", or if authentication fails, the tool sends
|
||||
a 401 response with a 'WWW-Authenticate' Digest header.
|
||||
|
||||
realm
|
||||
A string containing the authentication realm.
|
||||
|
||||
get_ha1
|
||||
A callable which looks up a username in a credentials store
|
||||
and returns the HA1 string, which is defined in the RFC to be
|
||||
MD5(username : realm : password). The function's signature is:
|
||||
``get_ha1(realm, username)``
|
||||
where username is obtained from the request's 'authorization' header.
|
||||
If username is not found in the credentials store, get_ha1() returns
|
||||
None.
|
||||
|
||||
key
|
||||
A secret string known only to the server, used in the synthesis
|
||||
of nonces.
|
||||
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
|
||||
auth_header = request.headers.get('authorization')
|
||||
nonce_is_stale = False
|
||||
if auth_header is not None:
|
||||
try:
|
||||
auth = HttpDigestAuthorization(
|
||||
auth_header, request.method, debug=debug)
|
||||
except ValueError:
|
||||
raise cherrypy.HTTPError(
|
||||
400, "The Authorization header could not be parsed.")
|
||||
|
||||
if debug:
|
||||
TRACE(str(auth))
|
||||
|
||||
if auth.validate_nonce(realm, key):
|
||||
ha1 = get_ha1(realm, auth.username)
|
||||
if ha1 is not None:
|
||||
# note that for request.body to be available we need to
|
||||
# hook in at before_handler, not on_start_resource like
|
||||
# 3.1.x digest_auth does.
|
||||
digest = auth.request_digest(ha1, entity_body=request.body)
|
||||
if digest == auth.response: # authenticated
|
||||
if debug:
|
||||
TRACE("digest matches auth.response")
|
||||
# Now check if nonce is stale.
|
||||
# The choice of ten minutes' lifetime for nonce is somewhat
|
||||
# arbitrary
|
||||
nonce_is_stale = auth.is_nonce_stale(max_age_seconds=600)
|
||||
if not nonce_is_stale:
|
||||
request.login = auth.username
|
||||
if debug:
|
||||
TRACE("authentication of %s successful" %
|
||||
auth.username)
|
||||
return
|
||||
|
||||
# Respond with 401 status and a WWW-Authenticate header
|
||||
header = www_authenticate(realm, key, stale=nonce_is_stale)
|
||||
if debug:
|
||||
TRACE(header)
|
||||
cherrypy.serving.response.headers['WWW-Authenticate'] = header
|
||||
raise cherrypy.HTTPError(
|
||||
401, "You are not authorized to access that resource")
|
||||
470
lib/cherrypy/lib/caching.py
Normal file
470
lib/cherrypy/lib/caching.py
Normal file
@@ -0,0 +1,470 @@
|
||||
"""
|
||||
CherryPy implements a simple caching system as a pluggable Tool. This tool
|
||||
tries to be an (in-process) HTTP/1.1-compliant cache. It's not quite there
|
||||
yet, but it's probably good enough for most sites.
|
||||
|
||||
In general, GET responses are cached (along with selecting headers) and, if
|
||||
another request arrives for the same resource, the caching Tool will return 304
|
||||
Not Modified if possible, or serve the cached response otherwise. It also sets
|
||||
request.cached to True if serving a cached representation, and sets
|
||||
request.cacheable to False (so it doesn't get cached again).
|
||||
|
||||
If POST, PUT, or DELETE requests are made for a cached resource, they
|
||||
invalidate (delete) any cached response.
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
Configuration file example::
|
||||
|
||||
[/]
|
||||
tools.caching.on = True
|
||||
tools.caching.delay = 3600
|
||||
|
||||
You may use a class other than the default
|
||||
:class:`MemoryCache<cherrypy.lib.caching.MemoryCache>` by supplying the config
|
||||
entry ``cache_class``; supply the full dotted name of the replacement class
|
||||
as the config value. It must implement the basic methods ``get``, ``put``,
|
||||
``delete``, and ``clear``.
|
||||
|
||||
You may set any attribute, including overriding methods, on the cache
|
||||
instance by providing them in config. The above sets the
|
||||
:attr:`delay<cherrypy.lib.caching.MemoryCache.delay>` attribute, for example.
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
|
||||
import cherrypy
|
||||
from cherrypy.lib import cptools, httputil
|
||||
from cherrypy._cpcompat import copyitems, ntob, set_daemon, sorted, Event
|
||||
|
||||
|
||||
class Cache(object):
|
||||
|
||||
"""Base class for Cache implementations."""
|
||||
|
||||
def get(self):
|
||||
"""Return the current variant if in the cache, else None."""
|
||||
raise NotImplemented
|
||||
|
||||
def put(self, obj, size):
|
||||
"""Store the current variant in the cache."""
|
||||
raise NotImplemented
|
||||
|
||||
def delete(self):
|
||||
"""Remove ALL cached variants of the current resource."""
|
||||
raise NotImplemented
|
||||
|
||||
def clear(self):
|
||||
"""Reset the cache to its initial, empty state."""
|
||||
raise NotImplemented
|
||||
|
||||
|
||||
# ------------------------------ Memory Cache ------------------------------- #
|
||||
class AntiStampedeCache(dict):
|
||||
|
||||
"""A storage system for cached items which reduces stampede collisions."""
|
||||
|
||||
def wait(self, key, timeout=5, debug=False):
|
||||
"""Return the cached value for the given key, or None.
|
||||
|
||||
If timeout is not None, and the value is already
|
||||
being calculated by another thread, wait until the given timeout has
|
||||
elapsed. If the value is available before the timeout expires, it is
|
||||
returned. If not, None is returned, and a sentinel placed in the cache
|
||||
to signal other threads to wait.
|
||||
|
||||
If timeout is None, no waiting is performed nor sentinels used.
|
||||
"""
|
||||
value = self.get(key)
|
||||
if isinstance(value, Event):
|
||||
if timeout is None:
|
||||
# Ignore the other thread and recalc it ourselves.
|
||||
if debug:
|
||||
cherrypy.log('No timeout', 'TOOLS.CACHING')
|
||||
return None
|
||||
|
||||
# Wait until it's done or times out.
|
||||
if debug:
|
||||
cherrypy.log('Waiting up to %s seconds' %
|
||||
timeout, 'TOOLS.CACHING')
|
||||
value.wait(timeout)
|
||||
if value.result is not None:
|
||||
# The other thread finished its calculation. Use it.
|
||||
if debug:
|
||||
cherrypy.log('Result!', 'TOOLS.CACHING')
|
||||
return value.result
|
||||
# Timed out. Stick an Event in the slot so other threads wait
|
||||
# on this one to finish calculating the value.
|
||||
if debug:
|
||||
cherrypy.log('Timed out', 'TOOLS.CACHING')
|
||||
e = threading.Event()
|
||||
e.result = None
|
||||
dict.__setitem__(self, key, e)
|
||||
|
||||
return None
|
||||
elif value is None:
|
||||
# Stick an Event in the slot so other threads wait
|
||||
# on this one to finish calculating the value.
|
||||
if debug:
|
||||
cherrypy.log('Timed out', 'TOOLS.CACHING')
|
||||
e = threading.Event()
|
||||
e.result = None
|
||||
dict.__setitem__(self, key, e)
|
||||
return value
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
"""Set the cached value for the given key."""
|
||||
existing = self.get(key)
|
||||
dict.__setitem__(self, key, value)
|
||||
if isinstance(existing, Event):
|
||||
# Set Event.result so other threads waiting on it have
|
||||
# immediate access without needing to poll the cache again.
|
||||
existing.result = value
|
||||
existing.set()
|
||||
|
||||
|
||||
class MemoryCache(Cache):
|
||||
|
||||
"""An in-memory cache for varying response content.
|
||||
|
||||
Each key in self.store is a URI, and each value is an AntiStampedeCache.
|
||||
The response for any given URI may vary based on the values of
|
||||
"selecting request headers"; that is, those named in the Vary
|
||||
response header. We assume the list of header names to be constant
|
||||
for each URI throughout the lifetime of the application, and store
|
||||
that list in ``self.store[uri].selecting_headers``.
|
||||
|
||||
The items contained in ``self.store[uri]`` have keys which are tuples of
|
||||
request header values (in the same order as the names in its
|
||||
selecting_headers), and values which are the actual responses.
|
||||
"""
|
||||
|
||||
maxobjects = 1000
|
||||
"""The maximum number of cached objects; defaults to 1000."""
|
||||
|
||||
maxobj_size = 100000
|
||||
"""The maximum size of each cached object in bytes; defaults to 100 KB."""
|
||||
|
||||
maxsize = 10000000
|
||||
"""The maximum size of the entire cache in bytes; defaults to 10 MB."""
|
||||
|
||||
delay = 600
|
||||
"""Seconds until the cached content expires; defaults to 600 (10 minutes).
|
||||
"""
|
||||
|
||||
antistampede_timeout = 5
|
||||
"""Seconds to wait for other threads to release a cache lock."""
|
||||
|
||||
expire_freq = 0.1
|
||||
"""Seconds to sleep between cache expiration sweeps."""
|
||||
|
||||
debug = False
|
||||
|
||||
def __init__(self):
|
||||
self.clear()
|
||||
|
||||
# Run self.expire_cache in a separate daemon thread.
|
||||
t = threading.Thread(target=self.expire_cache, name='expire_cache')
|
||||
self.expiration_thread = t
|
||||
set_daemon(t, True)
|
||||
t.start()
|
||||
|
||||
def clear(self):
|
||||
"""Reset the cache to its initial, empty state."""
|
||||
self.store = {}
|
||||
self.expirations = {}
|
||||
self.tot_puts = 0
|
||||
self.tot_gets = 0
|
||||
self.tot_hist = 0
|
||||
self.tot_expires = 0
|
||||
self.tot_non_modified = 0
|
||||
self.cursize = 0
|
||||
|
||||
def expire_cache(self):
|
||||
"""Continuously examine cached objects, expiring stale ones.
|
||||
|
||||
This function is designed to be run in its own daemon thread,
|
||||
referenced at ``self.expiration_thread``.
|
||||
"""
|
||||
# It's possible that "time" will be set to None
|
||||
# arbitrarily, so we check "while time" to avoid exceptions.
|
||||
# See tickets #99 and #180 for more information.
|
||||
while time:
|
||||
now = time.time()
|
||||
# Must make a copy of expirations so it doesn't change size
|
||||
# during iteration
|
||||
for expiration_time, objects in copyitems(self.expirations):
|
||||
if expiration_time <= now:
|
||||
for obj_size, uri, sel_header_values in objects:
|
||||
try:
|
||||
del self.store[uri][tuple(sel_header_values)]
|
||||
self.tot_expires += 1
|
||||
self.cursize -= obj_size
|
||||
except KeyError:
|
||||
# the key may have been deleted elsewhere
|
||||
pass
|
||||
del self.expirations[expiration_time]
|
||||
time.sleep(self.expire_freq)
|
||||
|
||||
def get(self):
|
||||
"""Return the current variant if in the cache, else None."""
|
||||
request = cherrypy.serving.request
|
||||
self.tot_gets += 1
|
||||
|
||||
uri = cherrypy.url(qs=request.query_string)
|
||||
uricache = self.store.get(uri)
|
||||
if uricache is None:
|
||||
return None
|
||||
|
||||
header_values = [request.headers.get(h, '')
|
||||
for h in uricache.selecting_headers]
|
||||
variant = uricache.wait(key=tuple(sorted(header_values)),
|
||||
timeout=self.antistampede_timeout,
|
||||
debug=self.debug)
|
||||
if variant is not None:
|
||||
self.tot_hist += 1
|
||||
return variant
|
||||
|
||||
def put(self, variant, size):
|
||||
"""Store the current variant in the cache."""
|
||||
request = cherrypy.serving.request
|
||||
response = cherrypy.serving.response
|
||||
|
||||
uri = cherrypy.url(qs=request.query_string)
|
||||
uricache = self.store.get(uri)
|
||||
if uricache is None:
|
||||
uricache = AntiStampedeCache()
|
||||
uricache.selecting_headers = [
|
||||
e.value for e in response.headers.elements('Vary')]
|
||||
self.store[uri] = uricache
|
||||
|
||||
if len(self.store) < self.maxobjects:
|
||||
total_size = self.cursize + size
|
||||
|
||||
# checks if there's space for the object
|
||||
if (size < self.maxobj_size and total_size < self.maxsize):
|
||||
# add to the expirations list
|
||||
expiration_time = response.time + self.delay
|
||||
bucket = self.expirations.setdefault(expiration_time, [])
|
||||
bucket.append((size, uri, uricache.selecting_headers))
|
||||
|
||||
# add to the cache
|
||||
header_values = [request.headers.get(h, '')
|
||||
for h in uricache.selecting_headers]
|
||||
uricache[tuple(sorted(header_values))] = variant
|
||||
self.tot_puts += 1
|
||||
self.cursize = total_size
|
||||
|
||||
def delete(self):
|
||||
"""Remove ALL cached variants of the current resource."""
|
||||
uri = cherrypy.url(qs=cherrypy.serving.request.query_string)
|
||||
self.store.pop(uri, None)
|
||||
|
||||
|
||||
def get(invalid_methods=("POST", "PUT", "DELETE"), debug=False, **kwargs):
|
||||
"""Try to obtain cached output. If fresh enough, raise HTTPError(304).
|
||||
|
||||
If POST, PUT, or DELETE:
|
||||
* invalidates (deletes) any cached response for this resource
|
||||
* sets request.cached = False
|
||||
* sets request.cacheable = False
|
||||
|
||||
else if a cached copy exists:
|
||||
* sets request.cached = True
|
||||
* sets request.cacheable = False
|
||||
* sets response.headers to the cached values
|
||||
* checks the cached Last-Modified response header against the
|
||||
current If-(Un)Modified-Since request headers; raises 304
|
||||
if necessary.
|
||||
* sets response.status and response.body to the cached values
|
||||
* returns True
|
||||
|
||||
otherwise:
|
||||
* sets request.cached = False
|
||||
* sets request.cacheable = True
|
||||
* returns False
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
response = cherrypy.serving.response
|
||||
|
||||
if not hasattr(cherrypy, "_cache"):
|
||||
# Make a process-wide Cache object.
|
||||
cherrypy._cache = kwargs.pop("cache_class", MemoryCache)()
|
||||
|
||||
# Take all remaining kwargs and set them on the Cache object.
|
||||
for k, v in kwargs.items():
|
||||
setattr(cherrypy._cache, k, v)
|
||||
cherrypy._cache.debug = debug
|
||||
|
||||
# POST, PUT, DELETE should invalidate (delete) the cached copy.
|
||||
# See http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.10.
|
||||
if request.method in invalid_methods:
|
||||
if debug:
|
||||
cherrypy.log('request.method %r in invalid_methods %r' %
|
||||
(request.method, invalid_methods), 'TOOLS.CACHING')
|
||||
cherrypy._cache.delete()
|
||||
request.cached = False
|
||||
request.cacheable = False
|
||||
return False
|
||||
|
||||
if 'no-cache' in [e.value for e in request.headers.elements('Pragma')]:
|
||||
request.cached = False
|
||||
request.cacheable = True
|
||||
return False
|
||||
|
||||
cache_data = cherrypy._cache.get()
|
||||
request.cached = bool(cache_data)
|
||||
request.cacheable = not request.cached
|
||||
if request.cached:
|
||||
# Serve the cached copy.
|
||||
max_age = cherrypy._cache.delay
|
||||
for v in [e.value for e in request.headers.elements('Cache-Control')]:
|
||||
atoms = v.split('=', 1)
|
||||
directive = atoms.pop(0)
|
||||
if directive == 'max-age':
|
||||
if len(atoms) != 1 or not atoms[0].isdigit():
|
||||
raise cherrypy.HTTPError(
|
||||
400, "Invalid Cache-Control header")
|
||||
max_age = int(atoms[0])
|
||||
break
|
||||
elif directive == 'no-cache':
|
||||
if debug:
|
||||
cherrypy.log(
|
||||
'Ignoring cache due to Cache-Control: no-cache',
|
||||
'TOOLS.CACHING')
|
||||
request.cached = False
|
||||
request.cacheable = True
|
||||
return False
|
||||
|
||||
if debug:
|
||||
cherrypy.log('Reading response from cache', 'TOOLS.CACHING')
|
||||
s, h, b, create_time = cache_data
|
||||
age = int(response.time - create_time)
|
||||
if (age > max_age):
|
||||
if debug:
|
||||
cherrypy.log('Ignoring cache due to age > %d' % max_age,
|
||||
'TOOLS.CACHING')
|
||||
request.cached = False
|
||||
request.cacheable = True
|
||||
return False
|
||||
|
||||
# Copy the response headers. See
|
||||
# https://bitbucket.org/cherrypy/cherrypy/issue/721.
|
||||
response.headers = rh = httputil.HeaderMap()
|
||||
for k in h:
|
||||
dict.__setitem__(rh, k, dict.__getitem__(h, k))
|
||||
|
||||
# Add the required Age header
|
||||
response.headers["Age"] = str(age)
|
||||
|
||||
try:
|
||||
# Note that validate_since depends on a Last-Modified header;
|
||||
# this was put into the cached copy, and should have been
|
||||
# resurrected just above (response.headers = cache_data[1]).
|
||||
cptools.validate_since()
|
||||
except cherrypy.HTTPRedirect:
|
||||
x = sys.exc_info()[1]
|
||||
if x.status == 304:
|
||||
cherrypy._cache.tot_non_modified += 1
|
||||
raise
|
||||
|
||||
# serve it & get out from the request
|
||||
response.status = s
|
||||
response.body = b
|
||||
else:
|
||||
if debug:
|
||||
cherrypy.log('request is not cached', 'TOOLS.CACHING')
|
||||
return request.cached
|
||||
|
||||
|
||||
def tee_output():
|
||||
"""Tee response output to cache storage. Internal."""
|
||||
# Used by CachingTool by attaching to request.hooks
|
||||
|
||||
request = cherrypy.serving.request
|
||||
if 'no-store' in request.headers.values('Cache-Control'):
|
||||
return
|
||||
|
||||
def tee(body):
|
||||
"""Tee response.body into a list."""
|
||||
if ('no-cache' in response.headers.values('Pragma') or
|
||||
'no-store' in response.headers.values('Cache-Control')):
|
||||
for chunk in body:
|
||||
yield chunk
|
||||
return
|
||||
|
||||
output = []
|
||||
for chunk in body:
|
||||
output.append(chunk)
|
||||
yield chunk
|
||||
|
||||
# save the cache data
|
||||
body = ntob('').join(output)
|
||||
cherrypy._cache.put((response.status, response.headers or {},
|
||||
body, response.time), len(body))
|
||||
|
||||
response = cherrypy.serving.response
|
||||
response.body = tee(response.body)
|
||||
|
||||
|
||||
def expires(secs=0, force=False, debug=False):
|
||||
"""Tool for influencing cache mechanisms using the 'Expires' header.
|
||||
|
||||
secs
|
||||
Must be either an int or a datetime.timedelta, and indicates the
|
||||
number of seconds between response.time and when the response should
|
||||
expire. The 'Expires' header will be set to response.time + secs.
|
||||
If secs is zero, the 'Expires' header is set one year in the past, and
|
||||
the following "cache prevention" headers are also set:
|
||||
|
||||
* Pragma: no-cache
|
||||
* Cache-Control': no-cache, must-revalidate
|
||||
|
||||
force
|
||||
If False, the following headers are checked:
|
||||
|
||||
* Etag
|
||||
* Last-Modified
|
||||
* Age
|
||||
* Expires
|
||||
|
||||
If any are already present, none of the above response headers are set.
|
||||
|
||||
"""
|
||||
|
||||
response = cherrypy.serving.response
|
||||
headers = response.headers
|
||||
|
||||
cacheable = False
|
||||
if not force:
|
||||
# some header names that indicate that the response can be cached
|
||||
for indicator in ('Etag', 'Last-Modified', 'Age', 'Expires'):
|
||||
if indicator in headers:
|
||||
cacheable = True
|
||||
break
|
||||
|
||||
if not cacheable and not force:
|
||||
if debug:
|
||||
cherrypy.log('request is not cacheable', 'TOOLS.EXPIRES')
|
||||
else:
|
||||
if debug:
|
||||
cherrypy.log('request is cacheable', 'TOOLS.EXPIRES')
|
||||
if isinstance(secs, datetime.timedelta):
|
||||
secs = (86400 * secs.days) + secs.seconds
|
||||
|
||||
if secs == 0:
|
||||
if force or ("Pragma" not in headers):
|
||||
headers["Pragma"] = "no-cache"
|
||||
if cherrypy.serving.request.protocol >= (1, 1):
|
||||
if force or "Cache-Control" not in headers:
|
||||
headers["Cache-Control"] = "no-cache, must-revalidate"
|
||||
# Set an explicit Expires date in the past.
|
||||
expiry = httputil.HTTPDate(1169942400.0)
|
||||
else:
|
||||
expiry = httputil.HTTPDate(response.time + secs)
|
||||
if force or "Expires" not in headers:
|
||||
headers["Expires"] = expiry
|
||||
387
lib/cherrypy/lib/covercp.py
Normal file
387
lib/cherrypy/lib/covercp.py
Normal file
@@ -0,0 +1,387 @@
|
||||
"""Code-coverage tools for CherryPy.
|
||||
|
||||
To use this module, or the coverage tools in the test suite,
|
||||
you need to download 'coverage.py', either Gareth Rees' `original
|
||||
implementation <http://www.garethrees.org/2001/12/04/python-coverage/>`_
|
||||
or Ned Batchelder's `enhanced version:
|
||||
<http://www.nedbatchelder.com/code/modules/coverage.html>`_
|
||||
|
||||
To turn on coverage tracing, use the following code::
|
||||
|
||||
cherrypy.engine.subscribe('start', covercp.start)
|
||||
|
||||
DO NOT subscribe anything on the 'start_thread' channel, as previously
|
||||
recommended. Calling start once in the main thread should be sufficient
|
||||
to start coverage on all threads. Calling start again in each thread
|
||||
effectively clears any coverage data gathered up to that point.
|
||||
|
||||
Run your code, then use the ``covercp.serve()`` function to browse the
|
||||
results in a web browser. If you run this module from the command line,
|
||||
it will call ``serve()`` for you.
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
import cgi
|
||||
from cherrypy._cpcompat import quote_plus
|
||||
import os
|
||||
import os.path
|
||||
localFile = os.path.join(os.path.dirname(__file__), "coverage.cache")
|
||||
|
||||
the_coverage = None
|
||||
try:
|
||||
from coverage import coverage
|
||||
the_coverage = coverage(data_file=localFile)
|
||||
|
||||
def start():
|
||||
the_coverage.start()
|
||||
except ImportError:
|
||||
# Setting the_coverage to None will raise errors
|
||||
# that need to be trapped downstream.
|
||||
the_coverage = None
|
||||
|
||||
import warnings
|
||||
warnings.warn(
|
||||
"No code coverage will be performed; "
|
||||
"coverage.py could not be imported.")
|
||||
|
||||
def start():
|
||||
pass
|
||||
start.priority = 20
|
||||
|
||||
TEMPLATE_MENU = """<html>
|
||||
<head>
|
||||
<title>CherryPy Coverage Menu</title>
|
||||
<style>
|
||||
body {font: 9pt Arial, serif;}
|
||||
#tree {
|
||||
font-size: 8pt;
|
||||
font-family: Andale Mono, monospace;
|
||||
white-space: pre;
|
||||
}
|
||||
#tree a:active, a:focus {
|
||||
background-color: black;
|
||||
padding: 1px;
|
||||
color: white;
|
||||
border: 0px solid #9999FF;
|
||||
-moz-outline-style: none;
|
||||
}
|
||||
.fail { color: red;}
|
||||
.pass { color: #888;}
|
||||
#pct { text-align: right;}
|
||||
h3 {
|
||||
font-size: small;
|
||||
font-weight: bold;
|
||||
font-style: italic;
|
||||
margin-top: 5px;
|
||||
}
|
||||
input { border: 1px solid #ccc; padding: 2px; }
|
||||
.directory {
|
||||
color: #933;
|
||||
font-style: italic;
|
||||
font-weight: bold;
|
||||
font-size: 10pt;
|
||||
}
|
||||
.file {
|
||||
color: #400;
|
||||
}
|
||||
a { text-decoration: none; }
|
||||
#crumbs {
|
||||
color: white;
|
||||
font-size: 8pt;
|
||||
font-family: Andale Mono, monospace;
|
||||
width: 100%;
|
||||
background-color: black;
|
||||
}
|
||||
#crumbs a {
|
||||
color: #f88;
|
||||
}
|
||||
#options {
|
||||
line-height: 2.3em;
|
||||
border: 1px solid black;
|
||||
background-color: #eee;
|
||||
padding: 4px;
|
||||
}
|
||||
#exclude {
|
||||
width: 100%;
|
||||
margin-bottom: 3px;
|
||||
border: 1px solid #999;
|
||||
}
|
||||
#submit {
|
||||
background-color: black;
|
||||
color: white;
|
||||
border: 0;
|
||||
margin-bottom: -9px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h2>CherryPy Coverage</h2>"""
|
||||
|
||||
TEMPLATE_FORM = """
|
||||
<div id="options">
|
||||
<form action='menu' method=GET>
|
||||
<input type='hidden' name='base' value='%(base)s' />
|
||||
Show percentages
|
||||
<input type='checkbox' %(showpct)s name='showpct' value='checked' /><br />
|
||||
Hide files over
|
||||
<input type='text' id='pct' name='pct' value='%(pct)s' size='3' />%%<br />
|
||||
Exclude files matching<br />
|
||||
<input type='text' id='exclude' name='exclude'
|
||||
value='%(exclude)s' size='20' />
|
||||
<br />
|
||||
|
||||
<input type='submit' value='Change view' id="submit"/>
|
||||
</form>
|
||||
</div>"""
|
||||
|
||||
TEMPLATE_FRAMESET = """<html>
|
||||
<head><title>CherryPy coverage data</title></head>
|
||||
<frameset cols='250, 1*'>
|
||||
<frame src='menu?base=%s' />
|
||||
<frame name='main' src='' />
|
||||
</frameset>
|
||||
</html>
|
||||
"""
|
||||
|
||||
TEMPLATE_COVERAGE = """<html>
|
||||
<head>
|
||||
<title>Coverage for %(name)s</title>
|
||||
<style>
|
||||
h2 { margin-bottom: .25em; }
|
||||
p { margin: .25em; }
|
||||
.covered { color: #000; background-color: #fff; }
|
||||
.notcovered { color: #fee; background-color: #500; }
|
||||
.excluded { color: #00f; background-color: #fff; }
|
||||
table .covered, table .notcovered, table .excluded
|
||||
{ font-family: Andale Mono, monospace;
|
||||
font-size: 10pt; white-space: pre; }
|
||||
|
||||
.lineno { background-color: #eee;}
|
||||
.notcovered .lineno { background-color: #000;}
|
||||
table { border-collapse: collapse;
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h2>%(name)s</h2>
|
||||
<p>%(fullpath)s</p>
|
||||
<p>Coverage: %(pc)s%%</p>"""
|
||||
|
||||
TEMPLATE_LOC_COVERED = """<tr class="covered">
|
||||
<td class="lineno">%s </td>
|
||||
<td>%s</td>
|
||||
</tr>\n"""
|
||||
TEMPLATE_LOC_NOT_COVERED = """<tr class="notcovered">
|
||||
<td class="lineno">%s </td>
|
||||
<td>%s</td>
|
||||
</tr>\n"""
|
||||
TEMPLATE_LOC_EXCLUDED = """<tr class="excluded">
|
||||
<td class="lineno">%s </td>
|
||||
<td>%s</td>
|
||||
</tr>\n"""
|
||||
|
||||
TEMPLATE_ITEM = (
|
||||
"%s%s<a class='file' href='report?name=%s' target='main'>%s</a>\n"
|
||||
)
|
||||
|
||||
|
||||
def _percent(statements, missing):
|
||||
s = len(statements)
|
||||
e = s - len(missing)
|
||||
if s > 0:
|
||||
return int(round(100.0 * e / s))
|
||||
return 0
|
||||
|
||||
|
||||
def _show_branch(root, base, path, pct=0, showpct=False, exclude="",
|
||||
coverage=the_coverage):
|
||||
|
||||
# Show the directory name and any of our children
|
||||
dirs = [k for k, v in root.items() if v]
|
||||
dirs.sort()
|
||||
for name in dirs:
|
||||
newpath = os.path.join(path, name)
|
||||
|
||||
if newpath.lower().startswith(base):
|
||||
relpath = newpath[len(base):]
|
||||
yield "| " * relpath.count(os.sep)
|
||||
yield (
|
||||
"<a class='directory' "
|
||||
"href='menu?base=%s&exclude=%s'>%s</a>\n" %
|
||||
(newpath, quote_plus(exclude), name)
|
||||
)
|
||||
|
||||
for chunk in _show_branch(
|
||||
root[name], base, newpath, pct, showpct,
|
||||
exclude, coverage=coverage
|
||||
):
|
||||
yield chunk
|
||||
|
||||
# Now list the files
|
||||
if path.lower().startswith(base):
|
||||
relpath = path[len(base):]
|
||||
files = [k for k, v in root.items() if not v]
|
||||
files.sort()
|
||||
for name in files:
|
||||
newpath = os.path.join(path, name)
|
||||
|
||||
pc_str = ""
|
||||
if showpct:
|
||||
try:
|
||||
_, statements, _, missing, _ = coverage.analysis2(newpath)
|
||||
except:
|
||||
# Yes, we really want to pass on all errors.
|
||||
pass
|
||||
else:
|
||||
pc = _percent(statements, missing)
|
||||
pc_str = ("%3d%% " % pc).replace(' ', ' ')
|
||||
if pc < float(pct) or pc == -1:
|
||||
pc_str = "<span class='fail'>%s</span>" % pc_str
|
||||
else:
|
||||
pc_str = "<span class='pass'>%s</span>" % pc_str
|
||||
|
||||
yield TEMPLATE_ITEM % ("| " * (relpath.count(os.sep) + 1),
|
||||
pc_str, newpath, name)
|
||||
|
||||
|
||||
def _skip_file(path, exclude):
|
||||
if exclude:
|
||||
return bool(re.search(exclude, path))
|
||||
|
||||
|
||||
def _graft(path, tree):
|
||||
d = tree
|
||||
|
||||
p = path
|
||||
atoms = []
|
||||
while True:
|
||||
p, tail = os.path.split(p)
|
||||
if not tail:
|
||||
break
|
||||
atoms.append(tail)
|
||||
atoms.append(p)
|
||||
if p != "/":
|
||||
atoms.append("/")
|
||||
|
||||
atoms.reverse()
|
||||
for node in atoms:
|
||||
if node:
|
||||
d = d.setdefault(node, {})
|
||||
|
||||
|
||||
def get_tree(base, exclude, coverage=the_coverage):
|
||||
"""Return covered module names as a nested dict."""
|
||||
tree = {}
|
||||
runs = coverage.data.executed_files()
|
||||
for path in runs:
|
||||
if not _skip_file(path, exclude) and not os.path.isdir(path):
|
||||
_graft(path, tree)
|
||||
return tree
|
||||
|
||||
|
||||
class CoverStats(object):
|
||||
|
||||
def __init__(self, coverage, root=None):
|
||||
self.coverage = coverage
|
||||
if root is None:
|
||||
# Guess initial depth. Files outside this path will not be
|
||||
# reachable from the web interface.
|
||||
import cherrypy
|
||||
root = os.path.dirname(cherrypy.__file__)
|
||||
self.root = root
|
||||
|
||||
def index(self):
|
||||
return TEMPLATE_FRAMESET % self.root.lower()
|
||||
index.exposed = True
|
||||
|
||||
def menu(self, base="/", pct="50", showpct="",
|
||||
exclude=r'python\d\.\d|test|tut\d|tutorial'):
|
||||
|
||||
# The coverage module uses all-lower-case names.
|
||||
base = base.lower().rstrip(os.sep)
|
||||
|
||||
yield TEMPLATE_MENU
|
||||
yield TEMPLATE_FORM % locals()
|
||||
|
||||
# Start by showing links for parent paths
|
||||
yield "<div id='crumbs'>"
|
||||
path = ""
|
||||
atoms = base.split(os.sep)
|
||||
atoms.pop()
|
||||
for atom in atoms:
|
||||
path += atom + os.sep
|
||||
yield ("<a href='menu?base=%s&exclude=%s'>%s</a> %s"
|
||||
% (path, quote_plus(exclude), atom, os.sep))
|
||||
yield "</div>"
|
||||
|
||||
yield "<div id='tree'>"
|
||||
|
||||
# Then display the tree
|
||||
tree = get_tree(base, exclude, self.coverage)
|
||||
if not tree:
|
||||
yield "<p>No modules covered.</p>"
|
||||
else:
|
||||
for chunk in _show_branch(tree, base, "/", pct,
|
||||
showpct == 'checked', exclude,
|
||||
coverage=self.coverage):
|
||||
yield chunk
|
||||
|
||||
yield "</div>"
|
||||
yield "</body></html>"
|
||||
menu.exposed = True
|
||||
|
||||
def annotated_file(self, filename, statements, excluded, missing):
|
||||
source = open(filename, 'r')
|
||||
buffer = []
|
||||
for lineno, line in enumerate(source.readlines()):
|
||||
lineno += 1
|
||||
line = line.strip("\n\r")
|
||||
empty_the_buffer = True
|
||||
if lineno in excluded:
|
||||
template = TEMPLATE_LOC_EXCLUDED
|
||||
elif lineno in missing:
|
||||
template = TEMPLATE_LOC_NOT_COVERED
|
||||
elif lineno in statements:
|
||||
template = TEMPLATE_LOC_COVERED
|
||||
else:
|
||||
empty_the_buffer = False
|
||||
buffer.append((lineno, line))
|
||||
if empty_the_buffer:
|
||||
for lno, pastline in buffer:
|
||||
yield template % (lno, cgi.escape(pastline))
|
||||
buffer = []
|
||||
yield template % (lineno, cgi.escape(line))
|
||||
|
||||
def report(self, name):
|
||||
filename, statements, excluded, missing, _ = self.coverage.analysis2(
|
||||
name)
|
||||
pc = _percent(statements, missing)
|
||||
yield TEMPLATE_COVERAGE % dict(name=os.path.basename(name),
|
||||
fullpath=name,
|
||||
pc=pc)
|
||||
yield '<table>\n'
|
||||
for line in self.annotated_file(filename, statements, excluded,
|
||||
missing):
|
||||
yield line
|
||||
yield '</table>'
|
||||
yield '</body>'
|
||||
yield '</html>'
|
||||
report.exposed = True
|
||||
|
||||
|
||||
def serve(path=localFile, port=8080, root=None):
|
||||
if coverage is None:
|
||||
raise ImportError("The coverage module could not be imported.")
|
||||
from coverage import coverage
|
||||
cov = coverage(data_file=path)
|
||||
cov.load()
|
||||
|
||||
import cherrypy
|
||||
cherrypy.config.update({'server.socket_port': int(port),
|
||||
'server.thread_pool': 10,
|
||||
'environment': "production",
|
||||
})
|
||||
cherrypy.quickstart(CoverStats(cov, root))
|
||||
|
||||
if __name__ == "__main__":
|
||||
serve(*tuple(sys.argv[1:]))
|
||||
687
lib/cherrypy/lib/cpstats.py
Normal file
687
lib/cherrypy/lib/cpstats.py
Normal file
@@ -0,0 +1,687 @@
|
||||
"""CPStats, a package for collecting and reporting on program statistics.
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
Statistics about program operation are an invaluable monitoring and debugging
|
||||
tool. Unfortunately, the gathering and reporting of these critical values is
|
||||
usually ad-hoc. This package aims to add a centralized place for gathering
|
||||
statistical performance data, a structure for recording that data which
|
||||
provides for extrapolation of that data into more useful information,
|
||||
and a method of serving that data to both human investigators and
|
||||
monitoring software. Let's examine each of those in more detail.
|
||||
|
||||
Data Gathering
|
||||
--------------
|
||||
|
||||
Just as Python's `logging` module provides a common importable for gathering
|
||||
and sending messages, performance statistics would benefit from a similar
|
||||
common mechanism, and one that does *not* require each package which wishes
|
||||
to collect stats to import a third-party module. Therefore, we choose to
|
||||
re-use the `logging` module by adding a `statistics` object to it.
|
||||
|
||||
That `logging.statistics` object is a nested dict. It is not a custom class,
|
||||
because that would:
|
||||
|
||||
1. require libraries and applications to import a third-party module in
|
||||
order to participate
|
||||
2. inhibit innovation in extrapolation approaches and in reporting tools, and
|
||||
3. be slow.
|
||||
|
||||
There are, however, some specifications regarding the structure of the dict.::
|
||||
|
||||
{
|
||||
+----"SQLAlchemy": {
|
||||
| "Inserts": 4389745,
|
||||
| "Inserts per Second":
|
||||
| lambda s: s["Inserts"] / (time() - s["Start"]),
|
||||
| C +---"Table Statistics": {
|
||||
| o | "widgets": {-----------+
|
||||
N | l | "Rows": 1.3M, | Record
|
||||
a | l | "Inserts": 400, |
|
||||
m | e | },---------------------+
|
||||
e | c | "froobles": {
|
||||
s | t | "Rows": 7845,
|
||||
p | i | "Inserts": 0,
|
||||
a | o | },
|
||||
c | n +---},
|
||||
e | "Slow Queries":
|
||||
| [{"Query": "SELECT * FROM widgets;",
|
||||
| "Processing Time": 47.840923343,
|
||||
| },
|
||||
| ],
|
||||
+----},
|
||||
}
|
||||
|
||||
The `logging.statistics` dict has four levels. The topmost level is nothing
|
||||
more than a set of names to introduce modularity, usually along the lines of
|
||||
package names. If the SQLAlchemy project wanted to participate, for example,
|
||||
it might populate the item `logging.statistics['SQLAlchemy']`, whose value
|
||||
would be a second-layer dict we call a "namespace". Namespaces help multiple
|
||||
packages to avoid collisions over key names, and make reports easier to read,
|
||||
to boot. The maintainers of SQLAlchemy should feel free to use more than one
|
||||
namespace if needed (such as 'SQLAlchemy ORM'). Note that there are no case
|
||||
or other syntax constraints on the namespace names; they should be chosen
|
||||
to be maximally readable by humans (neither too short nor too long).
|
||||
|
||||
Each namespace, then, is a dict of named statistical values, such as
|
||||
'Requests/sec' or 'Uptime'. You should choose names which will look
|
||||
good on a report: spaces and capitalization are just fine.
|
||||
|
||||
In addition to scalars, values in a namespace MAY be a (third-layer)
|
||||
dict, or a list, called a "collection". For example, the CherryPy
|
||||
:class:`StatsTool` keeps track of what each request is doing (or has most
|
||||
recently done) in a 'Requests' collection, where each key is a thread ID; each
|
||||
value in the subdict MUST be a fourth dict (whew!) of statistical data about
|
||||
each thread. We call each subdict in the collection a "record". Similarly,
|
||||
the :class:`StatsTool` also keeps a list of slow queries, where each record
|
||||
contains data about each slow query, in order.
|
||||
|
||||
Values in a namespace or record may also be functions, which brings us to:
|
||||
|
||||
Extrapolation
|
||||
-------------
|
||||
|
||||
The collection of statistical data needs to be fast, as close to unnoticeable
|
||||
as possible to the host program. That requires us to minimize I/O, for example,
|
||||
but in Python it also means we need to minimize function calls. So when you
|
||||
are designing your namespace and record values, try to insert the most basic
|
||||
scalar values you already have on hand.
|
||||
|
||||
When it comes time to report on the gathered data, however, we usually have
|
||||
much more freedom in what we can calculate. Therefore, whenever reporting
|
||||
tools (like the provided :class:`StatsPage` CherryPy class) fetch the contents
|
||||
of `logging.statistics` for reporting, they first call
|
||||
`extrapolate_statistics` (passing the whole `statistics` dict as the only
|
||||
argument). This makes a deep copy of the statistics dict so that the
|
||||
reporting tool can both iterate over it and even change it without harming
|
||||
the original. But it also expands any functions in the dict by calling them.
|
||||
For example, you might have a 'Current Time' entry in the namespace with the
|
||||
value "lambda scope: time.time()". The "scope" parameter is the current
|
||||
namespace dict (or record, if we're currently expanding one of those
|
||||
instead), allowing you access to existing static entries. If you're truly
|
||||
evil, you can even modify more than one entry at a time.
|
||||
|
||||
However, don't try to calculate an entry and then use its value in further
|
||||
extrapolations; the order in which the functions are called is not guaranteed.
|
||||
This can lead to a certain amount of duplicated work (or a redesign of your
|
||||
schema), but that's better than complicating the spec.
|
||||
|
||||
After the whole thing has been extrapolated, it's time for:
|
||||
|
||||
Reporting
|
||||
---------
|
||||
|
||||
The :class:`StatsPage` class grabs the `logging.statistics` dict, extrapolates
|
||||
it all, and then transforms it to HTML for easy viewing. Each namespace gets
|
||||
its own header and attribute table, plus an extra table for each collection.
|
||||
This is NOT part of the statistics specification; other tools can format how
|
||||
they like.
|
||||
|
||||
You can control which columns are output and how they are formatted by updating
|
||||
StatsPage.formatting, which is a dict that mirrors the keys and nesting of
|
||||
`logging.statistics`. The difference is that, instead of data values, it has
|
||||
formatting values. Use None for a given key to indicate to the StatsPage that a
|
||||
given column should not be output. Use a string with formatting
|
||||
(such as '%.3f') to interpolate the value(s), or use a callable (such as
|
||||
lambda v: v.isoformat()) for more advanced formatting. Any entry which is not
|
||||
mentioned in the formatting dict is output unchanged.
|
||||
|
||||
Monitoring
|
||||
----------
|
||||
|
||||
Although the HTML output takes pains to assign unique id's to each <td> with
|
||||
statistical data, you're probably better off fetching /cpstats/data, which
|
||||
outputs the whole (extrapolated) `logging.statistics` dict in JSON format.
|
||||
That is probably easier to parse, and doesn't have any formatting controls,
|
||||
so you get the "original" data in a consistently-serialized format.
|
||||
Note: there's no treatment yet for datetime objects. Try time.time() instead
|
||||
for now if you can. Nagios will probably thank you.
|
||||
|
||||
Turning Collection Off
|
||||
----------------------
|
||||
|
||||
It is recommended each namespace have an "Enabled" item which, if False,
|
||||
stops collection (but not reporting) of statistical data. Applications
|
||||
SHOULD provide controls to pause and resume collection by setting these
|
||||
entries to False or True, if present.
|
||||
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
To collect statistics on CherryPy applications::
|
||||
|
||||
from cherrypy.lib import cpstats
|
||||
appconfig['/']['tools.cpstats.on'] = True
|
||||
|
||||
To collect statistics on your own code::
|
||||
|
||||
import logging
|
||||
# Initialize the repository
|
||||
if not hasattr(logging, 'statistics'): logging.statistics = {}
|
||||
# Initialize my namespace
|
||||
mystats = logging.statistics.setdefault('My Stuff', {})
|
||||
# Initialize my namespace's scalars and collections
|
||||
mystats.update({
|
||||
'Enabled': True,
|
||||
'Start Time': time.time(),
|
||||
'Important Events': 0,
|
||||
'Events/Second': lambda s: (
|
||||
(s['Important Events'] / (time.time() - s['Start Time']))),
|
||||
})
|
||||
...
|
||||
for event in events:
|
||||
...
|
||||
# Collect stats
|
||||
if mystats.get('Enabled', False):
|
||||
mystats['Important Events'] += 1
|
||||
|
||||
To report statistics::
|
||||
|
||||
root.cpstats = cpstats.StatsPage()
|
||||
|
||||
To format statistics reports::
|
||||
|
||||
See 'Reporting', above.
|
||||
|
||||
"""
|
||||
|
||||
# ------------------------------- Statistics -------------------------------- #
|
||||
|
||||
import logging
|
||||
if not hasattr(logging, 'statistics'):
|
||||
logging.statistics = {}
|
||||
|
||||
|
||||
def extrapolate_statistics(scope):
|
||||
"""Return an extrapolated copy of the given scope."""
|
||||
c = {}
|
||||
for k, v in list(scope.items()):
|
||||
if isinstance(v, dict):
|
||||
v = extrapolate_statistics(v)
|
||||
elif isinstance(v, (list, tuple)):
|
||||
v = [extrapolate_statistics(record) for record in v]
|
||||
elif hasattr(v, '__call__'):
|
||||
v = v(scope)
|
||||
c[k] = v
|
||||
return c
|
||||
|
||||
|
||||
# -------------------- CherryPy Applications Statistics --------------------- #
|
||||
|
||||
import threading
|
||||
import time
|
||||
|
||||
import cherrypy
|
||||
|
||||
appstats = logging.statistics.setdefault('CherryPy Applications', {})
|
||||
appstats.update({
|
||||
'Enabled': True,
|
||||
'Bytes Read/Request': lambda s: (
|
||||
s['Total Requests'] and
|
||||
(s['Total Bytes Read'] / float(s['Total Requests'])) or
|
||||
0.0
|
||||
),
|
||||
'Bytes Read/Second': lambda s: s['Total Bytes Read'] / s['Uptime'](s),
|
||||
'Bytes Written/Request': lambda s: (
|
||||
s['Total Requests'] and
|
||||
(s['Total Bytes Written'] / float(s['Total Requests'])) or
|
||||
0.0
|
||||
),
|
||||
'Bytes Written/Second': lambda s: (
|
||||
s['Total Bytes Written'] / s['Uptime'](s)
|
||||
),
|
||||
'Current Time': lambda s: time.time(),
|
||||
'Current Requests': 0,
|
||||
'Requests/Second': lambda s: float(s['Total Requests']) / s['Uptime'](s),
|
||||
'Server Version': cherrypy.__version__,
|
||||
'Start Time': time.time(),
|
||||
'Total Bytes Read': 0,
|
||||
'Total Bytes Written': 0,
|
||||
'Total Requests': 0,
|
||||
'Total Time': 0,
|
||||
'Uptime': lambda s: time.time() - s['Start Time'],
|
||||
'Requests': {},
|
||||
})
|
||||
|
||||
proc_time = lambda s: time.time() - s['Start Time']
|
||||
|
||||
|
||||
class ByteCountWrapper(object):
|
||||
|
||||
"""Wraps a file-like object, counting the number of bytes read."""
|
||||
|
||||
def __init__(self, rfile):
|
||||
self.rfile = rfile
|
||||
self.bytes_read = 0
|
||||
|
||||
def read(self, size=-1):
|
||||
data = self.rfile.read(size)
|
||||
self.bytes_read += len(data)
|
||||
return data
|
||||
|
||||
def readline(self, size=-1):
|
||||
data = self.rfile.readline(size)
|
||||
self.bytes_read += len(data)
|
||||
return data
|
||||
|
||||
def readlines(self, sizehint=0):
|
||||
# Shamelessly stolen from StringIO
|
||||
total = 0
|
||||
lines = []
|
||||
line = self.readline()
|
||||
while line:
|
||||
lines.append(line)
|
||||
total += len(line)
|
||||
if 0 < sizehint <= total:
|
||||
break
|
||||
line = self.readline()
|
||||
return lines
|
||||
|
||||
def close(self):
|
||||
self.rfile.close()
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def next(self):
|
||||
data = self.rfile.next()
|
||||
self.bytes_read += len(data)
|
||||
return data
|
||||
|
||||
|
||||
average_uriset_time = lambda s: s['Count'] and (s['Sum'] / s['Count']) or 0
|
||||
|
||||
|
||||
class StatsTool(cherrypy.Tool):
|
||||
|
||||
"""Record various information about the current request."""
|
||||
|
||||
def __init__(self):
|
||||
cherrypy.Tool.__init__(self, 'on_end_request', self.record_stop)
|
||||
|
||||
def _setup(self):
|
||||
"""Hook this tool into cherrypy.request.
|
||||
|
||||
The standard CherryPy request object will automatically call this
|
||||
method when the tool is "turned on" in config.
|
||||
"""
|
||||
if appstats.get('Enabled', False):
|
||||
cherrypy.Tool._setup(self)
|
||||
self.record_start()
|
||||
|
||||
def record_start(self):
|
||||
"""Record the beginning of a request."""
|
||||
request = cherrypy.serving.request
|
||||
if not hasattr(request.rfile, 'bytes_read'):
|
||||
request.rfile = ByteCountWrapper(request.rfile)
|
||||
request.body.fp = request.rfile
|
||||
|
||||
r = request.remote
|
||||
|
||||
appstats['Current Requests'] += 1
|
||||
appstats['Total Requests'] += 1
|
||||
appstats['Requests'][threading._get_ident()] = {
|
||||
'Bytes Read': None,
|
||||
'Bytes Written': None,
|
||||
# Use a lambda so the ip gets updated by tools.proxy later
|
||||
'Client': lambda s: '%s:%s' % (r.ip, r.port),
|
||||
'End Time': None,
|
||||
'Processing Time': proc_time,
|
||||
'Request-Line': request.request_line,
|
||||
'Response Status': None,
|
||||
'Start Time': time.time(),
|
||||
}
|
||||
|
||||
def record_stop(
|
||||
self, uriset=None, slow_queries=1.0, slow_queries_count=100,
|
||||
debug=False, **kwargs):
|
||||
"""Record the end of a request."""
|
||||
resp = cherrypy.serving.response
|
||||
w = appstats['Requests'][threading._get_ident()]
|
||||
|
||||
r = cherrypy.request.rfile.bytes_read
|
||||
w['Bytes Read'] = r
|
||||
appstats['Total Bytes Read'] += r
|
||||
|
||||
if resp.stream:
|
||||
w['Bytes Written'] = 'chunked'
|
||||
else:
|
||||
cl = int(resp.headers.get('Content-Length', 0))
|
||||
w['Bytes Written'] = cl
|
||||
appstats['Total Bytes Written'] += cl
|
||||
|
||||
w['Response Status'] = getattr(
|
||||
resp, 'output_status', None) or resp.status
|
||||
|
||||
w['End Time'] = time.time()
|
||||
p = w['End Time'] - w['Start Time']
|
||||
w['Processing Time'] = p
|
||||
appstats['Total Time'] += p
|
||||
|
||||
appstats['Current Requests'] -= 1
|
||||
|
||||
if debug:
|
||||
cherrypy.log('Stats recorded: %s' % repr(w), 'TOOLS.CPSTATS')
|
||||
|
||||
if uriset:
|
||||
rs = appstats.setdefault('URI Set Tracking', {})
|
||||
r = rs.setdefault(uriset, {
|
||||
'Min': None, 'Max': None, 'Count': 0, 'Sum': 0,
|
||||
'Avg': average_uriset_time})
|
||||
if r['Min'] is None or p < r['Min']:
|
||||
r['Min'] = p
|
||||
if r['Max'] is None or p > r['Max']:
|
||||
r['Max'] = p
|
||||
r['Count'] += 1
|
||||
r['Sum'] += p
|
||||
|
||||
if slow_queries and p > slow_queries:
|
||||
sq = appstats.setdefault('Slow Queries', [])
|
||||
sq.append(w.copy())
|
||||
if len(sq) > slow_queries_count:
|
||||
sq.pop(0)
|
||||
|
||||
|
||||
import cherrypy
|
||||
cherrypy.tools.cpstats = StatsTool()
|
||||
|
||||
|
||||
# ---------------------- CherryPy Statistics Reporting ---------------------- #
|
||||
|
||||
import os
|
||||
thisdir = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
try:
|
||||
import json
|
||||
except ImportError:
|
||||
try:
|
||||
import simplejson as json
|
||||
except ImportError:
|
||||
json = None
|
||||
|
||||
|
||||
missing = object()
|
||||
|
||||
locale_date = lambda v: time.strftime('%c', time.gmtime(v))
|
||||
iso_format = lambda v: time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(v))
|
||||
|
||||
|
||||
def pause_resume(ns):
|
||||
def _pause_resume(enabled):
|
||||
pause_disabled = ''
|
||||
resume_disabled = ''
|
||||
if enabled:
|
||||
resume_disabled = 'disabled="disabled" '
|
||||
else:
|
||||
pause_disabled = 'disabled="disabled" '
|
||||
return """
|
||||
<form action="pause" method="POST" style="display:inline">
|
||||
<input type="hidden" name="namespace" value="%s" />
|
||||
<input type="submit" value="Pause" %s/>
|
||||
</form>
|
||||
<form action="resume" method="POST" style="display:inline">
|
||||
<input type="hidden" name="namespace" value="%s" />
|
||||
<input type="submit" value="Resume" %s/>
|
||||
</form>
|
||||
""" % (ns, pause_disabled, ns, resume_disabled)
|
||||
return _pause_resume
|
||||
|
||||
|
||||
class StatsPage(object):
|
||||
|
||||
formatting = {
|
||||
'CherryPy Applications': {
|
||||
'Enabled': pause_resume('CherryPy Applications'),
|
||||
'Bytes Read/Request': '%.3f',
|
||||
'Bytes Read/Second': '%.3f',
|
||||
'Bytes Written/Request': '%.3f',
|
||||
'Bytes Written/Second': '%.3f',
|
||||
'Current Time': iso_format,
|
||||
'Requests/Second': '%.3f',
|
||||
'Start Time': iso_format,
|
||||
'Total Time': '%.3f',
|
||||
'Uptime': '%.3f',
|
||||
'Slow Queries': {
|
||||
'End Time': None,
|
||||
'Processing Time': '%.3f',
|
||||
'Start Time': iso_format,
|
||||
},
|
||||
'URI Set Tracking': {
|
||||
'Avg': '%.3f',
|
||||
'Max': '%.3f',
|
||||
'Min': '%.3f',
|
||||
'Sum': '%.3f',
|
||||
},
|
||||
'Requests': {
|
||||
'Bytes Read': '%s',
|
||||
'Bytes Written': '%s',
|
||||
'End Time': None,
|
||||
'Processing Time': '%.3f',
|
||||
'Start Time': None,
|
||||
},
|
||||
},
|
||||
'CherryPy WSGIServer': {
|
||||
'Enabled': pause_resume('CherryPy WSGIServer'),
|
||||
'Connections/second': '%.3f',
|
||||
'Start time': iso_format,
|
||||
},
|
||||
}
|
||||
|
||||
def index(self):
|
||||
# Transform the raw data into pretty output for HTML
|
||||
yield """
|
||||
<html>
|
||||
<head>
|
||||
<title>Statistics</title>
|
||||
<style>
|
||||
|
||||
th, td {
|
||||
padding: 0.25em 0.5em;
|
||||
border: 1px solid #666699;
|
||||
}
|
||||
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
table.stats1 {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
table.stats1 th {
|
||||
font-weight: bold;
|
||||
text-align: right;
|
||||
background-color: #CCD5DD;
|
||||
}
|
||||
|
||||
table.stats2, h2 {
|
||||
margin-left: 50px;
|
||||
}
|
||||
|
||||
table.stats2 th {
|
||||
font-weight: bold;
|
||||
text-align: center;
|
||||
background-color: #CCD5DD;
|
||||
}
|
||||
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
"""
|
||||
for title, scalars, collections in self.get_namespaces():
|
||||
yield """
|
||||
<h1>%s</h1>
|
||||
|
||||
<table class='stats1'>
|
||||
<tbody>
|
||||
""" % title
|
||||
for i, (key, value) in enumerate(scalars):
|
||||
colnum = i % 3
|
||||
if colnum == 0:
|
||||
yield """
|
||||
<tr>"""
|
||||
yield (
|
||||
"""
|
||||
<th>%(key)s</th><td id='%(title)s-%(key)s'>%(value)s</td>""" %
|
||||
vars()
|
||||
)
|
||||
if colnum == 2:
|
||||
yield """
|
||||
</tr>"""
|
||||
|
||||
if colnum == 0:
|
||||
yield """
|
||||
<th></th><td></td>
|
||||
<th></th><td></td>
|
||||
</tr>"""
|
||||
elif colnum == 1:
|
||||
yield """
|
||||
<th></th><td></td>
|
||||
</tr>"""
|
||||
yield """
|
||||
</tbody>
|
||||
</table>"""
|
||||
|
||||
for subtitle, headers, subrows in collections:
|
||||
yield """
|
||||
<h2>%s</h2>
|
||||
<table class='stats2'>
|
||||
<thead>
|
||||
<tr>""" % subtitle
|
||||
for key in headers:
|
||||
yield """
|
||||
<th>%s</th>""" % key
|
||||
yield """
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>"""
|
||||
for subrow in subrows:
|
||||
yield """
|
||||
<tr>"""
|
||||
for value in subrow:
|
||||
yield """
|
||||
<td>%s</td>""" % value
|
||||
yield """
|
||||
</tr>"""
|
||||
yield """
|
||||
</tbody>
|
||||
</table>"""
|
||||
yield """
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
index.exposed = True
|
||||
|
||||
def get_namespaces(self):
|
||||
"""Yield (title, scalars, collections) for each namespace."""
|
||||
s = extrapolate_statistics(logging.statistics)
|
||||
for title, ns in sorted(s.items()):
|
||||
scalars = []
|
||||
collections = []
|
||||
ns_fmt = self.formatting.get(title, {})
|
||||
for k, v in sorted(ns.items()):
|
||||
fmt = ns_fmt.get(k, {})
|
||||
if isinstance(v, dict):
|
||||
headers, subrows = self.get_dict_collection(v, fmt)
|
||||
collections.append((k, ['ID'] + headers, subrows))
|
||||
elif isinstance(v, (list, tuple)):
|
||||
headers, subrows = self.get_list_collection(v, fmt)
|
||||
collections.append((k, headers, subrows))
|
||||
else:
|
||||
format = ns_fmt.get(k, missing)
|
||||
if format is None:
|
||||
# Don't output this column.
|
||||
continue
|
||||
if hasattr(format, '__call__'):
|
||||
v = format(v)
|
||||
elif format is not missing:
|
||||
v = format % v
|
||||
scalars.append((k, v))
|
||||
yield title, scalars, collections
|
||||
|
||||
def get_dict_collection(self, v, formatting):
|
||||
"""Return ([headers], [rows]) for the given collection."""
|
||||
# E.g., the 'Requests' dict.
|
||||
headers = []
|
||||
for record in v.itervalues():
|
||||
for k3 in record:
|
||||
format = formatting.get(k3, missing)
|
||||
if format is None:
|
||||
# Don't output this column.
|
||||
continue
|
||||
if k3 not in headers:
|
||||
headers.append(k3)
|
||||
headers.sort()
|
||||
|
||||
subrows = []
|
||||
for k2, record in sorted(v.items()):
|
||||
subrow = [k2]
|
||||
for k3 in headers:
|
||||
v3 = record.get(k3, '')
|
||||
format = formatting.get(k3, missing)
|
||||
if format is None:
|
||||
# Don't output this column.
|
||||
continue
|
||||
if hasattr(format, '__call__'):
|
||||
v3 = format(v3)
|
||||
elif format is not missing:
|
||||
v3 = format % v3
|
||||
subrow.append(v3)
|
||||
subrows.append(subrow)
|
||||
|
||||
return headers, subrows
|
||||
|
||||
def get_list_collection(self, v, formatting):
|
||||
"""Return ([headers], [subrows]) for the given collection."""
|
||||
# E.g., the 'Slow Queries' list.
|
||||
headers = []
|
||||
for record in v:
|
||||
for k3 in record:
|
||||
format = formatting.get(k3, missing)
|
||||
if format is None:
|
||||
# Don't output this column.
|
||||
continue
|
||||
if k3 not in headers:
|
||||
headers.append(k3)
|
||||
headers.sort()
|
||||
|
||||
subrows = []
|
||||
for record in v:
|
||||
subrow = []
|
||||
for k3 in headers:
|
||||
v3 = record.get(k3, '')
|
||||
format = formatting.get(k3, missing)
|
||||
if format is None:
|
||||
# Don't output this column.
|
||||
continue
|
||||
if hasattr(format, '__call__'):
|
||||
v3 = format(v3)
|
||||
elif format is not missing:
|
||||
v3 = format % v3
|
||||
subrow.append(v3)
|
||||
subrows.append(subrow)
|
||||
|
||||
return headers, subrows
|
||||
|
||||
if json is not None:
|
||||
def data(self):
|
||||
s = extrapolate_statistics(logging.statistics)
|
||||
cherrypy.response.headers['Content-Type'] = 'application/json'
|
||||
return json.dumps(s, sort_keys=True, indent=4)
|
||||
data.exposed = True
|
||||
|
||||
def pause(self, namespace):
|
||||
logging.statistics.get(namespace, {})['Enabled'] = False
|
||||
raise cherrypy.HTTPRedirect('./')
|
||||
pause.exposed = True
|
||||
pause.cp_config = {'tools.allow.on': True,
|
||||
'tools.allow.methods': ['POST']}
|
||||
|
||||
def resume(self, namespace):
|
||||
logging.statistics.get(namespace, {})['Enabled'] = True
|
||||
raise cherrypy.HTTPRedirect('./')
|
||||
resume.exposed = True
|
||||
resume.cp_config = {'tools.allow.on': True,
|
||||
'tools.allow.methods': ['POST']}
|
||||
630
lib/cherrypy/lib/cptools.py
Normal file
630
lib/cherrypy/lib/cptools.py
Normal file
@@ -0,0 +1,630 @@
|
||||
"""Functions for builtin CherryPy tools."""
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import basestring, md5, set, unicodestr
|
||||
from cherrypy.lib import httputil as _httputil
|
||||
from cherrypy.lib import is_iterator
|
||||
|
||||
|
||||
# Conditional HTTP request support #
|
||||
|
||||
def validate_etags(autotags=False, debug=False):
|
||||
"""Validate the current ETag against If-Match, If-None-Match headers.
|
||||
|
||||
If autotags is True, an ETag response-header value will be provided
|
||||
from an MD5 hash of the response body (unless some other code has
|
||||
already provided an ETag header). If False (the default), the ETag
|
||||
will not be automatic.
|
||||
|
||||
WARNING: the autotags feature is not designed for URL's which allow
|
||||
methods other than GET. For example, if a POST to the same URL returns
|
||||
no content, the automatic ETag will be incorrect, breaking a fundamental
|
||||
use for entity tags in a possibly destructive fashion. Likewise, if you
|
||||
raise 304 Not Modified, the response body will be empty, the ETag hash
|
||||
will be incorrect, and your application will break.
|
||||
See :rfc:`2616` Section 14.24.
|
||||
"""
|
||||
response = cherrypy.serving.response
|
||||
|
||||
# Guard against being run twice.
|
||||
if hasattr(response, "ETag"):
|
||||
return
|
||||
|
||||
status, reason, msg = _httputil.valid_status(response.status)
|
||||
|
||||
etag = response.headers.get('ETag')
|
||||
|
||||
# Automatic ETag generation. See warning in docstring.
|
||||
if etag:
|
||||
if debug:
|
||||
cherrypy.log('ETag already set: %s' % etag, 'TOOLS.ETAGS')
|
||||
elif not autotags:
|
||||
if debug:
|
||||
cherrypy.log('Autotags off', 'TOOLS.ETAGS')
|
||||
elif status != 200:
|
||||
if debug:
|
||||
cherrypy.log('Status not 200', 'TOOLS.ETAGS')
|
||||
else:
|
||||
etag = response.collapse_body()
|
||||
etag = '"%s"' % md5(etag).hexdigest()
|
||||
if debug:
|
||||
cherrypy.log('Setting ETag: %s' % etag, 'TOOLS.ETAGS')
|
||||
response.headers['ETag'] = etag
|
||||
|
||||
response.ETag = etag
|
||||
|
||||
# "If the request would, without the If-Match header field, result in
|
||||
# anything other than a 2xx or 412 status, then the If-Match header
|
||||
# MUST be ignored."
|
||||
if debug:
|
||||
cherrypy.log('Status: %s' % status, 'TOOLS.ETAGS')
|
||||
if status >= 200 and status <= 299:
|
||||
request = cherrypy.serving.request
|
||||
|
||||
conditions = request.headers.elements('If-Match') or []
|
||||
conditions = [str(x) for x in conditions]
|
||||
if debug:
|
||||
cherrypy.log('If-Match conditions: %s' % repr(conditions),
|
||||
'TOOLS.ETAGS')
|
||||
if conditions and not (conditions == ["*"] or etag in conditions):
|
||||
raise cherrypy.HTTPError(412, "If-Match failed: ETag %r did "
|
||||
"not match %r" % (etag, conditions))
|
||||
|
||||
conditions = request.headers.elements('If-None-Match') or []
|
||||
conditions = [str(x) for x in conditions]
|
||||
if debug:
|
||||
cherrypy.log('If-None-Match conditions: %s' % repr(conditions),
|
||||
'TOOLS.ETAGS')
|
||||
if conditions == ["*"] or etag in conditions:
|
||||
if debug:
|
||||
cherrypy.log('request.method: %s' %
|
||||
request.method, 'TOOLS.ETAGS')
|
||||
if request.method in ("GET", "HEAD"):
|
||||
raise cherrypy.HTTPRedirect([], 304)
|
||||
else:
|
||||
raise cherrypy.HTTPError(412, "If-None-Match failed: ETag %r "
|
||||
"matched %r" % (etag, conditions))
|
||||
|
||||
|
||||
def validate_since():
|
||||
"""Validate the current Last-Modified against If-Modified-Since headers.
|
||||
|
||||
If no code has set the Last-Modified response header, then no validation
|
||||
will be performed.
|
||||
"""
|
||||
response = cherrypy.serving.response
|
||||
lastmod = response.headers.get('Last-Modified')
|
||||
if lastmod:
|
||||
status, reason, msg = _httputil.valid_status(response.status)
|
||||
|
||||
request = cherrypy.serving.request
|
||||
|
||||
since = request.headers.get('If-Unmodified-Since')
|
||||
if since and since != lastmod:
|
||||
if (status >= 200 and status <= 299) or status == 412:
|
||||
raise cherrypy.HTTPError(412)
|
||||
|
||||
since = request.headers.get('If-Modified-Since')
|
||||
if since and since == lastmod:
|
||||
if (status >= 200 and status <= 299) or status == 304:
|
||||
if request.method in ("GET", "HEAD"):
|
||||
raise cherrypy.HTTPRedirect([], 304)
|
||||
else:
|
||||
raise cherrypy.HTTPError(412)
|
||||
|
||||
|
||||
# Tool code #
|
||||
|
||||
def allow(methods=None, debug=False):
|
||||
"""Raise 405 if request.method not in methods (default ['GET', 'HEAD']).
|
||||
|
||||
The given methods are case-insensitive, and may be in any order.
|
||||
If only one method is allowed, you may supply a single string;
|
||||
if more than one, supply a list of strings.
|
||||
|
||||
Regardless of whether the current method is allowed or not, this
|
||||
also emits an 'Allow' response header, containing the given methods.
|
||||
"""
|
||||
if not isinstance(methods, (tuple, list)):
|
||||
methods = [methods]
|
||||
methods = [m.upper() for m in methods if m]
|
||||
if not methods:
|
||||
methods = ['GET', 'HEAD']
|
||||
elif 'GET' in methods and 'HEAD' not in methods:
|
||||
methods.append('HEAD')
|
||||
|
||||
cherrypy.response.headers['Allow'] = ', '.join(methods)
|
||||
if cherrypy.request.method not in methods:
|
||||
if debug:
|
||||
cherrypy.log('request.method %r not in methods %r' %
|
||||
(cherrypy.request.method, methods), 'TOOLS.ALLOW')
|
||||
raise cherrypy.HTTPError(405)
|
||||
else:
|
||||
if debug:
|
||||
cherrypy.log('request.method %r in methods %r' %
|
||||
(cherrypy.request.method, methods), 'TOOLS.ALLOW')
|
||||
|
||||
|
||||
def proxy(base=None, local='X-Forwarded-Host', remote='X-Forwarded-For',
|
||||
scheme='X-Forwarded-Proto', debug=False):
|
||||
"""Change the base URL (scheme://host[:port][/path]).
|
||||
|
||||
For running a CP server behind Apache, lighttpd, or other HTTP server.
|
||||
|
||||
For Apache and lighttpd, you should leave the 'local' argument at the
|
||||
default value of 'X-Forwarded-Host'. For Squid, you probably want to set
|
||||
tools.proxy.local = 'Origin'.
|
||||
|
||||
If you want the new request.base to include path info (not just the host),
|
||||
you must explicitly set base to the full base path, and ALSO set 'local'
|
||||
to '', so that the X-Forwarded-Host request header (which never includes
|
||||
path info) does not override it. Regardless, the value for 'base' MUST
|
||||
NOT end in a slash.
|
||||
|
||||
cherrypy.request.remote.ip (the IP address of the client) will be
|
||||
rewritten if the header specified by the 'remote' arg is valid.
|
||||
By default, 'remote' is set to 'X-Forwarded-For'. If you do not
|
||||
want to rewrite remote.ip, set the 'remote' arg to an empty string.
|
||||
"""
|
||||
|
||||
request = cherrypy.serving.request
|
||||
|
||||
if scheme:
|
||||
s = request.headers.get(scheme, None)
|
||||
if debug:
|
||||
cherrypy.log('Testing scheme %r:%r' % (scheme, s), 'TOOLS.PROXY')
|
||||
if s == 'on' and 'ssl' in scheme.lower():
|
||||
# This handles e.g. webfaction's 'X-Forwarded-Ssl: on' header
|
||||
scheme = 'https'
|
||||
else:
|
||||
# This is for lighttpd/pound/Mongrel's 'X-Forwarded-Proto: https'
|
||||
scheme = s
|
||||
if not scheme:
|
||||
scheme = request.base[:request.base.find("://")]
|
||||
|
||||
if local:
|
||||
lbase = request.headers.get(local, None)
|
||||
if debug:
|
||||
cherrypy.log('Testing local %r:%r' % (local, lbase), 'TOOLS.PROXY')
|
||||
if lbase is not None:
|
||||
base = lbase.split(',')[0]
|
||||
if not base:
|
||||
port = request.local.port
|
||||
if port == 80:
|
||||
base = '127.0.0.1'
|
||||
else:
|
||||
base = '127.0.0.1:%s' % port
|
||||
|
||||
if base.find("://") == -1:
|
||||
# add http:// or https:// if needed
|
||||
base = scheme + "://" + base
|
||||
|
||||
request.base = base
|
||||
|
||||
if remote:
|
||||
xff = request.headers.get(remote)
|
||||
if debug:
|
||||
cherrypy.log('Testing remote %r:%r' % (remote, xff), 'TOOLS.PROXY')
|
||||
if xff:
|
||||
if remote == 'X-Forwarded-For':
|
||||
#Bug #1268
|
||||
xff = xff.split(',')[0].strip()
|
||||
request.remote.ip = xff
|
||||
|
||||
|
||||
def ignore_headers(headers=('Range',), debug=False):
|
||||
"""Delete request headers whose field names are included in 'headers'.
|
||||
|
||||
This is a useful tool for working behind certain HTTP servers;
|
||||
for example, Apache duplicates the work that CP does for 'Range'
|
||||
headers, and will doubly-truncate the response.
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
for name in headers:
|
||||
if name in request.headers:
|
||||
if debug:
|
||||
cherrypy.log('Ignoring request header %r' % name,
|
||||
'TOOLS.IGNORE_HEADERS')
|
||||
del request.headers[name]
|
||||
|
||||
|
||||
def response_headers(headers=None, debug=False):
|
||||
"""Set headers on the response."""
|
||||
if debug:
|
||||
cherrypy.log('Setting response headers: %s' % repr(headers),
|
||||
'TOOLS.RESPONSE_HEADERS')
|
||||
for name, value in (headers or []):
|
||||
cherrypy.serving.response.headers[name] = value
|
||||
response_headers.failsafe = True
|
||||
|
||||
|
||||
def referer(pattern, accept=True, accept_missing=False, error=403,
|
||||
message='Forbidden Referer header.', debug=False):
|
||||
"""Raise HTTPError if Referer header does/does not match the given pattern.
|
||||
|
||||
pattern
|
||||
A regular expression pattern to test against the Referer.
|
||||
|
||||
accept
|
||||
If True, the Referer must match the pattern; if False,
|
||||
the Referer must NOT match the pattern.
|
||||
|
||||
accept_missing
|
||||
If True, permit requests with no Referer header.
|
||||
|
||||
error
|
||||
The HTTP error code to return to the client on failure.
|
||||
|
||||
message
|
||||
A string to include in the response body on failure.
|
||||
|
||||
"""
|
||||
try:
|
||||
ref = cherrypy.serving.request.headers['Referer']
|
||||
match = bool(re.match(pattern, ref))
|
||||
if debug:
|
||||
cherrypy.log('Referer %r matches %r' % (ref, pattern),
|
||||
'TOOLS.REFERER')
|
||||
if accept == match:
|
||||
return
|
||||
except KeyError:
|
||||
if debug:
|
||||
cherrypy.log('No Referer header', 'TOOLS.REFERER')
|
||||
if accept_missing:
|
||||
return
|
||||
|
||||
raise cherrypy.HTTPError(error, message)
|
||||
|
||||
|
||||
class SessionAuth(object):
|
||||
|
||||
"""Assert that the user is logged in."""
|
||||
|
||||
session_key = "username"
|
||||
debug = False
|
||||
|
||||
def check_username_and_password(self, username, password):
|
||||
pass
|
||||
|
||||
def anonymous(self):
|
||||
"""Provide a temporary user name for anonymous users."""
|
||||
pass
|
||||
|
||||
def on_login(self, username):
|
||||
pass
|
||||
|
||||
def on_logout(self, username):
|
||||
pass
|
||||
|
||||
def on_check(self, username):
|
||||
pass
|
||||
|
||||
def login_screen(self, from_page='..', username='', error_msg='',
|
||||
**kwargs):
|
||||
return (unicodestr("""<html><body>
|
||||
Message: %(error_msg)s
|
||||
<form method="post" action="do_login">
|
||||
Login: <input type="text" name="username" value="%(username)s" size="10" />
|
||||
<br />
|
||||
Password: <input type="password" name="password" size="10" />
|
||||
<br />
|
||||
<input type="hidden" name="from_page" value="%(from_page)s" />
|
||||
<br />
|
||||
<input type="submit" />
|
||||
</form>
|
||||
</body></html>""") % vars()).encode("utf-8")
|
||||
|
||||
def do_login(self, username, password, from_page='..', **kwargs):
|
||||
"""Login. May raise redirect, or return True if request handled."""
|
||||
response = cherrypy.serving.response
|
||||
error_msg = self.check_username_and_password(username, password)
|
||||
if error_msg:
|
||||
body = self.login_screen(from_page, username, error_msg)
|
||||
response.body = body
|
||||
if "Content-Length" in response.headers:
|
||||
# Delete Content-Length header so finalize() recalcs it.
|
||||
del response.headers["Content-Length"]
|
||||
return True
|
||||
else:
|
||||
cherrypy.serving.request.login = username
|
||||
cherrypy.session[self.session_key] = username
|
||||
self.on_login(username)
|
||||
raise cherrypy.HTTPRedirect(from_page or "/")
|
||||
|
||||
def do_logout(self, from_page='..', **kwargs):
|
||||
"""Logout. May raise redirect, or return True if request handled."""
|
||||
sess = cherrypy.session
|
||||
username = sess.get(self.session_key)
|
||||
sess[self.session_key] = None
|
||||
if username:
|
||||
cherrypy.serving.request.login = None
|
||||
self.on_logout(username)
|
||||
raise cherrypy.HTTPRedirect(from_page)
|
||||
|
||||
def do_check(self):
|
||||
"""Assert username. Raise redirect, or return True if request handled.
|
||||
"""
|
||||
sess = cherrypy.session
|
||||
request = cherrypy.serving.request
|
||||
response = cherrypy.serving.response
|
||||
|
||||
username = sess.get(self.session_key)
|
||||
if not username:
|
||||
sess[self.session_key] = username = self.anonymous()
|
||||
self._debug_message('No session[username], trying anonymous')
|
||||
if not username:
|
||||
url = cherrypy.url(qs=request.query_string)
|
||||
self._debug_message(
|
||||
'No username, routing to login_screen with from_page %(url)r',
|
||||
locals(),
|
||||
)
|
||||
response.body = self.login_screen(url)
|
||||
if "Content-Length" in response.headers:
|
||||
# Delete Content-Length header so finalize() recalcs it.
|
||||
del response.headers["Content-Length"]
|
||||
return True
|
||||
self._debug_message('Setting request.login to %(username)r', locals())
|
||||
request.login = username
|
||||
self.on_check(username)
|
||||
|
||||
def _debug_message(self, template, context={}):
|
||||
if not self.debug:
|
||||
return
|
||||
cherrypy.log(template % context, 'TOOLS.SESSAUTH')
|
||||
|
||||
def run(self):
|
||||
request = cherrypy.serving.request
|
||||
response = cherrypy.serving.response
|
||||
|
||||
path = request.path_info
|
||||
if path.endswith('login_screen'):
|
||||
self._debug_message('routing %(path)r to login_screen', locals())
|
||||
response.body = self.login_screen()
|
||||
return True
|
||||
elif path.endswith('do_login'):
|
||||
if request.method != 'POST':
|
||||
response.headers['Allow'] = "POST"
|
||||
self._debug_message('do_login requires POST')
|
||||
raise cherrypy.HTTPError(405)
|
||||
self._debug_message('routing %(path)r to do_login', locals())
|
||||
return self.do_login(**request.params)
|
||||
elif path.endswith('do_logout'):
|
||||
if request.method != 'POST':
|
||||
response.headers['Allow'] = "POST"
|
||||
raise cherrypy.HTTPError(405)
|
||||
self._debug_message('routing %(path)r to do_logout', locals())
|
||||
return self.do_logout(**request.params)
|
||||
else:
|
||||
self._debug_message('No special path, running do_check')
|
||||
return self.do_check()
|
||||
|
||||
|
||||
def session_auth(**kwargs):
|
||||
sa = SessionAuth()
|
||||
for k, v in kwargs.items():
|
||||
setattr(sa, k, v)
|
||||
return sa.run()
|
||||
session_auth.__doc__ = """Session authentication hook.
|
||||
|
||||
Any attribute of the SessionAuth class may be overridden via a keyword arg
|
||||
to this function:
|
||||
|
||||
""" + "\n".join(["%s: %s" % (k, type(getattr(SessionAuth, k)).__name__)
|
||||
for k in dir(SessionAuth) if not k.startswith("__")])
|
||||
|
||||
|
||||
def log_traceback(severity=logging.ERROR, debug=False):
|
||||
"""Write the last error's traceback to the cherrypy error log."""
|
||||
cherrypy.log("", "HTTP", severity=severity, traceback=True)
|
||||
|
||||
|
||||
def log_request_headers(debug=False):
|
||||
"""Write request headers to the cherrypy error log."""
|
||||
h = [" %s: %s" % (k, v) for k, v in cherrypy.serving.request.header_list]
|
||||
cherrypy.log('\nRequest Headers:\n' + '\n'.join(h), "HTTP")
|
||||
|
||||
|
||||
def log_hooks(debug=False):
|
||||
"""Write request.hooks to the cherrypy error log."""
|
||||
request = cherrypy.serving.request
|
||||
|
||||
msg = []
|
||||
# Sort by the standard points if possible.
|
||||
from cherrypy import _cprequest
|
||||
points = _cprequest.hookpoints
|
||||
for k in request.hooks.keys():
|
||||
if k not in points:
|
||||
points.append(k)
|
||||
|
||||
for k in points:
|
||||
msg.append(" %s:" % k)
|
||||
v = request.hooks.get(k, [])
|
||||
v.sort()
|
||||
for h in v:
|
||||
msg.append(" %r" % h)
|
||||
cherrypy.log('\nRequest Hooks for ' + cherrypy.url() +
|
||||
':\n' + '\n'.join(msg), "HTTP")
|
||||
|
||||
|
||||
def redirect(url='', internal=True, debug=False):
|
||||
"""Raise InternalRedirect or HTTPRedirect to the given url."""
|
||||
if debug:
|
||||
cherrypy.log('Redirecting %sto: %s' %
|
||||
({True: 'internal ', False: ''}[internal], url),
|
||||
'TOOLS.REDIRECT')
|
||||
if internal:
|
||||
raise cherrypy.InternalRedirect(url)
|
||||
else:
|
||||
raise cherrypy.HTTPRedirect(url)
|
||||
|
||||
|
||||
def trailing_slash(missing=True, extra=False, status=None, debug=False):
|
||||
"""Redirect if path_info has (missing|extra) trailing slash."""
|
||||
request = cherrypy.serving.request
|
||||
pi = request.path_info
|
||||
|
||||
if debug:
|
||||
cherrypy.log('is_index: %r, missing: %r, extra: %r, path_info: %r' %
|
||||
(request.is_index, missing, extra, pi),
|
||||
'TOOLS.TRAILING_SLASH')
|
||||
if request.is_index is True:
|
||||
if missing:
|
||||
if not pi.endswith('/'):
|
||||
new_url = cherrypy.url(pi + '/', request.query_string)
|
||||
raise cherrypy.HTTPRedirect(new_url, status=status or 301)
|
||||
elif request.is_index is False:
|
||||
if extra:
|
||||
# If pi == '/', don't redirect to ''!
|
||||
if pi.endswith('/') and pi != '/':
|
||||
new_url = cherrypy.url(pi[:-1], request.query_string)
|
||||
raise cherrypy.HTTPRedirect(new_url, status=status or 301)
|
||||
|
||||
|
||||
def flatten(debug=False):
|
||||
"""Wrap response.body in a generator that recursively iterates over body.
|
||||
|
||||
This allows cherrypy.response.body to consist of 'nested generators';
|
||||
that is, a set of generators that yield generators.
|
||||
"""
|
||||
def flattener(input):
|
||||
numchunks = 0
|
||||
for x in input:
|
||||
if not is_iterator(x):
|
||||
numchunks += 1
|
||||
yield x
|
||||
else:
|
||||
for y in flattener(x):
|
||||
numchunks += 1
|
||||
yield y
|
||||
if debug:
|
||||
cherrypy.log('Flattened %d chunks' % numchunks, 'TOOLS.FLATTEN')
|
||||
response = cherrypy.serving.response
|
||||
response.body = flattener(response.body)
|
||||
|
||||
|
||||
def accept(media=None, debug=False):
|
||||
"""Return the client's preferred media-type (from the given Content-Types).
|
||||
|
||||
If 'media' is None (the default), no test will be performed.
|
||||
|
||||
If 'media' is provided, it should be the Content-Type value (as a string)
|
||||
or values (as a list or tuple of strings) which the current resource
|
||||
can emit. The client's acceptable media ranges (as declared in the
|
||||
Accept request header) will be matched in order to these Content-Type
|
||||
values; the first such string is returned. That is, the return value
|
||||
will always be one of the strings provided in the 'media' arg (or None
|
||||
if 'media' is None).
|
||||
|
||||
If no match is found, then HTTPError 406 (Not Acceptable) is raised.
|
||||
Note that most web browsers send */* as a (low-quality) acceptable
|
||||
media range, which should match any Content-Type. In addition, "...if
|
||||
no Accept header field is present, then it is assumed that the client
|
||||
accepts all media types."
|
||||
|
||||
Matching types are checked in order of client preference first,
|
||||
and then in the order of the given 'media' values.
|
||||
|
||||
Note that this function does not honor accept-params (other than "q").
|
||||
"""
|
||||
if not media:
|
||||
return
|
||||
if isinstance(media, basestring):
|
||||
media = [media]
|
||||
request = cherrypy.serving.request
|
||||
|
||||
# Parse the Accept request header, and try to match one
|
||||
# of the requested media-ranges (in order of preference).
|
||||
ranges = request.headers.elements('Accept')
|
||||
if not ranges:
|
||||
# Any media type is acceptable.
|
||||
if debug:
|
||||
cherrypy.log('No Accept header elements', 'TOOLS.ACCEPT')
|
||||
return media[0]
|
||||
else:
|
||||
# Note that 'ranges' is sorted in order of preference
|
||||
for element in ranges:
|
||||
if element.qvalue > 0:
|
||||
if element.value == "*/*":
|
||||
# Matches any type or subtype
|
||||
if debug:
|
||||
cherrypy.log('Match due to */*', 'TOOLS.ACCEPT')
|
||||
return media[0]
|
||||
elif element.value.endswith("/*"):
|
||||
# Matches any subtype
|
||||
mtype = element.value[:-1] # Keep the slash
|
||||
for m in media:
|
||||
if m.startswith(mtype):
|
||||
if debug:
|
||||
cherrypy.log('Match due to %s' % element.value,
|
||||
'TOOLS.ACCEPT')
|
||||
return m
|
||||
else:
|
||||
# Matches exact value
|
||||
if element.value in media:
|
||||
if debug:
|
||||
cherrypy.log('Match due to %s' % element.value,
|
||||
'TOOLS.ACCEPT')
|
||||
return element.value
|
||||
|
||||
# No suitable media-range found.
|
||||
ah = request.headers.get('Accept')
|
||||
if ah is None:
|
||||
msg = "Your client did not send an Accept header."
|
||||
else:
|
||||
msg = "Your client sent this Accept header: %s." % ah
|
||||
msg += (" But this resource only emits these media types: %s." %
|
||||
", ".join(media))
|
||||
raise cherrypy.HTTPError(406, msg)
|
||||
|
||||
|
||||
class MonitoredHeaderMap(_httputil.HeaderMap):
|
||||
|
||||
def __init__(self):
|
||||
self.accessed_headers = set()
|
||||
|
||||
def __getitem__(self, key):
|
||||
self.accessed_headers.add(key)
|
||||
return _httputil.HeaderMap.__getitem__(self, key)
|
||||
|
||||
def __contains__(self, key):
|
||||
self.accessed_headers.add(key)
|
||||
return _httputil.HeaderMap.__contains__(self, key)
|
||||
|
||||
def get(self, key, default=None):
|
||||
self.accessed_headers.add(key)
|
||||
return _httputil.HeaderMap.get(self, key, default=default)
|
||||
|
||||
if hasattr({}, 'has_key'):
|
||||
# Python 2
|
||||
def has_key(self, key):
|
||||
self.accessed_headers.add(key)
|
||||
return _httputil.HeaderMap.has_key(self, key)
|
||||
|
||||
|
||||
def autovary(ignore=None, debug=False):
|
||||
"""Auto-populate the Vary response header based on request.header access.
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
|
||||
req_h = request.headers
|
||||
request.headers = MonitoredHeaderMap()
|
||||
request.headers.update(req_h)
|
||||
if ignore is None:
|
||||
ignore = set(['Content-Disposition', 'Content-Length', 'Content-Type'])
|
||||
|
||||
def set_response_header():
|
||||
resp_h = cherrypy.serving.response.headers
|
||||
v = set([e.value for e in resp_h.elements('Vary')])
|
||||
if debug:
|
||||
cherrypy.log(
|
||||
'Accessed headers: %s' % request.headers.accessed_headers,
|
||||
'TOOLS.AUTOVARY')
|
||||
v = v.union(request.headers.accessed_headers)
|
||||
v = v.difference(ignore)
|
||||
v = list(v)
|
||||
v.sort()
|
||||
resp_h['Vary'] = ', '.join(v)
|
||||
request.hooks.attach('before_finalize', set_response_header, 95)
|
||||
421
lib/cherrypy/lib/encoding.py
Normal file
421
lib/cherrypy/lib/encoding.py
Normal file
@@ -0,0 +1,421 @@
|
||||
import struct
|
||||
import time
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import basestring, BytesIO, ntob, set, unicodestr
|
||||
from cherrypy.lib import file_generator
|
||||
from cherrypy.lib import is_closable_iterator
|
||||
from cherrypy.lib import set_vary_header
|
||||
|
||||
|
||||
def decode(encoding=None, default_encoding='utf-8'):
|
||||
"""Replace or extend the list of charsets used to decode a request entity.
|
||||
|
||||
Either argument may be a single string or a list of strings.
|
||||
|
||||
encoding
|
||||
If not None, restricts the set of charsets attempted while decoding
|
||||
a request entity to the given set (even if a different charset is
|
||||
given in the Content-Type request header).
|
||||
|
||||
default_encoding
|
||||
Only in effect if the 'encoding' argument is not given.
|
||||
If given, the set of charsets attempted while decoding a request
|
||||
entity is *extended* with the given value(s).
|
||||
|
||||
"""
|
||||
body = cherrypy.request.body
|
||||
if encoding is not None:
|
||||
if not isinstance(encoding, list):
|
||||
encoding = [encoding]
|
||||
body.attempt_charsets = encoding
|
||||
elif default_encoding:
|
||||
if not isinstance(default_encoding, list):
|
||||
default_encoding = [default_encoding]
|
||||
body.attempt_charsets = body.attempt_charsets + default_encoding
|
||||
|
||||
class UTF8StreamEncoder:
|
||||
def __init__(self, iterator):
|
||||
self._iterator = iterator
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def next(self):
|
||||
return self.__next__()
|
||||
|
||||
def __next__(self):
|
||||
res = next(self._iterator)
|
||||
if isinstance(res, unicodestr):
|
||||
res = res.encode('utf-8')
|
||||
return res
|
||||
|
||||
def close(self):
|
||||
if is_closable_iterator(self._iterator):
|
||||
self._iterator.close()
|
||||
|
||||
def __getattr__(self, attr):
|
||||
if attr.startswith('__'):
|
||||
raise AttributeError(self, attr)
|
||||
return getattr(self._iterator, attr)
|
||||
|
||||
|
||||
class ResponseEncoder:
|
||||
|
||||
default_encoding = 'utf-8'
|
||||
failmsg = "Response body could not be encoded with %r."
|
||||
encoding = None
|
||||
errors = 'strict'
|
||||
text_only = True
|
||||
add_charset = True
|
||||
debug = False
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
for k, v in kwargs.items():
|
||||
setattr(self, k, v)
|
||||
|
||||
self.attempted_charsets = set()
|
||||
request = cherrypy.serving.request
|
||||
if request.handler is not None:
|
||||
# Replace request.handler with self
|
||||
if self.debug:
|
||||
cherrypy.log('Replacing request.handler', 'TOOLS.ENCODE')
|
||||
self.oldhandler = request.handler
|
||||
request.handler = self
|
||||
|
||||
def encode_stream(self, encoding):
|
||||
"""Encode a streaming response body.
|
||||
|
||||
Use a generator wrapper, and just pray it works as the stream is
|
||||
being written out.
|
||||
"""
|
||||
if encoding in self.attempted_charsets:
|
||||
return False
|
||||
self.attempted_charsets.add(encoding)
|
||||
|
||||
def encoder(body):
|
||||
for chunk in body:
|
||||
if isinstance(chunk, unicodestr):
|
||||
chunk = chunk.encode(encoding, self.errors)
|
||||
yield chunk
|
||||
self.body = encoder(self.body)
|
||||
return True
|
||||
|
||||
def encode_string(self, encoding):
|
||||
"""Encode a buffered response body."""
|
||||
if encoding in self.attempted_charsets:
|
||||
return False
|
||||
self.attempted_charsets.add(encoding)
|
||||
body = []
|
||||
for chunk in self.body:
|
||||
if isinstance(chunk, unicodestr):
|
||||
try:
|
||||
chunk = chunk.encode(encoding, self.errors)
|
||||
except (LookupError, UnicodeError):
|
||||
return False
|
||||
body.append(chunk)
|
||||
self.body = body
|
||||
return True
|
||||
|
||||
def find_acceptable_charset(self):
|
||||
request = cherrypy.serving.request
|
||||
response = cherrypy.serving.response
|
||||
|
||||
if self.debug:
|
||||
cherrypy.log('response.stream %r' %
|
||||
response.stream, 'TOOLS.ENCODE')
|
||||
if response.stream:
|
||||
encoder = self.encode_stream
|
||||
else:
|
||||
encoder = self.encode_string
|
||||
if "Content-Length" in response.headers:
|
||||
# Delete Content-Length header so finalize() recalcs it.
|
||||
# Encoded strings may be of different lengths from their
|
||||
# unicode equivalents, and even from each other. For example:
|
||||
# >>> t = u"\u7007\u3040"
|
||||
# >>> len(t)
|
||||
# 2
|
||||
# >>> len(t.encode("UTF-8"))
|
||||
# 6
|
||||
# >>> len(t.encode("utf7"))
|
||||
# 8
|
||||
del response.headers["Content-Length"]
|
||||
|
||||
# Parse the Accept-Charset request header, and try to provide one
|
||||
# of the requested charsets (in order of user preference).
|
||||
encs = request.headers.elements('Accept-Charset')
|
||||
charsets = [enc.value.lower() for enc in encs]
|
||||
if self.debug:
|
||||
cherrypy.log('charsets %s' % repr(charsets), 'TOOLS.ENCODE')
|
||||
|
||||
if self.encoding is not None:
|
||||
# If specified, force this encoding to be used, or fail.
|
||||
encoding = self.encoding.lower()
|
||||
if self.debug:
|
||||
cherrypy.log('Specified encoding %r' %
|
||||
encoding, 'TOOLS.ENCODE')
|
||||
if (not charsets) or "*" in charsets or encoding in charsets:
|
||||
if self.debug:
|
||||
cherrypy.log('Attempting encoding %r' %
|
||||
encoding, 'TOOLS.ENCODE')
|
||||
if encoder(encoding):
|
||||
return encoding
|
||||
else:
|
||||
if not encs:
|
||||
if self.debug:
|
||||
cherrypy.log('Attempting default encoding %r' %
|
||||
self.default_encoding, 'TOOLS.ENCODE')
|
||||
# Any character-set is acceptable.
|
||||
if encoder(self.default_encoding):
|
||||
return self.default_encoding
|
||||
else:
|
||||
raise cherrypy.HTTPError(500, self.failmsg %
|
||||
self.default_encoding)
|
||||
else:
|
||||
for element in encs:
|
||||
if element.qvalue > 0:
|
||||
if element.value == "*":
|
||||
# Matches any charset. Try our default.
|
||||
if self.debug:
|
||||
cherrypy.log('Attempting default encoding due '
|
||||
'to %r' % element, 'TOOLS.ENCODE')
|
||||
if encoder(self.default_encoding):
|
||||
return self.default_encoding
|
||||
else:
|
||||
encoding = element.value
|
||||
if self.debug:
|
||||
cherrypy.log('Attempting encoding %s (qvalue >'
|
||||
'0)' % element, 'TOOLS.ENCODE')
|
||||
if encoder(encoding):
|
||||
return encoding
|
||||
|
||||
if "*" not in charsets:
|
||||
# If no "*" is present in an Accept-Charset field, then all
|
||||
# character sets not explicitly mentioned get a quality
|
||||
# value of 0, except for ISO-8859-1, which gets a quality
|
||||
# value of 1 if not explicitly mentioned.
|
||||
iso = 'iso-8859-1'
|
||||
if iso not in charsets:
|
||||
if self.debug:
|
||||
cherrypy.log('Attempting ISO-8859-1 encoding',
|
||||
'TOOLS.ENCODE')
|
||||
if encoder(iso):
|
||||
return iso
|
||||
|
||||
# No suitable encoding found.
|
||||
ac = request.headers.get('Accept-Charset')
|
||||
if ac is None:
|
||||
msg = "Your client did not send an Accept-Charset header."
|
||||
else:
|
||||
msg = "Your client sent this Accept-Charset header: %s." % ac
|
||||
_charsets = ", ".join(sorted(self.attempted_charsets))
|
||||
msg += " We tried these charsets: %s." % (_charsets,)
|
||||
raise cherrypy.HTTPError(406, msg)
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
response = cherrypy.serving.response
|
||||
self.body = self.oldhandler(*args, **kwargs)
|
||||
|
||||
if isinstance(self.body, basestring):
|
||||
# strings get wrapped in a list because iterating over a single
|
||||
# item list is much faster than iterating over every character
|
||||
# in a long string.
|
||||
if self.body:
|
||||
self.body = [self.body]
|
||||
else:
|
||||
# [''] doesn't evaluate to False, so replace it with [].
|
||||
self.body = []
|
||||
elif hasattr(self.body, 'read'):
|
||||
self.body = file_generator(self.body)
|
||||
elif self.body is None:
|
||||
self.body = []
|
||||
|
||||
ct = response.headers.elements("Content-Type")
|
||||
if self.debug:
|
||||
cherrypy.log('Content-Type: %r' % [str(h)
|
||||
for h in ct], 'TOOLS.ENCODE')
|
||||
if ct and self.add_charset:
|
||||
ct = ct[0]
|
||||
if self.text_only:
|
||||
if ct.value.lower().startswith("text/"):
|
||||
if self.debug:
|
||||
cherrypy.log(
|
||||
'Content-Type %s starts with "text/"' % ct,
|
||||
'TOOLS.ENCODE')
|
||||
do_find = True
|
||||
else:
|
||||
if self.debug:
|
||||
cherrypy.log('Not finding because Content-Type %s '
|
||||
'does not start with "text/"' % ct,
|
||||
'TOOLS.ENCODE')
|
||||
do_find = False
|
||||
else:
|
||||
if self.debug:
|
||||
cherrypy.log('Finding because not text_only',
|
||||
'TOOLS.ENCODE')
|
||||
do_find = True
|
||||
|
||||
if do_find:
|
||||
# Set "charset=..." param on response Content-Type header
|
||||
ct.params['charset'] = self.find_acceptable_charset()
|
||||
if self.debug:
|
||||
cherrypy.log('Setting Content-Type %s' % ct,
|
||||
'TOOLS.ENCODE')
|
||||
response.headers["Content-Type"] = str(ct)
|
||||
|
||||
return self.body
|
||||
|
||||
# GZIP
|
||||
|
||||
|
||||
def compress(body, compress_level):
|
||||
"""Compress 'body' at the given compress_level."""
|
||||
import zlib
|
||||
|
||||
# See http://www.gzip.org/zlib/rfc-gzip.html
|
||||
yield ntob('\x1f\x8b') # ID1 and ID2: gzip marker
|
||||
yield ntob('\x08') # CM: compression method
|
||||
yield ntob('\x00') # FLG: none set
|
||||
# MTIME: 4 bytes
|
||||
yield struct.pack("<L", int(time.time()) & int('FFFFFFFF', 16))
|
||||
yield ntob('\x02') # XFL: max compression, slowest algo
|
||||
yield ntob('\xff') # OS: unknown
|
||||
|
||||
crc = zlib.crc32(ntob(""))
|
||||
size = 0
|
||||
zobj = zlib.compressobj(compress_level,
|
||||
zlib.DEFLATED, -zlib.MAX_WBITS,
|
||||
zlib.DEF_MEM_LEVEL, 0)
|
||||
for line in body:
|
||||
size += len(line)
|
||||
crc = zlib.crc32(line, crc)
|
||||
yield zobj.compress(line)
|
||||
yield zobj.flush()
|
||||
|
||||
# CRC32: 4 bytes
|
||||
yield struct.pack("<L", crc & int('FFFFFFFF', 16))
|
||||
# ISIZE: 4 bytes
|
||||
yield struct.pack("<L", size & int('FFFFFFFF', 16))
|
||||
|
||||
|
||||
def decompress(body):
|
||||
import gzip
|
||||
|
||||
zbuf = BytesIO()
|
||||
zbuf.write(body)
|
||||
zbuf.seek(0)
|
||||
zfile = gzip.GzipFile(mode='rb', fileobj=zbuf)
|
||||
data = zfile.read()
|
||||
zfile.close()
|
||||
return data
|
||||
|
||||
|
||||
def gzip(compress_level=5, mime_types=['text/html', 'text/plain'],
|
||||
debug=False):
|
||||
"""Try to gzip the response body if Content-Type in mime_types.
|
||||
|
||||
cherrypy.response.headers['Content-Type'] must be set to one of the
|
||||
values in the mime_types arg before calling this function.
|
||||
|
||||
The provided list of mime-types must be of one of the following form:
|
||||
* type/subtype
|
||||
* type/*
|
||||
* type/*+subtype
|
||||
|
||||
No compression is performed if any of the following hold:
|
||||
* The client sends no Accept-Encoding request header
|
||||
* No 'gzip' or 'x-gzip' is present in the Accept-Encoding header
|
||||
* No 'gzip' or 'x-gzip' with a qvalue > 0 is present
|
||||
* The 'identity' value is given with a qvalue > 0.
|
||||
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
response = cherrypy.serving.response
|
||||
|
||||
set_vary_header(response, "Accept-Encoding")
|
||||
|
||||
if not response.body:
|
||||
# Response body is empty (might be a 304 for instance)
|
||||
if debug:
|
||||
cherrypy.log('No response body', context='TOOLS.GZIP')
|
||||
return
|
||||
|
||||
# If returning cached content (which should already have been gzipped),
|
||||
# don't re-zip.
|
||||
if getattr(request, "cached", False):
|
||||
if debug:
|
||||
cherrypy.log('Not gzipping cached response', context='TOOLS.GZIP')
|
||||
return
|
||||
|
||||
acceptable = request.headers.elements('Accept-Encoding')
|
||||
if not acceptable:
|
||||
# If no Accept-Encoding field is present in a request,
|
||||
# the server MAY assume that the client will accept any
|
||||
# content coding. In this case, if "identity" is one of
|
||||
# the available content-codings, then the server SHOULD use
|
||||
# the "identity" content-coding, unless it has additional
|
||||
# information that a different content-coding is meaningful
|
||||
# to the client.
|
||||
if debug:
|
||||
cherrypy.log('No Accept-Encoding', context='TOOLS.GZIP')
|
||||
return
|
||||
|
||||
ct = response.headers.get('Content-Type', '').split(';')[0]
|
||||
for coding in acceptable:
|
||||
if coding.value == 'identity' and coding.qvalue != 0:
|
||||
if debug:
|
||||
cherrypy.log('Non-zero identity qvalue: %s' % coding,
|
||||
context='TOOLS.GZIP')
|
||||
return
|
||||
if coding.value in ('gzip', 'x-gzip'):
|
||||
if coding.qvalue == 0:
|
||||
if debug:
|
||||
cherrypy.log('Zero gzip qvalue: %s' % coding,
|
||||
context='TOOLS.GZIP')
|
||||
return
|
||||
|
||||
if ct not in mime_types:
|
||||
# If the list of provided mime-types contains tokens
|
||||
# such as 'text/*' or 'application/*+xml',
|
||||
# we go through them and find the most appropriate one
|
||||
# based on the given content-type.
|
||||
# The pattern matching is only caring about the most
|
||||
# common cases, as stated above, and doesn't support
|
||||
# for extra parameters.
|
||||
found = False
|
||||
if '/' in ct:
|
||||
ct_media_type, ct_sub_type = ct.split('/')
|
||||
for mime_type in mime_types:
|
||||
if '/' in mime_type:
|
||||
media_type, sub_type = mime_type.split('/')
|
||||
if ct_media_type == media_type:
|
||||
if sub_type == '*':
|
||||
found = True
|
||||
break
|
||||
elif '+' in sub_type and '+' in ct_sub_type:
|
||||
ct_left, ct_right = ct_sub_type.split('+')
|
||||
left, right = sub_type.split('+')
|
||||
if left == '*' and ct_right == right:
|
||||
found = True
|
||||
break
|
||||
|
||||
if not found:
|
||||
if debug:
|
||||
cherrypy.log('Content-Type %s not in mime_types %r' %
|
||||
(ct, mime_types), context='TOOLS.GZIP')
|
||||
return
|
||||
|
||||
if debug:
|
||||
cherrypy.log('Gzipping', context='TOOLS.GZIP')
|
||||
# Return a generator that compresses the page
|
||||
response.headers['Content-Encoding'] = 'gzip'
|
||||
response.body = compress(response.body, compress_level)
|
||||
if "Content-Length" in response.headers:
|
||||
# Delete Content-Length header so finalize() recalcs it.
|
||||
del response.headers["Content-Length"]
|
||||
|
||||
return
|
||||
|
||||
if debug:
|
||||
cherrypy.log('No acceptable encoding found.', context='GZIP')
|
||||
cherrypy.HTTPError(406, "identity, gzip").set_response()
|
||||
217
lib/cherrypy/lib/gctools.py
Normal file
217
lib/cherrypy/lib/gctools.py
Normal file
@@ -0,0 +1,217 @@
|
||||
import gc
|
||||
import inspect
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
try:
|
||||
import objgraph
|
||||
except ImportError:
|
||||
objgraph = None
|
||||
|
||||
import cherrypy
|
||||
from cherrypy import _cprequest, _cpwsgi
|
||||
from cherrypy.process.plugins import SimplePlugin
|
||||
|
||||
|
||||
class ReferrerTree(object):
|
||||
|
||||
"""An object which gathers all referrers of an object to a given depth."""
|
||||
|
||||
peek_length = 40
|
||||
|
||||
def __init__(self, ignore=None, maxdepth=2, maxparents=10):
|
||||
self.ignore = ignore or []
|
||||
self.ignore.append(inspect.currentframe().f_back)
|
||||
self.maxdepth = maxdepth
|
||||
self.maxparents = maxparents
|
||||
|
||||
def ascend(self, obj, depth=1):
|
||||
"""Return a nested list containing referrers of the given object."""
|
||||
depth += 1
|
||||
parents = []
|
||||
|
||||
# Gather all referrers in one step to minimize
|
||||
# cascading references due to repr() logic.
|
||||
refs = gc.get_referrers(obj)
|
||||
self.ignore.append(refs)
|
||||
if len(refs) > self.maxparents:
|
||||
return [("[%s referrers]" % len(refs), [])]
|
||||
|
||||
try:
|
||||
ascendcode = self.ascend.__code__
|
||||
except AttributeError:
|
||||
ascendcode = self.ascend.im_func.func_code
|
||||
for parent in refs:
|
||||
if inspect.isframe(parent) and parent.f_code is ascendcode:
|
||||
continue
|
||||
if parent in self.ignore:
|
||||
continue
|
||||
if depth <= self.maxdepth:
|
||||
parents.append((parent, self.ascend(parent, depth)))
|
||||
else:
|
||||
parents.append((parent, []))
|
||||
|
||||
return parents
|
||||
|
||||
def peek(self, s):
|
||||
"""Return s, restricted to a sane length."""
|
||||
if len(s) > (self.peek_length + 3):
|
||||
half = self.peek_length // 2
|
||||
return s[:half] + '...' + s[-half:]
|
||||
else:
|
||||
return s
|
||||
|
||||
def _format(self, obj, descend=True):
|
||||
"""Return a string representation of a single object."""
|
||||
if inspect.isframe(obj):
|
||||
filename, lineno, func, context, index = inspect.getframeinfo(obj)
|
||||
return "<frame of function '%s'>" % func
|
||||
|
||||
if not descend:
|
||||
return self.peek(repr(obj))
|
||||
|
||||
if isinstance(obj, dict):
|
||||
return "{" + ", ".join(["%s: %s" % (self._format(k, descend=False),
|
||||
self._format(v, descend=False))
|
||||
for k, v in obj.items()]) + "}"
|
||||
elif isinstance(obj, list):
|
||||
return "[" + ", ".join([self._format(item, descend=False)
|
||||
for item in obj]) + "]"
|
||||
elif isinstance(obj, tuple):
|
||||
return "(" + ", ".join([self._format(item, descend=False)
|
||||
for item in obj]) + ")"
|
||||
|
||||
r = self.peek(repr(obj))
|
||||
if isinstance(obj, (str, int, float)):
|
||||
return r
|
||||
return "%s: %s" % (type(obj), r)
|
||||
|
||||
def format(self, tree):
|
||||
"""Return a list of string reprs from a nested list of referrers."""
|
||||
output = []
|
||||
|
||||
def ascend(branch, depth=1):
|
||||
for parent, grandparents in branch:
|
||||
output.append((" " * depth) + self._format(parent))
|
||||
if grandparents:
|
||||
ascend(grandparents, depth + 1)
|
||||
ascend(tree)
|
||||
return output
|
||||
|
||||
|
||||
def get_instances(cls):
|
||||
return [x for x in gc.get_objects() if isinstance(x, cls)]
|
||||
|
||||
|
||||
class RequestCounter(SimplePlugin):
|
||||
|
||||
def start(self):
|
||||
self.count = 0
|
||||
|
||||
def before_request(self):
|
||||
self.count += 1
|
||||
|
||||
def after_request(self):
|
||||
self.count -= 1
|
||||
request_counter = RequestCounter(cherrypy.engine)
|
||||
request_counter.subscribe()
|
||||
|
||||
|
||||
def get_context(obj):
|
||||
if isinstance(obj, _cprequest.Request):
|
||||
return "path=%s;stage=%s" % (obj.path_info, obj.stage)
|
||||
elif isinstance(obj, _cprequest.Response):
|
||||
return "status=%s" % obj.status
|
||||
elif isinstance(obj, _cpwsgi.AppResponse):
|
||||
return "PATH_INFO=%s" % obj.environ.get('PATH_INFO', '')
|
||||
elif hasattr(obj, "tb_lineno"):
|
||||
return "tb_lineno=%s" % obj.tb_lineno
|
||||
return ""
|
||||
|
||||
|
||||
class GCRoot(object):
|
||||
|
||||
"""A CherryPy page handler for testing reference leaks."""
|
||||
|
||||
classes = [
|
||||
(_cprequest.Request, 2, 2,
|
||||
"Should be 1 in this request thread and 1 in the main thread."),
|
||||
(_cprequest.Response, 2, 2,
|
||||
"Should be 1 in this request thread and 1 in the main thread."),
|
||||
(_cpwsgi.AppResponse, 1, 1,
|
||||
"Should be 1 in this request thread only."),
|
||||
]
|
||||
|
||||
def index(self):
|
||||
return "Hello, world!"
|
||||
index.exposed = True
|
||||
|
||||
def stats(self):
|
||||
output = ["Statistics:"]
|
||||
|
||||
for trial in range(10):
|
||||
if request_counter.count > 0:
|
||||
break
|
||||
time.sleep(0.5)
|
||||
else:
|
||||
output.append("\nNot all requests closed properly.")
|
||||
|
||||
# gc_collect isn't perfectly synchronous, because it may
|
||||
# break reference cycles that then take time to fully
|
||||
# finalize. Call it thrice and hope for the best.
|
||||
gc.collect()
|
||||
gc.collect()
|
||||
unreachable = gc.collect()
|
||||
if unreachable:
|
||||
if objgraph is not None:
|
||||
final = objgraph.by_type('Nondestructible')
|
||||
if final:
|
||||
objgraph.show_backrefs(final, filename='finalizers.png')
|
||||
|
||||
trash = {}
|
||||
for x in gc.garbage:
|
||||
trash[type(x)] = trash.get(type(x), 0) + 1
|
||||
if trash:
|
||||
output.insert(0, "\n%s unreachable objects:" % unreachable)
|
||||
trash = [(v, k) for k, v in trash.items()]
|
||||
trash.sort()
|
||||
for pair in trash:
|
||||
output.append(" " + repr(pair))
|
||||
|
||||
# Check declared classes to verify uncollected instances.
|
||||
# These don't have to be part of a cycle; they can be
|
||||
# any objects that have unanticipated referrers that keep
|
||||
# them from being collected.
|
||||
allobjs = {}
|
||||
for cls, minobj, maxobj, msg in self.classes:
|
||||
allobjs[cls] = get_instances(cls)
|
||||
|
||||
for cls, minobj, maxobj, msg in self.classes:
|
||||
objs = allobjs[cls]
|
||||
lenobj = len(objs)
|
||||
if lenobj < minobj or lenobj > maxobj:
|
||||
if minobj == maxobj:
|
||||
output.append(
|
||||
"\nExpected %s %r references, got %s." %
|
||||
(minobj, cls, lenobj))
|
||||
else:
|
||||
output.append(
|
||||
"\nExpected %s to %s %r references, got %s." %
|
||||
(minobj, maxobj, cls, lenobj))
|
||||
|
||||
for obj in objs:
|
||||
if objgraph is not None:
|
||||
ig = [id(objs), id(inspect.currentframe())]
|
||||
fname = "graph_%s_%s.png" % (cls.__name__, id(obj))
|
||||
objgraph.show_backrefs(
|
||||
obj, extra_ignore=ig, max_depth=4, too_many=20,
|
||||
filename=fname, extra_info=get_context)
|
||||
output.append("\nReferrers for %s (refcount=%s):" %
|
||||
(repr(obj), sys.getrefcount(obj)))
|
||||
t = ReferrerTree(ignore=[objs], maxdepth=3)
|
||||
tree = t.ascend(obj)
|
||||
output.extend(t.format(tree))
|
||||
|
||||
return "\n".join(output)
|
||||
stats.exposed = True
|
||||
6
lib/cherrypy/lib/http.py
Normal file
6
lib/cherrypy/lib/http.py
Normal file
@@ -0,0 +1,6 @@
|
||||
import warnings
|
||||
warnings.warn('cherrypy.lib.http has been deprecated and will be removed '
|
||||
'in CherryPy 3.3 use cherrypy.lib.httputil instead.',
|
||||
DeprecationWarning)
|
||||
|
||||
from cherrypy.lib.httputil import *
|
||||
371
lib/cherrypy/lib/httpauth.py
Normal file
371
lib/cherrypy/lib/httpauth.py
Normal file
@@ -0,0 +1,371 @@
|
||||
"""
|
||||
This module defines functions to implement HTTP Digest Authentication
|
||||
(:rfc:`2617`).
|
||||
This has full compliance with 'Digest' and 'Basic' authentication methods. In
|
||||
'Digest' it supports both MD5 and MD5-sess algorithms.
|
||||
|
||||
Usage:
|
||||
First use 'doAuth' to request the client authentication for a
|
||||
certain resource. You should send an httplib.UNAUTHORIZED response to the
|
||||
client so he knows he has to authenticate itself.
|
||||
|
||||
Then use 'parseAuthorization' to retrieve the 'auth_map' used in
|
||||
'checkResponse'.
|
||||
|
||||
To use 'checkResponse' you must have already verified the password
|
||||
associated with the 'username' key in 'auth_map' dict. Then you use the
|
||||
'checkResponse' function to verify if the password matches the one sent
|
||||
by the client.
|
||||
|
||||
SUPPORTED_ALGORITHM - list of supported 'Digest' algorithms
|
||||
SUPPORTED_QOP - list of supported 'Digest' 'qop'.
|
||||
"""
|
||||
__version__ = 1, 0, 1
|
||||
__author__ = "Tiago Cogumbreiro <cogumbreiro@users.sf.net>"
|
||||
__credits__ = """
|
||||
Peter van Kampen for its recipe which implement most of Digest
|
||||
authentication:
|
||||
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/302378
|
||||
"""
|
||||
|
||||
__license__ = """
|
||||
Copyright (c) 2005, Tiago Cogumbreiro <cogumbreiro@users.sf.net>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of Sylvain Hellegouarch nor the names of his
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
"""
|
||||
|
||||
__all__ = ("digestAuth", "basicAuth", "doAuth", "checkResponse",
|
||||
"parseAuthorization", "SUPPORTED_ALGORITHM", "md5SessionKey",
|
||||
"calculateNonce", "SUPPORTED_QOP")
|
||||
|
||||
##########################################################################
|
||||
import time
|
||||
from cherrypy._cpcompat import base64_decode, ntob, md5
|
||||
from cherrypy._cpcompat import parse_http_list, parse_keqv_list
|
||||
|
||||
MD5 = "MD5"
|
||||
MD5_SESS = "MD5-sess"
|
||||
AUTH = "auth"
|
||||
AUTH_INT = "auth-int"
|
||||
|
||||
SUPPORTED_ALGORITHM = (MD5, MD5_SESS)
|
||||
SUPPORTED_QOP = (AUTH, AUTH_INT)
|
||||
|
||||
##########################################################################
|
||||
# doAuth
|
||||
#
|
||||
DIGEST_AUTH_ENCODERS = {
|
||||
MD5: lambda val: md5(ntob(val)).hexdigest(),
|
||||
MD5_SESS: lambda val: md5(ntob(val)).hexdigest(),
|
||||
# SHA: lambda val: sha.new(ntob(val)).hexdigest (),
|
||||
}
|
||||
|
||||
|
||||
def calculateNonce(realm, algorithm=MD5):
|
||||
"""This is an auxaliary function that calculates 'nonce' value. It is used
|
||||
to handle sessions."""
|
||||
|
||||
global SUPPORTED_ALGORITHM, DIGEST_AUTH_ENCODERS
|
||||
assert algorithm in SUPPORTED_ALGORITHM
|
||||
|
||||
try:
|
||||
encoder = DIGEST_AUTH_ENCODERS[algorithm]
|
||||
except KeyError:
|
||||
raise NotImplementedError("The chosen algorithm (%s) does not have "
|
||||
"an implementation yet" % algorithm)
|
||||
|
||||
return encoder("%d:%s" % (time.time(), realm))
|
||||
|
||||
|
||||
def digestAuth(realm, algorithm=MD5, nonce=None, qop=AUTH):
|
||||
"""Challenges the client for a Digest authentication."""
|
||||
global SUPPORTED_ALGORITHM, DIGEST_AUTH_ENCODERS, SUPPORTED_QOP
|
||||
assert algorithm in SUPPORTED_ALGORITHM
|
||||
assert qop in SUPPORTED_QOP
|
||||
|
||||
if nonce is None:
|
||||
nonce = calculateNonce(realm, algorithm)
|
||||
|
||||
return 'Digest realm="%s", nonce="%s", algorithm="%s", qop="%s"' % (
|
||||
realm, nonce, algorithm, qop
|
||||
)
|
||||
|
||||
|
||||
def basicAuth(realm):
|
||||
"""Challengenes the client for a Basic authentication."""
|
||||
assert '"' not in realm, "Realms cannot contain the \" (quote) character."
|
||||
|
||||
return 'Basic realm="%s"' % realm
|
||||
|
||||
|
||||
def doAuth(realm):
|
||||
"""'doAuth' function returns the challenge string b giving priority over
|
||||
Digest and fallback to Basic authentication when the browser doesn't
|
||||
support the first one.
|
||||
|
||||
This should be set in the HTTP header under the key 'WWW-Authenticate'."""
|
||||
|
||||
return digestAuth(realm) + " " + basicAuth(realm)
|
||||
|
||||
|
||||
##########################################################################
|
||||
# Parse authorization parameters
|
||||
#
|
||||
def _parseDigestAuthorization(auth_params):
|
||||
# Convert the auth params to a dict
|
||||
items = parse_http_list(auth_params)
|
||||
params = parse_keqv_list(items)
|
||||
|
||||
# Now validate the params
|
||||
|
||||
# Check for required parameters
|
||||
required = ["username", "realm", "nonce", "uri", "response"]
|
||||
for k in required:
|
||||
if k not in params:
|
||||
return None
|
||||
|
||||
# If qop is sent then cnonce and nc MUST be present
|
||||
if "qop" in params and not ("cnonce" in params
|
||||
and "nc" in params):
|
||||
return None
|
||||
|
||||
# If qop is not sent, neither cnonce nor nc can be present
|
||||
if ("cnonce" in params or "nc" in params) and \
|
||||
"qop" not in params:
|
||||
return None
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def _parseBasicAuthorization(auth_params):
|
||||
username, password = base64_decode(auth_params).split(":", 1)
|
||||
return {"username": username, "password": password}
|
||||
|
||||
AUTH_SCHEMES = {
|
||||
"basic": _parseBasicAuthorization,
|
||||
"digest": _parseDigestAuthorization,
|
||||
}
|
||||
|
||||
|
||||
def parseAuthorization(credentials):
|
||||
"""parseAuthorization will convert the value of the 'Authorization' key in
|
||||
the HTTP header to a map itself. If the parsing fails 'None' is returned.
|
||||
"""
|
||||
|
||||
global AUTH_SCHEMES
|
||||
|
||||
auth_scheme, auth_params = credentials.split(" ", 1)
|
||||
auth_scheme = auth_scheme.lower()
|
||||
|
||||
parser = AUTH_SCHEMES[auth_scheme]
|
||||
params = parser(auth_params)
|
||||
|
||||
if params is None:
|
||||
return
|
||||
|
||||
assert "auth_scheme" not in params
|
||||
params["auth_scheme"] = auth_scheme
|
||||
return params
|
||||
|
||||
|
||||
##########################################################################
|
||||
# Check provided response for a valid password
|
||||
#
|
||||
def md5SessionKey(params, password):
|
||||
"""
|
||||
If the "algorithm" directive's value is "MD5-sess", then A1
|
||||
[the session key] is calculated only once - on the first request by the
|
||||
client following receipt of a WWW-Authenticate challenge from the server.
|
||||
|
||||
This creates a 'session key' for the authentication of subsequent
|
||||
requests and responses which is different for each "authentication
|
||||
session", thus limiting the amount of material hashed with any one
|
||||
key.
|
||||
|
||||
Because the server need only use the hash of the user
|
||||
credentials in order to create the A1 value, this construction could
|
||||
be used in conjunction with a third party authentication service so
|
||||
that the web server would not need the actual password value. The
|
||||
specification of such a protocol is beyond the scope of this
|
||||
specification.
|
||||
"""
|
||||
|
||||
keys = ("username", "realm", "nonce", "cnonce")
|
||||
params_copy = {}
|
||||
for key in keys:
|
||||
params_copy[key] = params[key]
|
||||
|
||||
params_copy["algorithm"] = MD5_SESS
|
||||
return _A1(params_copy, password)
|
||||
|
||||
|
||||
def _A1(params, password):
|
||||
algorithm = params.get("algorithm", MD5)
|
||||
H = DIGEST_AUTH_ENCODERS[algorithm]
|
||||
|
||||
if algorithm == MD5:
|
||||
# If the "algorithm" directive's value is "MD5" or is
|
||||
# unspecified, then A1 is:
|
||||
# A1 = unq(username-value) ":" unq(realm-value) ":" passwd
|
||||
return "%s:%s:%s" % (params["username"], params["realm"], password)
|
||||
|
||||
elif algorithm == MD5_SESS:
|
||||
|
||||
# This is A1 if qop is set
|
||||
# A1 = H( unq(username-value) ":" unq(realm-value) ":" passwd )
|
||||
# ":" unq(nonce-value) ":" unq(cnonce-value)
|
||||
h_a1 = H("%s:%s:%s" % (params["username"], params["realm"], password))
|
||||
return "%s:%s:%s" % (h_a1, params["nonce"], params["cnonce"])
|
||||
|
||||
|
||||
def _A2(params, method, kwargs):
|
||||
# If the "qop" directive's value is "auth" or is unspecified, then A2 is:
|
||||
# A2 = Method ":" digest-uri-value
|
||||
|
||||
qop = params.get("qop", "auth")
|
||||
if qop == "auth":
|
||||
return method + ":" + params["uri"]
|
||||
elif qop == "auth-int":
|
||||
# If the "qop" value is "auth-int", then A2 is:
|
||||
# A2 = Method ":" digest-uri-value ":" H(entity-body)
|
||||
entity_body = kwargs.get("entity_body", "")
|
||||
H = kwargs["H"]
|
||||
|
||||
return "%s:%s:%s" % (
|
||||
method,
|
||||
params["uri"],
|
||||
H(entity_body)
|
||||
)
|
||||
|
||||
else:
|
||||
raise NotImplementedError("The 'qop' method is unknown: %s" % qop)
|
||||
|
||||
|
||||
def _computeDigestResponse(auth_map, password, method="GET", A1=None,
|
||||
**kwargs):
|
||||
"""
|
||||
Generates a response respecting the algorithm defined in RFC 2617
|
||||
"""
|
||||
params = auth_map
|
||||
|
||||
algorithm = params.get("algorithm", MD5)
|
||||
|
||||
H = DIGEST_AUTH_ENCODERS[algorithm]
|
||||
KD = lambda secret, data: H(secret + ":" + data)
|
||||
|
||||
qop = params.get("qop", None)
|
||||
|
||||
H_A2 = H(_A2(params, method, kwargs))
|
||||
|
||||
if algorithm == MD5_SESS and A1 is not None:
|
||||
H_A1 = H(A1)
|
||||
else:
|
||||
H_A1 = H(_A1(params, password))
|
||||
|
||||
if qop in ("auth", "auth-int"):
|
||||
# If the "qop" value is "auth" or "auth-int":
|
||||
# request-digest = <"> < KD ( H(A1), unq(nonce-value)
|
||||
# ":" nc-value
|
||||
# ":" unq(cnonce-value)
|
||||
# ":" unq(qop-value)
|
||||
# ":" H(A2)
|
||||
# ) <">
|
||||
request = "%s:%s:%s:%s:%s" % (
|
||||
params["nonce"],
|
||||
params["nc"],
|
||||
params["cnonce"],
|
||||
params["qop"],
|
||||
H_A2,
|
||||
)
|
||||
elif qop is None:
|
||||
# If the "qop" directive is not present (this construction is
|
||||
# for compatibility with RFC 2069):
|
||||
# request-digest =
|
||||
# <"> < KD ( H(A1), unq(nonce-value) ":" H(A2) ) > <">
|
||||
request = "%s:%s" % (params["nonce"], H_A2)
|
||||
|
||||
return KD(H_A1, request)
|
||||
|
||||
|
||||
def _checkDigestResponse(auth_map, password, method="GET", A1=None, **kwargs):
|
||||
"""This function is used to verify the response given by the client when
|
||||
he tries to authenticate.
|
||||
Optional arguments:
|
||||
entity_body - when 'qop' is set to 'auth-int' you MUST provide the
|
||||
raw data you are going to send to the client (usually the
|
||||
HTML page.
|
||||
request_uri - the uri from the request line compared with the 'uri'
|
||||
directive of the authorization map. They must represent
|
||||
the same resource (unused at this time).
|
||||
"""
|
||||
|
||||
if auth_map['realm'] != kwargs.get('realm', None):
|
||||
return False
|
||||
|
||||
response = _computeDigestResponse(
|
||||
auth_map, password, method, A1, **kwargs)
|
||||
|
||||
return response == auth_map["response"]
|
||||
|
||||
|
||||
def _checkBasicResponse(auth_map, password, method='GET', encrypt=None,
|
||||
**kwargs):
|
||||
# Note that the Basic response doesn't provide the realm value so we cannot
|
||||
# test it
|
||||
pass_through = lambda password, username=None: password
|
||||
encrypt = encrypt or pass_through
|
||||
try:
|
||||
candidate = encrypt(auth_map["password"], auth_map["username"])
|
||||
except TypeError:
|
||||
# if encrypt only takes one parameter, it's the password
|
||||
candidate = encrypt(auth_map["password"])
|
||||
return candidate == password
|
||||
|
||||
AUTH_RESPONSES = {
|
||||
"basic": _checkBasicResponse,
|
||||
"digest": _checkDigestResponse,
|
||||
}
|
||||
|
||||
|
||||
def checkResponse(auth_map, password, method="GET", encrypt=None, **kwargs):
|
||||
"""'checkResponse' compares the auth_map with the password and optionally
|
||||
other arguments that each implementation might need.
|
||||
|
||||
If the response is of type 'Basic' then the function has the following
|
||||
signature::
|
||||
|
||||
checkBasicResponse(auth_map, password) -> bool
|
||||
|
||||
If the response is of type 'Digest' then the function has the following
|
||||
signature::
|
||||
|
||||
checkDigestResponse(auth_map, password, method='GET', A1=None) -> bool
|
||||
|
||||
The 'A1' argument is only used in MD5_SESS algorithm based responses.
|
||||
Check md5SessionKey() for more info.
|
||||
"""
|
||||
checker = AUTH_RESPONSES[auth_map["auth_scheme"]]
|
||||
return checker(auth_map, password, method=method, encrypt=encrypt,
|
||||
**kwargs)
|
||||
536
lib/cherrypy/lib/httputil.py
Normal file
536
lib/cherrypy/lib/httputil.py
Normal file
@@ -0,0 +1,536 @@
|
||||
"""HTTP library functions.
|
||||
|
||||
This module contains functions for building an HTTP application
|
||||
framework: any one, not just one whose name starts with "Ch". ;) If you
|
||||
reference any modules from some popular framework inside *this* module,
|
||||
FuManChu will personally hang you up by your thumbs and submit you
|
||||
to a public caning.
|
||||
"""
|
||||
|
||||
from binascii import b2a_base64
|
||||
from cherrypy._cpcompat import BaseHTTPRequestHandler, HTTPDate, ntob, ntou
|
||||
from cherrypy._cpcompat import basestring, bytestr, iteritems, nativestr
|
||||
from cherrypy._cpcompat import reversed, sorted, unicodestr, unquote_qs
|
||||
response_codes = BaseHTTPRequestHandler.responses.copy()
|
||||
|
||||
# From https://bitbucket.org/cherrypy/cherrypy/issue/361
|
||||
response_codes[500] = ('Internal Server Error',
|
||||
'The server encountered an unexpected condition '
|
||||
'which prevented it from fulfilling the request.')
|
||||
response_codes[503] = ('Service Unavailable',
|
||||
'The server is currently unable to handle the '
|
||||
'request due to a temporary overloading or '
|
||||
'maintenance of the server.')
|
||||
|
||||
import re
|
||||
import urllib
|
||||
|
||||
|
||||
def urljoin(*atoms):
|
||||
"""Return the given path \*atoms, joined into a single URL.
|
||||
|
||||
This will correctly join a SCRIPT_NAME and PATH_INFO into the
|
||||
original URL, even if either atom is blank.
|
||||
"""
|
||||
url = "/".join([x for x in atoms if x])
|
||||
while "//" in url:
|
||||
url = url.replace("//", "/")
|
||||
# Special-case the final url of "", and return "/" instead.
|
||||
return url or "/"
|
||||
|
||||
|
||||
def urljoin_bytes(*atoms):
|
||||
"""Return the given path *atoms, joined into a single URL.
|
||||
|
||||
This will correctly join a SCRIPT_NAME and PATH_INFO into the
|
||||
original URL, even if either atom is blank.
|
||||
"""
|
||||
url = ntob("/").join([x for x in atoms if x])
|
||||
while ntob("//") in url:
|
||||
url = url.replace(ntob("//"), ntob("/"))
|
||||
# Special-case the final url of "", and return "/" instead.
|
||||
return url or ntob("/")
|
||||
|
||||
|
||||
def protocol_from_http(protocol_str):
|
||||
"""Return a protocol tuple from the given 'HTTP/x.y' string."""
|
||||
return int(protocol_str[5]), int(protocol_str[7])
|
||||
|
||||
|
||||
def get_ranges(headervalue, content_length):
|
||||
"""Return a list of (start, stop) indices from a Range header, or None.
|
||||
|
||||
Each (start, stop) tuple will be composed of two ints, which are suitable
|
||||
for use in a slicing operation. That is, the header "Range: bytes=3-6",
|
||||
if applied against a Python string, is requesting resource[3:7]. This
|
||||
function will return the list [(3, 7)].
|
||||
|
||||
If this function returns an empty list, you should return HTTP 416.
|
||||
"""
|
||||
|
||||
if not headervalue:
|
||||
return None
|
||||
|
||||
result = []
|
||||
bytesunit, byteranges = headervalue.split("=", 1)
|
||||
for brange in byteranges.split(","):
|
||||
start, stop = [x.strip() for x in brange.split("-", 1)]
|
||||
if start:
|
||||
if not stop:
|
||||
stop = content_length - 1
|
||||
start, stop = int(start), int(stop)
|
||||
if start >= content_length:
|
||||
# From rfc 2616 sec 14.16:
|
||||
# "If the server receives a request (other than one
|
||||
# including an If-Range request-header field) with an
|
||||
# unsatisfiable Range request-header field (that is,
|
||||
# all of whose byte-range-spec values have a first-byte-pos
|
||||
# value greater than the current length of the selected
|
||||
# resource), it SHOULD return a response code of 416
|
||||
# (Requested range not satisfiable)."
|
||||
continue
|
||||
if stop < start:
|
||||
# From rfc 2616 sec 14.16:
|
||||
# "If the server ignores a byte-range-spec because it
|
||||
# is syntactically invalid, the server SHOULD treat
|
||||
# the request as if the invalid Range header field
|
||||
# did not exist. (Normally, this means return a 200
|
||||
# response containing the full entity)."
|
||||
return None
|
||||
result.append((start, stop + 1))
|
||||
else:
|
||||
if not stop:
|
||||
# See rfc quote above.
|
||||
return None
|
||||
# Negative subscript (last N bytes)
|
||||
#
|
||||
# RFC 2616 Section 14.35.1:
|
||||
# If the entity is shorter than the specified suffix-length,
|
||||
# the entire entity-body is used.
|
||||
if int(stop) > content_length:
|
||||
result.append((0, content_length))
|
||||
else:
|
||||
result.append((content_length - int(stop), content_length))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class HeaderElement(object):
|
||||
|
||||
"""An element (with parameters) from an HTTP header's element list."""
|
||||
|
||||
def __init__(self, value, params=None):
|
||||
self.value = value
|
||||
if params is None:
|
||||
params = {}
|
||||
self.params = params
|
||||
|
||||
def __cmp__(self, other):
|
||||
return cmp(self.value, other.value)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self.value < other.value
|
||||
|
||||
def __str__(self):
|
||||
p = [";%s=%s" % (k, v) for k, v in iteritems(self.params)]
|
||||
return str("%s%s" % (self.value, "".join(p)))
|
||||
|
||||
def __bytes__(self):
|
||||
return ntob(self.__str__())
|
||||
|
||||
def __unicode__(self):
|
||||
return ntou(self.__str__())
|
||||
|
||||
def parse(elementstr):
|
||||
"""Transform 'token;key=val' to ('token', {'key': 'val'})."""
|
||||
# Split the element into a value and parameters. The 'value' may
|
||||
# be of the form, "token=token", but we don't split that here.
|
||||
atoms = [x.strip() for x in elementstr.split(";") if x.strip()]
|
||||
if not atoms:
|
||||
initial_value = ''
|
||||
else:
|
||||
initial_value = atoms.pop(0).strip()
|
||||
params = {}
|
||||
for atom in atoms:
|
||||
atom = [x.strip() for x in atom.split("=", 1) if x.strip()]
|
||||
key = atom.pop(0)
|
||||
if atom:
|
||||
val = atom[0]
|
||||
else:
|
||||
val = ""
|
||||
params[key] = val
|
||||
return initial_value, params
|
||||
parse = staticmethod(parse)
|
||||
|
||||
def from_str(cls, elementstr):
|
||||
"""Construct an instance from a string of the form 'token;key=val'."""
|
||||
ival, params = cls.parse(elementstr)
|
||||
return cls(ival, params)
|
||||
from_str = classmethod(from_str)
|
||||
|
||||
|
||||
q_separator = re.compile(r'; *q *=')
|
||||
|
||||
|
||||
class AcceptElement(HeaderElement):
|
||||
|
||||
"""An element (with parameters) from an Accept* header's element list.
|
||||
|
||||
AcceptElement objects are comparable; the more-preferred object will be
|
||||
"less than" the less-preferred object. They are also therefore sortable;
|
||||
if you sort a list of AcceptElement objects, they will be listed in
|
||||
priority order; the most preferred value will be first. Yes, it should
|
||||
have been the other way around, but it's too late to fix now.
|
||||
"""
|
||||
|
||||
def from_str(cls, elementstr):
|
||||
qvalue = None
|
||||
# The first "q" parameter (if any) separates the initial
|
||||
# media-range parameter(s) (if any) from the accept-params.
|
||||
atoms = q_separator.split(elementstr, 1)
|
||||
media_range = atoms.pop(0).strip()
|
||||
if atoms:
|
||||
# The qvalue for an Accept header can have extensions. The other
|
||||
# headers cannot, but it's easier to parse them as if they did.
|
||||
qvalue = HeaderElement.from_str(atoms[0].strip())
|
||||
|
||||
media_type, params = cls.parse(media_range)
|
||||
if qvalue is not None:
|
||||
params["q"] = qvalue
|
||||
return cls(media_type, params)
|
||||
from_str = classmethod(from_str)
|
||||
|
||||
def qvalue(self):
|
||||
val = self.params.get("q", "1")
|
||||
if isinstance(val, HeaderElement):
|
||||
val = val.value
|
||||
return float(val)
|
||||
qvalue = property(qvalue, doc="The qvalue, or priority, of this value.")
|
||||
|
||||
def __cmp__(self, other):
|
||||
diff = cmp(self.qvalue, other.qvalue)
|
||||
if diff == 0:
|
||||
diff = cmp(str(self), str(other))
|
||||
return diff
|
||||
|
||||
def __lt__(self, other):
|
||||
if self.qvalue == other.qvalue:
|
||||
return str(self) < str(other)
|
||||
else:
|
||||
return self.qvalue < other.qvalue
|
||||
|
||||
RE_HEADER_SPLIT = re.compile(',(?=(?:[^"]*"[^"]*")*[^"]*$)')
|
||||
def header_elements(fieldname, fieldvalue):
|
||||
"""Return a sorted HeaderElement list from a comma-separated header string.
|
||||
"""
|
||||
if not fieldvalue:
|
||||
return []
|
||||
|
||||
result = []
|
||||
for element in RE_HEADER_SPLIT.split(fieldvalue):
|
||||
if fieldname.startswith("Accept") or fieldname == 'TE':
|
||||
hv = AcceptElement.from_str(element)
|
||||
else:
|
||||
hv = HeaderElement.from_str(element)
|
||||
result.append(hv)
|
||||
|
||||
return list(reversed(sorted(result)))
|
||||
|
||||
|
||||
def decode_TEXT(value):
|
||||
r"""Decode :rfc:`2047` TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> "f\xfcr")."""
|
||||
try:
|
||||
# Python 3
|
||||
from email.header import decode_header
|
||||
except ImportError:
|
||||
from email.Header import decode_header
|
||||
atoms = decode_header(value)
|
||||
decodedvalue = ""
|
||||
for atom, charset in atoms:
|
||||
if charset is not None:
|
||||
atom = atom.decode(charset)
|
||||
decodedvalue += atom
|
||||
return decodedvalue
|
||||
|
||||
|
||||
def valid_status(status):
|
||||
"""Return legal HTTP status Code, Reason-phrase and Message.
|
||||
|
||||
The status arg must be an int, or a str that begins with an int.
|
||||
|
||||
If status is an int, or a str and no reason-phrase is supplied,
|
||||
a default reason-phrase will be provided.
|
||||
"""
|
||||
|
||||
if not status:
|
||||
status = 200
|
||||
|
||||
status = str(status)
|
||||
parts = status.split(" ", 1)
|
||||
if len(parts) == 1:
|
||||
# No reason supplied.
|
||||
code, = parts
|
||||
reason = None
|
||||
else:
|
||||
code, reason = parts
|
||||
reason = reason.strip()
|
||||
|
||||
try:
|
||||
code = int(code)
|
||||
except ValueError:
|
||||
raise ValueError("Illegal response status from server "
|
||||
"(%s is non-numeric)." % repr(code))
|
||||
|
||||
if code < 100 or code > 599:
|
||||
raise ValueError("Illegal response status from server "
|
||||
"(%s is out of range)." % repr(code))
|
||||
|
||||
if code not in response_codes:
|
||||
# code is unknown but not illegal
|
||||
default_reason, message = "", ""
|
||||
else:
|
||||
default_reason, message = response_codes[code]
|
||||
|
||||
if reason is None:
|
||||
reason = default_reason
|
||||
|
||||
return code, reason, message
|
||||
|
||||
|
||||
# NOTE: the parse_qs functions that follow are modified version of those
|
||||
# in the python3.0 source - we need to pass through an encoding to the unquote
|
||||
# method, but the default parse_qs function doesn't allow us to. These do.
|
||||
|
||||
def _parse_qs(qs, keep_blank_values=0, strict_parsing=0, encoding='utf-8'):
|
||||
"""Parse a query given as a string argument.
|
||||
|
||||
Arguments:
|
||||
|
||||
qs: URL-encoded query string to be parsed
|
||||
|
||||
keep_blank_values: flag indicating whether blank values in
|
||||
URL encoded queries should be treated as blank strings. A
|
||||
true value indicates that blanks should be retained as blank
|
||||
strings. The default false value indicates that blank values
|
||||
are to be ignored and treated as if they were not included.
|
||||
|
||||
strict_parsing: flag indicating what to do with parsing errors. If
|
||||
false (the default), errors are silently ignored. If true,
|
||||
errors raise a ValueError exception.
|
||||
|
||||
Returns a dict, as G-d intended.
|
||||
"""
|
||||
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
||||
d = {}
|
||||
for name_value in pairs:
|
||||
if not name_value and not strict_parsing:
|
||||
continue
|
||||
nv = name_value.split('=', 1)
|
||||
if len(nv) != 2:
|
||||
if strict_parsing:
|
||||
raise ValueError("bad query field: %r" % (name_value,))
|
||||
# Handle case of a control-name with no equal sign
|
||||
if keep_blank_values:
|
||||
nv.append('')
|
||||
else:
|
||||
continue
|
||||
if len(nv[1]) or keep_blank_values:
|
||||
name = unquote_qs(nv[0], encoding)
|
||||
value = unquote_qs(nv[1], encoding)
|
||||
if name in d:
|
||||
if not isinstance(d[name], list):
|
||||
d[name] = [d[name]]
|
||||
d[name].append(value)
|
||||
else:
|
||||
d[name] = value
|
||||
return d
|
||||
|
||||
|
||||
image_map_pattern = re.compile(r"[0-9]+,[0-9]+")
|
||||
|
||||
|
||||
def parse_query_string(query_string, keep_blank_values=True, encoding='utf-8'):
|
||||
"""Build a params dictionary from a query_string.
|
||||
|
||||
Duplicate key/value pairs in the provided query_string will be
|
||||
returned as {'key': [val1, val2, ...]}. Single key/values will
|
||||
be returned as strings: {'key': 'value'}.
|
||||
"""
|
||||
if image_map_pattern.match(query_string):
|
||||
# Server-side image map. Map the coords to 'x' and 'y'
|
||||
# (like CGI::Request does).
|
||||
pm = query_string.split(",")
|
||||
pm = {'x': int(pm[0]), 'y': int(pm[1])}
|
||||
else:
|
||||
pm = _parse_qs(query_string, keep_blank_values, encoding=encoding)
|
||||
return pm
|
||||
|
||||
|
||||
class CaseInsensitiveDict(dict):
|
||||
|
||||
"""A case-insensitive dict subclass.
|
||||
|
||||
Each key is changed on entry to str(key).title().
|
||||
"""
|
||||
|
||||
def __getitem__(self, key):
|
||||
return dict.__getitem__(self, str(key).title())
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
dict.__setitem__(self, str(key).title(), value)
|
||||
|
||||
def __delitem__(self, key):
|
||||
dict.__delitem__(self, str(key).title())
|
||||
|
||||
def __contains__(self, key):
|
||||
return dict.__contains__(self, str(key).title())
|
||||
|
||||
def get(self, key, default=None):
|
||||
return dict.get(self, str(key).title(), default)
|
||||
|
||||
if hasattr({}, 'has_key'):
|
||||
def has_key(self, key):
|
||||
return str(key).title() in self
|
||||
|
||||
def update(self, E):
|
||||
for k in E.keys():
|
||||
self[str(k).title()] = E[k]
|
||||
|
||||
def fromkeys(cls, seq, value=None):
|
||||
newdict = cls()
|
||||
for k in seq:
|
||||
newdict[str(k).title()] = value
|
||||
return newdict
|
||||
fromkeys = classmethod(fromkeys)
|
||||
|
||||
def setdefault(self, key, x=None):
|
||||
key = str(key).title()
|
||||
try:
|
||||
return self[key]
|
||||
except KeyError:
|
||||
self[key] = x
|
||||
return x
|
||||
|
||||
def pop(self, key, default):
|
||||
return dict.pop(self, str(key).title(), default)
|
||||
|
||||
|
||||
# TEXT = <any OCTET except CTLs, but including LWS>
|
||||
#
|
||||
# A CRLF is allowed in the definition of TEXT only as part of a header
|
||||
# field continuation. It is expected that the folding LWS will be
|
||||
# replaced with a single SP before interpretation of the TEXT value."
|
||||
if nativestr == bytestr:
|
||||
header_translate_table = ''.join([chr(i) for i in xrange(256)])
|
||||
header_translate_deletechars = ''.join(
|
||||
[chr(i) for i in xrange(32)]) + chr(127)
|
||||
else:
|
||||
header_translate_table = None
|
||||
header_translate_deletechars = bytes(range(32)) + bytes([127])
|
||||
|
||||
|
||||
class HeaderMap(CaseInsensitiveDict):
|
||||
|
||||
"""A dict subclass for HTTP request and response headers.
|
||||
|
||||
Each key is changed on entry to str(key).title(). This allows headers
|
||||
to be case-insensitive and avoid duplicates.
|
||||
|
||||
Values are header values (decoded according to :rfc:`2047` if necessary).
|
||||
"""
|
||||
|
||||
protocol = (1, 1)
|
||||
encodings = ["ISO-8859-1"]
|
||||
|
||||
# Someday, when http-bis is done, this will probably get dropped
|
||||
# since few servers, clients, or intermediaries do it. But until then,
|
||||
# we're going to obey the spec as is.
|
||||
# "Words of *TEXT MAY contain characters from character sets other than
|
||||
# ISO-8859-1 only when encoded according to the rules of RFC 2047."
|
||||
use_rfc_2047 = True
|
||||
|
||||
def elements(self, key):
|
||||
"""Return a sorted list of HeaderElements for the given header."""
|
||||
key = str(key).title()
|
||||
value = self.get(key)
|
||||
return header_elements(key, value)
|
||||
|
||||
def values(self, key):
|
||||
"""Return a sorted list of HeaderElement.value for the given header."""
|
||||
return [e.value for e in self.elements(key)]
|
||||
|
||||
def output(self):
|
||||
"""Transform self into a list of (name, value) tuples."""
|
||||
return list(self.encode_header_items(self.items()))
|
||||
|
||||
def encode_header_items(cls, header_items):
|
||||
"""
|
||||
Prepare the sequence of name, value tuples into a form suitable for
|
||||
transmitting on the wire for HTTP.
|
||||
"""
|
||||
for k, v in header_items:
|
||||
if isinstance(k, unicodestr):
|
||||
k = cls.encode(k)
|
||||
|
||||
if not isinstance(v, basestring):
|
||||
v = str(v)
|
||||
|
||||
if isinstance(v, unicodestr):
|
||||
v = cls.encode(v)
|
||||
|
||||
# See header_translate_* constants above.
|
||||
# Replace only if you really know what you're doing.
|
||||
k = k.translate(header_translate_table,
|
||||
header_translate_deletechars)
|
||||
v = v.translate(header_translate_table,
|
||||
header_translate_deletechars)
|
||||
|
||||
yield (k, v)
|
||||
encode_header_items = classmethod(encode_header_items)
|
||||
|
||||
def encode(cls, v):
|
||||
"""Return the given header name or value, encoded for HTTP output."""
|
||||
for enc in cls.encodings:
|
||||
try:
|
||||
return v.encode(enc)
|
||||
except UnicodeEncodeError:
|
||||
continue
|
||||
|
||||
if cls.protocol == (1, 1) and cls.use_rfc_2047:
|
||||
# Encode RFC-2047 TEXT
|
||||
# (e.g. u"\u8200" -> "=?utf-8?b?6IiA?=").
|
||||
# We do our own here instead of using the email module
|
||||
# because we never want to fold lines--folding has
|
||||
# been deprecated by the HTTP working group.
|
||||
v = b2a_base64(v.encode('utf-8'))
|
||||
return (ntob('=?utf-8?b?') + v.strip(ntob('\n')) + ntob('?='))
|
||||
|
||||
raise ValueError("Could not encode header part %r using "
|
||||
"any of the encodings %r." %
|
||||
(v, cls.encodings))
|
||||
encode = classmethod(encode)
|
||||
|
||||
|
||||
class Host(object):
|
||||
|
||||
"""An internet address.
|
||||
|
||||
name
|
||||
Should be the client's host name. If not available (because no DNS
|
||||
lookup is performed), the IP address should be used instead.
|
||||
|
||||
"""
|
||||
|
||||
ip = "0.0.0.0"
|
||||
port = 80
|
||||
name = "unknown.tld"
|
||||
|
||||
def __init__(self, ip, port, name=None):
|
||||
self.ip = ip
|
||||
self.port = port
|
||||
if name is None:
|
||||
name = ip
|
||||
self.name = name
|
||||
|
||||
def __repr__(self):
|
||||
return "httputil.Host(%r, %r, %r)" % (self.ip, self.port, self.name)
|
||||
96
lib/cherrypy/lib/jsontools.py
Normal file
96
lib/cherrypy/lib/jsontools.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import basestring, ntou, json_encode, json_decode
|
||||
|
||||
|
||||
def json_processor(entity):
|
||||
"""Read application/json data into request.json."""
|
||||
if not entity.headers.get(ntou("Content-Length"), ntou("")):
|
||||
raise cherrypy.HTTPError(411)
|
||||
|
||||
body = entity.fp.read()
|
||||
try:
|
||||
cherrypy.serving.request.json = json_decode(body.decode('utf-8'))
|
||||
except ValueError:
|
||||
raise cherrypy.HTTPError(400, 'Invalid JSON document')
|
||||
|
||||
|
||||
def json_in(content_type=[ntou('application/json'), ntou('text/javascript')],
|
||||
force=True, debug=False, processor=json_processor):
|
||||
"""Add a processor to parse JSON request entities:
|
||||
The default processor places the parsed data into request.json.
|
||||
|
||||
Incoming request entities which match the given content_type(s) will
|
||||
be deserialized from JSON to the Python equivalent, and the result
|
||||
stored at cherrypy.request.json. The 'content_type' argument may
|
||||
be a Content-Type string or a list of allowable Content-Type strings.
|
||||
|
||||
If the 'force' argument is True (the default), then entities of other
|
||||
content types will not be allowed; "415 Unsupported Media Type" is
|
||||
raised instead.
|
||||
|
||||
Supply your own processor to use a custom decoder, or to handle the parsed
|
||||
data differently. The processor can be configured via
|
||||
tools.json_in.processor or via the decorator method.
|
||||
|
||||
Note that the deserializer requires the client send a Content-Length
|
||||
request header, or it will raise "411 Length Required". If for any
|
||||
other reason the request entity cannot be deserialized from JSON,
|
||||
it will raise "400 Bad Request: Invalid JSON document".
|
||||
|
||||
You must be using Python 2.6 or greater, or have the 'simplejson'
|
||||
package importable; otherwise, ValueError is raised during processing.
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
if isinstance(content_type, basestring):
|
||||
content_type = [content_type]
|
||||
|
||||
if force:
|
||||
if debug:
|
||||
cherrypy.log('Removing body processors %s' %
|
||||
repr(request.body.processors.keys()), 'TOOLS.JSON_IN')
|
||||
request.body.processors.clear()
|
||||
request.body.default_proc = cherrypy.HTTPError(
|
||||
415, 'Expected an entity of content type %s' %
|
||||
', '.join(content_type))
|
||||
|
||||
for ct in content_type:
|
||||
if debug:
|
||||
cherrypy.log('Adding body processor for %s' % ct, 'TOOLS.JSON_IN')
|
||||
request.body.processors[ct] = processor
|
||||
|
||||
|
||||
def json_handler(*args, **kwargs):
|
||||
value = cherrypy.serving.request._json_inner_handler(*args, **kwargs)
|
||||
return json_encode(value)
|
||||
|
||||
|
||||
def json_out(content_type='application/json', debug=False,
|
||||
handler=json_handler):
|
||||
"""Wrap request.handler to serialize its output to JSON. Sets Content-Type.
|
||||
|
||||
If the given content_type is None, the Content-Type response header
|
||||
is not set.
|
||||
|
||||
Provide your own handler to use a custom encoder. For example
|
||||
cherrypy.config['tools.json_out.handler'] = <function>, or
|
||||
@json_out(handler=function).
|
||||
|
||||
You must be using Python 2.6 or greater, or have the 'simplejson'
|
||||
package importable; otherwise, ValueError is raised during processing.
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
# request.handler may be set to None by e.g. the caching tool
|
||||
# to signal to all components that a response body has already
|
||||
# been attached, in which case we don't need to wrap anything.
|
||||
if request.handler is None:
|
||||
return
|
||||
if debug:
|
||||
cherrypy.log('Replacing %s with JSON handler' % request.handler,
|
||||
'TOOLS.JSON_OUT')
|
||||
request._json_inner_handler = request.handler
|
||||
request.handler = handler
|
||||
if content_type is not None:
|
||||
if debug:
|
||||
cherrypy.log('Setting Content-Type to %s' %
|
||||
content_type, 'TOOLS.JSON_OUT')
|
||||
cherrypy.serving.response.headers['Content-Type'] = content_type
|
||||
147
lib/cherrypy/lib/lockfile.py
Normal file
147
lib/cherrypy/lib/lockfile.py
Normal file
@@ -0,0 +1,147 @@
|
||||
"""
|
||||
Platform-independent file locking. Inspired by and modeled after zc.lockfile.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
try:
|
||||
import msvcrt
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import fcntl
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
class LockError(Exception):
|
||||
|
||||
"Could not obtain a lock"
|
||||
|
||||
msg = "Unable to lock %r"
|
||||
|
||||
def __init__(self, path):
|
||||
super(LockError, self).__init__(self.msg % path)
|
||||
|
||||
|
||||
class UnlockError(LockError):
|
||||
|
||||
"Could not release a lock"
|
||||
|
||||
msg = "Unable to unlock %r"
|
||||
|
||||
|
||||
# first, a default, naive locking implementation
|
||||
class LockFile(object):
|
||||
|
||||
"""
|
||||
A default, naive locking implementation. Always fails if the file
|
||||
already exists.
|
||||
"""
|
||||
|
||||
def __init__(self, path):
|
||||
self.path = path
|
||||
try:
|
||||
fd = os.open(path, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
|
||||
except OSError:
|
||||
raise LockError(self.path)
|
||||
os.close(fd)
|
||||
|
||||
def release(self):
|
||||
os.remove(self.path)
|
||||
|
||||
def remove(self):
|
||||
pass
|
||||
|
||||
|
||||
class SystemLockFile(object):
|
||||
|
||||
"""
|
||||
An abstract base class for platform-specific locking.
|
||||
"""
|
||||
|
||||
def __init__(self, path):
|
||||
self.path = path
|
||||
|
||||
try:
|
||||
# Open lockfile for writing without truncation:
|
||||
self.fp = open(path, 'r+')
|
||||
except IOError:
|
||||
# If the file doesn't exist, IOError is raised; Use a+ instead.
|
||||
# Note that there may be a race here. Multiple processes
|
||||
# could fail on the r+ open and open the file a+, but only
|
||||
# one will get the the lock and write a pid.
|
||||
self.fp = open(path, 'a+')
|
||||
|
||||
try:
|
||||
self._lock_file()
|
||||
except:
|
||||
self.fp.seek(1)
|
||||
self.fp.close()
|
||||
del self.fp
|
||||
raise
|
||||
|
||||
self.fp.write(" %s\n" % os.getpid())
|
||||
self.fp.truncate()
|
||||
self.fp.flush()
|
||||
|
||||
def release(self):
|
||||
if not hasattr(self, 'fp'):
|
||||
return
|
||||
self._unlock_file()
|
||||
self.fp.close()
|
||||
del self.fp
|
||||
|
||||
def remove(self):
|
||||
"""
|
||||
Attempt to remove the file
|
||||
"""
|
||||
try:
|
||||
os.remove(self.path)
|
||||
except:
|
||||
pass
|
||||
|
||||
#@abc.abstract_method
|
||||
# def _lock_file(self):
|
||||
# """Attempt to obtain the lock on self.fp. Raise LockError if not
|
||||
# acquired."""
|
||||
|
||||
def _unlock_file(self):
|
||||
"""Attempt to obtain the lock on self.fp. Raise UnlockError if not
|
||||
released."""
|
||||
|
||||
|
||||
class WindowsLockFile(SystemLockFile):
|
||||
|
||||
def _lock_file(self):
|
||||
# Lock just the first byte
|
||||
try:
|
||||
msvcrt.locking(self.fp.fileno(), msvcrt.LK_NBLCK, 1)
|
||||
except IOError:
|
||||
raise LockError(self.fp.name)
|
||||
|
||||
def _unlock_file(self):
|
||||
try:
|
||||
self.fp.seek(0)
|
||||
msvcrt.locking(self.fp.fileno(), msvcrt.LK_UNLCK, 1)
|
||||
except IOError:
|
||||
raise UnlockError(self.fp.name)
|
||||
|
||||
if 'msvcrt' in globals():
|
||||
LockFile = WindowsLockFile
|
||||
|
||||
|
||||
class UnixLockFile(SystemLockFile):
|
||||
|
||||
def _lock_file(self):
|
||||
flags = fcntl.LOCK_EX | fcntl.LOCK_NB
|
||||
try:
|
||||
fcntl.flock(self.fp.fileno(), flags)
|
||||
except IOError:
|
||||
raise LockError(self.fp.name)
|
||||
|
||||
# no need to implement _unlock_file, it will be unlocked on close()
|
||||
|
||||
if 'fcntl' in globals():
|
||||
LockFile = UnixLockFile
|
||||
47
lib/cherrypy/lib/locking.py
Normal file
47
lib/cherrypy/lib/locking.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import datetime
|
||||
|
||||
|
||||
class NeverExpires(object):
|
||||
def expired(self):
|
||||
return False
|
||||
|
||||
|
||||
class Timer(object):
|
||||
"""
|
||||
A simple timer that will indicate when an expiration time has passed.
|
||||
"""
|
||||
def __init__(self, expiration):
|
||||
"Create a timer that expires at `expiration` (UTC datetime)"
|
||||
self.expiration = expiration
|
||||
|
||||
@classmethod
|
||||
def after(cls, elapsed):
|
||||
"""
|
||||
Return a timer that will expire after `elapsed` passes.
|
||||
"""
|
||||
return cls(datetime.datetime.utcnow() + elapsed)
|
||||
|
||||
def expired(self):
|
||||
return datetime.datetime.utcnow() >= self.expiration
|
||||
|
||||
|
||||
class LockTimeout(Exception):
|
||||
"An exception when a lock could not be acquired before a timeout period"
|
||||
|
||||
|
||||
class LockChecker(object):
|
||||
"""
|
||||
Keep track of the time and detect if a timeout has expired
|
||||
"""
|
||||
def __init__(self, session_id, timeout):
|
||||
self.session_id = session_id
|
||||
if timeout:
|
||||
self.timer = Timer.after(timeout)
|
||||
else:
|
||||
self.timer = NeverExpires()
|
||||
|
||||
def expired(self):
|
||||
if self.timer.expired():
|
||||
raise LockTimeout(
|
||||
"Timeout acquiring lock for %(session_id)s" % vars(self))
|
||||
return False
|
||||
216
lib/cherrypy/lib/profiler.py
Normal file
216
lib/cherrypy/lib/profiler.py
Normal file
@@ -0,0 +1,216 @@
|
||||
"""Profiler tools for CherryPy.
|
||||
|
||||
CherryPy users
|
||||
==============
|
||||
|
||||
You can profile any of your pages as follows::
|
||||
|
||||
from cherrypy.lib import profiler
|
||||
|
||||
class Root:
|
||||
p = profile.Profiler("/path/to/profile/dir")
|
||||
|
||||
def index(self):
|
||||
self.p.run(self._index)
|
||||
index.exposed = True
|
||||
|
||||
def _index(self):
|
||||
return "Hello, world!"
|
||||
|
||||
cherrypy.tree.mount(Root())
|
||||
|
||||
You can also turn on profiling for all requests
|
||||
using the ``make_app`` function as WSGI middleware.
|
||||
|
||||
CherryPy developers
|
||||
===================
|
||||
|
||||
This module can be used whenever you make changes to CherryPy,
|
||||
to get a quick sanity-check on overall CP performance. Use the
|
||||
``--profile`` flag when running the test suite. Then, use the ``serve()``
|
||||
function to browse the results in a web browser. If you run this
|
||||
module from the command line, it will call ``serve()`` for you.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def new_func_strip_path(func_name):
|
||||
"""Make profiler output more readable by adding `__init__` modules' parents
|
||||
"""
|
||||
filename, line, name = func_name
|
||||
if filename.endswith("__init__.py"):
|
||||
return os.path.basename(filename[:-12]) + filename[-12:], line, name
|
||||
return os.path.basename(filename), line, name
|
||||
|
||||
try:
|
||||
import profile
|
||||
import pstats
|
||||
pstats.func_strip_path = new_func_strip_path
|
||||
except ImportError:
|
||||
profile = None
|
||||
pstats = None
|
||||
|
||||
import os
|
||||
import os.path
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
from cherrypy._cpcompat import StringIO
|
||||
|
||||
_count = 0
|
||||
|
||||
|
||||
class Profiler(object):
|
||||
|
||||
def __init__(self, path=None):
|
||||
if not path:
|
||||
path = os.path.join(os.path.dirname(__file__), "profile")
|
||||
self.path = path
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path)
|
||||
|
||||
def run(self, func, *args, **params):
|
||||
"""Dump profile data into self.path."""
|
||||
global _count
|
||||
c = _count = _count + 1
|
||||
path = os.path.join(self.path, "cp_%04d.prof" % c)
|
||||
prof = profile.Profile()
|
||||
result = prof.runcall(func, *args, **params)
|
||||
prof.dump_stats(path)
|
||||
return result
|
||||
|
||||
def statfiles(self):
|
||||
""":rtype: list of available profiles.
|
||||
"""
|
||||
return [f for f in os.listdir(self.path)
|
||||
if f.startswith("cp_") and f.endswith(".prof")]
|
||||
|
||||
def stats(self, filename, sortby='cumulative'):
|
||||
""":rtype stats(index): output of print_stats() for the given profile.
|
||||
"""
|
||||
sio = StringIO()
|
||||
if sys.version_info >= (2, 5):
|
||||
s = pstats.Stats(os.path.join(self.path, filename), stream=sio)
|
||||
s.strip_dirs()
|
||||
s.sort_stats(sortby)
|
||||
s.print_stats()
|
||||
else:
|
||||
# pstats.Stats before Python 2.5 didn't take a 'stream' arg,
|
||||
# but just printed to stdout. So re-route stdout.
|
||||
s = pstats.Stats(os.path.join(self.path, filename))
|
||||
s.strip_dirs()
|
||||
s.sort_stats(sortby)
|
||||
oldout = sys.stdout
|
||||
try:
|
||||
sys.stdout = sio
|
||||
s.print_stats()
|
||||
finally:
|
||||
sys.stdout = oldout
|
||||
response = sio.getvalue()
|
||||
sio.close()
|
||||
return response
|
||||
|
||||
def index(self):
|
||||
return """<html>
|
||||
<head><title>CherryPy profile data</title></head>
|
||||
<frameset cols='200, 1*'>
|
||||
<frame src='menu' />
|
||||
<frame name='main' src='' />
|
||||
</frameset>
|
||||
</html>
|
||||
"""
|
||||
index.exposed = True
|
||||
|
||||
def menu(self):
|
||||
yield "<h2>Profiling runs</h2>"
|
||||
yield "<p>Click on one of the runs below to see profiling data.</p>"
|
||||
runs = self.statfiles()
|
||||
runs.sort()
|
||||
for i in runs:
|
||||
yield "<a href='report?filename=%s' target='main'>%s</a><br />" % (
|
||||
i, i)
|
||||
menu.exposed = True
|
||||
|
||||
def report(self, filename):
|
||||
import cherrypy
|
||||
cherrypy.response.headers['Content-Type'] = 'text/plain'
|
||||
return self.stats(filename)
|
||||
report.exposed = True
|
||||
|
||||
|
||||
class ProfileAggregator(Profiler):
|
||||
|
||||
def __init__(self, path=None):
|
||||
Profiler.__init__(self, path)
|
||||
global _count
|
||||
self.count = _count = _count + 1
|
||||
self.profiler = profile.Profile()
|
||||
|
||||
def run(self, func, *args, **params):
|
||||
path = os.path.join(self.path, "cp_%04d.prof" % self.count)
|
||||
result = self.profiler.runcall(func, *args, **params)
|
||||
self.profiler.dump_stats(path)
|
||||
return result
|
||||
|
||||
|
||||
class make_app:
|
||||
|
||||
def __init__(self, nextapp, path=None, aggregate=False):
|
||||
"""Make a WSGI middleware app which wraps 'nextapp' with profiling.
|
||||
|
||||
nextapp
|
||||
the WSGI application to wrap, usually an instance of
|
||||
cherrypy.Application.
|
||||
|
||||
path
|
||||
where to dump the profiling output.
|
||||
|
||||
aggregate
|
||||
if True, profile data for all HTTP requests will go in
|
||||
a single file. If False (the default), each HTTP request will
|
||||
dump its profile data into a separate file.
|
||||
|
||||
"""
|
||||
if profile is None or pstats is None:
|
||||
msg = ("Your installation of Python does not have a profile "
|
||||
"module. If you're on Debian, try "
|
||||
"`sudo apt-get install python-profiler`. "
|
||||
"See http://www.cherrypy.org/wiki/ProfilingOnDebian "
|
||||
"for details.")
|
||||
warnings.warn(msg)
|
||||
|
||||
self.nextapp = nextapp
|
||||
self.aggregate = aggregate
|
||||
if aggregate:
|
||||
self.profiler = ProfileAggregator(path)
|
||||
else:
|
||||
self.profiler = Profiler(path)
|
||||
|
||||
def __call__(self, environ, start_response):
|
||||
def gather():
|
||||
result = []
|
||||
for line in self.nextapp(environ, start_response):
|
||||
result.append(line)
|
||||
return result
|
||||
return self.profiler.run(gather)
|
||||
|
||||
|
||||
def serve(path=None, port=8080):
|
||||
if profile is None or pstats is None:
|
||||
msg = ("Your installation of Python does not have a profile module. "
|
||||
"If you're on Debian, try "
|
||||
"`sudo apt-get install python-profiler`. "
|
||||
"See http://www.cherrypy.org/wiki/ProfilingOnDebian "
|
||||
"for details.")
|
||||
warnings.warn(msg)
|
||||
|
||||
import cherrypy
|
||||
cherrypy.config.update({'server.socket_port': int(port),
|
||||
'server.thread_pool': 10,
|
||||
'environment': "production",
|
||||
})
|
||||
cherrypy.quickstart(Profiler(path))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
serve(*tuple(sys.argv[1:]))
|
||||
503
lib/cherrypy/lib/reprconf.py
Normal file
503
lib/cherrypy/lib/reprconf.py
Normal file
@@ -0,0 +1,503 @@
|
||||
"""Generic configuration system using unrepr.
|
||||
|
||||
Configuration data may be supplied as a Python dictionary, as a filename,
|
||||
or as an open file object. When you supply a filename or file, Python's
|
||||
builtin ConfigParser is used (with some extensions).
|
||||
|
||||
Namespaces
|
||||
----------
|
||||
|
||||
Configuration keys are separated into namespaces by the first "." in the key.
|
||||
|
||||
The only key that cannot exist in a namespace is the "environment" entry.
|
||||
This special entry 'imports' other config entries from a template stored in
|
||||
the Config.environments dict.
|
||||
|
||||
You can define your own namespaces to be called when new config is merged
|
||||
by adding a named handler to Config.namespaces. The name can be any string,
|
||||
and the handler must be either a callable or a context manager.
|
||||
"""
|
||||
|
||||
try:
|
||||
# Python 3.0+
|
||||
from configparser import ConfigParser
|
||||
except ImportError:
|
||||
from ConfigParser import ConfigParser
|
||||
|
||||
try:
|
||||
set
|
||||
except NameError:
|
||||
from sets import Set as set
|
||||
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
basestring = str
|
||||
|
||||
try:
|
||||
# Python 3
|
||||
import builtins
|
||||
except ImportError:
|
||||
# Python 2
|
||||
import __builtin__ as builtins
|
||||
|
||||
import operator as _operator
|
||||
import sys
|
||||
|
||||
|
||||
def as_dict(config):
|
||||
"""Return a dict from 'config' whether it is a dict, file, or filename."""
|
||||
if isinstance(config, basestring):
|
||||
config = Parser().dict_from_file(config)
|
||||
elif hasattr(config, 'read'):
|
||||
config = Parser().dict_from_file(config)
|
||||
return config
|
||||
|
||||
|
||||
class NamespaceSet(dict):
|
||||
|
||||
"""A dict of config namespace names and handlers.
|
||||
|
||||
Each config entry should begin with a namespace name; the corresponding
|
||||
namespace handler will be called once for each config entry in that
|
||||
namespace, and will be passed two arguments: the config key (with the
|
||||
namespace removed) and the config value.
|
||||
|
||||
Namespace handlers may be any Python callable; they may also be
|
||||
Python 2.5-style 'context managers', in which case their __enter__
|
||||
method should return a callable to be used as the handler.
|
||||
See cherrypy.tools (the Toolbox class) for an example.
|
||||
"""
|
||||
|
||||
def __call__(self, config):
|
||||
"""Iterate through config and pass it to each namespace handler.
|
||||
|
||||
config
|
||||
A flat dict, where keys use dots to separate
|
||||
namespaces, and values are arbitrary.
|
||||
|
||||
The first name in each config key is used to look up the corresponding
|
||||
namespace handler. For example, a config entry of {'tools.gzip.on': v}
|
||||
will call the 'tools' namespace handler with the args: ('gzip.on', v)
|
||||
"""
|
||||
# Separate the given config into namespaces
|
||||
ns_confs = {}
|
||||
for k in config:
|
||||
if "." in k:
|
||||
ns, name = k.split(".", 1)
|
||||
bucket = ns_confs.setdefault(ns, {})
|
||||
bucket[name] = config[k]
|
||||
|
||||
# I chose __enter__ and __exit__ so someday this could be
|
||||
# rewritten using Python 2.5's 'with' statement:
|
||||
# for ns, handler in self.iteritems():
|
||||
# with handler as callable:
|
||||
# for k, v in ns_confs.get(ns, {}).iteritems():
|
||||
# callable(k, v)
|
||||
for ns, handler in self.items():
|
||||
exit = getattr(handler, "__exit__", None)
|
||||
if exit:
|
||||
callable = handler.__enter__()
|
||||
no_exc = True
|
||||
try:
|
||||
try:
|
||||
for k, v in ns_confs.get(ns, {}).items():
|
||||
callable(k, v)
|
||||
except:
|
||||
# The exceptional case is handled here
|
||||
no_exc = False
|
||||
if exit is None:
|
||||
raise
|
||||
if not exit(*sys.exc_info()):
|
||||
raise
|
||||
# The exception is swallowed if exit() returns true
|
||||
finally:
|
||||
# The normal and non-local-goto cases are handled here
|
||||
if no_exc and exit:
|
||||
exit(None, None, None)
|
||||
else:
|
||||
for k, v in ns_confs.get(ns, {}).items():
|
||||
handler(k, v)
|
||||
|
||||
def __repr__(self):
|
||||
return "%s.%s(%s)" % (self.__module__, self.__class__.__name__,
|
||||
dict.__repr__(self))
|
||||
|
||||
def __copy__(self):
|
||||
newobj = self.__class__()
|
||||
newobj.update(self)
|
||||
return newobj
|
||||
copy = __copy__
|
||||
|
||||
|
||||
class Config(dict):
|
||||
|
||||
"""A dict-like set of configuration data, with defaults and namespaces.
|
||||
|
||||
May take a file, filename, or dict.
|
||||
"""
|
||||
|
||||
defaults = {}
|
||||
environments = {}
|
||||
namespaces = NamespaceSet()
|
||||
|
||||
def __init__(self, file=None, **kwargs):
|
||||
self.reset()
|
||||
if file is not None:
|
||||
self.update(file)
|
||||
if kwargs:
|
||||
self.update(kwargs)
|
||||
|
||||
def reset(self):
|
||||
"""Reset self to default values."""
|
||||
self.clear()
|
||||
dict.update(self, self.defaults)
|
||||
|
||||
def update(self, config):
|
||||
"""Update self from a dict, file or filename."""
|
||||
if isinstance(config, basestring):
|
||||
# Filename
|
||||
config = Parser().dict_from_file(config)
|
||||
elif hasattr(config, 'read'):
|
||||
# Open file object
|
||||
config = Parser().dict_from_file(config)
|
||||
else:
|
||||
config = config.copy()
|
||||
self._apply(config)
|
||||
|
||||
def _apply(self, config):
|
||||
"""Update self from a dict."""
|
||||
which_env = config.get('environment')
|
||||
if which_env:
|
||||
env = self.environments[which_env]
|
||||
for k in env:
|
||||
if k not in config:
|
||||
config[k] = env[k]
|
||||
|
||||
dict.update(self, config)
|
||||
self.namespaces(config)
|
||||
|
||||
def __setitem__(self, k, v):
|
||||
dict.__setitem__(self, k, v)
|
||||
self.namespaces({k: v})
|
||||
|
||||
|
||||
class Parser(ConfigParser):
|
||||
|
||||
"""Sub-class of ConfigParser that keeps the case of options and that
|
||||
raises an exception if the file cannot be read.
|
||||
"""
|
||||
|
||||
def optionxform(self, optionstr):
|
||||
return optionstr
|
||||
|
||||
def read(self, filenames):
|
||||
if isinstance(filenames, basestring):
|
||||
filenames = [filenames]
|
||||
for filename in filenames:
|
||||
# try:
|
||||
# fp = open(filename)
|
||||
# except IOError:
|
||||
# continue
|
||||
fp = open(filename)
|
||||
try:
|
||||
self._read(fp, filename)
|
||||
finally:
|
||||
fp.close()
|
||||
|
||||
def as_dict(self, raw=False, vars=None):
|
||||
"""Convert an INI file to a dictionary"""
|
||||
# Load INI file into a dict
|
||||
result = {}
|
||||
for section in self.sections():
|
||||
if section not in result:
|
||||
result[section] = {}
|
||||
for option in self.options(section):
|
||||
value = self.get(section, option, raw=raw, vars=vars)
|
||||
try:
|
||||
value = unrepr(value)
|
||||
except Exception:
|
||||
x = sys.exc_info()[1]
|
||||
msg = ("Config error in section: %r, option: %r, "
|
||||
"value: %r. Config values must be valid Python." %
|
||||
(section, option, value))
|
||||
raise ValueError(msg, x.__class__.__name__, x.args)
|
||||
result[section][option] = value
|
||||
return result
|
||||
|
||||
def dict_from_file(self, file):
|
||||
if hasattr(file, 'read'):
|
||||
self.readfp(file)
|
||||
else:
|
||||
self.read(file)
|
||||
return self.as_dict()
|
||||
|
||||
|
||||
# public domain "unrepr" implementation, found on the web and then improved.
|
||||
|
||||
|
||||
class _Builder2:
|
||||
|
||||
def build(self, o):
|
||||
m = getattr(self, 'build_' + o.__class__.__name__, None)
|
||||
if m is None:
|
||||
raise TypeError("unrepr does not recognize %s" %
|
||||
repr(o.__class__.__name__))
|
||||
return m(o)
|
||||
|
||||
def astnode(self, s):
|
||||
"""Return a Python2 ast Node compiled from a string."""
|
||||
try:
|
||||
import compiler
|
||||
except ImportError:
|
||||
# Fallback to eval when compiler package is not available,
|
||||
# e.g. IronPython 1.0.
|
||||
return eval(s)
|
||||
|
||||
p = compiler.parse("__tempvalue__ = " + s)
|
||||
return p.getChildren()[1].getChildren()[0].getChildren()[1]
|
||||
|
||||
def build_Subscript(self, o):
|
||||
expr, flags, subs = o.getChildren()
|
||||
expr = self.build(expr)
|
||||
subs = self.build(subs)
|
||||
return expr[subs]
|
||||
|
||||
def build_CallFunc(self, o):
|
||||
children = o.getChildren()
|
||||
# Build callee from first child
|
||||
callee = self.build(children[0])
|
||||
# Build args and kwargs from remaining children
|
||||
args = []
|
||||
kwargs = {}
|
||||
for child in children[1:]:
|
||||
class_name = child.__class__.__name__
|
||||
# None is ignored
|
||||
if class_name == 'NoneType':
|
||||
continue
|
||||
# Keywords become kwargs
|
||||
if class_name == 'Keyword':
|
||||
kwargs.update(self.build(child))
|
||||
# Everything else becomes args
|
||||
else :
|
||||
args.append(self.build(child))
|
||||
return callee(*args, **kwargs)
|
||||
|
||||
def build_Keyword(self, o):
|
||||
key, value_obj = o.getChildren()
|
||||
value = self.build(value_obj)
|
||||
kw_dict = {key: value}
|
||||
return kw_dict
|
||||
|
||||
def build_List(self, o):
|
||||
return map(self.build, o.getChildren())
|
||||
|
||||
def build_Const(self, o):
|
||||
return o.value
|
||||
|
||||
def build_Dict(self, o):
|
||||
d = {}
|
||||
i = iter(map(self.build, o.getChildren()))
|
||||
for el in i:
|
||||
d[el] = i.next()
|
||||
return d
|
||||
|
||||
def build_Tuple(self, o):
|
||||
return tuple(self.build_List(o))
|
||||
|
||||
def build_Name(self, o):
|
||||
name = o.name
|
||||
if name == 'None':
|
||||
return None
|
||||
if name == 'True':
|
||||
return True
|
||||
if name == 'False':
|
||||
return False
|
||||
|
||||
# See if the Name is a package or module. If it is, import it.
|
||||
try:
|
||||
return modules(name)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# See if the Name is in builtins.
|
||||
try:
|
||||
return getattr(builtins, name)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
raise TypeError("unrepr could not resolve the name %s" % repr(name))
|
||||
|
||||
def build_Add(self, o):
|
||||
left, right = map(self.build, o.getChildren())
|
||||
return left + right
|
||||
|
||||
def build_Mul(self, o):
|
||||
left, right = map(self.build, o.getChildren())
|
||||
return left * right
|
||||
|
||||
def build_Getattr(self, o):
|
||||
parent = self.build(o.expr)
|
||||
return getattr(parent, o.attrname)
|
||||
|
||||
def build_NoneType(self, o):
|
||||
return None
|
||||
|
||||
def build_UnarySub(self, o):
|
||||
return -self.build(o.getChildren()[0])
|
||||
|
||||
def build_UnaryAdd(self, o):
|
||||
return self.build(o.getChildren()[0])
|
||||
|
||||
|
||||
class _Builder3:
|
||||
|
||||
def build(self, o):
|
||||
m = getattr(self, 'build_' + o.__class__.__name__, None)
|
||||
if m is None:
|
||||
raise TypeError("unrepr does not recognize %s" %
|
||||
repr(o.__class__.__name__))
|
||||
return m(o)
|
||||
|
||||
def astnode(self, s):
|
||||
"""Return a Python3 ast Node compiled from a string."""
|
||||
try:
|
||||
import ast
|
||||
except ImportError:
|
||||
# Fallback to eval when ast package is not available,
|
||||
# e.g. IronPython 1.0.
|
||||
return eval(s)
|
||||
|
||||
p = ast.parse("__tempvalue__ = " + s)
|
||||
return p.body[0].value
|
||||
|
||||
def build_Subscript(self, o):
|
||||
return self.build(o.value)[self.build(o.slice)]
|
||||
|
||||
def build_Index(self, o):
|
||||
return self.build(o.value)
|
||||
|
||||
def build_Call(self, o):
|
||||
callee = self.build(o.func)
|
||||
|
||||
if o.args is None:
|
||||
args = ()
|
||||
else:
|
||||
args = tuple([self.build(a) for a in o.args])
|
||||
|
||||
if o.starargs is None:
|
||||
starargs = ()
|
||||
else:
|
||||
starargs = self.build(o.starargs)
|
||||
|
||||
if o.kwargs is None:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = self.build(o.kwargs)
|
||||
|
||||
return callee(*(args + starargs), **kwargs)
|
||||
|
||||
def build_List(self, o):
|
||||
return list(map(self.build, o.elts))
|
||||
|
||||
def build_Str(self, o):
|
||||
return o.s
|
||||
|
||||
def build_Num(self, o):
|
||||
return o.n
|
||||
|
||||
def build_Dict(self, o):
|
||||
return dict([(self.build(k), self.build(v))
|
||||
for k, v in zip(o.keys, o.values)])
|
||||
|
||||
def build_Tuple(self, o):
|
||||
return tuple(self.build_List(o))
|
||||
|
||||
def build_Name(self, o):
|
||||
name = o.id
|
||||
if name == 'None':
|
||||
return None
|
||||
if name == 'True':
|
||||
return True
|
||||
if name == 'False':
|
||||
return False
|
||||
|
||||
# See if the Name is a package or module. If it is, import it.
|
||||
try:
|
||||
return modules(name)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# See if the Name is in builtins.
|
||||
try:
|
||||
import builtins
|
||||
return getattr(builtins, name)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
raise TypeError("unrepr could not resolve the name %s" % repr(name))
|
||||
|
||||
def build_NameConstant(self, o):
|
||||
return o.value
|
||||
|
||||
def build_UnaryOp(self, o):
|
||||
op, operand = map(self.build, [o.op, o.operand])
|
||||
return op(operand)
|
||||
|
||||
def build_BinOp(self, o):
|
||||
left, op, right = map(self.build, [o.left, o.op, o.right])
|
||||
return op(left, right)
|
||||
|
||||
def build_Add(self, o):
|
||||
return _operator.add
|
||||
|
||||
def build_Mult(self, o):
|
||||
return _operator.mul
|
||||
|
||||
def build_USub(self, o):
|
||||
return _operator.neg
|
||||
|
||||
def build_Attribute(self, o):
|
||||
parent = self.build(o.value)
|
||||
return getattr(parent, o.attr)
|
||||
|
||||
def build_NoneType(self, o):
|
||||
return None
|
||||
|
||||
|
||||
def unrepr(s):
|
||||
"""Return a Python object compiled from a string."""
|
||||
if not s:
|
||||
return s
|
||||
if sys.version_info < (3, 0):
|
||||
b = _Builder2()
|
||||
else:
|
||||
b = _Builder3()
|
||||
obj = b.astnode(s)
|
||||
return b.build(obj)
|
||||
|
||||
|
||||
def modules(modulePath):
|
||||
"""Load a module and retrieve a reference to that module."""
|
||||
__import__(modulePath)
|
||||
return sys.modules[modulePath]
|
||||
|
||||
|
||||
def attributes(full_attribute_name):
|
||||
"""Load a module and retrieve an attribute of that module."""
|
||||
|
||||
# Parse out the path, module, and attribute
|
||||
last_dot = full_attribute_name.rfind(".")
|
||||
attr_name = full_attribute_name[last_dot + 1:]
|
||||
mod_path = full_attribute_name[:last_dot]
|
||||
|
||||
mod = modules(mod_path)
|
||||
# Let an AttributeError propagate outward.
|
||||
try:
|
||||
attr = getattr(mod, attr_name)
|
||||
except AttributeError:
|
||||
raise AttributeError("'%s' object has no attribute '%s'"
|
||||
% (mod_path, attr_name))
|
||||
|
||||
# Return a reference to the attribute.
|
||||
return attr
|
||||
968
lib/cherrypy/lib/sessions.py
Normal file
968
lib/cherrypy/lib/sessions.py
Normal file
@@ -0,0 +1,968 @@
|
||||
"""Session implementation for CherryPy.
|
||||
|
||||
You need to edit your config file to use sessions. Here's an example::
|
||||
|
||||
[/]
|
||||
tools.sessions.on = True
|
||||
tools.sessions.storage_type = "file"
|
||||
tools.sessions.storage_path = "/home/site/sessions"
|
||||
tools.sessions.timeout = 60
|
||||
|
||||
This sets the session to be stored in files in the directory
|
||||
/home/site/sessions, and the session timeout to 60 minutes. If you omit
|
||||
``storage_type`` the sessions will be saved in RAM.
|
||||
``tools.sessions.on`` is the only required line for working sessions,
|
||||
the rest are optional.
|
||||
|
||||
By default, the session ID is passed in a cookie, so the client's browser must
|
||||
have cookies enabled for your site.
|
||||
|
||||
To set data for the current session, use
|
||||
``cherrypy.session['fieldname'] = 'fieldvalue'``;
|
||||
to get data use ``cherrypy.session.get('fieldname')``.
|
||||
|
||||
================
|
||||
Locking sessions
|
||||
================
|
||||
|
||||
By default, the ``'locking'`` mode of sessions is ``'implicit'``, which means
|
||||
the session is locked early and unlocked late. Be mindful of this default mode
|
||||
for any requests that take a long time to process (streaming responses,
|
||||
expensive calculations, database lookups, API calls, etc), as other concurrent
|
||||
requests that also utilize sessions will hang until the session is unlocked.
|
||||
|
||||
If you want to control when the session data is locked and unlocked,
|
||||
set ``tools.sessions.locking = 'explicit'``. Then call
|
||||
``cherrypy.session.acquire_lock()`` and ``cherrypy.session.release_lock()``.
|
||||
Regardless of which mode you use, the session is guaranteed to be unlocked when
|
||||
the request is complete.
|
||||
|
||||
=================
|
||||
Expiring Sessions
|
||||
=================
|
||||
|
||||
You can force a session to expire with :func:`cherrypy.lib.sessions.expire`.
|
||||
Simply call that function at the point you want the session to expire, and it
|
||||
will cause the session cookie to expire client-side.
|
||||
|
||||
===========================
|
||||
Session Fixation Protection
|
||||
===========================
|
||||
|
||||
If CherryPy receives, via a request cookie, a session id that it does not
|
||||
recognize, it will reject that id and create a new one to return in the
|
||||
response cookie. This `helps prevent session fixation attacks
|
||||
<http://en.wikipedia.org/wiki/Session_fixation#Regenerate_SID_on_each_request>`_.
|
||||
However, CherryPy "recognizes" a session id by looking up the saved session
|
||||
data for that id. Therefore, if you never save any session data,
|
||||
**you will get a new session id for every request**.
|
||||
|
||||
================
|
||||
Sharing Sessions
|
||||
================
|
||||
|
||||
If you run multiple instances of CherryPy (for example via mod_python behind
|
||||
Apache prefork), you most likely cannot use the RAM session backend, since each
|
||||
instance of CherryPy will have its own memory space. Use a different backend
|
||||
instead, and verify that all instances are pointing at the same file or db
|
||||
location. Alternately, you might try a load balancer which makes sessions
|
||||
"sticky". Google is your friend, there.
|
||||
|
||||
================
|
||||
Expiration Dates
|
||||
================
|
||||
|
||||
The response cookie will possess an expiration date to inform the client at
|
||||
which point to stop sending the cookie back in requests. If the server time
|
||||
and client time differ, expect sessions to be unreliable. **Make sure the
|
||||
system time of your server is accurate**.
|
||||
|
||||
CherryPy defaults to a 60-minute session timeout, which also applies to the
|
||||
cookie which is sent to the client. Unfortunately, some versions of Safari
|
||||
("4 public beta" on Windows XP at least) appear to have a bug in their parsing
|
||||
of the GMT expiration date--they appear to interpret the date as one hour in
|
||||
the past. Sixty minutes minus one hour is pretty close to zero, so you may
|
||||
experience this bug as a new session id for every request, unless the requests
|
||||
are less than one second apart. To fix, try increasing the session.timeout.
|
||||
|
||||
On the other extreme, some users report Firefox sending cookies after their
|
||||
expiration date, although this was on a system with an inaccurate system time.
|
||||
Maybe FF doesn't trust system time.
|
||||
"""
|
||||
import sys
|
||||
import datetime
|
||||
import os
|
||||
import time
|
||||
import threading
|
||||
import types
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import copyitems, pickle, random20, unicodestr
|
||||
from cherrypy.lib import httputil
|
||||
from cherrypy.lib import lockfile
|
||||
from cherrypy.lib import locking
|
||||
from cherrypy.lib import is_iterator
|
||||
|
||||
missing = object()
|
||||
|
||||
|
||||
class Session(object):
|
||||
|
||||
"""A CherryPy dict-like Session object (one per request)."""
|
||||
|
||||
_id = None
|
||||
|
||||
id_observers = None
|
||||
"A list of callbacks to which to pass new id's."
|
||||
|
||||
def _get_id(self):
|
||||
return self._id
|
||||
|
||||
def _set_id(self, value):
|
||||
self._id = value
|
||||
for o in self.id_observers:
|
||||
o(value)
|
||||
id = property(_get_id, _set_id, doc="The current session ID.")
|
||||
|
||||
timeout = 60
|
||||
"Number of minutes after which to delete session data."
|
||||
|
||||
locked = False
|
||||
"""
|
||||
If True, this session instance has exclusive read/write access
|
||||
to session data."""
|
||||
|
||||
loaded = False
|
||||
"""
|
||||
If True, data has been retrieved from storage. This should happen
|
||||
automatically on the first attempt to access session data."""
|
||||
|
||||
clean_thread = None
|
||||
"Class-level Monitor which calls self.clean_up."
|
||||
|
||||
clean_freq = 5
|
||||
"The poll rate for expired session cleanup in minutes."
|
||||
|
||||
originalid = None
|
||||
"The session id passed by the client. May be missing or unsafe."
|
||||
|
||||
missing = False
|
||||
"True if the session requested by the client did not exist."
|
||||
|
||||
regenerated = False
|
||||
"""
|
||||
True if the application called session.regenerate(). This is not set by
|
||||
internal calls to regenerate the session id."""
|
||||
|
||||
debug = False
|
||||
"If True, log debug information."
|
||||
|
||||
# --------------------- Session management methods --------------------- #
|
||||
|
||||
def __init__(self, id=None, **kwargs):
|
||||
self.id_observers = []
|
||||
self._data = {}
|
||||
|
||||
for k, v in kwargs.items():
|
||||
setattr(self, k, v)
|
||||
|
||||
self.originalid = id
|
||||
self.missing = False
|
||||
if id is None:
|
||||
if self.debug:
|
||||
cherrypy.log('No id given; making a new one', 'TOOLS.SESSIONS')
|
||||
self._regenerate()
|
||||
else:
|
||||
self.id = id
|
||||
if self._exists():
|
||||
if self.debug:
|
||||
cherrypy.log('Set id to %s.' % id, 'TOOLS.SESSIONS')
|
||||
else:
|
||||
if self.debug:
|
||||
cherrypy.log('Expired or malicious session %r; '
|
||||
'making a new one' % id, 'TOOLS.SESSIONS')
|
||||
# Expired or malicious session. Make a new one.
|
||||
# See https://bitbucket.org/cherrypy/cherrypy/issue/709.
|
||||
self.id = None
|
||||
self.missing = True
|
||||
self._regenerate()
|
||||
|
||||
def now(self):
|
||||
"""Generate the session specific concept of 'now'.
|
||||
|
||||
Other session providers can override this to use alternative,
|
||||
possibly timezone aware, versions of 'now'.
|
||||
"""
|
||||
return datetime.datetime.now()
|
||||
|
||||
def regenerate(self):
|
||||
"""Replace the current session (with a new id)."""
|
||||
self.regenerated = True
|
||||
self._regenerate()
|
||||
|
||||
def _regenerate(self):
|
||||
if self.id is not None:
|
||||
if self.debug:
|
||||
cherrypy.log(
|
||||
'Deleting the existing session %r before '
|
||||
'regeneration.' % self.id,
|
||||
'TOOLS.SESSIONS')
|
||||
self.delete()
|
||||
|
||||
old_session_was_locked = self.locked
|
||||
if old_session_was_locked:
|
||||
self.release_lock()
|
||||
if self.debug:
|
||||
cherrypy.log('Old lock released.', 'TOOLS.SESSIONS')
|
||||
|
||||
self.id = None
|
||||
while self.id is None:
|
||||
self.id = self.generate_id()
|
||||
# Assert that the generated id is not already stored.
|
||||
if self._exists():
|
||||
self.id = None
|
||||
if self.debug:
|
||||
cherrypy.log('Set id to generated %s.' % self.id,
|
||||
'TOOLS.SESSIONS')
|
||||
|
||||
if old_session_was_locked:
|
||||
self.acquire_lock()
|
||||
if self.debug:
|
||||
cherrypy.log('Regenerated lock acquired.', 'TOOLS.SESSIONS')
|
||||
|
||||
def clean_up(self):
|
||||
"""Clean up expired sessions."""
|
||||
pass
|
||||
|
||||
def generate_id(self):
|
||||
"""Return a new session id."""
|
||||
return random20()
|
||||
|
||||
def save(self):
|
||||
"""Save session data."""
|
||||
try:
|
||||
# If session data has never been loaded then it's never been
|
||||
# accessed: no need to save it
|
||||
if self.loaded:
|
||||
t = datetime.timedelta(seconds=self.timeout * 60)
|
||||
expiration_time = self.now() + t
|
||||
if self.debug:
|
||||
cherrypy.log('Saving session %r with expiry %s' %
|
||||
(self.id, expiration_time),
|
||||
'TOOLS.SESSIONS')
|
||||
self._save(expiration_time)
|
||||
else:
|
||||
if self.debug:
|
||||
cherrypy.log(
|
||||
'Skipping save of session %r (no session loaded).' %
|
||||
self.id, 'TOOLS.SESSIONS')
|
||||
finally:
|
||||
if self.locked:
|
||||
# Always release the lock if the user didn't release it
|
||||
self.release_lock()
|
||||
if self.debug:
|
||||
cherrypy.log('Lock released after save.', 'TOOLS.SESSIONS')
|
||||
|
||||
def load(self):
|
||||
"""Copy stored session data into this session instance."""
|
||||
data = self._load()
|
||||
# data is either None or a tuple (session_data, expiration_time)
|
||||
if data is None or data[1] < self.now():
|
||||
if self.debug:
|
||||
cherrypy.log('Expired session %r, flushing data.' % self.id,
|
||||
'TOOLS.SESSIONS')
|
||||
self._data = {}
|
||||
else:
|
||||
if self.debug:
|
||||
cherrypy.log('Data loaded for session %r.' % self.id,
|
||||
'TOOLS.SESSIONS')
|
||||
self._data = data[0]
|
||||
self.loaded = True
|
||||
|
||||
# Stick the clean_thread in the class, not the instance.
|
||||
# The instances are created and destroyed per-request.
|
||||
cls = self.__class__
|
||||
if self.clean_freq and not cls.clean_thread:
|
||||
# clean_up is an instancemethod and not a classmethod,
|
||||
# so that tool config can be accessed inside the method.
|
||||
t = cherrypy.process.plugins.Monitor(
|
||||
cherrypy.engine, self.clean_up, self.clean_freq * 60,
|
||||
name='Session cleanup')
|
||||
t.subscribe()
|
||||
cls.clean_thread = t
|
||||
t.start()
|
||||
if self.debug:
|
||||
cherrypy.log('Started cleanup thread.', 'TOOLS.SESSIONS')
|
||||
|
||||
def delete(self):
|
||||
"""Delete stored session data."""
|
||||
self._delete()
|
||||
if self.debug:
|
||||
cherrypy.log('Deleted session %s.' % self.id,
|
||||
'TOOLS.SESSIONS')
|
||||
|
||||
# -------------------- Application accessor methods -------------------- #
|
||||
|
||||
def __getitem__(self, key):
|
||||
if not self.loaded:
|
||||
self.load()
|
||||
return self._data[key]
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
if not self.loaded:
|
||||
self.load()
|
||||
self._data[key] = value
|
||||
|
||||
def __delitem__(self, key):
|
||||
if not self.loaded:
|
||||
self.load()
|
||||
del self._data[key]
|
||||
|
||||
def pop(self, key, default=missing):
|
||||
"""Remove the specified key and return the corresponding value.
|
||||
If key is not found, default is returned if given,
|
||||
otherwise KeyError is raised.
|
||||
"""
|
||||
if not self.loaded:
|
||||
self.load()
|
||||
if default is missing:
|
||||
return self._data.pop(key)
|
||||
else:
|
||||
return self._data.pop(key, default)
|
||||
|
||||
def __contains__(self, key):
|
||||
if not self.loaded:
|
||||
self.load()
|
||||
return key in self._data
|
||||
|
||||
if hasattr({}, 'has_key'):
|
||||
def has_key(self, key):
|
||||
"""D.has_key(k) -> True if D has a key k, else False."""
|
||||
if not self.loaded:
|
||||
self.load()
|
||||
return key in self._data
|
||||
|
||||
def get(self, key, default=None):
|
||||
"""D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."""
|
||||
if not self.loaded:
|
||||
self.load()
|
||||
return self._data.get(key, default)
|
||||
|
||||
def update(self, d):
|
||||
"""D.update(E) -> None. Update D from E: for k in E: D[k] = E[k]."""
|
||||
if not self.loaded:
|
||||
self.load()
|
||||
self._data.update(d)
|
||||
|
||||
def setdefault(self, key, default=None):
|
||||
"""D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D."""
|
||||
if not self.loaded:
|
||||
self.load()
|
||||
return self._data.setdefault(key, default)
|
||||
|
||||
def clear(self):
|
||||
"""D.clear() -> None. Remove all items from D."""
|
||||
if not self.loaded:
|
||||
self.load()
|
||||
self._data.clear()
|
||||
|
||||
def keys(self):
|
||||
"""D.keys() -> list of D's keys."""
|
||||
if not self.loaded:
|
||||
self.load()
|
||||
return self._data.keys()
|
||||
|
||||
def items(self):
|
||||
"""D.items() -> list of D's (key, value) pairs, as 2-tuples."""
|
||||
if not self.loaded:
|
||||
self.load()
|
||||
return self._data.items()
|
||||
|
||||
def values(self):
|
||||
"""D.values() -> list of D's values."""
|
||||
if not self.loaded:
|
||||
self.load()
|
||||
return self._data.values()
|
||||
|
||||
|
||||
class RamSession(Session):
|
||||
|
||||
# Class-level objects. Don't rebind these!
|
||||
cache = {}
|
||||
locks = {}
|
||||
|
||||
def clean_up(self):
|
||||
"""Clean up expired sessions."""
|
||||
|
||||
now = self.now()
|
||||
for _id, (data, expiration_time) in copyitems(self.cache):
|
||||
if expiration_time <= now:
|
||||
try:
|
||||
del self.cache[_id]
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
if self.locks[_id].acquire(blocking=False):
|
||||
lock = self.locks.pop(_id)
|
||||
lock.release()
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
# added to remove obsolete lock objects
|
||||
for _id in list(self.locks):
|
||||
if _id not in self.cache and self.locks[_id].acquire(blocking=False):
|
||||
lock = self.locks.pop(_id)
|
||||
lock.release()
|
||||
|
||||
def _exists(self):
|
||||
return self.id in self.cache
|
||||
|
||||
def _load(self):
|
||||
return self.cache.get(self.id)
|
||||
|
||||
def _save(self, expiration_time):
|
||||
self.cache[self.id] = (self._data, expiration_time)
|
||||
|
||||
def _delete(self):
|
||||
self.cache.pop(self.id, None)
|
||||
|
||||
def acquire_lock(self):
|
||||
"""Acquire an exclusive lock on the currently-loaded session data."""
|
||||
self.locked = True
|
||||
self.locks.setdefault(self.id, threading.RLock()).acquire()
|
||||
|
||||
def release_lock(self):
|
||||
"""Release the lock on the currently-loaded session data."""
|
||||
self.locks[self.id].release()
|
||||
self.locked = False
|
||||
|
||||
def __len__(self):
|
||||
"""Return the number of active sessions."""
|
||||
return len(self.cache)
|
||||
|
||||
|
||||
class FileSession(Session):
|
||||
|
||||
"""Implementation of the File backend for sessions
|
||||
|
||||
storage_path
|
||||
The folder where session data will be saved. Each session
|
||||
will be saved as pickle.dump(data, expiration_time) in its own file;
|
||||
the filename will be self.SESSION_PREFIX + self.id.
|
||||
|
||||
lock_timeout
|
||||
A timedelta or numeric seconds indicating how long
|
||||
to block acquiring a lock. If None (default), acquiring a lock
|
||||
will block indefinitely.
|
||||
"""
|
||||
|
||||
SESSION_PREFIX = 'session-'
|
||||
LOCK_SUFFIX = '.lock'
|
||||
pickle_protocol = pickle.HIGHEST_PROTOCOL
|
||||
|
||||
def __init__(self, id=None, **kwargs):
|
||||
# The 'storage_path' arg is required for file-based sessions.
|
||||
kwargs['storage_path'] = os.path.abspath(kwargs['storage_path'])
|
||||
kwargs.setdefault('lock_timeout', None)
|
||||
|
||||
Session.__init__(self, id=id, **kwargs)
|
||||
|
||||
# validate self.lock_timeout
|
||||
if isinstance(self.lock_timeout, (int, float)):
|
||||
self.lock_timeout = datetime.timedelta(seconds=self.lock_timeout)
|
||||
if not isinstance(self.lock_timeout, (datetime.timedelta, type(None))):
|
||||
raise ValueError("Lock timeout must be numeric seconds or "
|
||||
"a timedelta instance.")
|
||||
|
||||
def setup(cls, **kwargs):
|
||||
"""Set up the storage system for file-based sessions.
|
||||
|
||||
This should only be called once per process; this will be done
|
||||
automatically when using sessions.init (as the built-in Tool does).
|
||||
"""
|
||||
# The 'storage_path' arg is required for file-based sessions.
|
||||
kwargs['storage_path'] = os.path.abspath(kwargs['storage_path'])
|
||||
|
||||
for k, v in kwargs.items():
|
||||
setattr(cls, k, v)
|
||||
setup = classmethod(setup)
|
||||
|
||||
def _get_file_path(self):
|
||||
f = os.path.join(self.storage_path, self.SESSION_PREFIX + self.id)
|
||||
if not os.path.abspath(f).startswith(self.storage_path):
|
||||
raise cherrypy.HTTPError(400, "Invalid session id in cookie.")
|
||||
return f
|
||||
|
||||
def _exists(self):
|
||||
path = self._get_file_path()
|
||||
return os.path.exists(path)
|
||||
|
||||
def _load(self, path=None):
|
||||
assert self.locked, ("The session load without being locked. "
|
||||
"Check your tools' priority levels.")
|
||||
if path is None:
|
||||
path = self._get_file_path()
|
||||
try:
|
||||
f = open(path, "rb")
|
||||
try:
|
||||
return pickle.load(f)
|
||||
finally:
|
||||
f.close()
|
||||
except (IOError, EOFError):
|
||||
e = sys.exc_info()[1]
|
||||
if self.debug:
|
||||
cherrypy.log("Error loading the session pickle: %s" %
|
||||
e, 'TOOLS.SESSIONS')
|
||||
return None
|
||||
|
||||
def _save(self, expiration_time):
|
||||
assert self.locked, ("The session was saved without being locked. "
|
||||
"Check your tools' priority levels.")
|
||||
f = open(self._get_file_path(), "wb")
|
||||
try:
|
||||
pickle.dump((self._data, expiration_time), f, self.pickle_protocol)
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
def _delete(self):
|
||||
assert self.locked, ("The session deletion without being locked. "
|
||||
"Check your tools' priority levels.")
|
||||
try:
|
||||
os.unlink(self._get_file_path())
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def acquire_lock(self, path=None):
|
||||
"""Acquire an exclusive lock on the currently-loaded session data."""
|
||||
if path is None:
|
||||
path = self._get_file_path()
|
||||
path += self.LOCK_SUFFIX
|
||||
checker = locking.LockChecker(self.id, self.lock_timeout)
|
||||
while not checker.expired():
|
||||
try:
|
||||
self.lock = lockfile.LockFile(path)
|
||||
except lockfile.LockError:
|
||||
time.sleep(0.1)
|
||||
else:
|
||||
break
|
||||
self.locked = True
|
||||
if self.debug:
|
||||
cherrypy.log('Lock acquired.', 'TOOLS.SESSIONS')
|
||||
|
||||
def release_lock(self, path=None):
|
||||
"""Release the lock on the currently-loaded session data."""
|
||||
self.lock.release()
|
||||
self.lock.remove()
|
||||
self.locked = False
|
||||
|
||||
def clean_up(self):
|
||||
"""Clean up expired sessions."""
|
||||
now = self.now()
|
||||
# Iterate over all session files in self.storage_path
|
||||
for fname in os.listdir(self.storage_path):
|
||||
if (fname.startswith(self.SESSION_PREFIX)
|
||||
and not fname.endswith(self.LOCK_SUFFIX)):
|
||||
# We have a session file: lock and load it and check
|
||||
# if it's expired. If it fails, nevermind.
|
||||
path = os.path.join(self.storage_path, fname)
|
||||
self.acquire_lock(path)
|
||||
if self.debug:
|
||||
# This is a bit of a hack, since we're calling clean_up
|
||||
# on the first instance rather than the entire class,
|
||||
# so depending on whether you have "debug" set on the
|
||||
# path of the first session called, this may not run.
|
||||
cherrypy.log('Cleanup lock acquired.', 'TOOLS.SESSIONS')
|
||||
|
||||
try:
|
||||
contents = self._load(path)
|
||||
# _load returns None on IOError
|
||||
if contents is not None:
|
||||
data, expiration_time = contents
|
||||
if expiration_time < now:
|
||||
# Session expired: deleting it
|
||||
os.unlink(path)
|
||||
finally:
|
||||
self.release_lock(path)
|
||||
|
||||
def __len__(self):
|
||||
"""Return the number of active sessions."""
|
||||
return len([fname for fname in os.listdir(self.storage_path)
|
||||
if (fname.startswith(self.SESSION_PREFIX)
|
||||
and not fname.endswith(self.LOCK_SUFFIX))])
|
||||
|
||||
|
||||
class PostgresqlSession(Session):
|
||||
|
||||
""" Implementation of the PostgreSQL backend for sessions. It assumes
|
||||
a table like this::
|
||||
|
||||
create table session (
|
||||
id varchar(40),
|
||||
data text,
|
||||
expiration_time timestamp
|
||||
)
|
||||
|
||||
You must provide your own get_db function.
|
||||
"""
|
||||
|
||||
pickle_protocol = pickle.HIGHEST_PROTOCOL
|
||||
|
||||
def __init__(self, id=None, **kwargs):
|
||||
Session.__init__(self, id, **kwargs)
|
||||
self.cursor = self.db.cursor()
|
||||
|
||||
def setup(cls, **kwargs):
|
||||
"""Set up the storage system for Postgres-based sessions.
|
||||
|
||||
This should only be called once per process; this will be done
|
||||
automatically when using sessions.init (as the built-in Tool does).
|
||||
"""
|
||||
for k, v in kwargs.items():
|
||||
setattr(cls, k, v)
|
||||
|
||||
self.db = self.get_db()
|
||||
setup = classmethod(setup)
|
||||
|
||||
def __del__(self):
|
||||
if self.cursor:
|
||||
self.cursor.close()
|
||||
self.db.commit()
|
||||
|
||||
def _exists(self):
|
||||
# Select session data from table
|
||||
self.cursor.execute('select data, expiration_time from session '
|
||||
'where id=%s', (self.id,))
|
||||
rows = self.cursor.fetchall()
|
||||
return bool(rows)
|
||||
|
||||
def _load(self):
|
||||
# Select session data from table
|
||||
self.cursor.execute('select data, expiration_time from session '
|
||||
'where id=%s', (self.id,))
|
||||
rows = self.cursor.fetchall()
|
||||
if not rows:
|
||||
return None
|
||||
|
||||
pickled_data, expiration_time = rows[0]
|
||||
data = pickle.loads(pickled_data)
|
||||
return data, expiration_time
|
||||
|
||||
def _save(self, expiration_time):
|
||||
pickled_data = pickle.dumps(self._data, self.pickle_protocol)
|
||||
self.cursor.execute('update session set data = %s, '
|
||||
'expiration_time = %s where id = %s',
|
||||
(pickled_data, expiration_time, self.id))
|
||||
|
||||
def _delete(self):
|
||||
self.cursor.execute('delete from session where id=%s', (self.id,))
|
||||
|
||||
def acquire_lock(self):
|
||||
"""Acquire an exclusive lock on the currently-loaded session data."""
|
||||
# We use the "for update" clause to lock the row
|
||||
self.locked = True
|
||||
self.cursor.execute('select id from session where id=%s for update',
|
||||
(self.id,))
|
||||
if self.debug:
|
||||
cherrypy.log('Lock acquired.', 'TOOLS.SESSIONS')
|
||||
|
||||
def release_lock(self):
|
||||
"""Release the lock on the currently-loaded session data."""
|
||||
# We just close the cursor and that will remove the lock
|
||||
# introduced by the "for update" clause
|
||||
self.cursor.close()
|
||||
self.locked = False
|
||||
|
||||
def clean_up(self):
|
||||
"""Clean up expired sessions."""
|
||||
self.cursor.execute('delete from session where expiration_time < %s',
|
||||
(self.now(),))
|
||||
|
||||
|
||||
class MemcachedSession(Session):
|
||||
|
||||
# The most popular memcached client for Python isn't thread-safe.
|
||||
# Wrap all .get and .set operations in a single lock.
|
||||
mc_lock = threading.RLock()
|
||||
|
||||
# This is a seperate set of locks per session id.
|
||||
locks = {}
|
||||
|
||||
servers = ['127.0.0.1:11211']
|
||||
|
||||
def setup(cls, **kwargs):
|
||||
"""Set up the storage system for memcached-based sessions.
|
||||
|
||||
This should only be called once per process; this will be done
|
||||
automatically when using sessions.init (as the built-in Tool does).
|
||||
"""
|
||||
for k, v in kwargs.items():
|
||||
setattr(cls, k, v)
|
||||
|
||||
import memcache
|
||||
cls.cache = memcache.Client(cls.servers)
|
||||
setup = classmethod(setup)
|
||||
|
||||
def _get_id(self):
|
||||
return self._id
|
||||
|
||||
def _set_id(self, value):
|
||||
# This encode() call is where we differ from the superclass.
|
||||
# Memcache keys MUST be byte strings, not unicode.
|
||||
if isinstance(value, unicodestr):
|
||||
value = value.encode('utf-8')
|
||||
|
||||
self._id = value
|
||||
for o in self.id_observers:
|
||||
o(value)
|
||||
id = property(_get_id, _set_id, doc="The current session ID.")
|
||||
|
||||
def _exists(self):
|
||||
self.mc_lock.acquire()
|
||||
try:
|
||||
return bool(self.cache.get(self.id))
|
||||
finally:
|
||||
self.mc_lock.release()
|
||||
|
||||
def _load(self):
|
||||
self.mc_lock.acquire()
|
||||
try:
|
||||
return self.cache.get(self.id)
|
||||
finally:
|
||||
self.mc_lock.release()
|
||||
|
||||
def _save(self, expiration_time):
|
||||
# Send the expiration time as "Unix time" (seconds since 1/1/1970)
|
||||
td = int(time.mktime(expiration_time.timetuple()))
|
||||
self.mc_lock.acquire()
|
||||
try:
|
||||
if not self.cache.set(self.id, (self._data, expiration_time), td):
|
||||
raise AssertionError(
|
||||
"Session data for id %r not set." % self.id)
|
||||
finally:
|
||||
self.mc_lock.release()
|
||||
|
||||
def _delete(self):
|
||||
self.cache.delete(self.id)
|
||||
|
||||
def acquire_lock(self):
|
||||
"""Acquire an exclusive lock on the currently-loaded session data."""
|
||||
self.locked = True
|
||||
self.locks.setdefault(self.id, threading.RLock()).acquire()
|
||||
if self.debug:
|
||||
cherrypy.log('Lock acquired.', 'TOOLS.SESSIONS')
|
||||
|
||||
def release_lock(self):
|
||||
"""Release the lock on the currently-loaded session data."""
|
||||
self.locks[self.id].release()
|
||||
self.locked = False
|
||||
|
||||
def __len__(self):
|
||||
"""Return the number of active sessions."""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
# Hook functions (for CherryPy tools)
|
||||
|
||||
def save():
|
||||
"""Save any changed session data."""
|
||||
|
||||
if not hasattr(cherrypy.serving, "session"):
|
||||
return
|
||||
request = cherrypy.serving.request
|
||||
response = cherrypy.serving.response
|
||||
|
||||
# Guard against running twice
|
||||
if hasattr(request, "_sessionsaved"):
|
||||
return
|
||||
request._sessionsaved = True
|
||||
|
||||
if response.stream:
|
||||
# If the body is being streamed, we have to save the data
|
||||
# *after* the response has been written out
|
||||
request.hooks.attach('on_end_request', cherrypy.session.save)
|
||||
else:
|
||||
# If the body is not being streamed, we save the data now
|
||||
# (so we can release the lock).
|
||||
if is_iterator(response.body):
|
||||
response.collapse_body()
|
||||
cherrypy.session.save()
|
||||
save.failsafe = True
|
||||
|
||||
|
||||
def close():
|
||||
"""Close the session object for this request."""
|
||||
sess = getattr(cherrypy.serving, "session", None)
|
||||
if getattr(sess, "locked", False):
|
||||
# If the session is still locked we release the lock
|
||||
sess.release_lock()
|
||||
if sess.debug:
|
||||
cherrypy.log('Lock released on close.', 'TOOLS.SESSIONS')
|
||||
close.failsafe = True
|
||||
close.priority = 90
|
||||
|
||||
|
||||
def init(storage_type='ram', path=None, path_header=None, name='session_id',
|
||||
timeout=60, domain=None, secure=False, clean_freq=5,
|
||||
persistent=True, httponly=False, debug=False, **kwargs):
|
||||
"""Initialize session object (using cookies).
|
||||
|
||||
storage_type
|
||||
One of 'ram', 'file', 'postgresql', 'memcached'. This will be
|
||||
used to look up the corresponding class in cherrypy.lib.sessions
|
||||
globals. For example, 'file' will use the FileSession class.
|
||||
|
||||
path
|
||||
The 'path' value to stick in the response cookie metadata.
|
||||
|
||||
path_header
|
||||
If 'path' is None (the default), then the response
|
||||
cookie 'path' will be pulled from request.headers[path_header].
|
||||
|
||||
name
|
||||
The name of the cookie.
|
||||
|
||||
timeout
|
||||
The expiration timeout (in minutes) for the stored session data.
|
||||
If 'persistent' is True (the default), this is also the timeout
|
||||
for the cookie.
|
||||
|
||||
domain
|
||||
The cookie domain.
|
||||
|
||||
secure
|
||||
If False (the default) the cookie 'secure' value will not
|
||||
be set. If True, the cookie 'secure' value will be set (to 1).
|
||||
|
||||
clean_freq (minutes)
|
||||
The poll rate for expired session cleanup.
|
||||
|
||||
persistent
|
||||
If True (the default), the 'timeout' argument will be used
|
||||
to expire the cookie. If False, the cookie will not have an expiry,
|
||||
and the cookie will be a "session cookie" which expires when the
|
||||
browser is closed.
|
||||
|
||||
httponly
|
||||
If False (the default) the cookie 'httponly' value will not be set.
|
||||
If True, the cookie 'httponly' value will be set (to 1).
|
||||
|
||||
Any additional kwargs will be bound to the new Session instance,
|
||||
and may be specific to the storage type. See the subclass of Session
|
||||
you're using for more information.
|
||||
"""
|
||||
|
||||
request = cherrypy.serving.request
|
||||
|
||||
# Guard against running twice
|
||||
if hasattr(request, "_session_init_flag"):
|
||||
return
|
||||
request._session_init_flag = True
|
||||
|
||||
# Check if request came with a session ID
|
||||
id = None
|
||||
if name in request.cookie:
|
||||
id = request.cookie[name].value
|
||||
if debug:
|
||||
cherrypy.log('ID obtained from request.cookie: %r' % id,
|
||||
'TOOLS.SESSIONS')
|
||||
|
||||
# Find the storage class and call setup (first time only).
|
||||
storage_class = storage_type.title() + 'Session'
|
||||
storage_class = globals()[storage_class]
|
||||
if not hasattr(cherrypy, "session"):
|
||||
if hasattr(storage_class, "setup"):
|
||||
storage_class.setup(**kwargs)
|
||||
|
||||
# Create and attach a new Session instance to cherrypy.serving.
|
||||
# It will possess a reference to (and lock, and lazily load)
|
||||
# the requested session data.
|
||||
kwargs['timeout'] = timeout
|
||||
kwargs['clean_freq'] = clean_freq
|
||||
cherrypy.serving.session = sess = storage_class(id, **kwargs)
|
||||
sess.debug = debug
|
||||
|
||||
def update_cookie(id):
|
||||
"""Update the cookie every time the session id changes."""
|
||||
cherrypy.serving.response.cookie[name] = id
|
||||
sess.id_observers.append(update_cookie)
|
||||
|
||||
# Create cherrypy.session which will proxy to cherrypy.serving.session
|
||||
if not hasattr(cherrypy, "session"):
|
||||
cherrypy.session = cherrypy._ThreadLocalProxy('session')
|
||||
|
||||
if persistent:
|
||||
cookie_timeout = timeout
|
||||
else:
|
||||
# See http://support.microsoft.com/kb/223799/EN-US/
|
||||
# and http://support.mozilla.com/en-US/kb/Cookies
|
||||
cookie_timeout = None
|
||||
set_response_cookie(path=path, path_header=path_header, name=name,
|
||||
timeout=cookie_timeout, domain=domain, secure=secure,
|
||||
httponly=httponly)
|
||||
|
||||
|
||||
def set_response_cookie(path=None, path_header=None, name='session_id',
|
||||
timeout=60, domain=None, secure=False, httponly=False):
|
||||
"""Set a response cookie for the client.
|
||||
|
||||
path
|
||||
the 'path' value to stick in the response cookie metadata.
|
||||
|
||||
path_header
|
||||
if 'path' is None (the default), then the response
|
||||
cookie 'path' will be pulled from request.headers[path_header].
|
||||
|
||||
name
|
||||
the name of the cookie.
|
||||
|
||||
timeout
|
||||
the expiration timeout for the cookie. If 0 or other boolean
|
||||
False, no 'expires' param will be set, and the cookie will be a
|
||||
"session cookie" which expires when the browser is closed.
|
||||
|
||||
domain
|
||||
the cookie domain.
|
||||
|
||||
secure
|
||||
if False (the default) the cookie 'secure' value will not
|
||||
be set. If True, the cookie 'secure' value will be set (to 1).
|
||||
|
||||
httponly
|
||||
If False (the default) the cookie 'httponly' value will not be set.
|
||||
If True, the cookie 'httponly' value will be set (to 1).
|
||||
|
||||
"""
|
||||
# Set response cookie
|
||||
cookie = cherrypy.serving.response.cookie
|
||||
cookie[name] = cherrypy.serving.session.id
|
||||
cookie[name]['path'] = (
|
||||
path or
|
||||
cherrypy.serving.request.headers.get(path_header) or
|
||||
'/'
|
||||
)
|
||||
|
||||
# We'd like to use the "max-age" param as indicated in
|
||||
# http://www.faqs.org/rfcs/rfc2109.html but IE doesn't
|
||||
# save it to disk and the session is lost if people close
|
||||
# the browser. So we have to use the old "expires" ... sigh ...
|
||||
## cookie[name]['max-age'] = timeout * 60
|
||||
if timeout:
|
||||
e = time.time() + (timeout * 60)
|
||||
cookie[name]['expires'] = httputil.HTTPDate(e)
|
||||
if domain is not None:
|
||||
cookie[name]['domain'] = domain
|
||||
if secure:
|
||||
cookie[name]['secure'] = 1
|
||||
if httponly:
|
||||
if not cookie[name].isReservedKey('httponly'):
|
||||
raise ValueError("The httponly cookie token is not supported.")
|
||||
cookie[name]['httponly'] = 1
|
||||
|
||||
|
||||
def expire():
|
||||
"""Expire the current session cookie."""
|
||||
name = cherrypy.serving.request.config.get(
|
||||
'tools.sessions.name', 'session_id')
|
||||
one_year = 60 * 60 * 24 * 365
|
||||
e = time.time() - one_year
|
||||
cherrypy.serving.response.cookie[name]['expires'] = httputil.HTTPDate(e)
|
||||
378
lib/cherrypy/lib/static.py
Normal file
378
lib/cherrypy/lib/static.py
Normal file
@@ -0,0 +1,378 @@
|
||||
import os
|
||||
import re
|
||||
import stat
|
||||
import mimetypes
|
||||
|
||||
try:
|
||||
from io import UnsupportedOperation
|
||||
except ImportError:
|
||||
UnsupportedOperation = object()
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import ntob, unquote
|
||||
from cherrypy.lib import cptools, httputil, file_generator_limited
|
||||
|
||||
|
||||
mimetypes.init()
|
||||
mimetypes.types_map['.dwg'] = 'image/x-dwg'
|
||||
mimetypes.types_map['.ico'] = 'image/x-icon'
|
||||
mimetypes.types_map['.bz2'] = 'application/x-bzip2'
|
||||
mimetypes.types_map['.gz'] = 'application/x-gzip'
|
||||
|
||||
|
||||
def serve_file(path, content_type=None, disposition=None, name=None,
|
||||
debug=False):
|
||||
"""Set status, headers, and body in order to serve the given path.
|
||||
|
||||
The Content-Type header will be set to the content_type arg, if provided.
|
||||
If not provided, the Content-Type will be guessed by the file extension
|
||||
of the 'path' argument.
|
||||
|
||||
If disposition is not None, the Content-Disposition header will be set
|
||||
to "<disposition>; filename=<name>". If name is None, it will be set
|
||||
to the basename of path. If disposition is None, no Content-Disposition
|
||||
header will be written.
|
||||
"""
|
||||
|
||||
response = cherrypy.serving.response
|
||||
|
||||
# If path is relative, users should fix it by making path absolute.
|
||||
# That is, CherryPy should not guess where the application root is.
|
||||
# It certainly should *not* use cwd (since CP may be invoked from a
|
||||
# variety of paths). If using tools.staticdir, you can make your relative
|
||||
# paths become absolute by supplying a value for "tools.staticdir.root".
|
||||
if not os.path.isabs(path):
|
||||
msg = "'%s' is not an absolute path." % path
|
||||
if debug:
|
||||
cherrypy.log(msg, 'TOOLS.STATICFILE')
|
||||
raise ValueError(msg)
|
||||
|
||||
try:
|
||||
st = os.stat(path)
|
||||
except OSError:
|
||||
if debug:
|
||||
cherrypy.log('os.stat(%r) failed' % path, 'TOOLS.STATIC')
|
||||
raise cherrypy.NotFound()
|
||||
|
||||
# Check if path is a directory.
|
||||
if stat.S_ISDIR(st.st_mode):
|
||||
# Let the caller deal with it as they like.
|
||||
if debug:
|
||||
cherrypy.log('%r is a directory' % path, 'TOOLS.STATIC')
|
||||
raise cherrypy.NotFound()
|
||||
|
||||
# Set the Last-Modified response header, so that
|
||||
# modified-since validation code can work.
|
||||
response.headers['Last-Modified'] = httputil.HTTPDate(st.st_mtime)
|
||||
cptools.validate_since()
|
||||
|
||||
if content_type is None:
|
||||
# Set content-type based on filename extension
|
||||
ext = ""
|
||||
i = path.rfind('.')
|
||||
if i != -1:
|
||||
ext = path[i:].lower()
|
||||
content_type = mimetypes.types_map.get(ext, None)
|
||||
if content_type is not None:
|
||||
response.headers['Content-Type'] = content_type
|
||||
if debug:
|
||||
cherrypy.log('Content-Type: %r' % content_type, 'TOOLS.STATIC')
|
||||
|
||||
cd = None
|
||||
if disposition is not None:
|
||||
if name is None:
|
||||
name = os.path.basename(path)
|
||||
cd = '%s; filename="%s"' % (disposition, name)
|
||||
response.headers["Content-Disposition"] = cd
|
||||
if debug:
|
||||
cherrypy.log('Content-Disposition: %r' % cd, 'TOOLS.STATIC')
|
||||
|
||||
# Set Content-Length and use an iterable (file object)
|
||||
# this way CP won't load the whole file in memory
|
||||
content_length = st.st_size
|
||||
fileobj = open(path, 'rb')
|
||||
return _serve_fileobj(fileobj, content_type, content_length, debug=debug)
|
||||
|
||||
|
||||
def serve_fileobj(fileobj, content_type=None, disposition=None, name=None,
|
||||
debug=False):
|
||||
"""Set status, headers, and body in order to serve the given file object.
|
||||
|
||||
The Content-Type header will be set to the content_type arg, if provided.
|
||||
|
||||
If disposition is not None, the Content-Disposition header will be set
|
||||
to "<disposition>; filename=<name>". If name is None, 'filename' will
|
||||
not be set. If disposition is None, no Content-Disposition header will
|
||||
be written.
|
||||
|
||||
CAUTION: If the request contains a 'Range' header, one or more seek()s will
|
||||
be performed on the file object. This may cause undesired behavior if
|
||||
the file object is not seekable. It could also produce undesired results
|
||||
if the caller set the read position of the file object prior to calling
|
||||
serve_fileobj(), expecting that the data would be served starting from that
|
||||
position.
|
||||
"""
|
||||
|
||||
response = cherrypy.serving.response
|
||||
|
||||
try:
|
||||
st = os.fstat(fileobj.fileno())
|
||||
except AttributeError:
|
||||
if debug:
|
||||
cherrypy.log('os has no fstat attribute', 'TOOLS.STATIC')
|
||||
content_length = None
|
||||
except UnsupportedOperation:
|
||||
content_length = None
|
||||
else:
|
||||
# Set the Last-Modified response header, so that
|
||||
# modified-since validation code can work.
|
||||
response.headers['Last-Modified'] = httputil.HTTPDate(st.st_mtime)
|
||||
cptools.validate_since()
|
||||
content_length = st.st_size
|
||||
|
||||
if content_type is not None:
|
||||
response.headers['Content-Type'] = content_type
|
||||
if debug:
|
||||
cherrypy.log('Content-Type: %r' % content_type, 'TOOLS.STATIC')
|
||||
|
||||
cd = None
|
||||
if disposition is not None:
|
||||
if name is None:
|
||||
cd = disposition
|
||||
else:
|
||||
cd = '%s; filename="%s"' % (disposition, name)
|
||||
response.headers["Content-Disposition"] = cd
|
||||
if debug:
|
||||
cherrypy.log('Content-Disposition: %r' % cd, 'TOOLS.STATIC')
|
||||
|
||||
return _serve_fileobj(fileobj, content_type, content_length, debug=debug)
|
||||
|
||||
|
||||
def _serve_fileobj(fileobj, content_type, content_length, debug=False):
|
||||
"""Internal. Set response.body to the given file object, perhaps ranged."""
|
||||
response = cherrypy.serving.response
|
||||
|
||||
# HTTP/1.0 didn't have Range/Accept-Ranges headers, or the 206 code
|
||||
request = cherrypy.serving.request
|
||||
if request.protocol >= (1, 1):
|
||||
response.headers["Accept-Ranges"] = "bytes"
|
||||
r = httputil.get_ranges(request.headers.get('Range'), content_length)
|
||||
if r == []:
|
||||
response.headers['Content-Range'] = "bytes */%s" % content_length
|
||||
message = ("Invalid Range (first-byte-pos greater than "
|
||||
"Content-Length)")
|
||||
if debug:
|
||||
cherrypy.log(message, 'TOOLS.STATIC')
|
||||
raise cherrypy.HTTPError(416, message)
|
||||
|
||||
if r:
|
||||
if len(r) == 1:
|
||||
# Return a single-part response.
|
||||
start, stop = r[0]
|
||||
if stop > content_length:
|
||||
stop = content_length
|
||||
r_len = stop - start
|
||||
if debug:
|
||||
cherrypy.log(
|
||||
'Single part; start: %r, stop: %r' % (start, stop),
|
||||
'TOOLS.STATIC')
|
||||
response.status = "206 Partial Content"
|
||||
response.headers['Content-Range'] = (
|
||||
"bytes %s-%s/%s" % (start, stop - 1, content_length))
|
||||
response.headers['Content-Length'] = r_len
|
||||
fileobj.seek(start)
|
||||
response.body = file_generator_limited(fileobj, r_len)
|
||||
else:
|
||||
# Return a multipart/byteranges response.
|
||||
response.status = "206 Partial Content"
|
||||
try:
|
||||
# Python 3
|
||||
from email.generator import _make_boundary as make_boundary
|
||||
except ImportError:
|
||||
# Python 2
|
||||
from mimetools import choose_boundary as make_boundary
|
||||
boundary = make_boundary()
|
||||
ct = "multipart/byteranges; boundary=%s" % boundary
|
||||
response.headers['Content-Type'] = ct
|
||||
if "Content-Length" in response.headers:
|
||||
# Delete Content-Length header so finalize() recalcs it.
|
||||
del response.headers["Content-Length"]
|
||||
|
||||
def file_ranges():
|
||||
# Apache compatibility:
|
||||
yield ntob("\r\n")
|
||||
|
||||
for start, stop in r:
|
||||
if debug:
|
||||
cherrypy.log(
|
||||
'Multipart; start: %r, stop: %r' % (
|
||||
start, stop),
|
||||
'TOOLS.STATIC')
|
||||
yield ntob("--" + boundary, 'ascii')
|
||||
yield ntob("\r\nContent-type: %s" % content_type,
|
||||
'ascii')
|
||||
yield ntob(
|
||||
"\r\nContent-range: bytes %s-%s/%s\r\n\r\n" % (
|
||||
start, stop - 1, content_length),
|
||||
'ascii')
|
||||
fileobj.seek(start)
|
||||
gen = file_generator_limited(fileobj, stop - start)
|
||||
for chunk in gen:
|
||||
yield chunk
|
||||
yield ntob("\r\n")
|
||||
# Final boundary
|
||||
yield ntob("--" + boundary + "--", 'ascii')
|
||||
|
||||
# Apache compatibility:
|
||||
yield ntob("\r\n")
|
||||
response.body = file_ranges()
|
||||
return response.body
|
||||
else:
|
||||
if debug:
|
||||
cherrypy.log('No byteranges requested', 'TOOLS.STATIC')
|
||||
|
||||
# Set Content-Length and use an iterable (file object)
|
||||
# this way CP won't load the whole file in memory
|
||||
response.headers['Content-Length'] = content_length
|
||||
response.body = fileobj
|
||||
return response.body
|
||||
|
||||
|
||||
def serve_download(path, name=None):
|
||||
"""Serve 'path' as an application/x-download attachment."""
|
||||
# This is such a common idiom I felt it deserved its own wrapper.
|
||||
return serve_file(path, "application/x-download", "attachment", name)
|
||||
|
||||
|
||||
def _attempt(filename, content_types, debug=False):
|
||||
if debug:
|
||||
cherrypy.log('Attempting %r (content_types %r)' %
|
||||
(filename, content_types), 'TOOLS.STATICDIR')
|
||||
try:
|
||||
# you can set the content types for a
|
||||
# complete directory per extension
|
||||
content_type = None
|
||||
if content_types:
|
||||
r, ext = os.path.splitext(filename)
|
||||
content_type = content_types.get(ext[1:], None)
|
||||
serve_file(filename, content_type=content_type, debug=debug)
|
||||
return True
|
||||
except cherrypy.NotFound:
|
||||
# If we didn't find the static file, continue handling the
|
||||
# request. We might find a dynamic handler instead.
|
||||
if debug:
|
||||
cherrypy.log('NotFound', 'TOOLS.STATICFILE')
|
||||
return False
|
||||
|
||||
|
||||
def staticdir(section, dir, root="", match="", content_types=None, index="",
|
||||
debug=False):
|
||||
"""Serve a static resource from the given (root +) dir.
|
||||
|
||||
match
|
||||
If given, request.path_info will be searched for the given
|
||||
regular expression before attempting to serve static content.
|
||||
|
||||
content_types
|
||||
If given, it should be a Python dictionary of
|
||||
{file-extension: content-type} pairs, where 'file-extension' is
|
||||
a string (e.g. "gif") and 'content-type' is the value to write
|
||||
out in the Content-Type response header (e.g. "image/gif").
|
||||
|
||||
index
|
||||
If provided, it should be the (relative) name of a file to
|
||||
serve for directory requests. For example, if the dir argument is
|
||||
'/home/me', the Request-URI is 'myapp', and the index arg is
|
||||
'index.html', the file '/home/me/myapp/index.html' will be sought.
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
if request.method not in ('GET', 'HEAD'):
|
||||
if debug:
|
||||
cherrypy.log('request.method not GET or HEAD', 'TOOLS.STATICDIR')
|
||||
return False
|
||||
|
||||
if match and not re.search(match, request.path_info):
|
||||
if debug:
|
||||
cherrypy.log('request.path_info %r does not match pattern %r' %
|
||||
(request.path_info, match), 'TOOLS.STATICDIR')
|
||||
return False
|
||||
|
||||
# Allow the use of '~' to refer to a user's home directory.
|
||||
dir = os.path.expanduser(dir)
|
||||
|
||||
# If dir is relative, make absolute using "root".
|
||||
if not os.path.isabs(dir):
|
||||
if not root:
|
||||
msg = "Static dir requires an absolute dir (or root)."
|
||||
if debug:
|
||||
cherrypy.log(msg, 'TOOLS.STATICDIR')
|
||||
raise ValueError(msg)
|
||||
dir = os.path.join(root, dir)
|
||||
|
||||
# Determine where we are in the object tree relative to 'section'
|
||||
# (where the static tool was defined).
|
||||
if section == 'global':
|
||||
section = "/"
|
||||
section = section.rstrip(r"\/")
|
||||
branch = request.path_info[len(section) + 1:]
|
||||
branch = unquote(branch.lstrip(r"\/"))
|
||||
|
||||
# If branch is "", filename will end in a slash
|
||||
filename = os.path.join(dir, branch)
|
||||
if debug:
|
||||
cherrypy.log('Checking file %r to fulfill %r' %
|
||||
(filename, request.path_info), 'TOOLS.STATICDIR')
|
||||
|
||||
# There's a chance that the branch pulled from the URL might
|
||||
# have ".." or similar uplevel attacks in it. Check that the final
|
||||
# filename is a child of dir.
|
||||
if not os.path.normpath(filename).startswith(os.path.normpath(dir)):
|
||||
raise cherrypy.HTTPError(403) # Forbidden
|
||||
|
||||
handled = _attempt(filename, content_types)
|
||||
if not handled:
|
||||
# Check for an index file if a folder was requested.
|
||||
if index:
|
||||
handled = _attempt(os.path.join(filename, index), content_types)
|
||||
if handled:
|
||||
request.is_index = filename[-1] in (r"\/")
|
||||
return handled
|
||||
|
||||
|
||||
def staticfile(filename, root=None, match="", content_types=None, debug=False):
|
||||
"""Serve a static resource from the given (root +) filename.
|
||||
|
||||
match
|
||||
If given, request.path_info will be searched for the given
|
||||
regular expression before attempting to serve static content.
|
||||
|
||||
content_types
|
||||
If given, it should be a Python dictionary of
|
||||
{file-extension: content-type} pairs, where 'file-extension' is
|
||||
a string (e.g. "gif") and 'content-type' is the value to write
|
||||
out in the Content-Type response header (e.g. "image/gif").
|
||||
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
if request.method not in ('GET', 'HEAD'):
|
||||
if debug:
|
||||
cherrypy.log('request.method not GET or HEAD', 'TOOLS.STATICFILE')
|
||||
return False
|
||||
|
||||
if match and not re.search(match, request.path_info):
|
||||
if debug:
|
||||
cherrypy.log('request.path_info %r does not match pattern %r' %
|
||||
(request.path_info, match), 'TOOLS.STATICFILE')
|
||||
return False
|
||||
|
||||
# If filename is relative, make absolute using "root".
|
||||
if not os.path.isabs(filename):
|
||||
if not root:
|
||||
msg = "Static tool requires an absolute filename (got '%s')." % (
|
||||
filename,)
|
||||
if debug:
|
||||
cherrypy.log(msg, 'TOOLS.STATICFILE')
|
||||
raise ValueError(msg)
|
||||
filename = os.path.join(root, filename)
|
||||
|
||||
return _attempt(filename, content_types, debug=debug)
|
||||
57
lib/cherrypy/lib/xmlrpcutil.py
Normal file
57
lib/cherrypy/lib/xmlrpcutil.py
Normal file
@@ -0,0 +1,57 @@
|
||||
import sys
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import ntob
|
||||
|
||||
|
||||
def get_xmlrpclib():
|
||||
try:
|
||||
import xmlrpc.client as x
|
||||
except ImportError:
|
||||
import xmlrpclib as x
|
||||
return x
|
||||
|
||||
|
||||
def process_body():
|
||||
"""Return (params, method) from request body."""
|
||||
try:
|
||||
return get_xmlrpclib().loads(cherrypy.request.body.read())
|
||||
except Exception:
|
||||
return ('ERROR PARAMS', ), 'ERRORMETHOD'
|
||||
|
||||
|
||||
def patched_path(path):
|
||||
"""Return 'path', doctored for RPC."""
|
||||
if not path.endswith('/'):
|
||||
path += '/'
|
||||
if path.startswith('/RPC2/'):
|
||||
# strip the first /rpc2
|
||||
path = path[5:]
|
||||
return path
|
||||
|
||||
|
||||
def _set_response(body):
|
||||
# The XML-RPC spec (http://www.xmlrpc.com/spec) says:
|
||||
# "Unless there's a lower-level error, always return 200 OK."
|
||||
# Since Python's xmlrpclib interprets a non-200 response
|
||||
# as a "Protocol Error", we'll just return 200 every time.
|
||||
response = cherrypy.response
|
||||
response.status = '200 OK'
|
||||
response.body = ntob(body, 'utf-8')
|
||||
response.headers['Content-Type'] = 'text/xml'
|
||||
response.headers['Content-Length'] = len(body)
|
||||
|
||||
|
||||
def respond(body, encoding='utf-8', allow_none=0):
|
||||
xmlrpclib = get_xmlrpclib()
|
||||
if not isinstance(body, xmlrpclib.Fault):
|
||||
body = (body,)
|
||||
_set_response(xmlrpclib.dumps(body, methodresponse=1,
|
||||
encoding=encoding,
|
||||
allow_none=allow_none))
|
||||
|
||||
|
||||
def on_error(*args, **kwargs):
|
||||
body = str(sys.exc_info()[1])
|
||||
xmlrpclib = get_xmlrpclib()
|
||||
_set_response(xmlrpclib.dumps(xmlrpclib.Fault(1, body)))
|
||||
14
lib/cherrypy/process/__init__.py
Normal file
14
lib/cherrypy/process/__init__.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""Site container for an HTTP server.
|
||||
|
||||
A Web Site Process Bus object is used to connect applications, servers,
|
||||
and frameworks with site-wide services such as daemonization, process
|
||||
reload, signal handling, drop privileges, PID file management, logging
|
||||
for all of these, and many more.
|
||||
|
||||
The 'plugins' module defines a few abstract and concrete services for
|
||||
use with the bus. Some use tool-specific channels; see the documentation
|
||||
for each class.
|
||||
"""
|
||||
|
||||
from cherrypy.process.wspbus import bus
|
||||
from cherrypy.process import plugins, servers
|
||||
717
lib/cherrypy/process/plugins.py
Normal file
717
lib/cherrypy/process/plugins.py
Normal file
@@ -0,0 +1,717 @@
|
||||
"""Site services for use with a Web Site Process Bus."""
|
||||
|
||||
import os
|
||||
import re
|
||||
import signal as _signal
|
||||
import sys
|
||||
import time
|
||||
import threading
|
||||
|
||||
from cherrypy._cpcompat import basestring, get_daemon, get_thread_ident
|
||||
from cherrypy._cpcompat import ntob, set, Timer, SetDaemonProperty
|
||||
|
||||
# _module__file__base is used by Autoreload to make
|
||||
# absolute any filenames retrieved from sys.modules which are not
|
||||
# already absolute paths. This is to work around Python's quirk
|
||||
# of importing the startup script and using a relative filename
|
||||
# for it in sys.modules.
|
||||
#
|
||||
# Autoreload examines sys.modules afresh every time it runs. If an application
|
||||
# changes the current directory by executing os.chdir(), then the next time
|
||||
# Autoreload runs, it will not be able to find any filenames which are
|
||||
# not absolute paths, because the current directory is not the same as when the
|
||||
# module was first imported. Autoreload will then wrongly conclude the file
|
||||
# has "changed", and initiate the shutdown/re-exec sequence.
|
||||
# See ticket #917.
|
||||
# For this workaround to have a decent probability of success, this module
|
||||
# needs to be imported as early as possible, before the app has much chance
|
||||
# to change the working directory.
|
||||
_module__file__base = os.getcwd()
|
||||
|
||||
|
||||
class SimplePlugin(object):
|
||||
|
||||
"""Plugin base class which auto-subscribes methods for known channels."""
|
||||
|
||||
bus = None
|
||||
"""A :class:`Bus <cherrypy.process.wspbus.Bus>`, usually cherrypy.engine.
|
||||
"""
|
||||
|
||||
def __init__(self, bus):
|
||||
self.bus = bus
|
||||
|
||||
def subscribe(self):
|
||||
"""Register this object as a (multi-channel) listener on the bus."""
|
||||
for channel in self.bus.listeners:
|
||||
# Subscribe self.start, self.exit, etc. if present.
|
||||
method = getattr(self, channel, None)
|
||||
if method is not None:
|
||||
self.bus.subscribe(channel, method)
|
||||
|
||||
def unsubscribe(self):
|
||||
"""Unregister this object as a listener on the bus."""
|
||||
for channel in self.bus.listeners:
|
||||
# Unsubscribe self.start, self.exit, etc. if present.
|
||||
method = getattr(self, channel, None)
|
||||
if method is not None:
|
||||
self.bus.unsubscribe(channel, method)
|
||||
|
||||
|
||||
class SignalHandler(object):
|
||||
|
||||
"""Register bus channels (and listeners) for system signals.
|
||||
|
||||
You can modify what signals your application listens for, and what it does
|
||||
when it receives signals, by modifying :attr:`SignalHandler.handlers`,
|
||||
a dict of {signal name: callback} pairs. The default set is::
|
||||
|
||||
handlers = {'SIGTERM': self.bus.exit,
|
||||
'SIGHUP': self.handle_SIGHUP,
|
||||
'SIGUSR1': self.bus.graceful,
|
||||
}
|
||||
|
||||
The :func:`SignalHandler.handle_SIGHUP`` method calls
|
||||
:func:`bus.restart()<cherrypy.process.wspbus.Bus.restart>`
|
||||
if the process is daemonized, but
|
||||
:func:`bus.exit()<cherrypy.process.wspbus.Bus.exit>`
|
||||
if the process is attached to a TTY. This is because Unix window
|
||||
managers tend to send SIGHUP to terminal windows when the user closes them.
|
||||
|
||||
Feel free to add signals which are not available on every platform.
|
||||
The :class:`SignalHandler` will ignore errors raised from attempting
|
||||
to register handlers for unknown signals.
|
||||
"""
|
||||
|
||||
handlers = {}
|
||||
"""A map from signal names (e.g. 'SIGTERM') to handlers (e.g. bus.exit)."""
|
||||
|
||||
signals = {}
|
||||
"""A map from signal numbers to names."""
|
||||
|
||||
for k, v in vars(_signal).items():
|
||||
if k.startswith('SIG') and not k.startswith('SIG_'):
|
||||
signals[v] = k
|
||||
del k, v
|
||||
|
||||
def __init__(self, bus):
|
||||
self.bus = bus
|
||||
# Set default handlers
|
||||
self.handlers = {'SIGTERM': self.bus.exit,
|
||||
'SIGHUP': self.handle_SIGHUP,
|
||||
'SIGUSR1': self.bus.graceful,
|
||||
}
|
||||
|
||||
if sys.platform[:4] == 'java':
|
||||
del self.handlers['SIGUSR1']
|
||||
self.handlers['SIGUSR2'] = self.bus.graceful
|
||||
self.bus.log("SIGUSR1 cannot be set on the JVM platform. "
|
||||
"Using SIGUSR2 instead.")
|
||||
self.handlers['SIGINT'] = self._jython_SIGINT_handler
|
||||
|
||||
self._previous_handlers = {}
|
||||
|
||||
def _jython_SIGINT_handler(self, signum=None, frame=None):
|
||||
# See http://bugs.jython.org/issue1313
|
||||
self.bus.log('Keyboard Interrupt: shutting down bus')
|
||||
self.bus.exit()
|
||||
|
||||
def subscribe(self):
|
||||
"""Subscribe self.handlers to signals."""
|
||||
for sig, func in self.handlers.items():
|
||||
try:
|
||||
self.set_handler(sig, func)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
def unsubscribe(self):
|
||||
"""Unsubscribe self.handlers from signals."""
|
||||
for signum, handler in self._previous_handlers.items():
|
||||
signame = self.signals[signum]
|
||||
|
||||
if handler is None:
|
||||
self.bus.log("Restoring %s handler to SIG_DFL." % signame)
|
||||
handler = _signal.SIG_DFL
|
||||
else:
|
||||
self.bus.log("Restoring %s handler %r." % (signame, handler))
|
||||
|
||||
try:
|
||||
our_handler = _signal.signal(signum, handler)
|
||||
if our_handler is None:
|
||||
self.bus.log("Restored old %s handler %r, but our "
|
||||
"handler was not registered." %
|
||||
(signame, handler), level=30)
|
||||
except ValueError:
|
||||
self.bus.log("Unable to restore %s handler %r." %
|
||||
(signame, handler), level=40, traceback=True)
|
||||
|
||||
def set_handler(self, signal, listener=None):
|
||||
"""Subscribe a handler for the given signal (number or name).
|
||||
|
||||
If the optional 'listener' argument is provided, it will be
|
||||
subscribed as a listener for the given signal's channel.
|
||||
|
||||
If the given signal name or number is not available on the current
|
||||
platform, ValueError is raised.
|
||||
"""
|
||||
if isinstance(signal, basestring):
|
||||
signum = getattr(_signal, signal, None)
|
||||
if signum is None:
|
||||
raise ValueError("No such signal: %r" % signal)
|
||||
signame = signal
|
||||
else:
|
||||
try:
|
||||
signame = self.signals[signal]
|
||||
except KeyError:
|
||||
raise ValueError("No such signal: %r" % signal)
|
||||
signum = signal
|
||||
|
||||
prev = _signal.signal(signum, self._handle_signal)
|
||||
self._previous_handlers[signum] = prev
|
||||
|
||||
if listener is not None:
|
||||
self.bus.log("Listening for %s." % signame)
|
||||
self.bus.subscribe(signame, listener)
|
||||
|
||||
def _handle_signal(self, signum=None, frame=None):
|
||||
"""Python signal handler (self.set_handler subscribes it for you)."""
|
||||
signame = self.signals[signum]
|
||||
self.bus.log("Caught signal %s." % signame)
|
||||
self.bus.publish(signame)
|
||||
|
||||
def handle_SIGHUP(self):
|
||||
"""Restart if daemonized, else exit."""
|
||||
if os.isatty(sys.stdin.fileno()):
|
||||
# not daemonized (may be foreground or background)
|
||||
self.bus.log("SIGHUP caught but not daemonized. Exiting.")
|
||||
self.bus.exit()
|
||||
else:
|
||||
self.bus.log("SIGHUP caught while daemonized. Restarting.")
|
||||
self.bus.restart()
|
||||
|
||||
|
||||
try:
|
||||
import pwd
|
||||
import grp
|
||||
except ImportError:
|
||||
pwd, grp = None, None
|
||||
|
||||
|
||||
class DropPrivileges(SimplePlugin):
|
||||
|
||||
"""Drop privileges. uid/gid arguments not available on Windows.
|
||||
|
||||
Special thanks to `Gavin Baker <http://antonym.org/2005/12/dropping-privileges-in-python.html>`_
|
||||
"""
|
||||
|
||||
def __init__(self, bus, umask=None, uid=None, gid=None):
|
||||
SimplePlugin.__init__(self, bus)
|
||||
self.finalized = False
|
||||
self.uid = uid
|
||||
self.gid = gid
|
||||
self.umask = umask
|
||||
|
||||
def _get_uid(self):
|
||||
return self._uid
|
||||
|
||||
def _set_uid(self, val):
|
||||
if val is not None:
|
||||
if pwd is None:
|
||||
self.bus.log("pwd module not available; ignoring uid.",
|
||||
level=30)
|
||||
val = None
|
||||
elif isinstance(val, basestring):
|
||||
val = pwd.getpwnam(val)[2]
|
||||
self._uid = val
|
||||
uid = property(_get_uid, _set_uid,
|
||||
doc="The uid under which to run. Availability: Unix.")
|
||||
|
||||
def _get_gid(self):
|
||||
return self._gid
|
||||
|
||||
def _set_gid(self, val):
|
||||
if val is not None:
|
||||
if grp is None:
|
||||
self.bus.log("grp module not available; ignoring gid.",
|
||||
level=30)
|
||||
val = None
|
||||
elif isinstance(val, basestring):
|
||||
val = grp.getgrnam(val)[2]
|
||||
self._gid = val
|
||||
gid = property(_get_gid, _set_gid,
|
||||
doc="The gid under which to run. Availability: Unix.")
|
||||
|
||||
def _get_umask(self):
|
||||
return self._umask
|
||||
|
||||
def _set_umask(self, val):
|
||||
if val is not None:
|
||||
try:
|
||||
os.umask
|
||||
except AttributeError:
|
||||
self.bus.log("umask function not available; ignoring umask.",
|
||||
level=30)
|
||||
val = None
|
||||
self._umask = val
|
||||
umask = property(
|
||||
_get_umask,
|
||||
_set_umask,
|
||||
doc="""The default permission mode for newly created files and
|
||||
directories.
|
||||
|
||||
Usually expressed in octal format, for example, ``0644``.
|
||||
Availability: Unix, Windows.
|
||||
""")
|
||||
|
||||
def start(self):
|
||||
# uid/gid
|
||||
def current_ids():
|
||||
"""Return the current (uid, gid) if available."""
|
||||
name, group = None, None
|
||||
if pwd:
|
||||
name = pwd.getpwuid(os.getuid())[0]
|
||||
if grp:
|
||||
group = grp.getgrgid(os.getgid())[0]
|
||||
return name, group
|
||||
|
||||
if self.finalized:
|
||||
if not (self.uid is None and self.gid is None):
|
||||
self.bus.log('Already running as uid: %r gid: %r' %
|
||||
current_ids())
|
||||
else:
|
||||
if self.uid is None and self.gid is None:
|
||||
if pwd or grp:
|
||||
self.bus.log('uid/gid not set', level=30)
|
||||
else:
|
||||
self.bus.log('Started as uid: %r gid: %r' % current_ids())
|
||||
if self.gid is not None:
|
||||
os.setgid(self.gid)
|
||||
os.setgroups([])
|
||||
if self.uid is not None:
|
||||
os.setuid(self.uid)
|
||||
self.bus.log('Running as uid: %r gid: %r' % current_ids())
|
||||
|
||||
# umask
|
||||
if self.finalized:
|
||||
if self.umask is not None:
|
||||
self.bus.log('umask already set to: %03o' % self.umask)
|
||||
else:
|
||||
if self.umask is None:
|
||||
self.bus.log('umask not set', level=30)
|
||||
else:
|
||||
old_umask = os.umask(self.umask)
|
||||
self.bus.log('umask old: %03o, new: %03o' %
|
||||
(old_umask, self.umask))
|
||||
|
||||
self.finalized = True
|
||||
# This is slightly higher than the priority for server.start
|
||||
# in order to facilitate the most common use: starting on a low
|
||||
# port (which requires root) and then dropping to another user.
|
||||
start.priority = 77
|
||||
|
||||
|
||||
class Daemonizer(SimplePlugin):
|
||||
|
||||
"""Daemonize the running script.
|
||||
|
||||
Use this with a Web Site Process Bus via::
|
||||
|
||||
Daemonizer(bus).subscribe()
|
||||
|
||||
When this component finishes, the process is completely decoupled from
|
||||
the parent environment. Please note that when this component is used,
|
||||
the return code from the parent process will still be 0 if a startup
|
||||
error occurs in the forked children. Errors in the initial daemonizing
|
||||
process still return proper exit codes. Therefore, if you use this
|
||||
plugin to daemonize, don't use the return code as an accurate indicator
|
||||
of whether the process fully started. In fact, that return code only
|
||||
indicates if the process succesfully finished the first fork.
|
||||
"""
|
||||
|
||||
def __init__(self, bus, stdin='/dev/null', stdout='/dev/null',
|
||||
stderr='/dev/null'):
|
||||
SimplePlugin.__init__(self, bus)
|
||||
self.stdin = stdin
|
||||
self.stdout = stdout
|
||||
self.stderr = stderr
|
||||
self.finalized = False
|
||||
|
||||
def start(self):
|
||||
if self.finalized:
|
||||
self.bus.log('Already deamonized.')
|
||||
|
||||
# forking has issues with threads:
|
||||
# http://www.opengroup.org/onlinepubs/000095399/functions/fork.html
|
||||
# "The general problem with making fork() work in a multi-threaded
|
||||
# world is what to do with all of the threads..."
|
||||
# So we check for active threads:
|
||||
if threading.activeCount() != 1:
|
||||
self.bus.log('There are %r active threads. '
|
||||
'Daemonizing now may cause strange failures.' %
|
||||
threading.enumerate(), level=30)
|
||||
|
||||
# See http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16
|
||||
# (or http://www.faqs.org/faqs/unix-faq/programmer/faq/ section 1.7)
|
||||
# and http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66012
|
||||
|
||||
# Finish up with the current stdout/stderr
|
||||
sys.stdout.flush()
|
||||
sys.stderr.flush()
|
||||
|
||||
# Do first fork.
|
||||
try:
|
||||
pid = os.fork()
|
||||
if pid == 0:
|
||||
# This is the child process. Continue.
|
||||
pass
|
||||
else:
|
||||
# This is the first parent. Exit, now that we've forked.
|
||||
self.bus.log('Forking once.')
|
||||
os._exit(0)
|
||||
except OSError:
|
||||
# Python raises OSError rather than returning negative numbers.
|
||||
exc = sys.exc_info()[1]
|
||||
sys.exit("%s: fork #1 failed: (%d) %s\n"
|
||||
% (sys.argv[0], exc.errno, exc.strerror))
|
||||
|
||||
os.setsid()
|
||||
|
||||
# Do second fork
|
||||
try:
|
||||
pid = os.fork()
|
||||
if pid > 0:
|
||||
self.bus.log('Forking twice.')
|
||||
os._exit(0) # Exit second parent
|
||||
except OSError:
|
||||
exc = sys.exc_info()[1]
|
||||
sys.exit("%s: fork #2 failed: (%d) %s\n"
|
||||
% (sys.argv[0], exc.errno, exc.strerror))
|
||||
|
||||
os.chdir("/")
|
||||
os.umask(0)
|
||||
|
||||
si = open(self.stdin, "r")
|
||||
so = open(self.stdout, "a+")
|
||||
se = open(self.stderr, "a+")
|
||||
|
||||
# os.dup2(fd, fd2) will close fd2 if necessary,
|
||||
# so we don't explicitly close stdin/out/err.
|
||||
# See http://docs.python.org/lib/os-fd-ops.html
|
||||
os.dup2(si.fileno(), sys.stdin.fileno())
|
||||
os.dup2(so.fileno(), sys.stdout.fileno())
|
||||
os.dup2(se.fileno(), sys.stderr.fileno())
|
||||
|
||||
self.bus.log('Daemonized to PID: %s' % os.getpid())
|
||||
self.finalized = True
|
||||
start.priority = 65
|
||||
|
||||
|
||||
class PIDFile(SimplePlugin):
|
||||
|
||||
"""Maintain a PID file via a WSPBus."""
|
||||
|
||||
def __init__(self, bus, pidfile):
|
||||
SimplePlugin.__init__(self, bus)
|
||||
self.pidfile = pidfile
|
||||
self.finalized = False
|
||||
|
||||
def start(self):
|
||||
pid = os.getpid()
|
||||
if self.finalized:
|
||||
self.bus.log('PID %r already written to %r.' % (pid, self.pidfile))
|
||||
else:
|
||||
open(self.pidfile, "wb").write(ntob("%s\n" % pid, 'utf8'))
|
||||
self.bus.log('PID %r written to %r.' % (pid, self.pidfile))
|
||||
self.finalized = True
|
||||
start.priority = 70
|
||||
|
||||
def exit(self):
|
||||
try:
|
||||
os.remove(self.pidfile)
|
||||
self.bus.log('PID file removed: %r.' % self.pidfile)
|
||||
except (KeyboardInterrupt, SystemExit):
|
||||
raise
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
class PerpetualTimer(Timer):
|
||||
|
||||
"""A responsive subclass of threading.Timer whose run() method repeats.
|
||||
|
||||
Use this timer only when you really need a very interruptible timer;
|
||||
this checks its 'finished' condition up to 20 times a second, which can
|
||||
results in pretty high CPU usage
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"Override parent constructor to allow 'bus' to be provided."
|
||||
self.bus = kwargs.pop('bus', None)
|
||||
super(PerpetualTimer, self).__init__(*args, **kwargs)
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
self.finished.wait(self.interval)
|
||||
if self.finished.isSet():
|
||||
return
|
||||
try:
|
||||
self.function(*self.args, **self.kwargs)
|
||||
except Exception:
|
||||
if self.bus:
|
||||
self.bus.log(
|
||||
"Error in perpetual timer thread function %r." %
|
||||
self.function, level=40, traceback=True)
|
||||
# Quit on first error to avoid massive logs.
|
||||
raise
|
||||
|
||||
|
||||
class BackgroundTask(SetDaemonProperty, threading.Thread):
|
||||
|
||||
"""A subclass of threading.Thread whose run() method repeats.
|
||||
|
||||
Use this class for most repeating tasks. It uses time.sleep() to wait
|
||||
for each interval, which isn't very responsive; that is, even if you call
|
||||
self.cancel(), you'll have to wait until the sleep() call finishes before
|
||||
the thread stops. To compensate, it defaults to being daemonic, which means
|
||||
it won't delay stopping the whole process.
|
||||
"""
|
||||
|
||||
def __init__(self, interval, function, args=[], kwargs={}, bus=None):
|
||||
threading.Thread.__init__(self)
|
||||
self.interval = interval
|
||||
self.function = function
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
self.running = False
|
||||
self.bus = bus
|
||||
|
||||
# default to daemonic
|
||||
self.daemon = True
|
||||
|
||||
def cancel(self):
|
||||
self.running = False
|
||||
|
||||
def run(self):
|
||||
self.running = True
|
||||
while self.running:
|
||||
time.sleep(self.interval)
|
||||
if not self.running:
|
||||
return
|
||||
try:
|
||||
self.function(*self.args, **self.kwargs)
|
||||
except Exception:
|
||||
if self.bus:
|
||||
self.bus.log("Error in background task thread function %r."
|
||||
% self.function, level=40, traceback=True)
|
||||
# Quit on first error to avoid massive logs.
|
||||
raise
|
||||
|
||||
|
||||
class Monitor(SimplePlugin):
|
||||
|
||||
"""WSPBus listener to periodically run a callback in its own thread."""
|
||||
|
||||
callback = None
|
||||
"""The function to call at intervals."""
|
||||
|
||||
frequency = 60
|
||||
"""The time in seconds between callback runs."""
|
||||
|
||||
thread = None
|
||||
"""A :class:`BackgroundTask<cherrypy.process.plugins.BackgroundTask>`
|
||||
thread.
|
||||
"""
|
||||
|
||||
def __init__(self, bus, callback, frequency=60, name=None):
|
||||
SimplePlugin.__init__(self, bus)
|
||||
self.callback = callback
|
||||
self.frequency = frequency
|
||||
self.thread = None
|
||||
self.name = name
|
||||
|
||||
def start(self):
|
||||
"""Start our callback in its own background thread."""
|
||||
if self.frequency > 0:
|
||||
threadname = self.name or self.__class__.__name__
|
||||
if self.thread is None:
|
||||
self.thread = BackgroundTask(self.frequency, self.callback,
|
||||
bus=self.bus)
|
||||
self.thread.setName(threadname)
|
||||
self.thread.start()
|
||||
self.bus.log("Started monitor thread %r." % threadname)
|
||||
else:
|
||||
self.bus.log("Monitor thread %r already started." % threadname)
|
||||
start.priority = 70
|
||||
|
||||
def stop(self):
|
||||
"""Stop our callback's background task thread."""
|
||||
if self.thread is None:
|
||||
self.bus.log("No thread running for %s." %
|
||||
self.name or self.__class__.__name__)
|
||||
else:
|
||||
if self.thread is not threading.currentThread():
|
||||
name = self.thread.getName()
|
||||
self.thread.cancel()
|
||||
if not get_daemon(self.thread):
|
||||
self.bus.log("Joining %r" % name)
|
||||
self.thread.join()
|
||||
self.bus.log("Stopped thread %r." % name)
|
||||
self.thread = None
|
||||
|
||||
def graceful(self):
|
||||
"""Stop the callback's background task thread and restart it."""
|
||||
self.stop()
|
||||
self.start()
|
||||
|
||||
|
||||
class Autoreloader(Monitor):
|
||||
|
||||
"""Monitor which re-executes the process when files change.
|
||||
|
||||
This :ref:`plugin<plugins>` restarts the process (via :func:`os.execv`)
|
||||
if any of the files it monitors change (or is deleted). By default, the
|
||||
autoreloader monitors all imported modules; you can add to the
|
||||
set by adding to ``autoreload.files``::
|
||||
|
||||
cherrypy.engine.autoreload.files.add(myFile)
|
||||
|
||||
If there are imported files you do *not* wish to monitor, you can
|
||||
adjust the ``match`` attribute, a regular expression. For example,
|
||||
to stop monitoring cherrypy itself::
|
||||
|
||||
cherrypy.engine.autoreload.match = r'^(?!cherrypy).+'
|
||||
|
||||
Like all :class:`Monitor<cherrypy.process.plugins.Monitor>` plugins,
|
||||
the autoreload plugin takes a ``frequency`` argument. The default is
|
||||
1 second; that is, the autoreloader will examine files once each second.
|
||||
"""
|
||||
|
||||
files = None
|
||||
"""The set of files to poll for modifications."""
|
||||
|
||||
frequency = 1
|
||||
"""The interval in seconds at which to poll for modified files."""
|
||||
|
||||
match = '.*'
|
||||
"""A regular expression by which to match filenames."""
|
||||
|
||||
def __init__(self, bus, frequency=1, match='.*'):
|
||||
self.mtimes = {}
|
||||
self.files = set()
|
||||
self.match = match
|
||||
Monitor.__init__(self, bus, self.run, frequency)
|
||||
|
||||
def start(self):
|
||||
"""Start our own background task thread for self.run."""
|
||||
if self.thread is None:
|
||||
self.mtimes = {}
|
||||
Monitor.start(self)
|
||||
start.priority = 70
|
||||
|
||||
def sysfiles(self):
|
||||
"""Return a Set of sys.modules filenames to monitor."""
|
||||
files = set()
|
||||
for k, m in list(sys.modules.items()):
|
||||
if re.match(self.match, k):
|
||||
if (
|
||||
hasattr(m, '__loader__') and
|
||||
hasattr(m.__loader__, 'archive')
|
||||
):
|
||||
f = m.__loader__.archive
|
||||
else:
|
||||
f = getattr(m, '__file__', None)
|
||||
if f is not None and not os.path.isabs(f):
|
||||
# ensure absolute paths so a os.chdir() in the app
|
||||
# doesn't break me
|
||||
f = os.path.normpath(
|
||||
os.path.join(_module__file__base, f))
|
||||
files.add(f)
|
||||
return files
|
||||
|
||||
def run(self):
|
||||
"""Reload the process if registered files have been modified."""
|
||||
for filename in self.sysfiles() | self.files:
|
||||
if filename:
|
||||
if filename.endswith('.pyc'):
|
||||
filename = filename[:-1]
|
||||
|
||||
oldtime = self.mtimes.get(filename, 0)
|
||||
if oldtime is None:
|
||||
# Module with no .py file. Skip it.
|
||||
continue
|
||||
|
||||
try:
|
||||
mtime = os.stat(filename).st_mtime
|
||||
except OSError:
|
||||
# Either a module with no .py file, or it's been deleted.
|
||||
mtime = None
|
||||
|
||||
if filename not in self.mtimes:
|
||||
# If a module has no .py file, this will be None.
|
||||
self.mtimes[filename] = mtime
|
||||
else:
|
||||
if mtime is None or mtime > oldtime:
|
||||
# The file has been deleted or modified.
|
||||
self.bus.log("Restarting because %s changed." %
|
||||
filename)
|
||||
self.thread.cancel()
|
||||
self.bus.log("Stopped thread %r." %
|
||||
self.thread.getName())
|
||||
self.bus.restart()
|
||||
return
|
||||
|
||||
|
||||
class ThreadManager(SimplePlugin):
|
||||
|
||||
"""Manager for HTTP request threads.
|
||||
|
||||
If you have control over thread creation and destruction, publish to
|
||||
the 'acquire_thread' and 'release_thread' channels (for each thread).
|
||||
This will register/unregister the current thread and publish to
|
||||
'start_thread' and 'stop_thread' listeners in the bus as needed.
|
||||
|
||||
If threads are created and destroyed by code you do not control
|
||||
(e.g., Apache), then, at the beginning of every HTTP request,
|
||||
publish to 'acquire_thread' only. You should not publish to
|
||||
'release_thread' in this case, since you do not know whether
|
||||
the thread will be re-used or not. The bus will call
|
||||
'stop_thread' listeners for you when it stops.
|
||||
"""
|
||||
|
||||
threads = None
|
||||
"""A map of {thread ident: index number} pairs."""
|
||||
|
||||
def __init__(self, bus):
|
||||
self.threads = {}
|
||||
SimplePlugin.__init__(self, bus)
|
||||
self.bus.listeners.setdefault('acquire_thread', set())
|
||||
self.bus.listeners.setdefault('start_thread', set())
|
||||
self.bus.listeners.setdefault('release_thread', set())
|
||||
self.bus.listeners.setdefault('stop_thread', set())
|
||||
|
||||
def acquire_thread(self):
|
||||
"""Run 'start_thread' listeners for the current thread.
|
||||
|
||||
If the current thread has already been seen, any 'start_thread'
|
||||
listeners will not be run again.
|
||||
"""
|
||||
thread_ident = get_thread_ident()
|
||||
if thread_ident not in self.threads:
|
||||
# We can't just use get_ident as the thread ID
|
||||
# because some platforms reuse thread ID's.
|
||||
i = len(self.threads) + 1
|
||||
self.threads[thread_ident] = i
|
||||
self.bus.publish('start_thread', i)
|
||||
|
||||
def release_thread(self):
|
||||
"""Release the current thread and run 'stop_thread' listeners."""
|
||||
thread_ident = get_thread_ident()
|
||||
i = self.threads.pop(thread_ident, None)
|
||||
if i is not None:
|
||||
self.bus.publish('stop_thread', i)
|
||||
|
||||
def stop(self):
|
||||
"""Release all threads and run all 'stop_thread' listeners."""
|
||||
for thread_ident, i in self.threads.items():
|
||||
self.bus.publish('stop_thread', i)
|
||||
self.threads.clear()
|
||||
graceful = stop
|
||||
465
lib/cherrypy/process/servers.py
Normal file
465
lib/cherrypy/process/servers.py
Normal file
@@ -0,0 +1,465 @@
|
||||
"""
|
||||
Starting in CherryPy 3.1, cherrypy.server is implemented as an
|
||||
:ref:`Engine Plugin<plugins>`. It's an instance of
|
||||
:class:`cherrypy._cpserver.Server`, which is a subclass of
|
||||
:class:`cherrypy.process.servers.ServerAdapter`. The ``ServerAdapter`` class
|
||||
is designed to control other servers, as well.
|
||||
|
||||
Multiple servers/ports
|
||||
======================
|
||||
|
||||
If you need to start more than one HTTP server (to serve on multiple ports, or
|
||||
protocols, etc.), you can manually register each one and then start them all
|
||||
with engine.start::
|
||||
|
||||
s1 = ServerAdapter(cherrypy.engine, MyWSGIServer(host='0.0.0.0', port=80))
|
||||
s2 = ServerAdapter(cherrypy.engine,
|
||||
another.HTTPServer(host='127.0.0.1',
|
||||
SSL=True))
|
||||
s1.subscribe()
|
||||
s2.subscribe()
|
||||
cherrypy.engine.start()
|
||||
|
||||
.. index:: SCGI
|
||||
|
||||
FastCGI/SCGI
|
||||
============
|
||||
|
||||
There are also Flup\ **F**\ CGIServer and Flup\ **S**\ CGIServer classes in
|
||||
:mod:`cherrypy.process.servers`. To start an fcgi server, for example,
|
||||
wrap an instance of it in a ServerAdapter::
|
||||
|
||||
addr = ('0.0.0.0', 4000)
|
||||
f = servers.FlupFCGIServer(application=cherrypy.tree, bindAddress=addr)
|
||||
s = servers.ServerAdapter(cherrypy.engine, httpserver=f, bind_addr=addr)
|
||||
s.subscribe()
|
||||
|
||||
The :doc:`cherryd</deployguide/cherryd>` startup script will do the above for
|
||||
you via its `-f` flag.
|
||||
Note that you need to download and install `flup <http://trac.saddi.com/flup>`_
|
||||
yourself, whether you use ``cherryd`` or not.
|
||||
|
||||
.. _fastcgi:
|
||||
.. index:: FastCGI
|
||||
|
||||
FastCGI
|
||||
-------
|
||||
|
||||
A very simple setup lets your cherry run with FastCGI.
|
||||
You just need the flup library,
|
||||
plus a running Apache server (with ``mod_fastcgi``) or lighttpd server.
|
||||
|
||||
CherryPy code
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
hello.py::
|
||||
|
||||
#!/usr/bin/python
|
||||
import cherrypy
|
||||
|
||||
class HelloWorld:
|
||||
\"""Sample request handler class.\"""
|
||||
def index(self):
|
||||
return "Hello world!"
|
||||
index.exposed = True
|
||||
|
||||
cherrypy.tree.mount(HelloWorld())
|
||||
# CherryPy autoreload must be disabled for the flup server to work
|
||||
cherrypy.config.update({'engine.autoreload.on':False})
|
||||
|
||||
Then run :doc:`/deployguide/cherryd` with the '-f' arg::
|
||||
|
||||
cherryd -c <myconfig> -d -f -i hello.py
|
||||
|
||||
Apache
|
||||
^^^^^^
|
||||
|
||||
At the top level in httpd.conf::
|
||||
|
||||
FastCgiIpcDir /tmp
|
||||
FastCgiServer /path/to/cherry.fcgi -idle-timeout 120 -processes 4
|
||||
|
||||
And inside the relevant VirtualHost section::
|
||||
|
||||
# FastCGI config
|
||||
AddHandler fastcgi-script .fcgi
|
||||
ScriptAliasMatch (.*$) /path/to/cherry.fcgi$1
|
||||
|
||||
Lighttpd
|
||||
^^^^^^^^
|
||||
|
||||
For `Lighttpd <http://www.lighttpd.net/>`_ you can follow these
|
||||
instructions. Within ``lighttpd.conf`` make sure ``mod_fastcgi`` is
|
||||
active within ``server.modules``. Then, within your ``$HTTP["host"]``
|
||||
directive, configure your fastcgi script like the following::
|
||||
|
||||
$HTTP["url"] =~ "" {
|
||||
fastcgi.server = (
|
||||
"/" => (
|
||||
"script.fcgi" => (
|
||||
"bin-path" => "/path/to/your/script.fcgi",
|
||||
"socket" => "/tmp/script.sock",
|
||||
"check-local" => "disable",
|
||||
"disable-time" => 1,
|
||||
"min-procs" => 1,
|
||||
"max-procs" => 1, # adjust as needed
|
||||
),
|
||||
),
|
||||
)
|
||||
} # end of $HTTP["url"] =~ "^/"
|
||||
|
||||
Please see `Lighttpd FastCGI Docs
|
||||
<http://redmine.lighttpd.net/wiki/lighttpd/Docs:ModFastCGI>`_ for
|
||||
an explanation of the possible configuration options.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import time
|
||||
import warnings
|
||||
|
||||
|
||||
class ServerAdapter(object):
|
||||
|
||||
"""Adapter for an HTTP server.
|
||||
|
||||
If you need to start more than one HTTP server (to serve on multiple
|
||||
ports, or protocols, etc.), you can manually register each one and then
|
||||
start them all with bus.start:
|
||||
|
||||
s1 = ServerAdapter(bus, MyWSGIServer(host='0.0.0.0', port=80))
|
||||
s2 = ServerAdapter(bus, another.HTTPServer(host='127.0.0.1', SSL=True))
|
||||
s1.subscribe()
|
||||
s2.subscribe()
|
||||
bus.start()
|
||||
"""
|
||||
|
||||
def __init__(self, bus, httpserver=None, bind_addr=None):
|
||||
self.bus = bus
|
||||
self.httpserver = httpserver
|
||||
self.bind_addr = bind_addr
|
||||
self.interrupt = None
|
||||
self.running = False
|
||||
|
||||
def subscribe(self):
|
||||
self.bus.subscribe('start', self.start)
|
||||
self.bus.subscribe('stop', self.stop)
|
||||
|
||||
def unsubscribe(self):
|
||||
self.bus.unsubscribe('start', self.start)
|
||||
self.bus.unsubscribe('stop', self.stop)
|
||||
|
||||
def start(self):
|
||||
"""Start the HTTP server."""
|
||||
if self.bind_addr is None:
|
||||
on_what = "unknown interface (dynamic?)"
|
||||
elif isinstance(self.bind_addr, tuple):
|
||||
on_what = self._get_base()
|
||||
else:
|
||||
on_what = "socket file: %s" % self.bind_addr
|
||||
|
||||
if self.running:
|
||||
self.bus.log("Already serving on %s" % on_what)
|
||||
return
|
||||
|
||||
self.interrupt = None
|
||||
if not self.httpserver:
|
||||
raise ValueError("No HTTP server has been created.")
|
||||
|
||||
# Start the httpserver in a new thread.
|
||||
if isinstance(self.bind_addr, tuple):
|
||||
wait_for_free_port(*self.bind_addr)
|
||||
|
||||
import threading
|
||||
t = threading.Thread(target=self._start_http_thread)
|
||||
t.setName("HTTPServer " + t.getName())
|
||||
t.start()
|
||||
|
||||
self.wait()
|
||||
self.running = True
|
||||
self.bus.log("Serving on %s" % on_what)
|
||||
start.priority = 75
|
||||
|
||||
def _get_base(self):
|
||||
if not self.httpserver:
|
||||
return ''
|
||||
host, port = self.bind_addr
|
||||
if getattr(self.httpserver, 'ssl_certificate', None):
|
||||
scheme = "https"
|
||||
if port != 443:
|
||||
host += ":%s" % port
|
||||
else:
|
||||
scheme = "http"
|
||||
if port != 80:
|
||||
host += ":%s" % port
|
||||
|
||||
return "%s://%s" % (scheme, host)
|
||||
|
||||
def _start_http_thread(self):
|
||||
"""HTTP servers MUST be running in new threads, so that the
|
||||
main thread persists to receive KeyboardInterrupt's. If an
|
||||
exception is raised in the httpserver's thread then it's
|
||||
trapped here, and the bus (and therefore our httpserver)
|
||||
are shut down.
|
||||
"""
|
||||
try:
|
||||
self.httpserver.start()
|
||||
except KeyboardInterrupt:
|
||||
self.bus.log("<Ctrl-C> hit: shutting down HTTP server")
|
||||
self.interrupt = sys.exc_info()[1]
|
||||
self.bus.exit()
|
||||
except SystemExit:
|
||||
self.bus.log("SystemExit raised: shutting down HTTP server")
|
||||
self.interrupt = sys.exc_info()[1]
|
||||
self.bus.exit()
|
||||
raise
|
||||
except:
|
||||
self.interrupt = sys.exc_info()[1]
|
||||
self.bus.log("Error in HTTP server: shutting down",
|
||||
traceback=True, level=40)
|
||||
self.bus.exit()
|
||||
raise
|
||||
|
||||
def wait(self):
|
||||
"""Wait until the HTTP server is ready to receive requests."""
|
||||
while not getattr(self.httpserver, "ready", False):
|
||||
if self.interrupt:
|
||||
raise self.interrupt
|
||||
time.sleep(.1)
|
||||
|
||||
# Wait for port to be occupied
|
||||
if isinstance(self.bind_addr, tuple):
|
||||
host, port = self.bind_addr
|
||||
wait_for_occupied_port(host, port)
|
||||
|
||||
def stop(self):
|
||||
"""Stop the HTTP server."""
|
||||
if self.running:
|
||||
# stop() MUST block until the server is *truly* stopped.
|
||||
self.httpserver.stop()
|
||||
# Wait for the socket to be truly freed.
|
||||
if isinstance(self.bind_addr, tuple):
|
||||
wait_for_free_port(*self.bind_addr)
|
||||
self.running = False
|
||||
self.bus.log("HTTP Server %s shut down" % self.httpserver)
|
||||
else:
|
||||
self.bus.log("HTTP Server %s already shut down" % self.httpserver)
|
||||
stop.priority = 25
|
||||
|
||||
def restart(self):
|
||||
"""Restart the HTTP server."""
|
||||
self.stop()
|
||||
self.start()
|
||||
|
||||
|
||||
class FlupCGIServer(object):
|
||||
|
||||
"""Adapter for a flup.server.cgi.WSGIServer."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
self.ready = False
|
||||
|
||||
def start(self):
|
||||
"""Start the CGI server."""
|
||||
# We have to instantiate the server class here because its __init__
|
||||
# starts a threadpool. If we do it too early, daemonize won't work.
|
||||
from flup.server.cgi import WSGIServer
|
||||
|
||||
self.cgiserver = WSGIServer(*self.args, **self.kwargs)
|
||||
self.ready = True
|
||||
self.cgiserver.run()
|
||||
|
||||
def stop(self):
|
||||
"""Stop the HTTP server."""
|
||||
self.ready = False
|
||||
|
||||
|
||||
class FlupFCGIServer(object):
|
||||
|
||||
"""Adapter for a flup.server.fcgi.WSGIServer."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if kwargs.get('bindAddress', None) is None:
|
||||
import socket
|
||||
if not hasattr(socket, 'fromfd'):
|
||||
raise ValueError(
|
||||
'Dynamic FCGI server not available on this platform. '
|
||||
'You must use a static or external one by providing a '
|
||||
'legal bindAddress.')
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
self.ready = False
|
||||
|
||||
def start(self):
|
||||
"""Start the FCGI server."""
|
||||
# We have to instantiate the server class here because its __init__
|
||||
# starts a threadpool. If we do it too early, daemonize won't work.
|
||||
from flup.server.fcgi import WSGIServer
|
||||
self.fcgiserver = WSGIServer(*self.args, **self.kwargs)
|
||||
# TODO: report this bug upstream to flup.
|
||||
# If we don't set _oldSIGs on Windows, we get:
|
||||
# File "C:\Python24\Lib\site-packages\flup\server\threadedserver.py",
|
||||
# line 108, in run
|
||||
# self._restoreSignalHandlers()
|
||||
# File "C:\Python24\Lib\site-packages\flup\server\threadedserver.py",
|
||||
# line 156, in _restoreSignalHandlers
|
||||
# for signum,handler in self._oldSIGs:
|
||||
# AttributeError: 'WSGIServer' object has no attribute '_oldSIGs'
|
||||
self.fcgiserver._installSignalHandlers = lambda: None
|
||||
self.fcgiserver._oldSIGs = []
|
||||
self.ready = True
|
||||
self.fcgiserver.run()
|
||||
|
||||
def stop(self):
|
||||
"""Stop the HTTP server."""
|
||||
# Forcibly stop the fcgi server main event loop.
|
||||
self.fcgiserver._keepGoing = False
|
||||
# Force all worker threads to die off.
|
||||
self.fcgiserver._threadPool.maxSpare = (
|
||||
self.fcgiserver._threadPool._idleCount)
|
||||
self.ready = False
|
||||
|
||||
|
||||
class FlupSCGIServer(object):
|
||||
|
||||
"""Adapter for a flup.server.scgi.WSGIServer."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
self.ready = False
|
||||
|
||||
def start(self):
|
||||
"""Start the SCGI server."""
|
||||
# We have to instantiate the server class here because its __init__
|
||||
# starts a threadpool. If we do it too early, daemonize won't work.
|
||||
from flup.server.scgi import WSGIServer
|
||||
self.scgiserver = WSGIServer(*self.args, **self.kwargs)
|
||||
# TODO: report this bug upstream to flup.
|
||||
# If we don't set _oldSIGs on Windows, we get:
|
||||
# File "C:\Python24\Lib\site-packages\flup\server\threadedserver.py",
|
||||
# line 108, in run
|
||||
# self._restoreSignalHandlers()
|
||||
# File "C:\Python24\Lib\site-packages\flup\server\threadedserver.py",
|
||||
# line 156, in _restoreSignalHandlers
|
||||
# for signum,handler in self._oldSIGs:
|
||||
# AttributeError: 'WSGIServer' object has no attribute '_oldSIGs'
|
||||
self.scgiserver._installSignalHandlers = lambda: None
|
||||
self.scgiserver._oldSIGs = []
|
||||
self.ready = True
|
||||
self.scgiserver.run()
|
||||
|
||||
def stop(self):
|
||||
"""Stop the HTTP server."""
|
||||
self.ready = False
|
||||
# Forcibly stop the scgi server main event loop.
|
||||
self.scgiserver._keepGoing = False
|
||||
# Force all worker threads to die off.
|
||||
self.scgiserver._threadPool.maxSpare = 0
|
||||
|
||||
|
||||
def client_host(server_host):
|
||||
"""Return the host on which a client can connect to the given listener."""
|
||||
if server_host == '0.0.0.0':
|
||||
# 0.0.0.0 is INADDR_ANY, which should answer on localhost.
|
||||
return '127.0.0.1'
|
||||
if server_host in ('::', '::0', '::0.0.0.0'):
|
||||
# :: is IN6ADDR_ANY, which should answer on localhost.
|
||||
# ::0 and ::0.0.0.0 are non-canonical but common
|
||||
# ways to write IN6ADDR_ANY.
|
||||
return '::1'
|
||||
return server_host
|
||||
|
||||
|
||||
def check_port(host, port, timeout=1.0):
|
||||
"""Raise an error if the given port is not free on the given host."""
|
||||
if not host:
|
||||
raise ValueError("Host values of '' or None are not allowed.")
|
||||
host = client_host(host)
|
||||
port = int(port)
|
||||
|
||||
import socket
|
||||
|
||||
# AF_INET or AF_INET6 socket
|
||||
# Get the correct address family for our host (allows IPv6 addresses)
|
||||
try:
|
||||
info = socket.getaddrinfo(host, port, socket.AF_UNSPEC,
|
||||
socket.SOCK_STREAM)
|
||||
except socket.gaierror:
|
||||
if ':' in host:
|
||||
info = [(
|
||||
socket.AF_INET6, socket.SOCK_STREAM, 0, "", (host, port, 0, 0)
|
||||
)]
|
||||
else:
|
||||
info = [(socket.AF_INET, socket.SOCK_STREAM, 0, "", (host, port))]
|
||||
|
||||
for res in info:
|
||||
af, socktype, proto, canonname, sa = res
|
||||
s = None
|
||||
try:
|
||||
s = socket.socket(af, socktype, proto)
|
||||
# See http://groups.google.com/group/cherrypy-users/
|
||||
# browse_frm/thread/bbfe5eb39c904fe0
|
||||
s.settimeout(timeout)
|
||||
s.connect((host, port))
|
||||
s.close()
|
||||
except socket.error:
|
||||
if s:
|
||||
s.close()
|
||||
else:
|
||||
raise IOError("Port %s is in use on %s; perhaps the previous "
|
||||
"httpserver did not shut down properly." %
|
||||
(repr(port), repr(host)))
|
||||
|
||||
|
||||
# Feel free to increase these defaults on slow systems:
|
||||
free_port_timeout = 0.1
|
||||
occupied_port_timeout = 1.0
|
||||
|
||||
|
||||
def wait_for_free_port(host, port, timeout=None):
|
||||
"""Wait for the specified port to become free (drop requests)."""
|
||||
if not host:
|
||||
raise ValueError("Host values of '' or None are not allowed.")
|
||||
if timeout is None:
|
||||
timeout = free_port_timeout
|
||||
|
||||
for trial in range(50):
|
||||
try:
|
||||
# we are expecting a free port, so reduce the timeout
|
||||
check_port(host, port, timeout=timeout)
|
||||
except IOError:
|
||||
# Give the old server thread time to free the port.
|
||||
time.sleep(timeout)
|
||||
else:
|
||||
return
|
||||
|
||||
raise IOError("Port %r not free on %r" % (port, host))
|
||||
|
||||
|
||||
def wait_for_occupied_port(host, port, timeout=None):
|
||||
"""Wait for the specified port to become active (receive requests)."""
|
||||
if not host:
|
||||
raise ValueError("Host values of '' or None are not allowed.")
|
||||
if timeout is None:
|
||||
timeout = occupied_port_timeout
|
||||
|
||||
for trial in range(50):
|
||||
try:
|
||||
check_port(host, port, timeout=timeout)
|
||||
except IOError:
|
||||
# port is occupied
|
||||
return
|
||||
else:
|
||||
time.sleep(timeout)
|
||||
|
||||
if host == client_host(host):
|
||||
raise IOError("Port %r not bound on %r" % (port, host))
|
||||
|
||||
# On systems where a loopback interface is not available and the
|
||||
# server is bound to all interfaces, it's difficult to determine
|
||||
# whether the server is in fact occupying the port. In this case,
|
||||
# just issue a warning and move on. See issue #1100.
|
||||
msg = "Unable to verify that the server is bound on %r" % port
|
||||
warnings.warn(msg)
|
||||
180
lib/cherrypy/process/win32.py
Normal file
180
lib/cherrypy/process/win32.py
Normal file
@@ -0,0 +1,180 @@
|
||||
"""Windows service. Requires pywin32."""
|
||||
|
||||
import os
|
||||
import win32api
|
||||
import win32con
|
||||
import win32event
|
||||
import win32service
|
||||
import win32serviceutil
|
||||
|
||||
from cherrypy.process import wspbus, plugins
|
||||
|
||||
|
||||
class ConsoleCtrlHandler(plugins.SimplePlugin):
|
||||
|
||||
"""A WSPBus plugin for handling Win32 console events (like Ctrl-C)."""
|
||||
|
||||
def __init__(self, bus):
|
||||
self.is_set = False
|
||||
plugins.SimplePlugin.__init__(self, bus)
|
||||
|
||||
def start(self):
|
||||
if self.is_set:
|
||||
self.bus.log('Handler for console events already set.', level=40)
|
||||
return
|
||||
|
||||
result = win32api.SetConsoleCtrlHandler(self.handle, 1)
|
||||
if result == 0:
|
||||
self.bus.log('Could not SetConsoleCtrlHandler (error %r)' %
|
||||
win32api.GetLastError(), level=40)
|
||||
else:
|
||||
self.bus.log('Set handler for console events.', level=40)
|
||||
self.is_set = True
|
||||
|
||||
def stop(self):
|
||||
if not self.is_set:
|
||||
self.bus.log('Handler for console events already off.', level=40)
|
||||
return
|
||||
|
||||
try:
|
||||
result = win32api.SetConsoleCtrlHandler(self.handle, 0)
|
||||
except ValueError:
|
||||
# "ValueError: The object has not been registered"
|
||||
result = 1
|
||||
|
||||
if result == 0:
|
||||
self.bus.log('Could not remove SetConsoleCtrlHandler (error %r)' %
|
||||
win32api.GetLastError(), level=40)
|
||||
else:
|
||||
self.bus.log('Removed handler for console events.', level=40)
|
||||
self.is_set = False
|
||||
|
||||
def handle(self, event):
|
||||
"""Handle console control events (like Ctrl-C)."""
|
||||
if event in (win32con.CTRL_C_EVENT, win32con.CTRL_LOGOFF_EVENT,
|
||||
win32con.CTRL_BREAK_EVENT, win32con.CTRL_SHUTDOWN_EVENT,
|
||||
win32con.CTRL_CLOSE_EVENT):
|
||||
self.bus.log('Console event %s: shutting down bus' % event)
|
||||
|
||||
# Remove self immediately so repeated Ctrl-C doesn't re-call it.
|
||||
try:
|
||||
self.stop()
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
self.bus.exit()
|
||||
# 'First to return True stops the calls'
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
class Win32Bus(wspbus.Bus):
|
||||
|
||||
"""A Web Site Process Bus implementation for Win32.
|
||||
|
||||
Instead of time.sleep, this bus blocks using native win32event objects.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.events = {}
|
||||
wspbus.Bus.__init__(self)
|
||||
|
||||
def _get_state_event(self, state):
|
||||
"""Return a win32event for the given state (creating it if needed)."""
|
||||
try:
|
||||
return self.events[state]
|
||||
except KeyError:
|
||||
event = win32event.CreateEvent(None, 0, 0,
|
||||
"WSPBus %s Event (pid=%r)" %
|
||||
(state.name, os.getpid()))
|
||||
self.events[state] = event
|
||||
return event
|
||||
|
||||
def _get_state(self):
|
||||
return self._state
|
||||
|
||||
def _set_state(self, value):
|
||||
self._state = value
|
||||
event = self._get_state_event(value)
|
||||
win32event.PulseEvent(event)
|
||||
state = property(_get_state, _set_state)
|
||||
|
||||
def wait(self, state, interval=0.1, channel=None):
|
||||
"""Wait for the given state(s), KeyboardInterrupt or SystemExit.
|
||||
|
||||
Since this class uses native win32event objects, the interval
|
||||
argument is ignored.
|
||||
"""
|
||||
if isinstance(state, (tuple, list)):
|
||||
# Don't wait for an event that beat us to the punch ;)
|
||||
if self.state not in state:
|
||||
events = tuple([self._get_state_event(s) for s in state])
|
||||
win32event.WaitForMultipleObjects(
|
||||
events, 0, win32event.INFINITE)
|
||||
else:
|
||||
# Don't wait for an event that beat us to the punch ;)
|
||||
if self.state != state:
|
||||
event = self._get_state_event(state)
|
||||
win32event.WaitForSingleObject(event, win32event.INFINITE)
|
||||
|
||||
|
||||
class _ControlCodes(dict):
|
||||
|
||||
"""Control codes used to "signal" a service via ControlService.
|
||||
|
||||
User-defined control codes are in the range 128-255. We generally use
|
||||
the standard Python value for the Linux signal and add 128. Example:
|
||||
|
||||
>>> signal.SIGUSR1
|
||||
10
|
||||
control_codes['graceful'] = 128 + 10
|
||||
"""
|
||||
|
||||
def key_for(self, obj):
|
||||
"""For the given value, return its corresponding key."""
|
||||
for key, val in self.items():
|
||||
if val is obj:
|
||||
return key
|
||||
raise ValueError("The given object could not be found: %r" % obj)
|
||||
|
||||
control_codes = _ControlCodes({'graceful': 138})
|
||||
|
||||
|
||||
def signal_child(service, command):
|
||||
if command == 'stop':
|
||||
win32serviceutil.StopService(service)
|
||||
elif command == 'restart':
|
||||
win32serviceutil.RestartService(service)
|
||||
else:
|
||||
win32serviceutil.ControlService(service, control_codes[command])
|
||||
|
||||
|
||||
class PyWebService(win32serviceutil.ServiceFramework):
|
||||
|
||||
"""Python Web Service."""
|
||||
|
||||
_svc_name_ = "Python Web Service"
|
||||
_svc_display_name_ = "Python Web Service"
|
||||
_svc_deps_ = None # sequence of service names on which this depends
|
||||
_exe_name_ = "pywebsvc"
|
||||
_exe_args_ = None # Default to no arguments
|
||||
|
||||
# Only exists on Windows 2000 or later, ignored on windows NT
|
||||
_svc_description_ = "Python Web Service"
|
||||
|
||||
def SvcDoRun(self):
|
||||
from cherrypy import process
|
||||
process.bus.start()
|
||||
process.bus.block()
|
||||
|
||||
def SvcStop(self):
|
||||
from cherrypy import process
|
||||
self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING)
|
||||
process.bus.exit()
|
||||
|
||||
def SvcOther(self, control):
|
||||
process.bus.publish(control_codes.key_for(control))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
win32serviceutil.HandleCommandLine(PyWebService)
|
||||
448
lib/cherrypy/process/wspbus.py
Normal file
448
lib/cherrypy/process/wspbus.py
Normal file
@@ -0,0 +1,448 @@
|
||||
"""An implementation of the Web Site Process Bus.
|
||||
|
||||
This module is completely standalone, depending only on the stdlib.
|
||||
|
||||
Web Site Process Bus
|
||||
--------------------
|
||||
|
||||
A Bus object is used to contain and manage site-wide behavior:
|
||||
daemonization, HTTP server start/stop, process reload, signal handling,
|
||||
drop privileges, PID file management, logging for all of these,
|
||||
and many more.
|
||||
|
||||
In addition, a Bus object provides a place for each web framework
|
||||
to register code that runs in response to site-wide events (like
|
||||
process start and stop), or which controls or otherwise interacts with
|
||||
the site-wide components mentioned above. For example, a framework which
|
||||
uses file-based templates would add known template filenames to an
|
||||
autoreload component.
|
||||
|
||||
Ideally, a Bus object will be flexible enough to be useful in a variety
|
||||
of invocation scenarios:
|
||||
|
||||
1. The deployer starts a site from the command line via a
|
||||
framework-neutral deployment script; applications from multiple frameworks
|
||||
are mixed in a single site. Command-line arguments and configuration
|
||||
files are used to define site-wide components such as the HTTP server,
|
||||
WSGI component graph, autoreload behavior, signal handling, etc.
|
||||
2. The deployer starts a site via some other process, such as Apache;
|
||||
applications from multiple frameworks are mixed in a single site.
|
||||
Autoreload and signal handling (from Python at least) are disabled.
|
||||
3. The deployer starts a site via a framework-specific mechanism;
|
||||
for example, when running tests, exploring tutorials, or deploying
|
||||
single applications from a single framework. The framework controls
|
||||
which site-wide components are enabled as it sees fit.
|
||||
|
||||
The Bus object in this package uses topic-based publish-subscribe
|
||||
messaging to accomplish all this. A few topic channels are built in
|
||||
('start', 'stop', 'exit', 'graceful', 'log', and 'main'). Frameworks and
|
||||
site containers are free to define their own. If a message is sent to a
|
||||
channel that has not been defined or has no listeners, there is no effect.
|
||||
|
||||
In general, there should only ever be a single Bus object per process.
|
||||
Frameworks and site containers share a single Bus object by publishing
|
||||
messages and subscribing listeners.
|
||||
|
||||
The Bus object works as a finite state machine which models the current
|
||||
state of the process. Bus methods move it from one state to another;
|
||||
those methods then publish to subscribed listeners on the channel for
|
||||
the new state.::
|
||||
|
||||
O
|
||||
|
|
||||
V
|
||||
STOPPING --> STOPPED --> EXITING -> X
|
||||
A A |
|
||||
| \___ |
|
||||
| \ |
|
||||
| V V
|
||||
STARTED <-- STARTING
|
||||
|
||||
"""
|
||||
|
||||
import atexit
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import traceback as _traceback
|
||||
import warnings
|
||||
|
||||
from cherrypy._cpcompat import set
|
||||
|
||||
# Here I save the value of os.getcwd(), which, if I am imported early enough,
|
||||
# will be the directory from which the startup script was run. This is needed
|
||||
# by _do_execv(), to change back to the original directory before execv()ing a
|
||||
# new process. This is a defense against the application having changed the
|
||||
# current working directory (which could make sys.executable "not found" if
|
||||
# sys.executable is a relative-path, and/or cause other problems).
|
||||
_startup_cwd = os.getcwd()
|
||||
|
||||
|
||||
class ChannelFailures(Exception):
|
||||
|
||||
"""Exception raised when errors occur in a listener during Bus.publish().
|
||||
"""
|
||||
delimiter = '\n'
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
# Don't use 'super' here; Exceptions are old-style in Py2.4
|
||||
# See https://bitbucket.org/cherrypy/cherrypy/issue/959
|
||||
Exception.__init__(self, *args, **kwargs)
|
||||
self._exceptions = list()
|
||||
|
||||
def handle_exception(self):
|
||||
"""Append the current exception to self."""
|
||||
self._exceptions.append(sys.exc_info()[1])
|
||||
|
||||
def get_instances(self):
|
||||
"""Return a list of seen exception instances."""
|
||||
return self._exceptions[:]
|
||||
|
||||
def __str__(self):
|
||||
exception_strings = map(repr, self.get_instances())
|
||||
return self.delimiter.join(exception_strings)
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
def __bool__(self):
|
||||
return bool(self._exceptions)
|
||||
__nonzero__ = __bool__
|
||||
|
||||
# Use a flag to indicate the state of the bus.
|
||||
|
||||
|
||||
class _StateEnum(object):
|
||||
|
||||
class State(object):
|
||||
name = None
|
||||
|
||||
def __repr__(self):
|
||||
return "states.%s" % self.name
|
||||
|
||||
def __setattr__(self, key, value):
|
||||
if isinstance(value, self.State):
|
||||
value.name = key
|
||||
object.__setattr__(self, key, value)
|
||||
states = _StateEnum()
|
||||
states.STOPPED = states.State()
|
||||
states.STARTING = states.State()
|
||||
states.STARTED = states.State()
|
||||
states.STOPPING = states.State()
|
||||
states.EXITING = states.State()
|
||||
|
||||
|
||||
try:
|
||||
import fcntl
|
||||
except ImportError:
|
||||
max_files = 0
|
||||
else:
|
||||
try:
|
||||
max_files = os.sysconf('SC_OPEN_MAX')
|
||||
except AttributeError:
|
||||
max_files = 1024
|
||||
|
||||
|
||||
class Bus(object):
|
||||
|
||||
"""Process state-machine and messenger for HTTP site deployment.
|
||||
|
||||
All listeners for a given channel are guaranteed to be called even
|
||||
if others at the same channel fail. Each failure is logged, but
|
||||
execution proceeds on to the next listener. The only way to stop all
|
||||
processing from inside a listener is to raise SystemExit and stop the
|
||||
whole server.
|
||||
"""
|
||||
|
||||
states = states
|
||||
state = states.STOPPED
|
||||
execv = False
|
||||
max_cloexec_files = max_files
|
||||
|
||||
def __init__(self):
|
||||
self.execv = False
|
||||
self.state = states.STOPPED
|
||||
self.listeners = dict(
|
||||
[(channel, set()) for channel
|
||||
in ('start', 'stop', 'exit', 'graceful', 'log', 'main')])
|
||||
self._priorities = {}
|
||||
|
||||
def subscribe(self, channel, callback, priority=None):
|
||||
"""Add the given callback at the given channel (if not present)."""
|
||||
if channel not in self.listeners:
|
||||
self.listeners[channel] = set()
|
||||
self.listeners[channel].add(callback)
|
||||
|
||||
if priority is None:
|
||||
priority = getattr(callback, 'priority', 50)
|
||||
self._priorities[(channel, callback)] = priority
|
||||
|
||||
def unsubscribe(self, channel, callback):
|
||||
"""Discard the given callback (if present)."""
|
||||
listeners = self.listeners.get(channel)
|
||||
if listeners and callback in listeners:
|
||||
listeners.discard(callback)
|
||||
del self._priorities[(channel, callback)]
|
||||
|
||||
def publish(self, channel, *args, **kwargs):
|
||||
"""Return output of all subscribers for the given channel."""
|
||||
if channel not in self.listeners:
|
||||
return []
|
||||
|
||||
exc = ChannelFailures()
|
||||
output = []
|
||||
|
||||
items = [(self._priorities[(channel, listener)], listener)
|
||||
for listener in self.listeners[channel]]
|
||||
try:
|
||||
items.sort(key=lambda item: item[0])
|
||||
except TypeError:
|
||||
# Python 2.3 had no 'key' arg, but that doesn't matter
|
||||
# since it could sort dissimilar types just fine.
|
||||
items.sort()
|
||||
for priority, listener in items:
|
||||
try:
|
||||
output.append(listener(*args, **kwargs))
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except SystemExit:
|
||||
e = sys.exc_info()[1]
|
||||
# If we have previous errors ensure the exit code is non-zero
|
||||
if exc and e.code == 0:
|
||||
e.code = 1
|
||||
raise
|
||||
except:
|
||||
exc.handle_exception()
|
||||
if channel == 'log':
|
||||
# Assume any further messages to 'log' will fail.
|
||||
pass
|
||||
else:
|
||||
self.log("Error in %r listener %r" % (channel, listener),
|
||||
level=40, traceback=True)
|
||||
if exc:
|
||||
raise exc
|
||||
return output
|
||||
|
||||
def _clean_exit(self):
|
||||
"""An atexit handler which asserts the Bus is not running."""
|
||||
if self.state != states.EXITING:
|
||||
warnings.warn(
|
||||
"The main thread is exiting, but the Bus is in the %r state; "
|
||||
"shutting it down automatically now. You must either call "
|
||||
"bus.block() after start(), or call bus.exit() before the "
|
||||
"main thread exits." % self.state, RuntimeWarning)
|
||||
self.exit()
|
||||
|
||||
def start(self):
|
||||
"""Start all services."""
|
||||
atexit.register(self._clean_exit)
|
||||
|
||||
self.state = states.STARTING
|
||||
self.log('Bus STARTING')
|
||||
try:
|
||||
self.publish('start')
|
||||
self.state = states.STARTED
|
||||
self.log('Bus STARTED')
|
||||
except (KeyboardInterrupt, SystemExit):
|
||||
raise
|
||||
except:
|
||||
self.log("Shutting down due to error in start listener:",
|
||||
level=40, traceback=True)
|
||||
e_info = sys.exc_info()[1]
|
||||
try:
|
||||
self.exit()
|
||||
except:
|
||||
# Any stop/exit errors will be logged inside publish().
|
||||
pass
|
||||
# Re-raise the original error
|
||||
raise e_info
|
||||
|
||||
def exit(self):
|
||||
"""Stop all services and prepare to exit the process."""
|
||||
exitstate = self.state
|
||||
try:
|
||||
self.stop()
|
||||
|
||||
self.state = states.EXITING
|
||||
self.log('Bus EXITING')
|
||||
self.publish('exit')
|
||||
# This isn't strictly necessary, but it's better than seeing
|
||||
# "Waiting for child threads to terminate..." and then nothing.
|
||||
self.log('Bus EXITED')
|
||||
except:
|
||||
# This method is often called asynchronously (whether thread,
|
||||
# signal handler, console handler, or atexit handler), so we
|
||||
# can't just let exceptions propagate out unhandled.
|
||||
# Assume it's been logged and just die.
|
||||
os._exit(70) # EX_SOFTWARE
|
||||
|
||||
if exitstate == states.STARTING:
|
||||
# exit() was called before start() finished, possibly due to
|
||||
# Ctrl-C because a start listener got stuck. In this case,
|
||||
# we could get stuck in a loop where Ctrl-C never exits the
|
||||
# process, so we just call os.exit here.
|
||||
os._exit(70) # EX_SOFTWARE
|
||||
|
||||
def restart(self):
|
||||
"""Restart the process (may close connections).
|
||||
|
||||
This method does not restart the process from the calling thread;
|
||||
instead, it stops the bus and asks the main thread to call execv.
|
||||
"""
|
||||
self.execv = True
|
||||
self.exit()
|
||||
|
||||
def graceful(self):
|
||||
"""Advise all services to reload."""
|
||||
self.log('Bus graceful')
|
||||
self.publish('graceful')
|
||||
|
||||
def block(self, interval=0.1):
|
||||
"""Wait for the EXITING state, KeyboardInterrupt or SystemExit.
|
||||
|
||||
This function is intended to be called only by the main thread.
|
||||
After waiting for the EXITING state, it also waits for all threads
|
||||
to terminate, and then calls os.execv if self.execv is True. This
|
||||
design allows another thread to call bus.restart, yet have the main
|
||||
thread perform the actual execv call (required on some platforms).
|
||||
"""
|
||||
try:
|
||||
self.wait(states.EXITING, interval=interval, channel='main')
|
||||
except (KeyboardInterrupt, IOError):
|
||||
# The time.sleep call might raise
|
||||
# "IOError: [Errno 4] Interrupted function call" on KBInt.
|
||||
self.log('Keyboard Interrupt: shutting down bus')
|
||||
self.exit()
|
||||
except SystemExit:
|
||||
self.log('SystemExit raised: shutting down bus')
|
||||
self.exit()
|
||||
raise
|
||||
|
||||
# Waiting for ALL child threads to finish is necessary on OS X.
|
||||
# See https://bitbucket.org/cherrypy/cherrypy/issue/581.
|
||||
# It's also good to let them all shut down before allowing
|
||||
# the main thread to call atexit handlers.
|
||||
# See https://bitbucket.org/cherrypy/cherrypy/issue/751.
|
||||
self.log("Waiting for child threads to terminate...")
|
||||
for t in threading.enumerate():
|
||||
# Validate the we're not trying to join the MainThread
|
||||
# that will cause a deadlock and the case exist when
|
||||
# implemented as a windows service and in any other case
|
||||
# that another thread executes cherrypy.engine.exit()
|
||||
if (
|
||||
t != threading.currentThread() and
|
||||
t.isAlive() and
|
||||
not isinstance(t, threading._MainThread)
|
||||
):
|
||||
# Note that any dummy (external) threads are always daemonic.
|
||||
if hasattr(threading.Thread, "daemon"):
|
||||
# Python 2.6+
|
||||
d = t.daemon
|
||||
else:
|
||||
d = t.isDaemon()
|
||||
if not d:
|
||||
self.log("Waiting for thread %s." % t.getName())
|
||||
t.join()
|
||||
|
||||
if self.execv:
|
||||
self._do_execv()
|
||||
|
||||
def wait(self, state, interval=0.1, channel=None):
|
||||
"""Poll for the given state(s) at intervals; publish to channel."""
|
||||
if isinstance(state, (tuple, list)):
|
||||
states = state
|
||||
else:
|
||||
states = [state]
|
||||
|
||||
def _wait():
|
||||
while self.state not in states:
|
||||
time.sleep(interval)
|
||||
self.publish(channel)
|
||||
|
||||
# From http://psyco.sourceforge.net/psycoguide/bugs.html:
|
||||
# "The compiled machine code does not include the regular polling
|
||||
# done by Python, meaning that a KeyboardInterrupt will not be
|
||||
# detected before execution comes back to the regular Python
|
||||
# interpreter. Your program cannot be interrupted if caught
|
||||
# into an infinite Psyco-compiled loop."
|
||||
try:
|
||||
sys.modules['psyco'].cannotcompile(_wait)
|
||||
except (KeyError, AttributeError):
|
||||
pass
|
||||
|
||||
_wait()
|
||||
|
||||
def _do_execv(self):
|
||||
"""Re-execute the current process.
|
||||
|
||||
This must be called from the main thread, because certain platforms
|
||||
(OS X) don't allow execv to be called in a child thread very well.
|
||||
"""
|
||||
args = sys.argv[:]
|
||||
self.log('Re-spawning %s' % ' '.join(args))
|
||||
|
||||
if sys.platform[:4] == 'java':
|
||||
from _systemrestart import SystemRestart
|
||||
raise SystemRestart
|
||||
else:
|
||||
args.insert(0, sys.executable)
|
||||
if sys.platform == 'win32':
|
||||
args = ['"%s"' % arg for arg in args]
|
||||
|
||||
os.chdir(_startup_cwd)
|
||||
if self.max_cloexec_files:
|
||||
self._set_cloexec()
|
||||
os.execv(sys.executable, args)
|
||||
|
||||
def _set_cloexec(self):
|
||||
"""Set the CLOEXEC flag on all open files (except stdin/out/err).
|
||||
|
||||
If self.max_cloexec_files is an integer (the default), then on
|
||||
platforms which support it, it represents the max open files setting
|
||||
for the operating system. This function will be called just before
|
||||
the process is restarted via os.execv() to prevent open files
|
||||
from persisting into the new process.
|
||||
|
||||
Set self.max_cloexec_files to 0 to disable this behavior.
|
||||
"""
|
||||
for fd in range(3, self.max_cloexec_files): # skip stdin/out/err
|
||||
try:
|
||||
flags = fcntl.fcntl(fd, fcntl.F_GETFD)
|
||||
except IOError:
|
||||
continue
|
||||
fcntl.fcntl(fd, fcntl.F_SETFD, flags | fcntl.FD_CLOEXEC)
|
||||
|
||||
def stop(self):
|
||||
"""Stop all services."""
|
||||
self.state = states.STOPPING
|
||||
self.log('Bus STOPPING')
|
||||
self.publish('stop')
|
||||
self.state = states.STOPPED
|
||||
self.log('Bus STOPPED')
|
||||
|
||||
def start_with_callback(self, func, args=None, kwargs=None):
|
||||
"""Start 'func' in a new thread T, then start self (and return T)."""
|
||||
if args is None:
|
||||
args = ()
|
||||
if kwargs is None:
|
||||
kwargs = {}
|
||||
args = (func,) + args
|
||||
|
||||
def _callback(func, *a, **kw):
|
||||
self.wait(states.STARTED)
|
||||
func(*a, **kw)
|
||||
t = threading.Thread(target=_callback, args=args, kwargs=kwargs)
|
||||
t.setName('Bus Callback ' + t.getName())
|
||||
t.start()
|
||||
|
||||
self.start()
|
||||
|
||||
return t
|
||||
|
||||
def log(self, msg="", level=20, traceback=False):
|
||||
"""Log the given message. Append the last traceback if requested."""
|
||||
if traceback:
|
||||
msg += "\n" + "".join(_traceback.format_exception(*sys.exc_info()))
|
||||
self.publish('log', msg, level)
|
||||
|
||||
bus = Bus()
|
||||
61
lib/cherrypy/scaffold/__init__.py
Normal file
61
lib/cherrypy/scaffold/__init__.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""<MyProject>, a CherryPy application.
|
||||
|
||||
Use this as a base for creating new CherryPy applications. When you want
|
||||
to make a new app, copy and paste this folder to some other location
|
||||
(maybe site-packages) and rename it to the name of your project,
|
||||
then tweak as desired.
|
||||
|
||||
Even before any tweaking, this should serve a few demonstration pages.
|
||||
Change to this directory and run:
|
||||
|
||||
../cherryd -c site.conf
|
||||
|
||||
"""
|
||||
|
||||
import cherrypy
|
||||
from cherrypy import tools, url
|
||||
|
||||
import os
|
||||
local_dir = os.path.join(os.getcwd(), os.path.dirname(__file__))
|
||||
|
||||
|
||||
class Root:
|
||||
|
||||
_cp_config = {'tools.log_tracebacks.on': True,
|
||||
}
|
||||
|
||||
def index(self):
|
||||
return """<html>
|
||||
<body>Try some <a href='%s?a=7'>other</a> path,
|
||||
or a <a href='%s?n=14'>default</a> path.<br />
|
||||
Or, just look at the pretty picture:<br />
|
||||
<img src='%s' />
|
||||
</body></html>""" % (url("other"), url("else"),
|
||||
url("files/made_with_cherrypy_small.png"))
|
||||
index.exposed = True
|
||||
|
||||
def default(self, *args, **kwargs):
|
||||
return "args: %s kwargs: %s" % (args, kwargs)
|
||||
default.exposed = True
|
||||
|
||||
def other(self, a=2, b='bananas', c=None):
|
||||
cherrypy.response.headers['Content-Type'] = 'text/plain'
|
||||
if c is None:
|
||||
return "Have %d %s." % (int(a), b)
|
||||
else:
|
||||
return "Have %d %s, %s." % (int(a), b, c)
|
||||
other.exposed = True
|
||||
|
||||
files = cherrypy.tools.staticdir.handler(
|
||||
section="/files",
|
||||
dir=os.path.join(local_dir, "static"),
|
||||
# Ignore .php files, etc.
|
||||
match=r'\.(css|gif|html?|ico|jpe?g|js|png|swf|xml)$',
|
||||
)
|
||||
|
||||
|
||||
root = Root()
|
||||
|
||||
# Uncomment the following to use your own favicon instead of CP's default.
|
||||
#favicon_path = os.path.join(local_dir, "favicon.ico")
|
||||
#root.favicon_ico = tools.staticfile.handler(filename=favicon_path)
|
||||
22
lib/cherrypy/scaffold/apache-fcgi.conf
Normal file
22
lib/cherrypy/scaffold/apache-fcgi.conf
Normal file
@@ -0,0 +1,22 @@
|
||||
# Apache2 server conf file for using CherryPy with mod_fcgid.
|
||||
|
||||
# This doesn't have to be "C:/", but it has to be a directory somewhere, and
|
||||
# MUST match the directory used in the FastCgiExternalServer directive, below.
|
||||
DocumentRoot "C:/"
|
||||
|
||||
ServerName 127.0.0.1
|
||||
Listen 80
|
||||
LoadModule fastcgi_module modules/mod_fastcgi.dll
|
||||
LoadModule rewrite_module modules/mod_rewrite.so
|
||||
|
||||
Options ExecCGI
|
||||
SetHandler fastcgi-script
|
||||
RewriteEngine On
|
||||
# Send requests for any URI to our fastcgi handler.
|
||||
RewriteRule ^(.*)$ /fastcgi.pyc [L]
|
||||
|
||||
# The FastCgiExternalServer directive defines filename as an external FastCGI application.
|
||||
# If filename does not begin with a slash (/) then it is assumed to be relative to the ServerRoot.
|
||||
# The filename does not have to exist in the local filesystem. URIs that Apache resolves to this
|
||||
# filename will be handled by this external FastCGI application.
|
||||
FastCgiExternalServer "C:/fastcgi.pyc" -host 127.0.0.1:8088
|
||||
3
lib/cherrypy/scaffold/example.conf
Normal file
3
lib/cherrypy/scaffold/example.conf
Normal file
@@ -0,0 +1,3 @@
|
||||
[/]
|
||||
log.error_file: "error.log"
|
||||
log.access_file: "access.log"
|
||||
14
lib/cherrypy/scaffold/site.conf
Normal file
14
lib/cherrypy/scaffold/site.conf
Normal file
@@ -0,0 +1,14 @@
|
||||
[global]
|
||||
# Uncomment this when you're done developing
|
||||
#environment: "production"
|
||||
|
||||
server.socket_host: "0.0.0.0"
|
||||
server.socket_port: 8088
|
||||
|
||||
# Uncomment the following lines to run on HTTPS at the same time
|
||||
#server.2.socket_host: "0.0.0.0"
|
||||
#server.2.socket_port: 8433
|
||||
#server.2.ssl_certificate: '../test/test.pem'
|
||||
#server.2.ssl_private_key: '../test/test.pem'
|
||||
|
||||
tree.myapp: cherrypy.Application(scaffold.root, "/", "example.conf")
|
||||
BIN
lib/cherrypy/scaffold/static/made_with_cherrypy_small.png
Normal file
BIN
lib/cherrypy/scaffold/static/made_with_cherrypy_small.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 7.3 KiB |
14
lib/cherrypy/wsgiserver/__init__.py
Normal file
14
lib/cherrypy/wsgiserver/__init__.py
Normal file
@@ -0,0 +1,14 @@
|
||||
__all__ = ['HTTPRequest', 'HTTPConnection', 'HTTPServer',
|
||||
'SizeCheckWrapper', 'KnownLengthRFile', 'ChunkedRFile',
|
||||
'MaxSizeExceeded', 'NoSSLError', 'FatalSSLAlert',
|
||||
'WorkerThread', 'ThreadPool', 'SSLAdapter',
|
||||
'CherryPyWSGIServer',
|
||||
'Gateway', 'WSGIGateway', 'WSGIGateway_10', 'WSGIGateway_u0',
|
||||
'WSGIPathInfoDispatcher', 'get_ssl_adapter_class']
|
||||
|
||||
import sys
|
||||
if sys.version_info < (3, 0):
|
||||
from wsgiserver2 import *
|
||||
else:
|
||||
# Le sigh. Boo for backward-incompatible syntax.
|
||||
exec('from .wsgiserver3 import *')
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user