Compare commits
258 Commits
v2.2.3
...
v2.5.0-bet
Author | SHA1 | Date | |
---|---|---|---|
![]() |
43e40e99f1 | ||
![]() |
d95afa990d | ||
![]() |
e14457da58 | ||
![]() |
9613934ae5 | ||
![]() |
07a48c04d7 | ||
![]() |
fbcf59abf0 | ||
![]() |
2ef40a6a1c | ||
![]() |
5b5c4d1a8b | ||
![]() |
5f2a74893a | ||
![]() |
0741b4021c | ||
![]() |
f2323b0dff | ||
![]() |
0462121f69 | ||
![]() |
fe4ddaeb52 | ||
![]() |
bdbfafabbd | ||
![]() |
42c6340c06 | ||
![]() |
39e1caec0f | ||
![]() |
ef72832e5a | ||
![]() |
39eb657012 | ||
![]() |
b8f8d45807 | ||
![]() |
b01fefc235 | ||
![]() |
09f6eb8e19 | ||
![]() |
e5d4969917 | ||
![]() |
53aa740305 | ||
![]() |
9a00350ffc | ||
![]() |
98ffa3735b | ||
![]() |
9073568c0f | ||
![]() |
17a01d65aa | ||
![]() |
5089575aac | ||
![]() |
7f178e0913 | ||
![]() |
dcad3017d3 | ||
![]() |
ae88489e55 | ||
![]() |
b57065d6ee | ||
![]() |
71551d3f6d | ||
![]() |
cbcad30a6c | ||
![]() |
e2c2f66e97 | ||
![]() |
eeff665680 | ||
![]() |
6ef9d187ba | ||
![]() |
6d23ef9105 | ||
![]() |
6c8b425fb3 | ||
![]() |
d4b46a5721 | ||
![]() |
9d2be4b939 | ||
![]() |
bc017fb010 | ||
![]() |
bfabbe3cdb | ||
![]() |
8a8d47f8e7 | ||
![]() |
b01fac9641 | ||
![]() |
25c850e243 | ||
![]() |
8c7476a670 | ||
![]() |
12effd643f | ||
![]() |
209008e50d | ||
![]() |
b336f07ff9 | ||
![]() |
73f6012507 | ||
![]() |
b73564d2e0 | ||
![]() |
00adb45086 | ||
![]() |
d604d40e91 | ||
![]() |
ba3f6935db | ||
![]() |
980c4f7618 | ||
![]() |
a869859491 | ||
![]() |
15a638b86e | ||
![]() |
e999000102 | ||
![]() |
95bdc000ca | ||
![]() |
1d46efe037 | ||
![]() |
5499e89058 | ||
![]() |
a5653e365e | ||
![]() |
e698bcb375 | ||
![]() |
33d5aca6d4 | ||
![]() |
058bd32329 | ||
![]() |
52d38883dc | ||
![]() |
c1d98ab901 | ||
![]() |
e555b7e456 | ||
![]() |
031bef8c02 | ||
![]() |
3bf138e2ad | ||
![]() |
4e0563bbf9 | ||
![]() |
a8783ac351 | ||
![]() |
fb51894fad | ||
![]() |
25d65e8d65 | ||
![]() |
3e10e0e511 | ||
![]() |
c3245c1f03 | ||
![]() |
5b022599b4 | ||
![]() |
d5d219d46f | ||
![]() |
e546689e01 | ||
![]() |
cac9e0b164 | ||
![]() |
4bb5920c04 | ||
![]() |
3ea257f8f3 | ||
![]() |
347db6b770 | ||
![]() |
fafe28a6d6 | ||
![]() |
eb6cb60ee3 | ||
![]() |
8226a14b00 | ||
![]() |
c6bd1b06f2 | ||
![]() |
be38028244 | ||
![]() |
b8ea04f5a4 | ||
![]() |
cd5ed1d748 | ||
![]() |
00c9fc79f9 | ||
![]() |
d5373c3992 | ||
![]() |
3001ff8c53 | ||
![]() |
0571a091f7 | ||
![]() |
1bca410bcb | ||
![]() |
463ed2f46a | ||
![]() |
f8f0717913 | ||
![]() |
53cd759422 | ||
![]() |
7047ac8007 | ||
![]() |
2efd81dc6a | ||
![]() |
773ee8664c | ||
![]() |
d779e72bcd | ||
![]() |
2e101dcf7d | ||
![]() |
e6befab6bb | ||
![]() |
9ee2c1f7a6 | ||
![]() |
5b82a86fa8 | ||
![]() |
922bb2760c | ||
![]() |
315be9f3eb | ||
![]() |
7bb9c6c915 | ||
![]() |
4b5f880ccb | ||
![]() |
5db309d142 | ||
![]() |
5d8a7d80eb | ||
![]() |
1394339df6 | ||
![]() |
801510c61e | ||
![]() |
6c8d6ed2ca | ||
![]() |
d8f223327e | ||
![]() |
0343d47a9d | ||
![]() |
e527a88a2e | ||
![]() |
d6b619934a | ||
![]() |
93f070f0ac | ||
![]() |
3ca4351aeb | ||
![]() |
0ed4b69b8f | ||
![]() |
94f929743c | ||
![]() |
9e9ad72dc2 | ||
![]() |
422a89c26c | ||
![]() |
798c17706c | ||
![]() |
0886d133a8 | ||
![]() |
435230711e | ||
![]() |
d75744bb4a | ||
![]() |
86d737dcf6 | ||
![]() |
9e0153e962 | ||
![]() |
fb395fc2e9 | ||
![]() |
573ff3f2a6 | ||
![]() |
b9f614c66f | ||
![]() |
e26182c96e | ||
![]() |
f4eff8a8c5 | ||
![]() |
b3f8341e0c | ||
![]() |
47db4e0559 | ||
![]() |
b8179678c6 | ||
![]() |
c1a7b3753c | ||
![]() |
8b312c8d2d | ||
![]() |
ab36041fef | ||
![]() |
3f87996bfc | ||
![]() |
4edd2001b3 | ||
![]() |
155b98bb0c | ||
![]() |
f72d93216c | ||
![]() |
c6cf293b12 | ||
![]() |
0f13329ddd | ||
![]() |
1dfbef89ff | ||
![]() |
c55c00a19e | ||
![]() |
2fa62f71e1 | ||
![]() |
846a8cac98 | ||
![]() |
9ee7918e59 | ||
![]() |
faf5cb0f8d | ||
![]() |
bde6309277 | ||
![]() |
cc05552685 | ||
![]() |
465f50666f | ||
![]() |
e6d0212604 | ||
![]() |
2eebacc3a6 | ||
![]() |
f362880eb6 | ||
![]() |
68a06d1bbc | ||
![]() |
82c09570c4 | ||
![]() |
58eb426eea | ||
![]() |
1c932057b8 | ||
![]() |
4564623884 | ||
![]() |
843a400b2d | ||
![]() |
5b067bd17d | ||
![]() |
ed07bd374c | ||
![]() |
078685a2a3 | ||
![]() |
2ce5194156 | ||
![]() |
fa97d3f88d | ||
![]() |
08c8ee0774 | ||
![]() |
9725c82187 | ||
![]() |
24277f1e3c | ||
![]() |
b58fb1da33 | ||
![]() |
bed1cd8fb5 | ||
![]() |
c2d17c285a | ||
![]() |
42262b0bb6 | ||
![]() |
510dddf724 | ||
![]() |
702b2fe167 | ||
![]() |
f24c2a8b77 | ||
![]() |
a675c2c4f2 | ||
![]() |
2984629b39 | ||
![]() |
1c56d9c513 | ||
![]() |
e06210f21c | ||
![]() |
ad112e0a44 | ||
![]() |
2b0e7daf7c | ||
![]() |
060dff0162 | ||
![]() |
4ae09774f7 | ||
![]() |
033a364699 | ||
![]() |
56a66976e6 | ||
![]() |
0f02fab259 | ||
![]() |
2917b609c3 | ||
![]() |
b9a80d06e4 | ||
![]() |
af46a02146 | ||
![]() |
19d8c1be5a | ||
![]() |
f63c1c2f7f | ||
![]() |
5045e406a1 | ||
![]() |
8d5bc88fd9 | ||
![]() |
b39ac866f2 | ||
![]() |
4c211342a2 | ||
![]() |
6b7cd38d71 | ||
![]() |
485609fbb9 | ||
![]() |
a44709a43d | ||
![]() |
65e9e2b680 | ||
![]() |
d84dc23b46 | ||
![]() |
e333940826 | ||
![]() |
70f7fd2de9 | ||
![]() |
411d88d798 | ||
![]() |
dce8248eb8 | ||
![]() |
3b8234ce67 | ||
![]() |
ac63d3c3ce | ||
![]() |
197c3a327b | ||
![]() |
0bb97fee31 | ||
![]() |
1bdf6bbb66 | ||
![]() |
077dfe7164 | ||
![]() |
169f83ac4a | ||
![]() |
121dad588e | ||
![]() |
bb3a11ad00 | ||
![]() |
64d3bd9c4f | ||
![]() |
e6be03a770 | ||
![]() |
5f722570d2 | ||
![]() |
dcbeca5f7f | ||
![]() |
16742d4705 | ||
![]() |
d21a03905d | ||
![]() |
0608b2a1df | ||
![]() |
5f237c7c71 | ||
![]() |
4c98b0a43d | ||
![]() |
05afa0859c | ||
![]() |
597cc9fe29 | ||
![]() |
ab6196589b | ||
![]() |
221be380ee | ||
![]() |
a68e5f6519 | ||
![]() |
bc81f19715 | ||
![]() |
ceeeea94ba | ||
![]() |
31ab5daa91 | ||
![]() |
8f6639028f | ||
![]() |
a2b686f6df | ||
![]() |
2dcc74d82d | ||
![]() |
d460263b97 | ||
![]() |
b8cfa343ae | ||
![]() |
8d391f125c | ||
![]() |
1532bb731a | ||
![]() |
357ba9ec59 | ||
![]() |
183c810c76 | ||
![]() |
f2d7beec90 | ||
![]() |
84ce4758d1 | ||
![]() |
4d6279a626 | ||
![]() |
f28e741ad7 | ||
![]() |
23c4e5b09d | ||
![]() |
cd6057e1ca | ||
![]() |
1771674b53 | ||
![]() |
2a9d0ea7d2 | ||
![]() |
e19938b05e | ||
![]() |
244a3e5be3 | ||
![]() |
e5a3d534b2 | ||
![]() |
c279057f91 |
@@ -3,6 +3,7 @@
|
||||
.gitignore
|
||||
contrib
|
||||
init-scripts
|
||||
package
|
||||
pylintrc
|
||||
*.md
|
||||
!CHANGELOG*.md
|
||||
|
8
.github/workflows/publish-docker.yml
vendored
8
.github/workflows/publish-docker.yml
vendored
@@ -1,7 +1,7 @@
|
||||
name: Publish Docker
|
||||
on:
|
||||
push:
|
||||
branches: [master, beta, nightly]
|
||||
branches: [master, beta, nightly, python3]
|
||||
tags: [v*]
|
||||
jobs:
|
||||
build:
|
||||
@@ -25,7 +25,7 @@ jobs:
|
||||
echo ::set-output name=commit::${GITHUB_SHA}
|
||||
echo ::set-output name=build_date::$(date -u +'%Y-%m-%dT%H:%M:%SZ')
|
||||
echo ::set-output name=docker_platforms::linux/amd64,linux/arm64,linux/arm
|
||||
echo ::set-output name=docker_image::tautulli/tautulli
|
||||
echo ::set-output name=docker_image::${{ secrets.DOCKER_REPO }}/tautulli
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
@@ -33,8 +33,8 @@ jobs:
|
||||
with:
|
||||
version: latest
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
- name: Checkout Code
|
||||
uses: actions/checkout@v2.1.0
|
||||
|
||||
- name: Docker Buildx (no push)
|
||||
run: |
|
||||
|
195
.github/workflows/publish-release.yml
vendored
195
.github/workflows/publish-release.yml
vendored
@@ -1,28 +1,203 @@
|
||||
name: Publish Release
|
||||
on:
|
||||
push:
|
||||
branches: [master, beta, nightly, python3]
|
||||
tags: [v*]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
build-windows:
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- name: Checkout Code
|
||||
uses: actions/checkout@master
|
||||
- name: Get Release Version
|
||||
run: echo ::set-env name=RELEASE_VERSION::${GITHUB_REF#refs/tags/}
|
||||
uses: actions/checkout@v2.1.0
|
||||
|
||||
- name: Set Release Version
|
||||
id: get_version
|
||||
shell: bash
|
||||
run: |
|
||||
if [[ $GITHUB_REF == refs/tags/* ]]; then
|
||||
VERSION_NSIS=${GITHUB_REF#refs/tags/v}.1
|
||||
echo ::set-output name=VERSION_NSIS::${VERSION_NSIS/%-beta.1/.0}
|
||||
echo ::set-output name=VERSION::${GITHUB_REF#refs/tags/v}
|
||||
echo ::set-output name=RELEASE_VERSION::${GITHUB_REF#refs/tags/}
|
||||
else
|
||||
echo ::set-output name=VERSION_NSIS::0.0.0.0
|
||||
echo ::set-output name=VERSION::0.0.0
|
||||
echo ::set-output name=RELEASE_VERSION::${GITHUB_SHA::7}
|
||||
fi
|
||||
echo $GITHUB_SHA > version.txt
|
||||
|
||||
- name: Set Up Python
|
||||
uses: actions/setup-python@v1.2.0
|
||||
with:
|
||||
python-version: 3.8
|
||||
|
||||
- name: Cache Dependencies
|
||||
id: cache_dependencies
|
||||
uses: actions/cache@v1
|
||||
with:
|
||||
path: ~\AppData\Local\pip\Cache
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('package/requirements-windows.txt') }}
|
||||
restore-keys: ${{ runner.os }}-pip-
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r package/requirements-windows.txt
|
||||
|
||||
- name: Build Package
|
||||
run: |
|
||||
pyinstaller -y ./package/Tautulli-windows.spec
|
||||
|
||||
- name: Create Installer
|
||||
uses: joncloud/makensis-action@v1
|
||||
with:
|
||||
script-file: ./package/Tautulli.nsi
|
||||
arguments: /DVERSION=${{ steps.get_version.outputs.VERSION_NSIS }} /DINSTALLER_NAME=..\Tautulli-windows-${{ steps.get_version.outputs.RELEASE_VERSION }}.exe
|
||||
includeMorePlugins: package/nsis-plugins
|
||||
|
||||
- name: Upload Installer
|
||||
uses: actions/upload-artifact@v1
|
||||
with:
|
||||
name: Tautulli-windows-installer
|
||||
path: Tautulli-windows-${{ steps.get_version.outputs.RELEASE_VERSION }}.exe
|
||||
|
||||
- name: Post Status to Discord
|
||||
uses: sarisia/actions-status-discord@v1
|
||||
if: always()
|
||||
with:
|
||||
webhook: ${{ secrets.DISCORD_WEBHOOK }}
|
||||
status: ${{ job.status }}
|
||||
job: Build Windows Installer
|
||||
nofail: true
|
||||
|
||||
build-macos:
|
||||
runs-on: macos-latest
|
||||
steps:
|
||||
- name: Checkout Code
|
||||
uses: actions/checkout@v2.1.0
|
||||
|
||||
- name: Set Release Version
|
||||
id: get_version
|
||||
shell: bash
|
||||
run: |
|
||||
if [[ $GITHUB_REF == refs/tags/* ]]; then
|
||||
echo ::set-env name=VERSION::${GITHUB_REF#refs/tags/v}
|
||||
echo ::set-output name=VERSION::${GITHUB_REF#refs/tags/v}
|
||||
echo ::set-output name=RELEASE_VERSION::${GITHUB_REF#refs/tags/}
|
||||
else
|
||||
echo ::set-env name=VERSION::0.0.0
|
||||
echo ::set-output name=VERSION::0.0.0
|
||||
echo ::set-output name=RELEASE_VERSION::${GITHUB_SHA::7}
|
||||
fi
|
||||
echo $GITHUB_SHA > version.txt
|
||||
|
||||
- name: Set Up Python
|
||||
uses: actions/setup-python@v1.2.0
|
||||
with:
|
||||
python-version: 3.8
|
||||
|
||||
- name: Cache Dependencies
|
||||
id: cache_dependencies
|
||||
uses: actions/cache@v1
|
||||
with:
|
||||
path: ~/Library/Caches/pip
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('package/requirements-macos.txt') }}
|
||||
restore-keys: ${{ runner.os }}-pip-
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r package/requirements-macos.txt
|
||||
|
||||
- name: Build Package
|
||||
run: |
|
||||
pyinstaller -y ./package/Tautulli-macos.spec
|
||||
|
||||
- name: Create Installer
|
||||
run: |
|
||||
sudo pkgbuild --install-location /Applications --version ${{ steps.get_version.outputs.VERSION }} --component ./dist/Tautulli.app --scripts ./package/macos-scripts Tautulli-macos-${{ steps.get_version.outputs.RELEASE_VERSION }}.pkg
|
||||
|
||||
- name: Upload Installer
|
||||
uses: actions/upload-artifact@v1
|
||||
with:
|
||||
name: Tautulli-macos-installer
|
||||
path: Tautulli-macos-${{ steps.get_version.outputs.RELEASE_VERSION }}.pkg
|
||||
|
||||
- name: Post Status to Discord
|
||||
uses: sarisia/actions-status-discord@v1
|
||||
if: always()
|
||||
with:
|
||||
webhook: ${{ secrets.DISCORD_WEBHOOK }}
|
||||
status: ${{ job.status }}
|
||||
job: Build MacOS Installer
|
||||
nofail: true
|
||||
|
||||
release:
|
||||
needs: [build-windows, build-macos]
|
||||
if: startsWith(github.ref, 'refs/tags/') && always()
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Get Build Job Status
|
||||
uses: technote-space/workflow-conclusion-action@v1
|
||||
|
||||
- name: Checkout Code
|
||||
uses: actions/checkout@v2.1.0
|
||||
|
||||
- name: Set Release Version
|
||||
id: get_version
|
||||
run: |
|
||||
echo ::set-output name=RELEASE_VERSION::${GITHUB_REF#refs/tags/}
|
||||
|
||||
- name: Download Windows Installer
|
||||
if: env.WORKFLOW_CONCLUSION == 'success'
|
||||
uses: actions/download-artifact@v1
|
||||
with:
|
||||
name: Tautulli-windows-installer
|
||||
|
||||
- name: Download MacOS Installer
|
||||
if: env.WORKFLOW_CONCLUSION == 'success'
|
||||
uses: actions/download-artifact@v1
|
||||
with:
|
||||
name: Tautulli-macos-installer
|
||||
|
||||
- name: Get Changelog
|
||||
run: echo ::set-env name=CHANGELOG::"$( sed -n '/^## /{p; :loop n; p; /^## /q; b loop}' CHANGELOG.md | sed '$d' | sed '$d' | sed '$d' | sed ':a;N;$!ba;s/\n/%0A/g' )"
|
||||
id: get_changelog
|
||||
run: echo ::set-output name=CHANGELOG::"$( sed -n '/^## /{p; :loop n; p; /^## /q; b loop}' CHANGELOG.md | sed '$d' | sed '$d' | sed '$d' | sed ':a;N;$!ba;s/\n/%0A/g' )"
|
||||
|
||||
- name: Create Release
|
||||
id: create_release
|
||||
uses: actions/create-release@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
tag_name: ${{ env.RELEASE_VERSION }}
|
||||
release_name: Tautulli ${{ env.RELEASE_VERSION }}
|
||||
tag_name: ${{ steps.get_version.outputs.RELEASE_VERSION }}
|
||||
release_name: Tautulli ${{ steps.get_version.outputs.RELEASE_VERSION }}
|
||||
body: |
|
||||
## Changelog
|
||||
|
||||
##${{ env.CHANGELOG }}
|
||||
##${{ steps.get_changelog.outputs.CHANGELOG }}
|
||||
draft: false
|
||||
prerelease: ${{ endsWith(env.RELEASE_VERSION, '-beta') }}
|
||||
prerelease: ${{ endsWith(steps.get_version.outputs.RELEASE_VERSION, '-beta') }}
|
||||
|
||||
- name: Upload Windows Installer
|
||||
if: env.WORKFLOW_CONCLUSION == 'success'
|
||||
uses: actions/upload-release-asset@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
upload_url: ${{ steps.create_release.outputs.upload_url }}
|
||||
asset_path: Tautulli-windows-installer/Tautulli-windows-${{ steps.get_version.outputs.RELEASE_VERSION }}.exe
|
||||
asset_name: Tautulli-windows-${{ steps.get_version.outputs.RELEASE_VERSION }}.exe
|
||||
asset_content_type: application/vnd.microsoft.portable-executable
|
||||
|
||||
- name: Upload MacOS Installer
|
||||
if: env.WORKFLOW_CONCLUSION == 'success'
|
||||
uses: actions/upload-release-asset@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
upload_url: ${{ steps.create_release.outputs.upload_url }}
|
||||
asset_path: Tautulli-macos-installer/Tautulli-macos-${{ steps.get_version.outputs.RELEASE_VERSION }}.pkg
|
||||
asset_name: Tautulli-macos-${{ steps.get_version.outputs.RELEASE_VERSION }}.pkg
|
||||
asset_content_type: application/vnd.apple.installer+xml
|
||||
|
6
.gitignore
vendored
6
.gitignore
vendored
@@ -19,6 +19,8 @@ backups/*
|
||||
cache/*
|
||||
newsletters/*
|
||||
*.mmdb
|
||||
version.txt
|
||||
branch.txt
|
||||
|
||||
# HTTPS Cert/Key #
|
||||
##################
|
||||
@@ -74,3 +76,7 @@ _ReSharper*/
|
||||
/logs
|
||||
.project
|
||||
.pydevproject
|
||||
|
||||
#Ignore files generated by pyinstaller
|
||||
/build
|
||||
/dist
|
||||
|
13
API.md
13
API.md
@@ -2555,16 +2555,21 @@ Returns:
|
||||
|
||||
|
||||
### import_database
|
||||
Import a PlexWatch or Plexivity database into Tautulli.
|
||||
Import a Tautulli, PlexWatch, or Plexivity database into Tautulli.
|
||||
|
||||
```
|
||||
Required parameters:
|
||||
app (str): "plexwatch" or "plexivity"
|
||||
app (str): "tautulli" or "plexwatch" or "plexivity"
|
||||
database_path (str): The full path to the plexwatch database file
|
||||
table_name (str): "processed" or "grouped"
|
||||
method (str): For Tautulli only, "merge" or "overwrite"
|
||||
table_name (str): For PlexWatch or Plexivity only, "processed" or "grouped"
|
||||
|
||||
|
||||
Optional parameters:
|
||||
import_ignore_interval (int): The minimum number of seconds for a stream to import
|
||||
backup (bool): For Tautulli only, true or false whether to backup
|
||||
the current database before importing
|
||||
import_ignore_interval (int): For PlexWatch or Plexivity only, the minimum number
|
||||
of seconds for a stream to import
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
27
CHANGELOG.md
27
CHANGELOG.md
@@ -1,5 +1,32 @@
|
||||
# Changelog
|
||||
|
||||
# v2.5.0-beta (2020-05-31)
|
||||
|
||||
* Announcements:
|
||||
* Tautulli now supports Python 3!
|
||||
* Python 2 is still supported for the time being, but it is recommended to upgrade to Python 3.
|
||||
* UI:
|
||||
* New: Windows and MacOS setting to enable Tautulli to start automatically when you login.
|
||||
* New: Added system tray icon for MacOS.
|
||||
* New: Ability to import a Tautulli database in the settings.
|
||||
* New: Added Tautulli news area on the settings page.
|
||||
* Other:
|
||||
* New: Windows and MacOS app installers to install Tautulli without needing Python installed.
|
||||
|
||||
|
||||
## v2.2.4 (2020-05-16)
|
||||
|
||||
* Monitoring:
|
||||
* Fix: Show "None" as the subtitle source on the activity card for user selected subtitles.
|
||||
* UI:
|
||||
* Fix: Deleted libraries were showing up on the homepage library cards.
|
||||
* Fix: Libraries could get stuck as inactive in the database in some instances.
|
||||
* API:
|
||||
* Fix: Incorrect title was being returned for the get_history API command.
|
||||
* Other:
|
||||
* Fix: Plex remote access check was not being rescheduled after changing the settings.
|
||||
|
||||
|
||||
## v2.2.3 (2020-05-01)
|
||||
|
||||
* Notifications:
|
||||
|
@@ -9,7 +9,7 @@ All pull requests should be based on the `nightly` branch, to minimize cross mer
|
||||
### Python Code
|
||||
|
||||
#### Compatibility
|
||||
The code should work with Python 2.7. Note that Tautulli runs on many different platforms.
|
||||
The code should work with Python 2.7.17 or Python 3.6+. Note that Tautulli runs on many different platforms.
|
||||
|
||||
Re-use existing code. Do not hesitate to add logging in your code. You can the logger module `plexpy.logger.*` for this. Web requests are invoked via `plexpy.request.*` and derived ones. Use these methods to automatically add proper and meaningful error handling.
|
||||
|
||||
@@ -38,4 +38,4 @@ HTML5 compatible browsers are targeted.
|
||||
* 4 space indentation
|
||||
* `methodName`
|
||||
* `variableName`
|
||||
* `ClassName`
|
||||
* `ClassName`
|
||||
|
@@ -1,4 +1,4 @@
|
||||
FROM tautulli/tautulli-baseimage:latest
|
||||
FROM tautulli/tautulli-baseimage:python3
|
||||
|
||||
LABEL maintainer="Tautulli"
|
||||
|
||||
|
@@ -1,8 +1,4 @@
|
||||
#!/bin/sh
|
||||
''''which python >/dev/null 2>&1 && exec python "$0" "$@" # '''
|
||||
''''which python2 >/dev/null 2>&1 && exec python2 "$0" "$@" # '''
|
||||
''''which python2.7 >/dev/null 2>&1 && exec python2.7 "$0" "$@" # '''
|
||||
''''exec echo "Error: Python not found!" # '''
|
||||
#!/usr/bin/env python
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
@@ -29,14 +29,15 @@ This project is based on code from [Headphones](https://github.com/rembo10/headp
|
||||
|
||||
## Installation & Support
|
||||
|
||||
[](https://python.org/downloads/release/python-2717/)
|
||||
[](https://python.org/downloads)
|
||||
[](https://hub.docker.com/r/tautulli/tautulli)
|
||||
[](https://hub.docker.com/r/tautulli/tautulli)
|
||||
|
||||
| Status | Branch: `master` | Branch: `beta` | Branch: `nightly` |
|
||||
| --- | --- | --- | --- |
|
||||
| Release | [](https://github.com/Tautulli/Tautulli/releases/latest) <br> [](https://github.com/Tautulli/Tautulli/releases/latest) | [](https://github.com/Tautulli/Tautulli/releases) <br> [](https://github.com/Tautulli/Tautulli/commits/beta) | [](https://github.com/Tautulli/Tautulli/commits/nightly) <br> [](https://github.com/Tautulli/Tautulli/commits/nightly) |
|
||||
| Docker | [](https://hub.docker.com/r/tautulli/tautulli) <br> [](https://github.com/Tautulli/Tautulli/actions?query=workflow%3A"Publish+Docker"+branch%3Amaster) | [](https://hub.docker.com/r/tautulli/tautulli) <br> [](https://github.com/Tautulli/Tautulli/actions?query=workflow%3A"Publish+Docker"+branch%3Abeta) | [](https://hub.docker.com/r/tautulli/tautulli) <br> [](https://github.com/Tautulli/Tautulli/actions?query=workflow%3A"Publish+Docker"+branch%3Anightly) |
|
||||
| Release | [](https://github.com/Tautulli/Tautulli/releases/latest) <br> [](https://github.com/Tautulli/Tautulli/releases/latest) | [](https://github.com/Tautulli/Tautulli/releases) <br> [](https://github.com/Tautulli/Tautulli/commits/beta) | [](https://github.com/Tautulli/Tautulli/commits/nightly) <br> [](https://github.com/Tautulli/Tautulli/commits/nightly) |
|
||||
| Docker | [](https://hub.docker.com/r/tautulli/tautulli) <br> [](https://github.com/Tautulli/Tautulli/actions?query=workflow%3A"Publish+Docker"+branch%3Amaster) | [](https://hub.docker.com/r/tautulli/tautulli) <br> [](https://github.com/Tautulli/Tautulli/actions?query=workflow%3A"Publish+Docker"+branch%3Abeta) | [](https://hub.docker.com/r/tautulli/tautulli) <br> [](https://github.com/Tautulli/Tautulli/actions?query=workflow%3A"Publish+Docker"+branch%3Anightly) |
|
||||
| Installer | [](https://github.com/Tautulli/Tautulli/releases/latest) <br> [](https://github.com/Tautulli/Tautulli/releases/latest) <br> [](https://github.com/Tautulli/Tautulli/actions?query=workflow%3A"Publish+Release"+branch%3Amaster) | [](https://github.com/Tautulli/Tautulli/releases) <br> [](https://github.com/Tautulli/Tautulli/releases) <br> [](https://github.com/Tautulli/Tautulli/actions?query=workflow%3A"Publish+Release"+branch%3Abeta) | [](https://github.com/Tautulli/Tautulli/actions?query=workflow%3A"Publish+Release"+branch%3Anightly) |
|
||||
|
||||
[](https://github.com/Tautulli/Tautulli-Wiki/wiki)
|
||||
[](https://tautulli.com/discord)
|
||||
|
73
Tautulli.py
73
Tautulli.py
@@ -1,8 +1,4 @@
|
||||
#!/bin/sh
|
||||
''''which python >/dev/null 2>&1 && exec python "$0" "$@" # '''
|
||||
''''which python2 >/dev/null 2>&1 && exec python2 "$0" "$@" # '''
|
||||
''''which python2.7 >/dev/null 2>&1 && exec python2.7 "$0" "$@" # '''
|
||||
''''exec echo "Error: Python not found!" # '''
|
||||
#!/usr/bin/env python
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
@@ -27,17 +23,24 @@ import sys
|
||||
# Ensure lib added to path, before any other imports
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'lib'))
|
||||
|
||||
from future.builtins import str
|
||||
|
||||
import appdirs
|
||||
import argparse
|
||||
import datetime
|
||||
import locale
|
||||
import pytz
|
||||
import signal
|
||||
import time
|
||||
import threading
|
||||
import tzlocal
|
||||
|
||||
import plexpy
|
||||
from plexpy import config, database, helpers, logger, webstart
|
||||
|
||||
from plexpy import common, config, database, helpers, logger, webstart
|
||||
if common.PLATFORM == 'Windows':
|
||||
from plexpy import windows
|
||||
elif common.PLATFORM == 'Darwin':
|
||||
from plexpy import macos
|
||||
|
||||
# Register signals, such as CTRL + C
|
||||
signal.signal(signal.SIGINT, plexpy.sig_handler)
|
||||
@@ -51,12 +54,14 @@ def main():
|
||||
"""
|
||||
|
||||
# Fixed paths to Tautulli
|
||||
if hasattr(sys, 'frozen'):
|
||||
if hasattr(sys, 'frozen') and hasattr(sys, '_MEIPASS'):
|
||||
plexpy.FROZEN = True
|
||||
plexpy.FULL_PATH = os.path.abspath(sys.executable)
|
||||
plexpy.PROG_DIR = sys._MEIPASS
|
||||
else:
|
||||
plexpy.FULL_PATH = os.path.abspath(__file__)
|
||||
plexpy.PROG_DIR = os.path.dirname(plexpy.FULL_PATH)
|
||||
|
||||
plexpy.PROG_DIR = os.path.dirname(plexpy.FULL_PATH)
|
||||
plexpy.ARGS = sys.argv[1:]
|
||||
|
||||
# From sickbeard
|
||||
@@ -122,7 +127,7 @@ def main():
|
||||
|
||||
if args.dev:
|
||||
plexpy.DEV = True
|
||||
logger.debug(u"Tautulli is running in the dev environment.")
|
||||
logger.debug("Tautulli is running in the dev environment.")
|
||||
|
||||
if args.daemon:
|
||||
if sys.platform == 'win32':
|
||||
@@ -175,6 +180,8 @@ def main():
|
||||
# Determine which data directory and config file to use
|
||||
if args.datadir:
|
||||
plexpy.DATA_DIR = args.datadir
|
||||
elif plexpy.FROZEN:
|
||||
plexpy.DATA_DIR = appdirs.user_data_dir("Tautulli", False)
|
||||
else:
|
||||
plexpy.DATA_DIR = plexpy.PROG_DIR
|
||||
|
||||
@@ -229,25 +236,50 @@ def main():
|
||||
try:
|
||||
import OpenSSL
|
||||
except ImportError:
|
||||
logger.warn("The pyOpenSSL module is missing. Install this " \
|
||||
logger.warn("The pyOpenSSL module is missing. Install this "
|
||||
"module to enable HTTPS. HTTPS will be disabled.")
|
||||
plexpy.CONFIG.ENABLE_HTTPS = False
|
||||
|
||||
# Try to start the server. Will exit here is address is already in use.
|
||||
webstart.start()
|
||||
|
||||
# Windows system tray icon
|
||||
if os.name == 'nt' and plexpy.CONFIG.WIN_SYS_TRAY:
|
||||
plexpy.win_system_tray()
|
||||
|
||||
logger.info("Tautulli is ready!")
|
||||
if common.PLATFORM == 'Windows':
|
||||
if plexpy.CONFIG.SYS_TRAY_ICON:
|
||||
plexpy.WIN_SYS_TRAY_ICON = windows.WindowsSystemTray()
|
||||
plexpy.WIN_SYS_TRAY_ICON.start()
|
||||
windows.set_startup()
|
||||
elif common.PLATFORM == 'Darwin':
|
||||
macos.set_startup()
|
||||
|
||||
# Open webbrowser
|
||||
if plexpy.CONFIG.LAUNCH_BROWSER and not args.nolaunch and not plexpy.DEV:
|
||||
plexpy.launch_browser(plexpy.CONFIG.HTTP_HOST, plexpy.HTTP_PORT,
|
||||
plexpy.HTTP_ROOT)
|
||||
|
||||
# Wait endlessy for a signal to happen
|
||||
if common.PLATFORM == 'Darwin' and plexpy.CONFIG.SYS_TRAY_ICON:
|
||||
try:
|
||||
import AppKit
|
||||
except ImportError:
|
||||
logger.warn("The pyobjc module is missing. Install this "
|
||||
"module to enable the system tray icon.")
|
||||
plexpy.CONFIG.SYS_TRAY_ICON = False
|
||||
|
||||
if plexpy.CONFIG.SYS_TRAY_ICON:
|
||||
# MacOS system tray icon must be run on the main thread and is blocking
|
||||
# Start the rest of Tautulli on a new thread
|
||||
threading.Thread(target=wait).start()
|
||||
plexpy.MAC_SYS_TRAY_ICON = macos.MacOSSystemTray()
|
||||
plexpy.MAC_SYS_TRAY_ICON.start()
|
||||
else:
|
||||
wait()
|
||||
else:
|
||||
wait()
|
||||
|
||||
|
||||
def wait():
|
||||
logger.info("Tautulli is ready!")
|
||||
|
||||
# Wait endlessly for a signal to happen
|
||||
while True:
|
||||
if not plexpy.SIGNAL:
|
||||
try:
|
||||
@@ -265,11 +297,14 @@ def main():
|
||||
plexpy.shutdown(restart=True, checkout=True)
|
||||
elif plexpy.SIGNAL == 'reset':
|
||||
plexpy.shutdown(restart=True, reset=True)
|
||||
else:
|
||||
elif plexpy.SIGNAL == 'update':
|
||||
plexpy.shutdown(restart=True, update=True)
|
||||
else:
|
||||
logger.error('Unknown signal. Shutting down...')
|
||||
plexpy.shutdown()
|
||||
|
||||
plexpy.SIGNAL = None
|
||||
|
||||
# Call main()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
@@ -5,46 +5,97 @@
|
||||
<h4 class="modal-title">Import ${app} Database</h4>
|
||||
</div>
|
||||
<div class="modal-body" id="modal-text">
|
||||
<p class="help-block">
|
||||
<%
|
||||
v = ''
|
||||
if app == 'PlexWatch':
|
||||
v = '0.3.2'
|
||||
elif app == 'Plexivity':
|
||||
v = '0.9.8'
|
||||
%>
|
||||
<strong>Please ensure your ${app} database is at version ${v} or higher.</strong>
|
||||
</p>
|
||||
<div class="form-group">
|
||||
<label for="db_location">Database Location</label>
|
||||
<div class="row">
|
||||
<div class="col-xs-8">
|
||||
<input type="text" class="form-control" id="db_location" name="db_location" value="" required>
|
||||
<form id="import_database" enctype="multipart/form-data" method="post" name="import_database">
|
||||
<input type="hidden" id="import_app" name="import_app" value="${app.lower()}" />
|
||||
% if app in ('PlexWatch', 'Plexivity'):
|
||||
<p class="help-block">
|
||||
<%
|
||||
v = ''
|
||||
if app == 'PlexWatch':
|
||||
v = '0.3.2'
|
||||
elif app == 'Plexivity':
|
||||
v = '0.9.8'
|
||||
%>
|
||||
<strong>Please ensure your ${app} database is at version ${v} or higher.</strong>
|
||||
</p>
|
||||
% endif
|
||||
<div class="form-group">
|
||||
<label for="import_database_file">Option 1: Upload a Database File</label>
|
||||
<div class="row">
|
||||
<div class="col-xs-12">
|
||||
<div class="input-group">
|
||||
<label for="import_database_file" class="input-group-btn">
|
||||
<span class="btn btn-form">Upload</span>
|
||||
<input type="file" style="display: none;" id="import_database_file" name="import_database_file" required>
|
||||
</label>
|
||||
<input id="import_database_file_name" type="text" class="form-control" disabled>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<p class="help-block">Upload the ${app} database you wish to import.</p>
|
||||
</div>
|
||||
<p class="help-block">Enter the path and file name for the ${app} database you wish to import.</p>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="table_name">Table Name</label>
|
||||
<div class="row">
|
||||
<div class="col-xs-4">
|
||||
<select id="table_name" class="form-control" name="table_name">
|
||||
<option value="processed">processed</option>
|
||||
<option value="grouped">grouped</option>
|
||||
</select>
|
||||
<div class="form-group">
|
||||
<label for="import_database_path">Option 2: Browse for a Database File</label>
|
||||
<div class="row">
|
||||
<div class="col-xs-12">
|
||||
<div class="input-group">
|
||||
<span class="input-group-btn">
|
||||
<button class="btn btn-form" type="button" id="import_database_path_browse">Browse</button>
|
||||
</span>
|
||||
<input type="text" class="form-control" id="import_database_path" name="import_database_path" value="" required disabled>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<p class="help-block">Browse for the ${app} database you wish to import.</p>
|
||||
</div>
|
||||
<p class="help-block">The table name from which you wish to import. Only import one of these, importing both will result in duplicated data.</p>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="import_ignore_interval">Ignore Interval</label>
|
||||
<div class="row">
|
||||
<div class="col-xs-2">
|
||||
<input type="text" class="form-control" id="import_ignore_interval" name="import_ignore_interval" value="120" required>
|
||||
% if app == 'Tautulli':
|
||||
<div class="form-group">
|
||||
<label for="table_name">Import Method</label>
|
||||
<div class="row">
|
||||
<div class="col-xs-4">
|
||||
<select class="form-control" id="import_method" name="import_method">
|
||||
<option value="merge">Merge</option>
|
||||
<option value="overwrite">Overwrite</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
<p class="help-block">Select how you would like to import the Tautulli history.</p>
|
||||
<ul class="help-block" style="padding-inline-start: 15px;">
|
||||
<li><strong>Merge</strong> will add all history and remove any duplicates from the imported database into the current database.</li>
|
||||
<li><strong>Overwrite</strong> will replace all history in the current database with the imported database.</li>
|
||||
</ul>
|
||||
<p class="help-block">Note: Libraries, users, notification agents, newsletter agents, and registered mobile devices will also be imported</p>
|
||||
</div>
|
||||
<p class="help-block">Enter the minimum duration (in seconds) an item must have been active for. Set to 0 to import all.</p>
|
||||
</div>
|
||||
<div class="checkbox">
|
||||
<label>
|
||||
<input type="checkbox" name="import_backup_db" id="import_backup_db" value="1" checked> Backup Current Database
|
||||
</label>
|
||||
<p class="help-block">Automatically create a backup of the current database before importing.</p>
|
||||
</div>
|
||||
% else:
|
||||
<div class="form-group">
|
||||
<label for="import_table_name">Table Name</label>
|
||||
<div class="row">
|
||||
<div class="col-xs-4">
|
||||
<select class="form-control" id="import_table_name" name="import_table_name">
|
||||
<option value="processed">Processed</option>
|
||||
<option value="grouped">Grouped</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
<p class="help-block">Select the table name from which you wish to import. Only import one of these, importing both will result in duplicated data.</p>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="import_ignore_interval">Ignore Interval</label>
|
||||
<div class="row">
|
||||
<div class="col-xs-2">
|
||||
<input type="text" class="form-control" id="import_ignore_interval" name="import_ignore_interval" value="120" required>
|
||||
</div>
|
||||
</div>
|
||||
<p class="help-block">Enter the minimum duration (in seconds) an item must have been active for. Set to 0 to import all.</p>
|
||||
</div>
|
||||
% endif
|
||||
</form>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<div>
|
||||
@@ -55,24 +106,97 @@
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
// Send database path to import script
|
||||
$('#import_database_path_browse').click(function () {
|
||||
$('#browse-path-type').text('Databse File');
|
||||
$('#browse-path-modal').modal('show');
|
||||
browsePath(null, null, '.db');
|
||||
});
|
||||
$('#select-browse-file').click(function () {
|
||||
$('#browse-path-modal').modal('hide');
|
||||
$("#import_database_path").val($('#browse-path').val());
|
||||
});
|
||||
|
||||
$('#import_database_file').change(function() {
|
||||
if ($(this)[0].files[0]) {
|
||||
$('#import_database_file_name').val($(this)[0].files[0].name);
|
||||
}
|
||||
});
|
||||
|
||||
$("#import_db").click(function() {
|
||||
var database_path = $("#db_location").val();
|
||||
var table_name = $("#table_name").val();
|
||||
var import_ignore_interval = $("#import_ignore_interval").val();
|
||||
$(this).prop('disabled', true);
|
||||
|
||||
var app = $("#import_app").val();
|
||||
var database_file = $('#import_database_file')[0].files[0];
|
||||
var database_path = $("#import_database_path").val();
|
||||
var method = $("#import_method").val();
|
||||
var backup = $("#import_backup_db").is(':checked');
|
||||
var table_name = $("#import_table_name").val();
|
||||
var ignore_interval = $("#import_ignore_interval").val();
|
||||
|
||||
var content_type;
|
||||
var process_data;
|
||||
var data;
|
||||
|
||||
if (database_file) {
|
||||
content_type = false;
|
||||
process_data = false;
|
||||
data = new FormData();
|
||||
data.append('app', app);
|
||||
data.append('database_file', database_file);
|
||||
data.append('method', method);
|
||||
data.append('backup', backup);
|
||||
data.append('table_name', table_name);
|
||||
data.append('ignore_interval', ignore_interval);
|
||||
} else {
|
||||
content_type = 'application/x-www-form-urlencoded; charset=UTF-8';
|
||||
process_data = true;
|
||||
data = {
|
||||
app: app,
|
||||
database_path: database_path,
|
||||
method: method,
|
||||
backup: backup,
|
||||
table_name: table_name,
|
||||
ignore_interval: ignore_interval
|
||||
}
|
||||
}
|
||||
|
||||
if (database_file) {
|
||||
$("#status-message").html('<i class="fa fa-fw fa-spin fa-refresh"></i> Uploading database file...');
|
||||
} else {
|
||||
$("#status-message").html('<i class="fa fa-fw fa-spin fa-refresh"></i>');
|
||||
}
|
||||
|
||||
$.ajax({
|
||||
url: 'import_database',
|
||||
data: {
|
||||
app: "${app}",
|
||||
database_path: database_path,
|
||||
table_name: table_name,
|
||||
import_ignore_interval: import_ignore_interval
|
||||
},
|
||||
type: 'POST',
|
||||
data: data,
|
||||
cache: false,
|
||||
async: true,
|
||||
contentType: content_type,
|
||||
processData: process_data,
|
||||
success: function(data) {
|
||||
$("#status-message").html(data);
|
||||
$("#db_location").val('')
|
||||
var msg;
|
||||
if (data.result === 'success') {
|
||||
msg = "<i class='fa fa-check'></i> " + data.message;
|
||||
} else {
|
||||
msg = "<i class='fa fa-exclamation-triangle'></i> " + data.message;
|
||||
}
|
||||
$("#status-message").html(msg);
|
||||
$("#import_database_file").val(null);
|
||||
$("#import_database_file_name").val('');
|
||||
$("#import_database_path").val('');
|
||||
},
|
||||
error: function (xhr) {
|
||||
var msg = "<i class='fa fa-exclamation-triangle'></i> Error (" + xhr.status + "): ";
|
||||
if (xhr.status === 413) {
|
||||
msg += "file is too large to upload"
|
||||
} else {
|
||||
msg += 'try again'
|
||||
}
|
||||
$("#status-message").html(msg);
|
||||
},
|
||||
complete: function(xhr) {
|
||||
$("#import_db").prop('disabled', false);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
@@ -55,8 +55,10 @@
|
||||
newer version</a> of Tautulli is available!<br />
|
||||
You are ${plexpy.COMMITS_BEHIND} commit${'s' if plexpy.COMMITS_BEHIND > 1 else ''} behind.<br />
|
||||
% endif
|
||||
% if plexpy.DOCKER:
|
||||
% if plexpy.INSTALL_TYPE == 'docker':
|
||||
Update your Docker container or <a href="#" id="updateDismiss">Dismiss</a>
|
||||
% elif plexpy.INSTALL_TYPE in ('windows', 'macos'):
|
||||
<a href="${anon_url('https://github.com/%s/%s/releases/tag/%s' % (plexpy.CONFIG.GIT_USER, plexpy.CONFIG.GIT_REPO, plexpy.LATEST_RELEASE))}" target="_blank">Download</a> and install the latest version or <a href="#" id="updateDismiss">Dismiss</a>
|
||||
% else:
|
||||
<a href="update">Update</a> or <a href="#" id="updateDismiss">Dismiss</a>
|
||||
% endif
|
||||
@@ -330,8 +332,10 @@ ${next.modalIncludes()}
|
||||
msg = 'A <a href="' + result.compare_url + '" target="_blank">newer version</a> of Tautulli is available!<br />' +
|
||||
'You are '+ result.commits_behind + ' commit' + (result.commits_behind > 1 ? 's' : '') + ' behind.<br />';
|
||||
}
|
||||
if (result.docker) {
|
||||
if (result.install_type === 'docker') {
|
||||
msg += 'Update your Docker container or <a href="#" id="updateDismiss">Dismiss</a>';
|
||||
} else if (result.install_type === 'windows' || result.install_type === 'macos') {
|
||||
msg += '<a href="' + result.release_url + '" target="_blank">Download</a> and install the latest version or <a href="#" id="updateDismiss">Dismiss</a>'
|
||||
} else {
|
||||
msg += '<a href="update">Update</a> or <a href="#" id="updateDismiss">Dismiss</a>';
|
||||
}
|
||||
|
@@ -2994,6 +2994,9 @@ a .home-platforms-list-cover-face:hover
|
||||
.accordion li .link i.fa {
|
||||
color: #999;
|
||||
}
|
||||
.accordion li .link span.toggle-left {
|
||||
padding-right: 5px;
|
||||
}
|
||||
.accordion li .link span.toggle-right {
|
||||
float: right;
|
||||
padding-left: 10px;
|
||||
@@ -4296,4 +4299,41 @@ a[data-tab-destination] {
|
||||
.help-block li {
|
||||
margin-top: 0;
|
||||
color: #737373;
|
||||
}
|
||||
}
|
||||
|
||||
#browse-path-list > li > span > i.fa {
|
||||
color: #999;
|
||||
}
|
||||
|
||||
#tautulli-news .open .news-title,
|
||||
#tautulli-news .open .news-date,
|
||||
#tautulli-news .accordion li.open .link i.fa {
|
||||
color: #eee;
|
||||
}
|
||||
.news-title,
|
||||
.news-date {
|
||||
color: #999;
|
||||
padding-left: 5px;
|
||||
}
|
||||
.news-subtitle {
|
||||
display: block;
|
||||
color: #aaa;
|
||||
font-weight: bold;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
.news-body {
|
||||
display: block;
|
||||
color: #aaa;
|
||||
}
|
||||
.news-body p:last-of-type {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
.news-body a {
|
||||
display: inline !important;
|
||||
background: none !important;
|
||||
padding: 0 !important;
|
||||
color: #fff;
|
||||
}
|
||||
.news-body a:hover {
|
||||
color: #f9be03;
|
||||
}
|
||||
|
@@ -276,14 +276,17 @@ DOCUMENTATION :: END
|
||||
<div class="sub-heading">Subtitle</div>
|
||||
<div class="sub-value" id="subtitle_decision-${sk}">
|
||||
% if data['subtitles'] == 1:
|
||||
<%
|
||||
subtitle_codec = 'None' if data['stream_subtitle_codec'] and data['stream_subtitle_transient'] else data['subtitle_codec'].upper()
|
||||
%>
|
||||
% if data['stream_subtitle_decision'] == 'transcode':
|
||||
Transcode (${data['subtitle_codec'].upper()} <i class="fa fa-long-arrow-right"></i> ${data['stream_subtitle_codec'].upper()})
|
||||
Transcode (${subtitle_codec} <i class="fa fa-long-arrow-right"></i> ${data['stream_subtitle_codec'].upper()})
|
||||
% elif data['stream_subtitle_decision'] == 'copy':
|
||||
Direct Stream (${data['subtitle_codec'].upper()})
|
||||
Direct Stream (${subtitle_codec})
|
||||
% elif data['stream_subtitle_decision'] == 'burn':
|
||||
Burn (${data['subtitle_codec'].upper()})
|
||||
Burn (${subtitle_codec})
|
||||
% else:
|
||||
Direct Play (${data['subtitle_codec'].upper() if data['synced_version'] else data['stream_subtitle_codec'].upper()})
|
||||
Direct Play (${subtitle_codec if data['synced_version'] else data['stream_subtitle_codec'].upper()})
|
||||
% endif
|
||||
% else:
|
||||
None
|
||||
|
@@ -8,6 +8,13 @@
|
||||
|
||||
<%def name="body()">
|
||||
<div class='container-fluid'>
|
||||
% if config['database_is_importing']:
|
||||
<div style="text-align: center; margin-top: 20px;">
|
||||
<i class="fa fa-refresh fa-spin"></i> Tautulli is importing history from another database. This could take a few minutes depending on the size of your database.
|
||||
<br />
|
||||
You may leave this page and check back later.
|
||||
</div>
|
||||
% endif
|
||||
<div class='table-card-header'>
|
||||
<div class="header-bar">
|
||||
<span><i class="fa fa-history"></i> History</span>
|
||||
|
BIN
data/interfaces/default/images/check-solid.ico
Normal file
BIN
data/interfaces/default/images/check-solid.ico
Normal file
Binary file not shown.
After Width: | Height: | Size: 99 KiB |
BIN
data/interfaces/default/images/logo-circle-update.ico
Normal file
BIN
data/interfaces/default/images/logo-circle-update.ico
Normal file
Binary file not shown.
After Width: | Height: | Size: 123 KiB |
BIN
data/interfaces/default/images/logo-circle.icns
Normal file
BIN
data/interfaces/default/images/logo-circle.icns
Normal file
Binary file not shown.
BIN
data/interfaces/default/images/logo-circle.ico
Normal file
BIN
data/interfaces/default/images/logo-circle.ico
Normal file
Binary file not shown.
After Width: | Height: | Size: 118 KiB |
Binary file not shown.
Before Width: | Height: | Size: 112 KiB |
Binary file not shown.
Before Width: | Height: | Size: 107 KiB |
@@ -493,14 +493,15 @@
|
||||
|
||||
var subtitle_decision = 'None';
|
||||
if (['movie', 'episode', 'clip'].indexOf(s.media_type) > -1 && s.subtitles === 1) {
|
||||
var subtitle_codec = (s.stream_subtitle_codec && s.stream_subtitle_transient) ? 'None' : s.subtitle_codec.toUpperCase();
|
||||
if (s.stream_subtitle_decision === 'transcode') {
|
||||
subtitle_decision = 'Transcode (' + s.subtitle_codec.toUpperCase() + ' <i class="fa fa-long-arrow-right"></i> ' + s.stream_subtitle_codec.toUpperCase() + ')';
|
||||
subtitle_decision = 'Transcode (' + subtitle_codec + ' <i class="fa fa-long-arrow-right"></i> ' + s.stream_subtitle_codec.toUpperCase() + ')';
|
||||
} else if (s.stream_subtitle_decision === 'copy') {
|
||||
subtitle_decision = 'Direct Stream (' + s.subtitle_codec.toUpperCase() + ')';
|
||||
subtitle_decision = 'Direct Stream (' + subtitle_codec + ')';
|
||||
} else if (s.stream_subtitle_decision === 'burn') {
|
||||
subtitle_decision = 'Burn (' + s.subtitle_codec.toUpperCase() + ')';
|
||||
subtitle_decision = 'Burn (' + subtitle_codec + ')';
|
||||
} else {
|
||||
subtitle_decision = 'Direct Play (' + ((s.synced_version === '1') ? s.subtitle_codec.toUpperCase() : s.stream_subtitle_codec.toUpperCase()) + ')';
|
||||
subtitle_decision = 'Direct Play (' + ((s.synced_version === '1') ? subtitle_codec : s.stream_subtitle_codec.toUpperCase()) + ')';
|
||||
}
|
||||
}
|
||||
$('#subtitle_decision-' + key).html(subtitle_decision);
|
||||
|
@@ -45,14 +45,14 @@ DOCUMENTATION :: END
|
||||
|
||||
# Get audio codec file
|
||||
def af(codec):
|
||||
for pattern, file_type in MEDIA_FLAGS_AUDIO.iteritems():
|
||||
for pattern, file_type in MEDIA_FLAGS_AUDIO.items():
|
||||
if re.match(pattern, codec):
|
||||
return file_type
|
||||
return codec
|
||||
|
||||
# Get video codec file
|
||||
def vf(codec):
|
||||
for pattern, file_type in MEDIA_FLAGS_VIDEO.iteritems():
|
||||
for pattern, file_type in MEDIA_FLAGS_VIDEO.items():
|
||||
if re.match(pattern, codec):
|
||||
return file_type
|
||||
return codec
|
||||
|
@@ -237,6 +237,27 @@ function doAjaxCall(url, elem, reload, form, showMsg, callback) {
|
||||
});
|
||||
}
|
||||
|
||||
getBrowsePath = function (key, path, filter_ext) {
|
||||
var deferred = $.Deferred();
|
||||
|
||||
$.ajax({
|
||||
url: 'browse_path',
|
||||
type: 'GET',
|
||||
data: {
|
||||
key: key,
|
||||
path: path,
|
||||
filter_ext: filter_ext
|
||||
},
|
||||
success: function(data) {
|
||||
deferred.resolve(data);
|
||||
},
|
||||
error: function() {
|
||||
deferred.reject();
|
||||
}
|
||||
});
|
||||
return deferred;
|
||||
};
|
||||
|
||||
function doSimpleAjaxCall(url) {
|
||||
$.ajax(url);
|
||||
}
|
||||
|
@@ -251,9 +251,9 @@ DOCUMENTATION :: END
|
||||
% else:
|
||||
<div id="get_file_sizes_message" style="text-align: center; margin-top: 20px; display: none;">
|
||||
% endif
|
||||
<i class="fa fa-refresh fa-spin"></i> Tautulli is calculating the file sizes for the library's media info. This could take a few minutes depending on the size of your library.
|
||||
<i class="fa fa-refresh fa-spin"></i> Tautulli is calculating the file sizes for the library's media info. This could take a few minutes depending on the size of your library.
|
||||
<br />
|
||||
You may leave this page and come back later.
|
||||
You may leave this page and check back later.
|
||||
</div>
|
||||
<div class='table-card-header'>
|
||||
<div class="header-bar">
|
||||
|
@@ -123,7 +123,7 @@
|
||||
<div class="row">
|
||||
<div class="col-md-12">
|
||||
<select class="form-control" id="${item['name']}" name="${item['name']}">
|
||||
% for key, value in sorted(item['select_options'].iteritems()):
|
||||
% for key, value in sorted(item['select_options'].items()):
|
||||
% if key == item['value']:
|
||||
<option value="${key}" selected>${value}</option>
|
||||
% else:
|
||||
@@ -144,7 +144,7 @@
|
||||
<option value="select-all">Select All</option>
|
||||
<option value="remove-all">Remove All</option>
|
||||
% if isinstance(item['select_options'], dict):
|
||||
% for section, options in item['select_options'].iteritems():
|
||||
% for section, options in item['select_options'].items():
|
||||
<optgroup label="${section}">
|
||||
% for option in sorted(options, key=lambda x: x['text'].lower()):
|
||||
<option value="${option['value']}">${option['text']}</option>
|
||||
@@ -325,7 +325,7 @@
|
||||
<div class="row">
|
||||
<div class="col-md-12">
|
||||
<select class="form-control" id="${item['name']}" name="${item['name']}">
|
||||
% for key, value in sorted(item['select_options'].iteritems()):
|
||||
% for key, value in sorted(item['select_options'].items()):
|
||||
% if key == item['value']:
|
||||
<option value="${key}" selected>${value}</option>
|
||||
% else:
|
||||
@@ -346,7 +346,7 @@
|
||||
<option value="select-all">Select All</option>
|
||||
<option value="remove-all">Remove All</option>
|
||||
% if isinstance(item['select_options'], dict):
|
||||
% for section, options in item['select_options'].iteritems():
|
||||
% for section, options in item['select_options'].items():
|
||||
<optgroup label="${section}">
|
||||
% for option in sorted(options, key=lambda x: x['text'].lower()):
|
||||
<option value="${option['value']}">${option['text']}</option>
|
||||
|
@@ -1,5 +1,5 @@
|
||||
<%
|
||||
import urllib
|
||||
from six.moves.urllib.parse import urlencode
|
||||
%>
|
||||
<!doctype html>
|
||||
|
||||
@@ -35,7 +35,7 @@
|
||||
<script>
|
||||
$(document).ready(function () {
|
||||
var frame = $('<iframe></iframe>', {
|
||||
src: 'real_newsletter?${urllib.urlencode(kwargs) | n}',
|
||||
src: 'real_newsletter?${urlencode(kwargs) | n}',
|
||||
frameborder: '0',
|
||||
style: 'display: none; height: 100vh; width: 100vw;'
|
||||
});
|
||||
|
@@ -9,7 +9,7 @@ Version: 0.1
|
||||
DOCUMENTATION :: END
|
||||
</%doc>
|
||||
|
||||
<% from plexpy.newsletter_handler import NEWSLETTER_SCHED %>
|
||||
<% from plexpy import newsletter_handler %>
|
||||
<ul class="stacked-configs list-unstyled">
|
||||
% for newsletter in sorted(newsletters_list, key=lambda k: (k['agent_label'], k['friendly_name'], k['id'])):
|
||||
<li class="newsletter-agent pointer" data-id="${newsletter['id']}">
|
||||
@@ -22,8 +22,8 @@ DOCUMENTATION :: END
|
||||
% endif
|
||||
<span class="toggle-right"><i class="fa fa-lg fa-fw fa-cog"></i></span>
|
||||
<span class="toggle-right friendly_name" id="newsletter-next_run-${newsletter['id']}">
|
||||
% if NEWSLETTER_SCHED.get_job('newsletter-{}'.format(newsletter['id'])):
|
||||
<% job = NEWSLETTER_SCHED.get_job('newsletter-{}'.format(newsletter['id'])) %>
|
||||
% if newsletter_handler.NEWSLETTER_SCHED.get_job('newsletter-{}'.format(newsletter['id'])):
|
||||
<% job = newsletter_handler.NEWSLETTER_SCHED.get_job('newsletter-{}'.format(newsletter['id'])) %>
|
||||
<script>
|
||||
$("#newsletter-next_run-${newsletter['id']}").text(moment("${job.next_run_time}", "YYYY-MM-DD HH:mm:ssZ").fromNow())
|
||||
</script>
|
||||
|
@@ -88,7 +88,7 @@
|
||||
<div class="row">
|
||||
<div class="col-md-12">
|
||||
<select class="form-control" id="${item['name']}" name="${item['name']}">
|
||||
% for key, value in sorted(item['select_options'].iteritems()):
|
||||
% for key, value in sorted(item['select_options'].items()):
|
||||
% if key == item['value']:
|
||||
<option value="${key}" selected>${value}</option>
|
||||
% else:
|
||||
@@ -109,7 +109,7 @@
|
||||
<option value="select-all">Select All</option>
|
||||
<option value="remove-all">Remove All</option>
|
||||
% if isinstance(item['select_options'], dict):
|
||||
% for section, options in item['select_options'].iteritems():
|
||||
% for section, options in item['select_options'].items():
|
||||
<optgroup label="${section}">
|
||||
% for option in sorted(options, key=lambda x: x['text'].lower()):
|
||||
<option value="${option['value']}">${option['text']}</option>
|
||||
@@ -211,7 +211,7 @@
|
||||
% for action in available_notification_actions:
|
||||
<li>
|
||||
<div class="link">
|
||||
<span class="toggle-left"><i class="fa ${action['icon']} fa-fw"></i></span>
|
||||
<span class="toggle-left"><i class="fa ${action['icon']} fa-fw"></i></span>
|
||||
${action['label']}
|
||||
<span class="toggle-right"><i class="fa fa-chevron-down"></i></span>
|
||||
</div>
|
||||
@@ -237,7 +237,7 @@
|
||||
% for action in available_notification_actions:
|
||||
<li>
|
||||
<div class="link">
|
||||
<span class="toggle-left"><i class="fa ${action['icon']} fa-fw"></i></span>
|
||||
<span class="toggle-left"><i class="fa ${action['icon']} fa-fw"></i></span>
|
||||
${action['label']}
|
||||
<span class="toggle-right"><i class="fa fa-chevron-down"></i></span>
|
||||
</div>
|
||||
@@ -268,7 +268,7 @@
|
||||
% for action in available_notification_actions:
|
||||
<li>
|
||||
<div class="link">
|
||||
<span class="toggle-left"><i class="fa ${action['icon']} fa-fw"></i></span>
|
||||
<span class="toggle-left"><i class="fa ${action['icon']} fa-fw"></i></span>
|
||||
${action['label']}
|
||||
<span class="toggle-right"><i class="fa fa-chevron-down"></i></span>
|
||||
</div>
|
||||
@@ -313,7 +313,7 @@
|
||||
<div class="row">
|
||||
<div class="col-md-12">
|
||||
<select class="form-control" id="test_script" name="test_script">
|
||||
% for key, value in sorted(notifier['config_options'][2]['select_options'].iteritems()):
|
||||
% for key, value in sorted(notifier['config_options'][2]['select_options'].items()):
|
||||
<option value="${key}">${value}</option>
|
||||
% endfor
|
||||
</select>
|
||||
@@ -857,6 +857,7 @@
|
||||
title: $('#test_subject').val(),
|
||||
text: $('#test_body').val()
|
||||
});
|
||||
showMsg('<i class="fa fa-check"></i> Notification sent.', false, true, 5000);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -71,6 +71,13 @@
|
||||
<h3>Version ${common.RELEASE} <small><a id="changelog-modal-link" href="#"><i class="fa fa-info-circle"></i> Changelog</a></small></h3>
|
||||
</div>
|
||||
% endif
|
||||
<div class="padded-header">
|
||||
<h3>Tautulli News</h3>
|
||||
</div>
|
||||
<div id="tautulli-news">
|
||||
<div class='text-muted'><i class="fa fa-refresh fa-spin"></i> Loading news...</div>
|
||||
<br>
|
||||
</div>
|
||||
<div class="padded-header">
|
||||
<h3>Tautulli Configuration</h3>
|
||||
</div>
|
||||
@@ -215,12 +222,14 @@
|
||||
<p class="help-block">Check for Tautulli updates periodically.</p>
|
||||
</div>
|
||||
<div id="git_update_options">
|
||||
% if not plexpy.FROZEN:
|
||||
<div class="checkbox">
|
||||
<label>
|
||||
<input type="checkbox" id="plexpy_auto_update" name="plexpy_auto_update" value="1" ${config['plexpy_auto_update']} ${docker_setting}> Update Automatically ${docker_msg | n}
|
||||
</label>
|
||||
<p class="help-block">Update Tautulli automatically if an update is available.</p>
|
||||
</div>
|
||||
% endif
|
||||
<div class="form-group advanced-setting">
|
||||
<label for="git_token">GitHub API Token</label>
|
||||
<div class="row">
|
||||
@@ -448,13 +457,19 @@
|
||||
</div>
|
||||
|
||||
<p class="help-block">Note: Web interface changes require a restart.</p>
|
||||
% if os.name == 'nt':
|
||||
% if common.PLATFORM in ('Windows', 'Darwin'):
|
||||
<div class="checkbox">
|
||||
<label>
|
||||
<input type="checkbox" class="http-settings" name="win_sys_tray" id="win_sys_tray" value="1" ${config['win_sys_tray']}> Enable System Tray Icon
|
||||
<input type="checkbox" class="http-settings" name="sys_tray_icon" id="sys_tray_icon" value="1" ${config['sys_tray_icon']}> Enable System Tray Icon
|
||||
</label>
|
||||
<p class="help-block">Show Tautulli shortcut in the system tray.</p>
|
||||
</div>
|
||||
<div class="checkbox">
|
||||
<label>
|
||||
<input type="checkbox" name="launch_startup" id="launch_startup" value="1" ${config['launch_startup']}> Launch at System Startup
|
||||
</label>
|
||||
<p class="help-block">Start Tautulli automatically after Login.</p>
|
||||
</div>
|
||||
% endif
|
||||
<div class="checkbox">
|
||||
<label>
|
||||
@@ -1313,8 +1328,9 @@
|
||||
<h3>Database Import</h3>
|
||||
</div>
|
||||
|
||||
<p class="help-block">Click a button below to import an existing database from another app.</p>
|
||||
<p class="help-block">Click a button below to import an existing database from the selected app.</p>
|
||||
<div class="btn-group">
|
||||
<button class="btn btn-form toggle-app-import-modal" type="button" data-target="#app-import-modal" data-toggle="modal" data-app="tautulli">Tautulli</button>
|
||||
<button class="btn btn-form toggle-app-import-modal" type="button" data-target="#app-import-modal" data-toggle="modal" data-app="plexwatch">PlexWatch</button>
|
||||
<button class="btn btn-form toggle-app-import-modal" type="button" data-target="#app-import-modal" data-toggle="modal" data-app="plexivity">Plexivity</button>
|
||||
</div>
|
||||
@@ -1873,6 +1889,38 @@ Rating: {rating}/10 --> Rating: /10
|
||||
</div>
|
||||
</div>
|
||||
<div id="mobile-device-config-modal" class="modal fade wide" tabindex="-1" role="dialog" aria-labelledby="mobile-device-config-modal"></div>
|
||||
<div id="browse-path-modal" class="modal fade" tabindex="-1" role="dialog" aria-labelledby="browse-path-modal">
|
||||
<div class="modal-dialog" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<button type="button" class="close" data-dismiss="modal" aria-hidden="true"><i class="fa fa-remove"></i></button>
|
||||
<h4 class="modal-title">File Browser</h4>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
<div class="row">
|
||||
<div class="col-md-12">
|
||||
<div class="form-group">
|
||||
<label for="browse-path">Select a <span id="browse-path-type"></span> Below</label>
|
||||
<div class="row">
|
||||
<div class="col-md-12">
|
||||
<input type="text" class="form-control" id="browse-path" name="browse-path" value="" size="30" disabled>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-12" style="height: 400px; overflow: auto;">
|
||||
<ul id="browse-path-list" class="stacked-configs list-unstyled">
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<span id="browse-path-status-message" style="padding-right: 25px;"></span>
|
||||
<input type="button" id="select-browse-file" class="btn btn-bright" value="Select">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</%def>
|
||||
|
||||
<%def name="javascriptIncludes()">
|
||||
@@ -1979,6 +2027,38 @@ Rating: {rating}/10 --> Rating: /10
|
||||
});
|
||||
}
|
||||
|
||||
function browsePath(key, path, filter_ext) {
|
||||
$("#browse-path-status-message").html('<i class="fa fa-fw fa-spin fa-refresh"></i>');
|
||||
getBrowsePath(key, path, filter_ext).then(function (data) {
|
||||
if (data.result === 'error') {
|
||||
$("#browse-path-status-message").html("<i class='fa fa-exclamation-triangle'></i> " + data.message);
|
||||
} else {
|
||||
$("#browse-path-status-message").html("");
|
||||
|
||||
$('#browse-path').val(data.path);
|
||||
var browse_list = $('#browse-path-list');
|
||||
browse_list.parent().animate({ scrollTop: 0 }, 0);
|
||||
browse_list.empty();
|
||||
|
||||
$.each(data.data, function(i, item) {
|
||||
var browse_item = $('<li/>')
|
||||
.html("<span><i class='fa fa-fw fa-" + item.icon + "'></i> " + item.title + "</span>")
|
||||
.addClass(item.type + ' pointer')
|
||||
.data('key', item.key)
|
||||
.data('path', item.path)
|
||||
.appendTo(browse_list)
|
||||
});
|
||||
|
||||
$('#browse-path-list li').click(function (){
|
||||
$('#browse-path').val($(this).data('path'));
|
||||
if ($(this).hasClass('folder')) {
|
||||
browsePath($(this).data('key'), null, filter_ext)
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
$(document).ready(function() {
|
||||
|
||||
// Javascript to enable link to tab
|
||||
@@ -2917,6 +2997,48 @@ $(document).ready(function() {
|
||||
$('#resources-xml').on('tripleclick', function () {
|
||||
openPlexXML('/api/resources', true, {includeHttps: 1});
|
||||
});
|
||||
|
||||
var tautulli_news = $('#tautulli-news')
|
||||
$.ajax({
|
||||
url: 'https://tautulli.com/news/tautulli-news.json',
|
||||
type: 'GET',
|
||||
dataType: 'json',
|
||||
cache: false,
|
||||
async: true,
|
||||
success: function (data) {
|
||||
if (data) {
|
||||
var now = moment().endOf('day');
|
||||
var news = $('<ul/>').addClass('accordion list-unstyled')
|
||||
$.each(data, function (index, news_item) {
|
||||
var date = moment(news_item.date, "YYYY-MM-DD");
|
||||
if (index >= 5) { return false; }
|
||||
var header = $('<div/>').addClass('link').html(
|
||||
'<span class="toggle-left"><i class="fa fa-newspaper fa-fw"></i></span>' +
|
||||
'<span class="news-title">' + news_item.title + '</span>' +
|
||||
'<span class="toggle-right"><i class="fa fa-chevron-down fa-fw"></i></span>' +
|
||||
'<span class="news-date toggle-right">' + date.format($('#date_format').val()) + '</span>');
|
||||
var subtitle = $('<span/>').addClass('news-subtitle').html(news_item.subtitle);
|
||||
var body = $('<span/>').addClass('news-body').html(news_item.body);
|
||||
var content = $('<div/>').addClass('submenu');
|
||||
if (news_item.subtitle) { content.append(subtitle); }
|
||||
content.append(body);
|
||||
var li = $('<li/>').append(header).append(content)
|
||||
if (index === 0 && Math.abs(now.diff(date, 'days')) < 7) {
|
||||
li.addClass('open');
|
||||
content.css('display', 'block');
|
||||
}
|
||||
news.append(li)
|
||||
});
|
||||
tautulli_news.html(news);
|
||||
var accordion_news = new Accordion(news, false);
|
||||
} else {
|
||||
tautulli_news.html('<p class="help-block"><i class="fa fa-check"></i> No news available.</p>')
|
||||
}
|
||||
},
|
||||
error: function () {
|
||||
tautulli_news.html('<p class="help-block"><i class="fa fa-exclamation-triangle"></i> Failed to retrieve news.</p>')
|
||||
}
|
||||
});
|
||||
});
|
||||
</script>
|
||||
</%def>
|
||||
|
@@ -203,7 +203,7 @@
|
||||
<h3>Database Import</h3>
|
||||
<div class="wizard-input-section">
|
||||
<p class="help-block">
|
||||
If you have an existing PlexWatch/Plexivity database, you can import the data into Tautulli.
|
||||
If you have an existing Tautulli, PlexWatch, or Plexivity database, you can import the data into Tautulli.
|
||||
</p>
|
||||
<p class="help-block">
|
||||
To import a database, navigate to the <strong>Settings</strong> page
|
||||
@@ -216,7 +216,8 @@
|
||||
<input type="checkbox" name="first_run" id="first_run" value="1" checked>
|
||||
<input type="checkbox" name="group_history_tables" id="group_history_tables" value="1" checked>
|
||||
<input type="checkbox" name="history_table_activity" id="history_table_activity" value="1" checked>
|
||||
<input type="checkbox" name="win_sys_tray" id="win_sys_tray" value="1" checked>
|
||||
<input type="checkbox" name="sys_tray_icon" id="sys_tray_icon" value="1" checked>
|
||||
<input type="checkbox" name="launch_startup" id="launch_startup" value="1" checked>
|
||||
<input type="checkbox" name="launch_browser" id="launch_browser" value="1" checked>
|
||||
<input type="checkbox" name="api_enabled" id="api_enabled" value="1" checked>
|
||||
<input type="checkbox" name="refresh_users_on_startup" id="refresh_users_on_startup" value="1" checked>
|
||||
@@ -494,7 +495,7 @@ $(document).ready(function() {
|
||||
var pms_ssl = $("#pms_ssl").val();
|
||||
var pms_is_remote = $("#pms_is_remote").val();
|
||||
if ((pms_ip !== '') || (pms_port !== '')) {
|
||||
$("#pms-verify-status").html('<i class="fa fa-refresh fa-spin"></i> Validating server...');
|
||||
$("#pms-verify-status").html('<i class="fa fa-refresh fa-spin"></i> Verifying server...');
|
||||
$('#pms-verify-status').fadeIn('fast');
|
||||
$.ajax({
|
||||
url: 'get_server_id',
|
||||
@@ -509,7 +510,7 @@ $(document).ready(function() {
|
||||
async: true,
|
||||
timeout: 5000,
|
||||
error: function (jqXHR, textStatus, errorThrown) {
|
||||
$("#pms-verify-status").html('<i class="fa fa-exclamation-circle"></i> This is not a Plex Server!');
|
||||
$("#pms-verify-status").html('<i class="fa fa-exclamation-circle"></i> Error verifying server: ' + textStatus);
|
||||
$('#pms-verify-status').fadeIn('fast');
|
||||
},
|
||||
success: function(xhr, status) {
|
||||
|
@@ -1,88 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# PROVIDE: tautulli
|
||||
# REQUIRE: tautulli
|
||||
# KEYWORD: shutdown
|
||||
#
|
||||
# Add the following lines to /etc/rc.conf.local or /etc/rc.conf
|
||||
# to enable this service:
|
||||
#
|
||||
# tautulli_enable (bool): Set to NO by default.
|
||||
# Set it to YES to enable it.
|
||||
# tautulli_user: The user account Tautulli daemon runs as what
|
||||
# you want it to be. It uses 'tautulli' user by
|
||||
# default. Do not sets it as empty or it will run
|
||||
# as root.
|
||||
# tautulli_dir: Directory where Tautulli lives.
|
||||
# Default: /usr/local/share/Tautulli
|
||||
# tautulli_chdir: Change to this directory before running Tautulli.
|
||||
# Default is same as tautulli_dir.
|
||||
# tautulli_pid: The name of the pidfile to create.
|
||||
# Default is tautulli.pid in tautulli_dir.
|
||||
|
||||
. /etc/rc.subr
|
||||
|
||||
name="tautulli"
|
||||
rcvar=${name}_enable
|
||||
|
||||
load_rc_config ${name}
|
||||
|
||||
: ${tautulli_enable:="NO"}
|
||||
: ${tautulli_user:="tautulli"}
|
||||
: ${tautulli_dir:="/usr/local/share/Tautulli"}
|
||||
: ${tautulli_chdir:="${tautulli_dir}"}
|
||||
: ${tautulli_pid:="${tautulli_dir}/tautulli.pid"}
|
||||
: ${tautulli_conf:="${tautulli_dir}/config.ini"}
|
||||
|
||||
WGET="/usr/local/bin/wget" # You need wget for this script to safely shutdown Tautulli.
|
||||
if [ -e "${tautulli_conf}" ]; then
|
||||
HOST=`grep -A64 "\[General\]" "${tautulli_conf}"|egrep "^http_host"|perl -wple 's/^http_host = (.*)$/$1/'`
|
||||
PORT=`grep -A64 "\[General\]" "${tautulli_conf}"|egrep "^http_port"|perl -wple 's/^http_port = (.*)$/$1/'`
|
||||
fi
|
||||
|
||||
status_cmd="${name}_status"
|
||||
stop_cmd="${name}_stop"
|
||||
|
||||
command="${tautulli_dir}/Tautulli.py"
|
||||
command_args="--daemon --quiet --nolaunch --port ${PORT} --pidfile ${tautulli_pid} --config ${tautulli_conf}"
|
||||
|
||||
# Check for wget and refuse to start without it.
|
||||
if [ ! -x "${WGET}" ]; then
|
||||
warn "Tautulli not started: You need wget to safely shut down Tautulli."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Ensure user is root when running this script.
|
||||
if [ `id -u` != "0" ]; then
|
||||
echo "Oops, you should be root before running this!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
verify_tautulli_pid() {
|
||||
# Make sure the pid corresponds to the Tautulli process.
|
||||
pid=`cat ${tautulli_pid} 2>/dev/null`
|
||||
ps -p ${pid} | grep -q "python ${tautulli_dir}/Tautulli.py"
|
||||
return $?
|
||||
}
|
||||
|
||||
# Try to stop Tautulli cleanly by calling shutdown over http.
|
||||
tautulli_stop() {
|
||||
if [ ! -e "${tautulli_conf}" ]; then
|
||||
echo "Tautulli' settings file does not exist. Try starting Tautulli, as this should create the file."
|
||||
exit 1
|
||||
fi
|
||||
echo "Stopping $name"
|
||||
verify_tautulli_pid
|
||||
${WGET} -O - -q --user=${SBUSR} --password=${SBPWD} "http://${HOST}:${PORT}/shutdown/" >/dev/null
|
||||
|
||||
if [ -n "${pid}" ]; then
|
||||
wait_for_pids ${pid}
|
||||
echo "Stopped $name"
|
||||
fi
|
||||
}
|
||||
|
||||
tautulli_status() {
|
||||
verify_tautulli_pid && echo "$name is running as ${pid}" || echo "$name is not running"
|
||||
}
|
||||
|
||||
run_rc_command "$1"
|
@@ -1,76 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
### BEGIN INIT INFO
|
||||
# Provides: Tautulli
|
||||
# Required-Start: $all
|
||||
# Required-Stop: $all
|
||||
# Default-Start: 2 3 4 5
|
||||
# Default-Stop: 0 1 6
|
||||
# Short-Description: starts Tautulli
|
||||
# Description: starts Tautulli
|
||||
### END INIT INFO
|
||||
|
||||
# Source function library.
|
||||
. /etc/init.d/functions
|
||||
|
||||
## Variables
|
||||
prog=tautulli
|
||||
lockfile=/var/lock/subsys/$prog
|
||||
homedir=/opt/Tautulli
|
||||
datadir=/opt/Tautulli
|
||||
configfile=/opt/Tautulli/config.ini
|
||||
pidfile=/var/run/tautulli.pid
|
||||
nice=
|
||||
# The following line must point to your Python 2.7 install
|
||||
python27=/usr/src/Python-2.7.11/python
|
||||
##
|
||||
|
||||
options=" --daemon --config $configfile --pidfile $pidfile --datadir $datadir --nolaunch --quiet"
|
||||
|
||||
start() {
|
||||
# Start daemon.
|
||||
echo -n $"Starting $prog: "
|
||||
daemon --pidfile=$pidfile $nice $python27 $homedir/Tautulli.py $options
|
||||
RETVAL=$?
|
||||
echo
|
||||
[ $RETVAL -eq 0 ] && touch $lockfile
|
||||
return $RETVAL
|
||||
}
|
||||
|
||||
stop() {
|
||||
echo -n $"Shutting down $prog: "
|
||||
killproc -p $pidfile $python27
|
||||
RETVAL=$?
|
||||
echo
|
||||
[ $RETVAL -eq 0 ] && rm -f $lockfile
|
||||
return $RETVAL
|
||||
}
|
||||
|
||||
# See how we were called.
|
||||
case "$1" in
|
||||
start)
|
||||
start
|
||||
;;
|
||||
stop)
|
||||
stop
|
||||
;;
|
||||
status)
|
||||
status $prog
|
||||
;;
|
||||
restart|force-reload)
|
||||
stop
|
||||
start
|
||||
;;
|
||||
try-restart|condrestart)
|
||||
if status $prog > /dev/null; then
|
||||
stop
|
||||
start
|
||||
fi
|
||||
;;
|
||||
reload)
|
||||
exit 3
|
||||
;;
|
||||
*)
|
||||
echo $"Usage: $0 {start|stop|status|restart|try-restart|force-reload}"
|
||||
exit 2
|
||||
esac
|
@@ -51,7 +51,7 @@ verify_tautulli_pid() {
|
||||
# Make sure the pid corresponds to the Tautulli process.
|
||||
if [ -f ${tautulli_pid} ]; then
|
||||
pid=`cat ${tautulli_pid} 2>/dev/null`
|
||||
ps -p ${pid} | grep -q "python2 ${tautulli_dir}/Tautulli.py"
|
||||
ps -p ${pid} | grep -q "python ${tautulli_dir}/Tautulli.py"
|
||||
return $?
|
||||
else
|
||||
return 0
|
||||
@@ -60,7 +60,7 @@ verify_tautulli_pid() {
|
||||
|
||||
# Try to stop Tautulli cleanly by sending SIGTERM
|
||||
tautulli_stop() {
|
||||
echo "Stopping $name"
|
||||
echo "Stopping $name."
|
||||
verify_tautulli_pid
|
||||
if [ -n "${pid}" ]; then
|
||||
kill ${pid}
|
||||
|
@@ -1,81 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# PROVIDE: tautulli
|
||||
# REQUIRE: DAEMON tautulli
|
||||
# KEYWORD: shutdown
|
||||
#
|
||||
# Add the following lines to /etc/rc.conf.local or /etc/rc.conf
|
||||
# to enable this service:
|
||||
#
|
||||
# tautulli_enable (bool): Set to NO by default.
|
||||
# Set it to YES to enable it.
|
||||
# tautulli_user: The user account Tautulli daemon runs as what
|
||||
# you want it to be. It uses 'tautulli' user by
|
||||
# default. Do not sets it as empty or it will run
|
||||
# as root.
|
||||
# tautulli_dir: Directory where Tautulli lives.
|
||||
# Default: /usr/local/share/Tautulli
|
||||
# tautulli_chdir: Change to this directory before running Tautulli.
|
||||
# Default is same as tautulli_dir.
|
||||
# tautulli_pid: The name of the pidfile to create.
|
||||
# Default is tautulli.pid in tautulli_dir.
|
||||
PATH="/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin"
|
||||
|
||||
. /etc/rc.subr
|
||||
|
||||
name="tautulli"
|
||||
rcvar=${name}_enable
|
||||
|
||||
load_rc_config ${name}
|
||||
|
||||
: ${tautulli_enable:="NO"}
|
||||
: ${tautulli_user:="tautulli"}
|
||||
: ${tautulli_dir:="/usr/local/share/Tautulli"}
|
||||
: ${tautulli_chdir:="${tautulli_dir}"}
|
||||
: ${tautulli_pid:="${tautulli_dir}/tautulli.pid"}
|
||||
: ${tautulli_flags:=""}
|
||||
|
||||
status_cmd="${name}_status"
|
||||
stop_cmd="${name}_stop"
|
||||
|
||||
command="${tautulli_dir}/Tautulli.py"
|
||||
command_args="--daemon --pidfile ${tautulli_pid} --quiet --nolaunch ${tautulli_flags}"
|
||||
|
||||
# Ensure user is root when running this script.
|
||||
if [ `id -u` != "0" ]; then
|
||||
echo "Oops, you should be root before running this!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
verify_tautulli_pid() {
|
||||
# Make sure the pid corresponds to the Tautulli process.
|
||||
if [ -f ${tautulli_pid} ]; then
|
||||
pid=`cat ${tautulli_pid} 2>/dev/null`
|
||||
ps -p ${pid} | grep -q "python2 ${tautulli_dir}/Tautulli.py"
|
||||
return $?
|
||||
else
|
||||
return 0
|
||||
fi
|
||||
}
|
||||
|
||||
# Try to stop Tautulli cleanly by sending SIGTERM
|
||||
tautulli_stop() {
|
||||
echo "Stopping $name."
|
||||
verify_tautulli_pid
|
||||
if [ -n "${pid}" ]; then
|
||||
kill ${pid}
|
||||
wait_for_pids ${pid}
|
||||
echo "Stopped."
|
||||
fi
|
||||
}
|
||||
|
||||
tautulli_status() {
|
||||
verify_tautulli_pid
|
||||
if [ -n "${pid}" ]; then
|
||||
echo "$name is running as ${pid}."
|
||||
else
|
||||
echo "$name is not running."
|
||||
fi
|
||||
}
|
||||
|
||||
run_rc_command "$1"
|
1
init-scripts/init.freenas
Symbolic link
1
init-scripts/init.freenas
Symbolic link
@@ -0,0 +1 @@
|
||||
init.freebsd
|
@@ -1,47 +0,0 @@
|
||||
<?xml version="1.0"?>
|
||||
<!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1">
|
||||
<!--
|
||||
Created by Manifold
|
||||
--><service_bundle type="manifest" name="tautulli">
|
||||
|
||||
<service name="application/tautulli" type="service" version="1">
|
||||
|
||||
<create_default_instance enabled="true"/>
|
||||
|
||||
<single_instance/>
|
||||
|
||||
<dependency name="network" grouping="require_all" restart_on="error" type="service">
|
||||
<service_fmri value="svc:/milestone/network:default"/>
|
||||
</dependency>
|
||||
|
||||
<dependency name="filesystem" grouping="require_all" restart_on="error" type="service">
|
||||
<service_fmri value="svc:/system/filesystem/local"/>
|
||||
</dependency>
|
||||
|
||||
<method_context>
|
||||
<method_credential user="tautulli" group="nogroup"/>
|
||||
</method_context>
|
||||
|
||||
<exec_method type="method" name="start" exec="python /opt/Tautulli/Tautulli.py --daemon --quiet --nolaunch" timeout_seconds="60"/>
|
||||
|
||||
<exec_method type="method" name="stop" exec=":kill" timeout_seconds="60"/>
|
||||
|
||||
<property_group name="startd" type="framework">
|
||||
<propval name="duration" type="astring" value="contract"/>
|
||||
<propval name="ignore_error" type="astring" value="core,signal"/>
|
||||
</property_group>
|
||||
|
||||
|
||||
<stability value="Evolving"/>
|
||||
|
||||
<template>
|
||||
<common_name>
|
||||
<loctext xml:lang="C">
|
||||
Tautulli
|
||||
</loctext>
|
||||
</common_name>
|
||||
</template>
|
||||
|
||||
</service>
|
||||
|
||||
</service_bundle>
|
@@ -1,209 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
## Don't edit this file
|
||||
## Edit user configuation in /etc/default/tautulli to change
|
||||
##
|
||||
## Make sure init script is executable
|
||||
## sudo chmod +x /path/to/init.ubuntu
|
||||
##
|
||||
## Install the init script
|
||||
## sudo ln -s /path/to/init.ubuntu /etc/init.d/tautulli
|
||||
##
|
||||
## Create the tautulli daemon user:
|
||||
## sudo adduser --system --no-create-home tautulli
|
||||
##
|
||||
## Make sure /opt/Tautulli is owned by the tautulli user
|
||||
## sudo chown tautulli:nogroup -R /opt/Tautulli
|
||||
##
|
||||
## Touch the default file to stop the warning message when starting
|
||||
## sudo touch /etc/default/tautulli
|
||||
##
|
||||
## To start Tautulli automatically
|
||||
## sudo update-rc.d tautulli defaults
|
||||
##
|
||||
## To start/stop/restart Tautulli
|
||||
## sudo service tautulli start
|
||||
## sudo service tautulli stop
|
||||
## sudo service tautulli restart
|
||||
##
|
||||
## TAUTULLI_USER= #$RUN_AS, username to run Tautulli under, the default is tautulli
|
||||
## TAUTULLI_HOME= #$APP_PATH, the location of Tautulli.py, the default is /opt/Tautulli
|
||||
## TAUTULLI_DATA= #$DATA_DIR, the location of plexpy.db, cache, logs, the default is /opt/Tautulli
|
||||
## TAUTULLI_PIDFILE= #$PID_FILE, the location of tautulli.pid, the default is /var/run/tautulli/tautulli.pid
|
||||
## PYTHON_BIN= #$DAEMON, the location of the python binary, the default is /usr/bin/python
|
||||
## TAUTULLI_OPTS= #$EXTRA_DAEMON_OPTS, extra cli option for Tautulli, i.e. " --config=/home/Tautulli/config.ini"
|
||||
## SSD_OPTS= #$EXTRA_SSD_OPTS, extra start-stop-daemon option like " --group=users"
|
||||
## TAUTULLI_PORT= #$PORT_OPTS, hardcoded port for the webserver, overrides value in config.ini
|
||||
##
|
||||
## EXAMPLE if want to run as different user
|
||||
## add TAUTULLI_USER=username to /etc/default/tautulli
|
||||
## otherwise default tautulli is used
|
||||
#
|
||||
### BEGIN INIT INFO
|
||||
# Provides: tautulli
|
||||
# Required-Start: $local_fs $network $remote_fs
|
||||
# Required-Stop: $local_fs $network $remote_fs
|
||||
# Should-Start: $NetworkManager
|
||||
# Should-Stop: $NetworkManager
|
||||
# Default-Start: 2 3 4 5
|
||||
# Default-Stop: 0 1 6
|
||||
# Short-Description: starts instance of Tautulli
|
||||
# Description: starts instance of Tautulli using start-stop-daemon
|
||||
### END INIT INFO
|
||||
|
||||
# Script name
|
||||
NAME=tautulli
|
||||
|
||||
# App name
|
||||
DESC=Tautulli
|
||||
|
||||
SETTINGS_LOADED=FALSE
|
||||
|
||||
. /lib/lsb/init-functions
|
||||
|
||||
# Source Tautulli configuration
|
||||
if [ -f /etc/default/tautulli ]; then
|
||||
SETTINGS=/etc/default/tautulli
|
||||
else
|
||||
log_warning_msg "/etc/default/tautulli not found using default settings.";
|
||||
fi
|
||||
|
||||
check_retval() {
|
||||
if [ $? -eq 0 ]; then
|
||||
log_end_msg 0
|
||||
return 0
|
||||
else
|
||||
log_end_msg 1
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
load_settings() {
|
||||
if [ $SETTINGS_LOADED != "TRUE" ]; then
|
||||
. $SETTINGS
|
||||
|
||||
## The defaults
|
||||
# Run as username
|
||||
RUN_AS=${TAUTULLI_USER-tautulli}
|
||||
|
||||
# Path to app TAUTULLI_HOME=path_to_app_Tautulli.py
|
||||
APP_PATH=${TAUTULLI_HOME-/opt/Tautulli}
|
||||
|
||||
# Data directory where plexpy.db, cache and logs are stored
|
||||
DATA_DIR=${TAUTULLI_DATA-/opt/Tautulli}
|
||||
|
||||
# Path to store PID file
|
||||
PID_FILE=${TAUTULLI_PIDFILE-/var/run/tautulli/tautulli.pid}
|
||||
|
||||
# Path to python bin
|
||||
DAEMON=${PYTHON_BIN-/usr/bin/python}
|
||||
|
||||
# Extra daemon option like: TAUTULLI_OPTS=" --config=/home/Tautulli/config.ini"
|
||||
EXTRA_DAEMON_OPTS=${TAUTULLI_OPTS-}
|
||||
|
||||
# Extra start-stop-daemon option like START_OPTS=" --group=users"
|
||||
EXTRA_SSD_OPTS=${SSD_OPTS-}
|
||||
|
||||
# Hardcoded port to run on, overrides config.ini settings
|
||||
[ -n "$TAUTULLI_PORT" ] && {
|
||||
PORT_OPTS=" --port=${TAUTULLI_PORT} "
|
||||
}
|
||||
|
||||
DAEMON_OPTS=" Tautulli.py --quiet --daemon --nolaunch --pidfile=${PID_FILE} --datadir=${DATA_DIR} ${PORT_OPTS}${EXTRA_DAEMON_OPTS}"
|
||||
|
||||
SETTINGS_LOADED=TRUE
|
||||
fi
|
||||
|
||||
[ -x $DAEMON ] || {
|
||||
log_warning_msg "$DESC: Can't execute daemon, aborting. See $DAEMON";
|
||||
return 1;}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
load_settings || exit 0
|
||||
|
||||
is_running () {
|
||||
# returns 1 when running, else 0.
|
||||
if [ -e $PID_FILE ]; then
|
||||
PID=`cat $PID_FILE`
|
||||
|
||||
RET=$?
|
||||
[ $RET -gt 1 ] && exit 1 || return $RET
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
handle_pid () {
|
||||
PID_PATH=`dirname $PID_FILE`
|
||||
[ -d $PID_PATH ] || mkdir -p $PID_PATH && chown -R $RUN_AS $PID_PATH > /dev/null || {
|
||||
log_warning_msg "$DESC: Could not create $PID_FILE, See $SETTINGS, aborting.";
|
||||
return 1;}
|
||||
|
||||
if [ -e $PID_FILE ]; then
|
||||
PID=`cat $PID_FILE`
|
||||
if ! kill -0 $PID > /dev/null 2>&1; then
|
||||
log_warning_msg "Removing stale $PID_FILE"
|
||||
rm $PID_FILE
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
handle_datadir () {
|
||||
[ -d $DATA_DIR ] || mkdir -p $DATA_DIR && chown -R $RUN_AS $DATA_DIR > /dev/null || {
|
||||
log_warning_msg "$DESC: Could not create $DATA_DIR, See $SETTINGS, aborting.";
|
||||
return 1;}
|
||||
}
|
||||
|
||||
handle_updates () {
|
||||
chown -R $RUN_AS $APP_PATH > /dev/null || {
|
||||
log_warning_msg "$DESC: $APP_PATH not writable by $RUN_AS for web-updates";
|
||||
return 0; }
|
||||
}
|
||||
|
||||
start_tautulli () {
|
||||
handle_pid
|
||||
handle_datadir
|
||||
handle_updates
|
||||
if ! is_running; then
|
||||
log_daemon_msg "Starting $DESC"
|
||||
start-stop-daemon -o -d $APP_PATH -c $RUN_AS --start $EXTRA_SSD_OPTS --pidfile $PID_FILE --exec $DAEMON -- $DAEMON_OPTS
|
||||
check_retval
|
||||
else
|
||||
log_success_msg "$DESC: already running (pid $PID)"
|
||||
fi
|
||||
}
|
||||
|
||||
stop_tautulli () {
|
||||
if is_running; then
|
||||
log_daemon_msg "Stopping $DESC"
|
||||
start-stop-daemon -o --stop --pidfile $PID_FILE --retry 15
|
||||
check_retval
|
||||
else
|
||||
log_success_msg "$DESC: not running"
|
||||
fi
|
||||
}
|
||||
|
||||
case "$1" in
|
||||
start)
|
||||
start_tautulli
|
||||
;;
|
||||
stop)
|
||||
stop_tautulli
|
||||
;;
|
||||
restart|force-reload)
|
||||
stop_tautulli
|
||||
start_tautulli
|
||||
;;
|
||||
status)
|
||||
status_of_proc -p "$PID_FILE" "$DAEMON" "$DESC"
|
||||
;;
|
||||
*)
|
||||
N=/etc/init.d/$NAME
|
||||
echo "Usage: $N {start|stop|restart|force-reload|status}" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
@@ -1,18 +0,0 @@
|
||||
# tautulli
|
||||
#
|
||||
# This is a session/user job. Install this file into /usr/share/upstart/sessions
|
||||
# if Tautulli is installed system wide, and into $XDG_CONFIG_HOME/upstart if
|
||||
# Tautulli is installed per user. Change the executable path appropiately.
|
||||
|
||||
start on desktop-start
|
||||
stop on desktop-end
|
||||
|
||||
env CONFIG=""$XDG_CONFIG_HOME"/Tautulli"
|
||||
env DATA=""$XDG_DATA_HOME"/Tautulli"
|
||||
|
||||
pre-start script
|
||||
[ -d "$CONFIG" ] || mkdir -p "$CONFIG"
|
||||
[ -d "$DATA" ] || mkdir -p "$DATA"
|
||||
end script
|
||||
|
||||
exec Tautulli.py --nolaunch --config "$CONFIG"/config.ini --datadir "$DATA"
|
@@ -10,7 +10,7 @@
|
||||
|
||||
|
||||
import sys, re, os
|
||||
from cStringIO import StringIO
|
||||
from io import StringIO
|
||||
|
||||
|
||||
|
||||
@@ -116,6 +116,6 @@ def consume(outbuffer = None): # Capture standard output
|
||||
|
||||
if __name__ == '__main__':
|
||||
consume(sys.stdout).write(sys.stdin.read())
|
||||
print '\n'
|
||||
print('\n')
|
||||
|
||||
# vim: set nowrap tabstop=4 shiftwidth=4 softtabstop=0 expandtab textwidth=0 filetype=python foldmethod=indent foldcolumn=4
|
||||
|
@@ -1,16 +1,7 @@
|
||||
###############################################################################
|
||||
# Universal Analytics for Python
|
||||
# Copyright (c) 2013, Analytics Pros
|
||||
#
|
||||
# This project is free software, distributed under the BSD license.
|
||||
# Analytics Pros offers consulting and integration services if your firm needs
|
||||
# assistance in strategy, implementation, or auditing existing work.
|
||||
###############################################################################
|
||||
|
||||
from urllib2 import urlopen, build_opener, install_opener
|
||||
from urllib2 import Request, HTTPSHandler
|
||||
from urllib2 import URLError, HTTPError
|
||||
from urllib import urlencode
|
||||
from future.moves.urllib.request import urlopen, build_opener, install_opener
|
||||
from future.moves.urllib.request import Request, HTTPSHandler
|
||||
from future.moves.urllib.error import URLError, HTTPError
|
||||
from future.moves.urllib.parse import urlencode
|
||||
|
||||
import random
|
||||
import datetime
|
||||
@@ -24,8 +15,8 @@ def generate_uuid(basedata=None):
|
||||
""" Provides a _random_ UUID with no input, or a UUID4-format MD5 checksum of any input data provided """
|
||||
if basedata is None:
|
||||
return str(uuid.uuid4())
|
||||
elif isinstance(basedata, basestring):
|
||||
checksum = hashlib.md5(basedata).hexdigest()
|
||||
elif isinstance(basedata, str):
|
||||
checksum = hashlib.md5(str(basedata).encode('utf-8')).hexdigest()
|
||||
return '%8s-%4s-%4s-%4s-%12s' % (
|
||||
checksum[0:8], checksum[8:12], checksum[12:16], checksum[16:20], checksum[20:32])
|
||||
|
||||
@@ -44,7 +35,7 @@ class Time(datetime.datetime):
|
||||
def to_unix(cls, timestamp):
|
||||
""" Wrapper over time module to produce Unix epoch time as a float """
|
||||
if not isinstance(timestamp, datetime.datetime):
|
||||
raise TypeError, 'Time.milliseconds expects a datetime object'
|
||||
raise TypeError('Time.milliseconds expects a datetime object')
|
||||
base = time.mktime(timestamp.timetuple())
|
||||
return base
|
||||
|
||||
@@ -86,14 +77,14 @@ class HTTPRequest(object):
|
||||
def fixUTF8(cls, data): # Ensure proper encoding for UA's servers...
|
||||
""" Convert all strings to UTF-8 """
|
||||
for key in data:
|
||||
if isinstance(data[key], basestring):
|
||||
if isinstance(data[key], str):
|
||||
data[key] = data[key].encode('utf-8')
|
||||
return data
|
||||
|
||||
# Apply stored properties to the given dataset & POST to the configured endpoint
|
||||
def send(self, data):
|
||||
request = Request(
|
||||
self.endpoint + '?' + urlencode(self.fixUTF8(data)),
|
||||
self.endpoint + '?' + urlencode(self.fixUTF8(data)).encode('utf-8'),
|
||||
headers={
|
||||
'User-Agent': self.user_agent
|
||||
}
|
||||
@@ -121,7 +112,7 @@ class HTTPPost(HTTPRequest):
|
||||
def send(self, data):
|
||||
request = Request(
|
||||
self.endpoint,
|
||||
data=urlencode(self.fixUTF8(data)),
|
||||
data=urlencode(self.fixUTF8(data)).encode('utf-8'),
|
||||
headers={
|
||||
'User-Agent': self.user_agent
|
||||
}
|
||||
@@ -144,26 +135,26 @@ class Tracker(object):
|
||||
|
||||
@classmethod
|
||||
def coerceParameter(cls, name, value=None):
|
||||
if isinstance(name, basestring) and name[0] == '&':
|
||||
if isinstance(name, str) and name[0] == '&':
|
||||
return name[1:], str(value)
|
||||
elif name in cls.parameter_alias:
|
||||
typecast, param_name = cls.parameter_alias.get(name)
|
||||
return param_name, typecast(value)
|
||||
else:
|
||||
raise KeyError, 'Parameter "{0}" is not recognized'.format(name)
|
||||
raise KeyError('Parameter "{0}" is not recognized'.format(name))
|
||||
|
||||
def payload(self, data):
|
||||
for key, value in data.iteritems():
|
||||
for key, value in data.items():
|
||||
try:
|
||||
yield self.coerceParameter(key, value)
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
option_sequence = {
|
||||
'pageview': [(basestring, 'dp')],
|
||||
'event': [(basestring, 'ec'), (basestring, 'ea'), (basestring, 'el'), (int, 'ev')],
|
||||
'social': [(basestring, 'sn'), (basestring, 'sa'), (basestring, 'st')],
|
||||
'timing': [(basestring, 'utc'), (basestring, 'utv'), (basestring, 'utt'), (basestring, 'utl')]
|
||||
'pageview': [(str, 'dp')],
|
||||
'event': [(str, 'ec'), (str, 'ea'), (str, 'el'), (int, 'ev')],
|
||||
'social': [(str, 'sn'), (str, 'sa'), (str, 'st')],
|
||||
'timing': [(str, 'utc'), (str, 'utv'), (str, 'utt'), (str, 'utl')]
|
||||
}
|
||||
|
||||
@classmethod
|
||||
@@ -232,7 +223,7 @@ class Tracker(object):
|
||||
for key, val in self.payload(item):
|
||||
data[key] = val
|
||||
|
||||
for k, v in self.params.iteritems(): # update only absent parameters
|
||||
for k, v in self.params.items(): # update only absent parameters
|
||||
if k not in data:
|
||||
data[k] = v
|
||||
|
||||
@@ -247,13 +238,13 @@ class Tracker(object):
|
||||
# Setting persistent attibutes of the session/hit/etc (inc. custom dimensions/metrics)
|
||||
def set(self, name, value=None):
|
||||
if isinstance(name, dict):
|
||||
for key, value in name.iteritems():
|
||||
for key, value in name.items():
|
||||
try:
|
||||
param, value = self.coerceParameter(key, value)
|
||||
self.params[param] = value
|
||||
except KeyError:
|
||||
pass
|
||||
elif isinstance(name, basestring):
|
||||
elif isinstance(name, str):
|
||||
try:
|
||||
param, value = self.coerceParameter(name, value)
|
||||
self.params[param] = value
|
||||
@@ -277,7 +268,7 @@ class Tracker(object):
|
||||
def safe_unicode(obj):
|
||||
""" Safe convertion to the Unicode string version of the object """
|
||||
try:
|
||||
return unicode(obj)
|
||||
return str(obj)
|
||||
except UnicodeDecodeError:
|
||||
return obj.decode('utf-8')
|
||||
|
||||
@@ -380,7 +371,7 @@ for i in range(0, 5):
|
||||
# Enhanced Ecommerce
|
||||
Tracker.alias(str, 'pa') # Product action
|
||||
Tracker.alias(str, 'tcc') # Coupon code
|
||||
Tracker.alias(unicode, 'pal') # Product action list
|
||||
Tracker.alias(str, 'pal') # Product action list
|
||||
Tracker.alias(int, 'cos') # Checkout step
|
||||
Tracker.alias(str, 'col') # Checkout step option
|
||||
|
||||
@@ -388,10 +379,10 @@ Tracker.alias(str, 'promoa') # Promotion action
|
||||
|
||||
for product_index in range(1, MAX_EC_PRODUCTS):
|
||||
Tracker.alias(str, 'pr{0}id'.format(product_index)) # Product SKU
|
||||
Tracker.alias(unicode, 'pr{0}nm'.format(product_index)) # Product name
|
||||
Tracker.alias(unicode, 'pr{0}br'.format(product_index)) # Product brand
|
||||
Tracker.alias(unicode, 'pr{0}ca'.format(product_index)) # Product category
|
||||
Tracker.alias(unicode, 'pr{0}va'.format(product_index)) # Product variant
|
||||
Tracker.alias(str, 'pr{0}nm'.format(product_index)) # Product name
|
||||
Tracker.alias(str, 'pr{0}br'.format(product_index)) # Product brand
|
||||
Tracker.alias(str, 'pr{0}ca'.format(product_index)) # Product category
|
||||
Tracker.alias(str, 'pr{0}va'.format(product_index)) # Product variant
|
||||
Tracker.alias(str, 'pr{0}pr'.format(product_index)) # Product price
|
||||
Tracker.alias(int, 'pr{0}qt'.format(product_index)) # Product quantity
|
||||
Tracker.alias(str, 'pr{0}cc'.format(product_index)) # Product coupon code
|
||||
@@ -403,10 +394,10 @@ for product_index in range(1, MAX_EC_PRODUCTS):
|
||||
|
||||
for list_index in range(1, MAX_EC_LISTS):
|
||||
Tracker.alias(str, 'il{0}pi{1}id'.format(list_index, product_index)) # Product impression SKU
|
||||
Tracker.alias(unicode, 'il{0}pi{1}nm'.format(list_index, product_index)) # Product impression name
|
||||
Tracker.alias(unicode, 'il{0}pi{1}br'.format(list_index, product_index)) # Product impression brand
|
||||
Tracker.alias(unicode, 'il{0}pi{1}ca'.format(list_index, product_index)) # Product impression category
|
||||
Tracker.alias(unicode, 'il{0}pi{1}va'.format(list_index, product_index)) # Product impression variant
|
||||
Tracker.alias(str, 'il{0}pi{1}nm'.format(list_index, product_index)) # Product impression name
|
||||
Tracker.alias(str, 'il{0}pi{1}br'.format(list_index, product_index)) # Product impression brand
|
||||
Tracker.alias(str, 'il{0}pi{1}ca'.format(list_index, product_index)) # Product impression category
|
||||
Tracker.alias(str, 'il{0}pi{1}va'.format(list_index, product_index)) # Product impression variant
|
||||
Tracker.alias(int, 'il{0}pi{1}ps'.format(list_index, product_index)) # Product impression position
|
||||
Tracker.alias(int, 'il{0}pi{1}pr'.format(list_index, product_index)) # Product impression price
|
||||
|
||||
@@ -417,11 +408,11 @@ for product_index in range(1, MAX_EC_PRODUCTS):
|
||||
custom_index)) # Product impression custom metric
|
||||
|
||||
for list_index in range(1, MAX_EC_LISTS):
|
||||
Tracker.alias(unicode, 'il{0}nm'.format(list_index)) # Product impression list name
|
||||
Tracker.alias(str, 'il{0}nm'.format(list_index)) # Product impression list name
|
||||
|
||||
for promotion_index in range(1, MAX_EC_PROMOTIONS):
|
||||
Tracker.alias(str, 'promo{0}id'.format(promotion_index)) # Promotion ID
|
||||
Tracker.alias(unicode, 'promo{0}nm'.format(promotion_index)) # Promotion name
|
||||
Tracker.alias(str, 'promo{0}nm'.format(promotion_index)) # Promotion name
|
||||
Tracker.alias(str, 'promo{0}cr'.format(promotion_index)) # Promotion creative
|
||||
Tracker.alias(str, 'promo{0}ps'.format(promotion_index)) # Promotion position
|
||||
|
||||
|
@@ -1 +1 @@
|
||||
import Tracker
|
||||
from . import Tracker
|
608
lib/appdirs.py
Normal file
608
lib/appdirs.py
Normal file
@@ -0,0 +1,608 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2005-2010 ActiveState Software Inc.
|
||||
# Copyright (c) 2013 Eddy Petrișor
|
||||
|
||||
"""Utilities for determining application-specific dirs.
|
||||
|
||||
See <http://github.com/ActiveState/appdirs> for details and usage.
|
||||
"""
|
||||
# Dev Notes:
|
||||
# - MSDN on where to store app data files:
|
||||
# http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120
|
||||
# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html
|
||||
# - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
|
||||
|
||||
__version_info__ = (1, 4, 3)
|
||||
__version__ = '.'.join(map(str, __version_info__))
|
||||
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
PY3 = sys.version_info[0] == 3
|
||||
|
||||
if PY3:
|
||||
unicode = str
|
||||
|
||||
if sys.platform.startswith('java'):
|
||||
import platform
|
||||
os_name = platform.java_ver()[3][0]
|
||||
if os_name.startswith('Windows'): # "Windows XP", "Windows 7", etc.
|
||||
system = 'win32'
|
||||
elif os_name.startswith('Mac'): # "Mac OS X", etc.
|
||||
system = 'darwin'
|
||||
else: # "Linux", "SunOS", "FreeBSD", etc.
|
||||
# Setting this to "linux2" is not ideal, but only Windows or Mac
|
||||
# are actually checked for and the rest of the module expects
|
||||
# *sys.platform* style strings.
|
||||
system = 'linux2'
|
||||
else:
|
||||
system = sys.platform
|
||||
|
||||
|
||||
|
||||
def user_data_dir(appname=None, appauthor=None, version=None, roaming=False):
|
||||
r"""Return full path to the user-specific data dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"roaming" (boolean, default False) can be set True to use the Windows
|
||||
roaming appdata directory. That means that for users on a Windows
|
||||
network setup for roaming profiles, this user data will be
|
||||
sync'd on login. See
|
||||
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
|
||||
for a discussion of issues.
|
||||
|
||||
Typical user data directories are:
|
||||
Mac OS X: ~/Library/Application Support/<AppName>
|
||||
Unix: ~/.local/share/<AppName> # or in $XDG_DATA_HOME, if defined
|
||||
Win XP (not roaming): C:\Documents and Settings\<username>\Application Data\<AppAuthor>\<AppName>
|
||||
Win XP (roaming): C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>
|
||||
Win 7 (not roaming): C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>
|
||||
Win 7 (roaming): C:\Users\<username>\AppData\Roaming\<AppAuthor>\<AppName>
|
||||
|
||||
For Unix, we follow the XDG spec and support $XDG_DATA_HOME.
|
||||
That means, by default "~/.local/share/<AppName>".
|
||||
"""
|
||||
if system == "win32":
|
||||
if appauthor is None:
|
||||
appauthor = appname
|
||||
const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA"
|
||||
path = os.path.normpath(_get_win_folder(const))
|
||||
if appname:
|
||||
if appauthor is not False:
|
||||
path = os.path.join(path, appauthor, appname)
|
||||
else:
|
||||
path = os.path.join(path, appname)
|
||||
elif system == 'darwin':
|
||||
path = os.path.expanduser('~/Library/Application Support/')
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
else:
|
||||
path = os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share"))
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
return path
|
||||
|
||||
|
||||
def site_data_dir(appname=None, appauthor=None, version=None, multipath=False):
|
||||
r"""Return full path to the user-shared data dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"multipath" is an optional parameter only applicable to *nix
|
||||
which indicates that the entire list of data dirs should be
|
||||
returned. By default, the first item from XDG_DATA_DIRS is
|
||||
returned, or '/usr/local/share/<AppName>',
|
||||
if XDG_DATA_DIRS is not set
|
||||
|
||||
Typical site data directories are:
|
||||
Mac OS X: /Library/Application Support/<AppName>
|
||||
Unix: /usr/local/share/<AppName> or /usr/share/<AppName>
|
||||
Win XP: C:\Documents and Settings\All Users\Application Data\<AppAuthor>\<AppName>
|
||||
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
|
||||
Win 7: C:\ProgramData\<AppAuthor>\<AppName> # Hidden, but writeable on Win 7.
|
||||
|
||||
For Unix, this is using the $XDG_DATA_DIRS[0] default.
|
||||
|
||||
WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
|
||||
"""
|
||||
if system == "win32":
|
||||
if appauthor is None:
|
||||
appauthor = appname
|
||||
path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA"))
|
||||
if appname:
|
||||
if appauthor is not False:
|
||||
path = os.path.join(path, appauthor, appname)
|
||||
else:
|
||||
path = os.path.join(path, appname)
|
||||
elif system == 'darwin':
|
||||
path = os.path.expanduser('/Library/Application Support')
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
else:
|
||||
# XDG default for $XDG_DATA_DIRS
|
||||
# only first, if multipath is False
|
||||
path = os.getenv('XDG_DATA_DIRS',
|
||||
os.pathsep.join(['/usr/local/share', '/usr/share']))
|
||||
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
|
||||
if appname:
|
||||
if version:
|
||||
appname = os.path.join(appname, version)
|
||||
pathlist = [os.sep.join([x, appname]) for x in pathlist]
|
||||
|
||||
if multipath:
|
||||
path = os.pathsep.join(pathlist)
|
||||
else:
|
||||
path = pathlist[0]
|
||||
return path
|
||||
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
return path
|
||||
|
||||
|
||||
def user_config_dir(appname=None, appauthor=None, version=None, roaming=False):
|
||||
r"""Return full path to the user-specific config dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"roaming" (boolean, default False) can be set True to use the Windows
|
||||
roaming appdata directory. That means that for users on a Windows
|
||||
network setup for roaming profiles, this user data will be
|
||||
sync'd on login. See
|
||||
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
|
||||
for a discussion of issues.
|
||||
|
||||
Typical user config directories are:
|
||||
Mac OS X: same as user_data_dir
|
||||
Unix: ~/.config/<AppName> # or in $XDG_CONFIG_HOME, if defined
|
||||
Win *: same as user_data_dir
|
||||
|
||||
For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME.
|
||||
That means, by default "~/.config/<AppName>".
|
||||
"""
|
||||
if system in ["win32", "darwin"]:
|
||||
path = user_data_dir(appname, appauthor, None, roaming)
|
||||
else:
|
||||
path = os.getenv('XDG_CONFIG_HOME', os.path.expanduser("~/.config"))
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
return path
|
||||
|
||||
|
||||
def site_config_dir(appname=None, appauthor=None, version=None, multipath=False):
|
||||
r"""Return full path to the user-shared data dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"multipath" is an optional parameter only applicable to *nix
|
||||
which indicates that the entire list of config dirs should be
|
||||
returned. By default, the first item from XDG_CONFIG_DIRS is
|
||||
returned, or '/etc/xdg/<AppName>', if XDG_CONFIG_DIRS is not set
|
||||
|
||||
Typical site config directories are:
|
||||
Mac OS X: same as site_data_dir
|
||||
Unix: /etc/xdg/<AppName> or $XDG_CONFIG_DIRS[i]/<AppName> for each value in
|
||||
$XDG_CONFIG_DIRS
|
||||
Win *: same as site_data_dir
|
||||
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
|
||||
|
||||
For Unix, this is using the $XDG_CONFIG_DIRS[0] default, if multipath=False
|
||||
|
||||
WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
|
||||
"""
|
||||
if system in ["win32", "darwin"]:
|
||||
path = site_data_dir(appname, appauthor)
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
else:
|
||||
# XDG default for $XDG_CONFIG_DIRS
|
||||
# only first, if multipath is False
|
||||
path = os.getenv('XDG_CONFIG_DIRS', '/etc/xdg')
|
||||
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
|
||||
if appname:
|
||||
if version:
|
||||
appname = os.path.join(appname, version)
|
||||
pathlist = [os.sep.join([x, appname]) for x in pathlist]
|
||||
|
||||
if multipath:
|
||||
path = os.pathsep.join(pathlist)
|
||||
else:
|
||||
path = pathlist[0]
|
||||
return path
|
||||
|
||||
|
||||
def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True):
|
||||
r"""Return full path to the user-specific cache dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"opinion" (boolean) can be False to disable the appending of
|
||||
"Cache" to the base app data dir for Windows. See
|
||||
discussion below.
|
||||
|
||||
Typical user cache directories are:
|
||||
Mac OS X: ~/Library/Caches/<AppName>
|
||||
Unix: ~/.cache/<AppName> (XDG default)
|
||||
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Cache
|
||||
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache
|
||||
|
||||
On Windows the only suggestion in the MSDN docs is that local settings go in
|
||||
the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming
|
||||
app data dir (the default returned by `user_data_dir` above). Apps typically
|
||||
put cache data somewhere *under* the given dir here. Some examples:
|
||||
...\Mozilla\Firefox\Profiles\<ProfileName>\Cache
|
||||
...\Acme\SuperApp\Cache\1.0
|
||||
OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value.
|
||||
This can be disabled with the `opinion=False` option.
|
||||
"""
|
||||
if system == "win32":
|
||||
if appauthor is None:
|
||||
appauthor = appname
|
||||
path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA"))
|
||||
if appname:
|
||||
if appauthor is not False:
|
||||
path = os.path.join(path, appauthor, appname)
|
||||
else:
|
||||
path = os.path.join(path, appname)
|
||||
if opinion:
|
||||
path = os.path.join(path, "Cache")
|
||||
elif system == 'darwin':
|
||||
path = os.path.expanduser('~/Library/Caches')
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
else:
|
||||
path = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
return path
|
||||
|
||||
|
||||
def user_state_dir(appname=None, appauthor=None, version=None, roaming=False):
|
||||
r"""Return full path to the user-specific state dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"roaming" (boolean, default False) can be set True to use the Windows
|
||||
roaming appdata directory. That means that for users on a Windows
|
||||
network setup for roaming profiles, this user data will be
|
||||
sync'd on login. See
|
||||
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
|
||||
for a discussion of issues.
|
||||
|
||||
Typical user state directories are:
|
||||
Mac OS X: same as user_data_dir
|
||||
Unix: ~/.local/state/<AppName> # or in $XDG_STATE_HOME, if defined
|
||||
Win *: same as user_data_dir
|
||||
|
||||
For Unix, we follow this Debian proposal <https://wiki.debian.org/XDGBaseDirectorySpecification#state>
|
||||
to extend the XDG spec and support $XDG_STATE_HOME.
|
||||
|
||||
That means, by default "~/.local/state/<AppName>".
|
||||
"""
|
||||
if system in ["win32", "darwin"]:
|
||||
path = user_data_dir(appname, appauthor, None, roaming)
|
||||
else:
|
||||
path = os.getenv('XDG_STATE_HOME', os.path.expanduser("~/.local/state"))
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
return path
|
||||
|
||||
|
||||
def user_log_dir(appname=None, appauthor=None, version=None, opinion=True):
|
||||
r"""Return full path to the user-specific log dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"opinion" (boolean) can be False to disable the appending of
|
||||
"Logs" to the base app data dir for Windows, and "log" to the
|
||||
base cache dir for Unix. See discussion below.
|
||||
|
||||
Typical user log directories are:
|
||||
Mac OS X: ~/Library/Logs/<AppName>
|
||||
Unix: ~/.cache/<AppName>/log # or under $XDG_CACHE_HOME if defined
|
||||
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Logs
|
||||
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Logs
|
||||
|
||||
On Windows the only suggestion in the MSDN docs is that local settings
|
||||
go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in
|
||||
examples of what some windows apps use for a logs dir.)
|
||||
|
||||
OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA`
|
||||
value for Windows and appends "log" to the user cache dir for Unix.
|
||||
This can be disabled with the `opinion=False` option.
|
||||
"""
|
||||
if system == "darwin":
|
||||
path = os.path.join(
|
||||
os.path.expanduser('~/Library/Logs'),
|
||||
appname)
|
||||
elif system == "win32":
|
||||
path = user_data_dir(appname, appauthor, version)
|
||||
version = False
|
||||
if opinion:
|
||||
path = os.path.join(path, "Logs")
|
||||
else:
|
||||
path = user_cache_dir(appname, appauthor, version)
|
||||
version = False
|
||||
if opinion:
|
||||
path = os.path.join(path, "log")
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
return path
|
||||
|
||||
|
||||
class AppDirs(object):
|
||||
"""Convenience wrapper for getting application dirs."""
|
||||
def __init__(self, appname=None, appauthor=None, version=None,
|
||||
roaming=False, multipath=False):
|
||||
self.appname = appname
|
||||
self.appauthor = appauthor
|
||||
self.version = version
|
||||
self.roaming = roaming
|
||||
self.multipath = multipath
|
||||
|
||||
@property
|
||||
def user_data_dir(self):
|
||||
return user_data_dir(self.appname, self.appauthor,
|
||||
version=self.version, roaming=self.roaming)
|
||||
|
||||
@property
|
||||
def site_data_dir(self):
|
||||
return site_data_dir(self.appname, self.appauthor,
|
||||
version=self.version, multipath=self.multipath)
|
||||
|
||||
@property
|
||||
def user_config_dir(self):
|
||||
return user_config_dir(self.appname, self.appauthor,
|
||||
version=self.version, roaming=self.roaming)
|
||||
|
||||
@property
|
||||
def site_config_dir(self):
|
||||
return site_config_dir(self.appname, self.appauthor,
|
||||
version=self.version, multipath=self.multipath)
|
||||
|
||||
@property
|
||||
def user_cache_dir(self):
|
||||
return user_cache_dir(self.appname, self.appauthor,
|
||||
version=self.version)
|
||||
|
||||
@property
|
||||
def user_state_dir(self):
|
||||
return user_state_dir(self.appname, self.appauthor,
|
||||
version=self.version)
|
||||
|
||||
@property
|
||||
def user_log_dir(self):
|
||||
return user_log_dir(self.appname, self.appauthor,
|
||||
version=self.version)
|
||||
|
||||
|
||||
#---- internal support stuff
|
||||
|
||||
def _get_win_folder_from_registry(csidl_name):
|
||||
"""This is a fallback technique at best. I'm not sure if using the
|
||||
registry for this guarantees us the correct answer for all CSIDL_*
|
||||
names.
|
||||
"""
|
||||
if PY3:
|
||||
import winreg as _winreg
|
||||
else:
|
||||
import _winreg
|
||||
|
||||
shell_folder_name = {
|
||||
"CSIDL_APPDATA": "AppData",
|
||||
"CSIDL_COMMON_APPDATA": "Common AppData",
|
||||
"CSIDL_LOCAL_APPDATA": "Local AppData",
|
||||
}[csidl_name]
|
||||
|
||||
key = _winreg.OpenKey(
|
||||
_winreg.HKEY_CURRENT_USER,
|
||||
r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
|
||||
)
|
||||
dir, type = _winreg.QueryValueEx(key, shell_folder_name)
|
||||
return dir
|
||||
|
||||
|
||||
def _get_win_folder_with_pywin32(csidl_name):
|
||||
from win32com.shell import shellcon, shell
|
||||
dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0)
|
||||
# Try to make this a unicode path because SHGetFolderPath does
|
||||
# not return unicode strings when there is unicode data in the
|
||||
# path.
|
||||
try:
|
||||
dir = unicode(dir)
|
||||
|
||||
# Downgrade to short path name if have highbit chars. See
|
||||
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
|
||||
has_high_char = False
|
||||
for c in dir:
|
||||
if ord(c) > 255:
|
||||
has_high_char = True
|
||||
break
|
||||
if has_high_char:
|
||||
try:
|
||||
import win32api
|
||||
dir = win32api.GetShortPathName(dir)
|
||||
except ImportError:
|
||||
pass
|
||||
except UnicodeError:
|
||||
pass
|
||||
return dir
|
||||
|
||||
|
||||
def _get_win_folder_with_ctypes(csidl_name):
|
||||
import ctypes
|
||||
|
||||
csidl_const = {
|
||||
"CSIDL_APPDATA": 26,
|
||||
"CSIDL_COMMON_APPDATA": 35,
|
||||
"CSIDL_LOCAL_APPDATA": 28,
|
||||
}[csidl_name]
|
||||
|
||||
buf = ctypes.create_unicode_buffer(1024)
|
||||
ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf)
|
||||
|
||||
# Downgrade to short path name if have highbit chars. See
|
||||
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
|
||||
has_high_char = False
|
||||
for c in buf:
|
||||
if ord(c) > 255:
|
||||
has_high_char = True
|
||||
break
|
||||
if has_high_char:
|
||||
buf2 = ctypes.create_unicode_buffer(1024)
|
||||
if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024):
|
||||
buf = buf2
|
||||
|
||||
return buf.value
|
||||
|
||||
def _get_win_folder_with_jna(csidl_name):
|
||||
import array
|
||||
from com.sun import jna
|
||||
from com.sun.jna.platform import win32
|
||||
|
||||
buf_size = win32.WinDef.MAX_PATH * 2
|
||||
buf = array.zeros('c', buf_size)
|
||||
shell = win32.Shell32.INSTANCE
|
||||
shell.SHGetFolderPath(None, getattr(win32.ShlObj, csidl_name), None, win32.ShlObj.SHGFP_TYPE_CURRENT, buf)
|
||||
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
|
||||
|
||||
# Downgrade to short path name if have highbit chars. See
|
||||
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
|
||||
has_high_char = False
|
||||
for c in dir:
|
||||
if ord(c) > 255:
|
||||
has_high_char = True
|
||||
break
|
||||
if has_high_char:
|
||||
buf = array.zeros('c', buf_size)
|
||||
kernel = win32.Kernel32.INSTANCE
|
||||
if kernel.GetShortPathName(dir, buf, buf_size):
|
||||
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
|
||||
|
||||
return dir
|
||||
|
||||
if system == "win32":
|
||||
try:
|
||||
import win32com.shell
|
||||
_get_win_folder = _get_win_folder_with_pywin32
|
||||
except ImportError:
|
||||
try:
|
||||
from ctypes import windll
|
||||
_get_win_folder = _get_win_folder_with_ctypes
|
||||
except ImportError:
|
||||
try:
|
||||
import com.sun.jna
|
||||
_get_win_folder = _get_win_folder_with_jna
|
||||
except ImportError:
|
||||
_get_win_folder = _get_win_folder_from_registry
|
||||
|
||||
|
||||
#---- self test code
|
||||
|
||||
if __name__ == "__main__":
|
||||
appname = "MyApp"
|
||||
appauthor = "MyCompany"
|
||||
|
||||
props = ("user_data_dir",
|
||||
"user_config_dir",
|
||||
"user_cache_dir",
|
||||
"user_state_dir",
|
||||
"user_log_dir",
|
||||
"site_data_dir",
|
||||
"site_config_dir")
|
||||
|
||||
print("-- app dirs %s --" % __version__)
|
||||
|
||||
print("-- app dirs (with optional 'version')")
|
||||
dirs = AppDirs(appname, appauthor, version="1.0")
|
||||
for prop in props:
|
||||
print("%s: %s" % (prop, getattr(dirs, prop)))
|
||||
|
||||
print("\n-- app dirs (without optional 'version')")
|
||||
dirs = AppDirs(appname, appauthor)
|
||||
for prop in props:
|
||||
print("%s: %s" % (prop, getattr(dirs, prop)))
|
||||
|
||||
print("\n-- app dirs (without optional 'appauthor')")
|
||||
dirs = AppDirs(appname)
|
||||
for prop in props:
|
||||
print("%s: %s" % (prop, getattr(dirs, prop)))
|
||||
|
||||
print("\n-- app dirs (with disabled 'appauthor')")
|
||||
dirs = AppDirs(appname, appauthor=False)
|
||||
for prop in props:
|
||||
print("%s: %s" % (prop, getattr(dirs, prop)))
|
@@ -3,7 +3,7 @@ __all__ = ('EVENT_SCHEDULER_STARTED', 'EVENT_SCHEDULER_SHUTDOWN', 'EVENT_SCHEDUL
|
||||
'EVENT_JOBSTORE_ADDED', 'EVENT_JOBSTORE_REMOVED', 'EVENT_ALL_JOBS_REMOVED',
|
||||
'EVENT_JOB_ADDED', 'EVENT_JOB_REMOVED', 'EVENT_JOB_MODIFIED', 'EVENT_JOB_EXECUTED',
|
||||
'EVENT_JOB_ERROR', 'EVENT_JOB_MISSED', 'EVENT_JOB_SUBMITTED', 'EVENT_JOB_MAX_INSTANCES',
|
||||
'SchedulerEvent', 'JobEvent', 'JobExecutionEvent')
|
||||
'SchedulerEvent', 'JobEvent', 'JobExecutionEvent', 'JobSubmissionEvent')
|
||||
|
||||
|
||||
EVENT_SCHEDULER_STARTED = EVENT_SCHEDULER_START = 2 ** 0
|
||||
|
@@ -3,12 +3,11 @@ from __future__ import absolute_import
|
||||
import sys
|
||||
|
||||
from apscheduler.executors.base import BaseExecutor, run_job
|
||||
from apscheduler.util import iscoroutinefunction_partial
|
||||
|
||||
try:
|
||||
from asyncio import iscoroutinefunction
|
||||
from apscheduler.executors.base_py3 import run_coroutine_job
|
||||
except ImportError:
|
||||
from trollius import iscoroutinefunction
|
||||
run_coroutine_job = None
|
||||
|
||||
|
||||
@@ -46,7 +45,7 @@ class AsyncIOExecutor(BaseExecutor):
|
||||
else:
|
||||
self._run_job_success(job.id, events)
|
||||
|
||||
if iscoroutinefunction(job.func):
|
||||
if iscoroutinefunction_partial(job.func):
|
||||
if run_coroutine_job is not None:
|
||||
coro = run_coroutine_job(job, job._jobstore_alias, run_times, self._logger.name)
|
||||
f = self._eventloop.create_task(coro)
|
||||
|
@@ -8,10 +8,10 @@ from tornado.gen import convert_yielded
|
||||
from apscheduler.executors.base import BaseExecutor, run_job
|
||||
|
||||
try:
|
||||
from inspect import iscoroutinefunction
|
||||
from apscheduler.executors.base_py3 import run_coroutine_job
|
||||
from apscheduler.util import iscoroutinefunction_partial
|
||||
except ImportError:
|
||||
def iscoroutinefunction(func):
|
||||
def iscoroutinefunction_partial(func):
|
||||
return False
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ class TornadoExecutor(BaseExecutor):
|
||||
else:
|
||||
self._run_job_success(job.id, events)
|
||||
|
||||
if iscoroutinefunction(job.func):
|
||||
if iscoroutinefunction_partial(job.func):
|
||||
f = run_coroutine_job(job, job._jobstore_alias, run_times, self._logger.name)
|
||||
else:
|
||||
f = self.executor.submit(run_job, job, job._jobstore_alias, run_times,
|
||||
|
@@ -1,4 +1,4 @@
|
||||
from collections import Iterable, Mapping
|
||||
from inspect import ismethod, isclass
|
||||
from uuid import uuid4
|
||||
|
||||
import six
|
||||
@@ -8,6 +8,11 @@ from apscheduler.util import (
|
||||
ref_to_obj, obj_to_ref, datetime_repr, repr_escape, get_callable_name, check_callable_args,
|
||||
convert_to_datetime)
|
||||
|
||||
try:
|
||||
from collections.abc import Iterable, Mapping
|
||||
except ImportError:
|
||||
from collections import Iterable, Mapping
|
||||
|
||||
|
||||
class Job(object):
|
||||
"""
|
||||
@@ -235,13 +240,20 @@ class Job(object):
|
||||
'be determined. Consider giving a textual reference (module:function name) '
|
||||
'instead.' % (self.func,))
|
||||
|
||||
# Instance methods cannot survive serialization as-is, so store the "self" argument
|
||||
# explicitly
|
||||
if ismethod(self.func) and not isclass(self.func.__self__):
|
||||
args = (self.func.__self__,) + tuple(self.args)
|
||||
else:
|
||||
args = self.args
|
||||
|
||||
return {
|
||||
'version': 1,
|
||||
'id': self.id,
|
||||
'func': self.func_ref,
|
||||
'trigger': self.trigger,
|
||||
'executor': self.executor,
|
||||
'args': self.args,
|
||||
'args': args,
|
||||
'kwargs': self.kwargs,
|
||||
'name': self.name,
|
||||
'misfire_grace_time': self.misfire_grace_time,
|
||||
|
@@ -14,7 +14,7 @@ except ImportError: # pragma: nocover
|
||||
import pickle
|
||||
|
||||
try:
|
||||
from redis import StrictRedis
|
||||
from redis import Redis
|
||||
except ImportError: # pragma: nocover
|
||||
raise ImportError('RedisJobStore requires redis installed')
|
||||
|
||||
@@ -47,7 +47,7 @@ class RedisJobStore(BaseJobStore):
|
||||
self.pickle_protocol = pickle_protocol
|
||||
self.jobs_key = jobs_key
|
||||
self.run_times_key = run_times_key
|
||||
self.redis = StrictRedis(db=int(db), **connect_args)
|
||||
self.redis = Redis(db=int(db), **connect_args)
|
||||
|
||||
def lookup_job(self, job_id):
|
||||
job_state = self.redis.hget(self.jobs_key, job_id)
|
||||
@@ -81,7 +81,9 @@ class RedisJobStore(BaseJobStore):
|
||||
pipe.hset(self.jobs_key, job.id, pickle.dumps(job.__getstate__(),
|
||||
self.pickle_protocol))
|
||||
if job.next_run_time:
|
||||
pipe.zadd(self.run_times_key, datetime_to_utc_timestamp(job.next_run_time), job.id)
|
||||
pipe.zadd(self.run_times_key,
|
||||
{job.id: datetime_to_utc_timestamp(job.next_run_time)})
|
||||
|
||||
pipe.execute()
|
||||
|
||||
def update_job(self, job):
|
||||
@@ -92,9 +94,11 @@ class RedisJobStore(BaseJobStore):
|
||||
pipe.hset(self.jobs_key, job.id, pickle.dumps(job.__getstate__(),
|
||||
self.pickle_protocol))
|
||||
if job.next_run_time:
|
||||
pipe.zadd(self.run_times_key, datetime_to_utc_timestamp(job.next_run_time), job.id)
|
||||
pipe.zadd(self.run_times_key,
|
||||
{job.id: datetime_to_utc_timestamp(job.next_run_time)})
|
||||
else:
|
||||
pipe.zrem(self.run_times_key, job.id)
|
||||
|
||||
pipe.execute()
|
||||
|
||||
def remove_job(self, job_id):
|
||||
|
@@ -10,7 +10,7 @@ except ImportError: # pragma: nocover
|
||||
import pickle
|
||||
|
||||
try:
|
||||
import rethinkdb as r
|
||||
from rethinkdb import RethinkDB
|
||||
except ImportError: # pragma: nocover
|
||||
raise ImportError('RethinkDBJobStore requires rethinkdb installed')
|
||||
|
||||
@@ -40,10 +40,12 @@ class RethinkDBJobStore(BaseJobStore):
|
||||
raise ValueError('The "table" parameter must not be empty')
|
||||
|
||||
self.database = database
|
||||
self.table = table
|
||||
self.table_name = table
|
||||
self.table = None
|
||||
self.client = client
|
||||
self.pickle_protocol = pickle_protocol
|
||||
self.connect_args = connect_args
|
||||
self.r = RethinkDB()
|
||||
self.conn = None
|
||||
|
||||
def start(self, scheduler, alias):
|
||||
@@ -52,31 +54,31 @@ class RethinkDBJobStore(BaseJobStore):
|
||||
if self.client:
|
||||
self.conn = maybe_ref(self.client)
|
||||
else:
|
||||
self.conn = r.connect(db=self.database, **self.connect_args)
|
||||
self.conn = self.r.connect(db=self.database, **self.connect_args)
|
||||
|
||||
if self.database not in r.db_list().run(self.conn):
|
||||
r.db_create(self.database).run(self.conn)
|
||||
if self.database not in self.r.db_list().run(self.conn):
|
||||
self.r.db_create(self.database).run(self.conn)
|
||||
|
||||
if self.table not in r.table_list().run(self.conn):
|
||||
r.table_create(self.table).run(self.conn)
|
||||
if self.table_name not in self.r.table_list().run(self.conn):
|
||||
self.r.table_create(self.table_name).run(self.conn)
|
||||
|
||||
if 'next_run_time' not in r.table(self.table).index_list().run(self.conn):
|
||||
r.table(self.table).index_create('next_run_time').run(self.conn)
|
||||
if 'next_run_time' not in self.r.table(self.table_name).index_list().run(self.conn):
|
||||
self.r.table(self.table_name).index_create('next_run_time').run(self.conn)
|
||||
|
||||
self.table = r.db(self.database).table(self.table)
|
||||
self.table = self.r.db(self.database).table(self.table_name)
|
||||
|
||||
def lookup_job(self, job_id):
|
||||
results = list(self.table.get_all(job_id).pluck('job_state').run(self.conn))
|
||||
return self._reconstitute_job(results[0]['job_state']) if results else None
|
||||
|
||||
def get_due_jobs(self, now):
|
||||
return self._get_jobs(r.row['next_run_time'] <= datetime_to_utc_timestamp(now))
|
||||
return self._get_jobs(self.r.row['next_run_time'] <= datetime_to_utc_timestamp(now))
|
||||
|
||||
def get_next_run_time(self):
|
||||
results = list(
|
||||
self.table
|
||||
.filter(r.row['next_run_time'] != None) # flake8: noqa
|
||||
.order_by(r.asc('next_run_time'))
|
||||
.filter(self.r.row['next_run_time'] != None) # noqa
|
||||
.order_by(self.r.asc('next_run_time'))
|
||||
.map(lambda x: x['next_run_time'])
|
||||
.limit(1)
|
||||
.run(self.conn)
|
||||
@@ -92,7 +94,7 @@ class RethinkDBJobStore(BaseJobStore):
|
||||
job_dict = {
|
||||
'id': job.id,
|
||||
'next_run_time': datetime_to_utc_timestamp(job.next_run_time),
|
||||
'job_state': r.binary(pickle.dumps(job.__getstate__(), self.pickle_protocol))
|
||||
'job_state': self.r.binary(pickle.dumps(job.__getstate__(), self.pickle_protocol))
|
||||
}
|
||||
results = self.table.insert(job_dict).run(self.conn)
|
||||
if results['errors'] > 0:
|
||||
@@ -101,7 +103,7 @@ class RethinkDBJobStore(BaseJobStore):
|
||||
def update_job(self, job):
|
||||
changes = {
|
||||
'next_run_time': datetime_to_utc_timestamp(job.next_run_time),
|
||||
'job_state': r.binary(pickle.dumps(job.__getstate__(), self.pickle_protocol))
|
||||
'job_state': self.r.binary(pickle.dumps(job.__getstate__(), self.pickle_protocol))
|
||||
}
|
||||
results = self.table.get_all(job.id).update(changes).run(self.conn)
|
||||
skipped = False in map(lambda x: results[x] == 0, results.keys())
|
||||
@@ -130,20 +132,20 @@ class RethinkDBJobStore(BaseJobStore):
|
||||
def _get_jobs(self, predicate=None):
|
||||
jobs = []
|
||||
failed_job_ids = []
|
||||
query = (self.table.filter(r.row['next_run_time'] != None).filter(predicate) if
|
||||
predicate else self.table)
|
||||
query = (self.table.filter(self.r.row['next_run_time'] != None).filter(predicate) # noqa
|
||||
if predicate else self.table)
|
||||
query = query.order_by('next_run_time', 'id').pluck('id', 'job_state')
|
||||
|
||||
for document in query.run(self.conn):
|
||||
try:
|
||||
jobs.append(self._reconstitute_job(document['job_state']))
|
||||
except:
|
||||
except Exception:
|
||||
self._logger.exception('Unable to restore job "%s" -- removing it', document['id'])
|
||||
failed_job_ids.append(document['id'])
|
||||
|
||||
# Remove all the jobs we failed to restore
|
||||
if failed_job_ids:
|
||||
r.expr(failed_job_ids).for_each(
|
||||
self.r.expr(failed_job_ids).for_each(
|
||||
lambda job_id: self.table.get_all(job_id).delete()).run(self.conn)
|
||||
|
||||
return jobs
|
||||
|
@@ -106,7 +106,7 @@ class SQLAlchemyJobStore(BaseJobStore):
|
||||
}).where(self.jobs_t.c.id == job.id)
|
||||
result = self.engine.execute(update)
|
||||
if result.rowcount == 0:
|
||||
raise JobLookupError(id)
|
||||
raise JobLookupError(job.id)
|
||||
|
||||
def remove_job(self, job_id):
|
||||
delete = self.jobs_t.delete().where(self.jobs_t.c.id == job_id)
|
||||
|
@@ -1,7 +1,6 @@
|
||||
from __future__ import print_function
|
||||
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from collections import MutableMapping
|
||||
from threading import RLock
|
||||
from datetime import datetime, timedelta
|
||||
from logging import getLogger
|
||||
@@ -19,13 +18,19 @@ from apscheduler.jobstores.base import ConflictingIdError, JobLookupError, BaseJ
|
||||
from apscheduler.jobstores.memory import MemoryJobStore
|
||||
from apscheduler.job import Job
|
||||
from apscheduler.triggers.base import BaseTrigger
|
||||
from apscheduler.util import asbool, asint, astimezone, maybe_ref, timedelta_seconds, undefined
|
||||
from apscheduler.util import (
|
||||
asbool, asint, astimezone, maybe_ref, timedelta_seconds, undefined, TIMEOUT_MAX)
|
||||
from apscheduler.events import (
|
||||
SchedulerEvent, JobEvent, JobSubmissionEvent, EVENT_SCHEDULER_START, EVENT_SCHEDULER_SHUTDOWN,
|
||||
EVENT_JOBSTORE_ADDED, EVENT_JOBSTORE_REMOVED, EVENT_ALL, EVENT_JOB_MODIFIED, EVENT_JOB_REMOVED,
|
||||
EVENT_JOB_ADDED, EVENT_EXECUTOR_ADDED, EVENT_EXECUTOR_REMOVED, EVENT_ALL_JOBS_REMOVED,
|
||||
EVENT_JOB_SUBMITTED, EVENT_JOB_MAX_INSTANCES, EVENT_SCHEDULER_RESUMED, EVENT_SCHEDULER_PAUSED)
|
||||
|
||||
try:
|
||||
from collections.abc import MutableMapping
|
||||
except ImportError:
|
||||
from collections import MutableMapping
|
||||
|
||||
#: constant indicating a scheduler's stopped state
|
||||
STATE_STOPPED = 0
|
||||
#: constant indicating a scheduler's running state (started and processing jobs)
|
||||
@@ -126,11 +131,14 @@ class BaseScheduler(six.with_metaclass(ABCMeta)):
|
||||
|
||||
:param bool paused: if ``True``, don't start job processing until :meth:`resume` is called
|
||||
:raises SchedulerAlreadyRunningError: if the scheduler is already running
|
||||
:raises RuntimeError: if running under uWSGI with threads disabled
|
||||
|
||||
"""
|
||||
if self.state != STATE_STOPPED:
|
||||
raise SchedulerAlreadyRunningError
|
||||
|
||||
self._check_uwsgi()
|
||||
|
||||
with self._executors_lock:
|
||||
# Create a default executor if nothing else is configured
|
||||
if 'default' not in self._executors:
|
||||
@@ -177,12 +185,13 @@ class BaseScheduler(six.with_metaclass(ABCMeta)):
|
||||
|
||||
self.state = STATE_STOPPED
|
||||
|
||||
with self._jobstores_lock, self._executors_lock:
|
||||
# Shut down all executors
|
||||
# Shut down all executors
|
||||
with self._executors_lock:
|
||||
for executor in six.itervalues(self._executors):
|
||||
executor.shutdown(wait)
|
||||
|
||||
# Shut down all job stores
|
||||
# Shut down all job stores
|
||||
with self._jobstores_lock:
|
||||
for jobstore in six.itervalues(self._jobstores):
|
||||
jobstore.shutdown()
|
||||
|
||||
@@ -546,7 +555,7 @@ class BaseScheduler(six.with_metaclass(ABCMeta)):
|
||||
"""
|
||||
if pending is not None:
|
||||
warnings.warn('The "pending" option is deprecated -- get_jobs() always returns '
|
||||
'pending jobs if the scheduler has been started and scheduled jobs '
|
||||
'scheduled jobs if the scheduler has been started and pending jobs '
|
||||
'otherwise', DeprecationWarning)
|
||||
|
||||
with self._jobstores_lock:
|
||||
@@ -589,14 +598,13 @@ class BaseScheduler(six.with_metaclass(ABCMeta)):
|
||||
"""
|
||||
jobstore_alias = None
|
||||
with self._jobstores_lock:
|
||||
# Check if the job is among the pending jobs
|
||||
if self.state == STATE_STOPPED:
|
||||
# Check if the job is among the pending jobs
|
||||
if self.state == STATE_STOPPED:
|
||||
for i, (job, alias, replace_existing) in enumerate(self._pending_jobs):
|
||||
if job.id == job_id and jobstore in (None, alias):
|
||||
del self._pending_jobs[i]
|
||||
jobstore_alias = alias
|
||||
break
|
||||
for i, (job, alias, replace_existing) in enumerate(self._pending_jobs):
|
||||
if job.id == job_id and jobstore in (None, alias):
|
||||
del self._pending_jobs[i]
|
||||
jobstore_alias = alias
|
||||
break
|
||||
else:
|
||||
# Otherwise, try to remove it from each store until it succeeds or we run out of
|
||||
# stores to check
|
||||
@@ -824,6 +832,14 @@ class BaseScheduler(six.with_metaclass(ABCMeta)):
|
||||
except BaseException:
|
||||
self._logger.exception('Error notifying listener')
|
||||
|
||||
def _check_uwsgi(self):
|
||||
"""Check if we're running under uWSGI with threads disabled."""
|
||||
uwsgi_module = sys.modules.get('uwsgi')
|
||||
if not getattr(uwsgi_module, 'has_threads', True):
|
||||
raise RuntimeError('The scheduler seems to be running under uWSGI, but threads have '
|
||||
'been disabled. You must run uWSGI with the --enable-threads '
|
||||
'option for the scheduler to work.')
|
||||
|
||||
def _real_add_job(self, job, jobstore_alias, replace_existing):
|
||||
"""
|
||||
:param Job job: the job to add
|
||||
@@ -999,7 +1015,7 @@ class BaseScheduler(six.with_metaclass(ABCMeta)):
|
||||
wait_seconds = None
|
||||
self._logger.debug('No jobs; waiting until a job is added')
|
||||
else:
|
||||
wait_seconds = max(timedelta_seconds(next_wakeup_time - now), 0)
|
||||
wait_seconds = min(max(timedelta_seconds(next_wakeup_time - now), 0), TIMEOUT_MAX)
|
||||
self._logger.debug('Next wakeup is due at %s (in %f seconds)', next_wakeup_time,
|
||||
wait_seconds)
|
||||
|
||||
|
@@ -9,7 +9,7 @@ except (ImportError, RuntimeError): # pragma: nocover
|
||||
from PyQt4.QtCore import QObject, QTimer
|
||||
except ImportError:
|
||||
try:
|
||||
from PySide.QtCore import QObject, QTimer # flake8: noqa
|
||||
from PySide.QtCore import QObject, QTimer # noqa
|
||||
except ImportError:
|
||||
raise ImportError('QtScheduler requires either PyQt5, PyQt4 or PySide installed')
|
||||
|
||||
@@ -26,7 +26,8 @@ class QtScheduler(BaseScheduler):
|
||||
def _start_timer(self, wait_seconds):
|
||||
self._stop_timer()
|
||||
if wait_seconds is not None:
|
||||
self._timer = QTimer.singleShot(wait_seconds * 1000, self._process_jobs)
|
||||
wait_time = min(wait_seconds * 1000, 2147483647)
|
||||
self._timer = QTimer.singleShot(wait_time, self._process_jobs)
|
||||
|
||||
def _stop_timer(self):
|
||||
if self._timer:
|
||||
|
@@ -192,9 +192,8 @@ class CronTrigger(BaseTrigger):
|
||||
return None
|
||||
|
||||
if fieldnum >= 0:
|
||||
if self.jitter is not None:
|
||||
next_date = self._apply_jitter(next_date, self.jitter, now)
|
||||
return next_date
|
||||
next_date = self._apply_jitter(next_date, self.jitter, now)
|
||||
return min(next_date, self.end_date) if self.end_date else next_date
|
||||
|
||||
def __getstate__(self):
|
||||
return {
|
||||
|
@@ -9,7 +9,7 @@ __all__ = ('AllExpression', 'RangeExpression', 'WeekdayRangeExpression',
|
||||
'WeekdayPositionExpression', 'LastDayOfMonthExpression')
|
||||
|
||||
|
||||
WEEKDAYS = ['sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat']
|
||||
WEEKDAYS = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
|
||||
MONTHS = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
|
||||
|
||||
|
||||
|
@@ -104,7 +104,7 @@ class DayOfWeekField(BaseField):
|
||||
COMPILERS = BaseField.COMPILERS + [WeekdayRangeExpression]
|
||||
|
||||
def get_value(self, dateval):
|
||||
return dateval.isoweekday() % 7
|
||||
return dateval.weekday()
|
||||
|
||||
|
||||
class MonthField(BaseField):
|
||||
|
@@ -1,12 +1,14 @@
|
||||
"""This module contains several handy functions primarily meant for internal use."""
|
||||
|
||||
from __future__ import division
|
||||
|
||||
from datetime import date, datetime, time, timedelta, tzinfo
|
||||
from calendar import timegm
|
||||
import re
|
||||
from functools import partial
|
||||
from inspect import isclass, ismethod
|
||||
import re
|
||||
|
||||
from pytz import timezone, utc
|
||||
from pytz import timezone, utc, FixedOffset
|
||||
import six
|
||||
|
||||
try:
|
||||
@@ -19,9 +21,19 @@ try:
|
||||
except ImportError:
|
||||
TIMEOUT_MAX = 4294967 # Maximum value accepted by Event.wait() on Windows
|
||||
|
||||
try:
|
||||
from asyncio import iscoroutinefunction
|
||||
except ImportError:
|
||||
try:
|
||||
from trollius import iscoroutinefunction
|
||||
except ImportError:
|
||||
def iscoroutinefunction(func):
|
||||
return False
|
||||
|
||||
__all__ = ('asint', 'asbool', 'astimezone', 'convert_to_datetime', 'datetime_to_utc_timestamp',
|
||||
'utc_timestamp_to_datetime', 'timedelta_seconds', 'datetime_ceil', 'get_callable_name',
|
||||
'obj_to_ref', 'ref_to_obj', 'maybe_ref', 'repr_escape', 'check_callable_args')
|
||||
'obj_to_ref', 'ref_to_obj', 'maybe_ref', 'repr_escape', 'check_callable_args',
|
||||
'TIMEOUT_MAX')
|
||||
|
||||
|
||||
class _Undefined(object):
|
||||
@@ -92,8 +104,9 @@ def astimezone(obj):
|
||||
|
||||
_DATE_REGEX = re.compile(
|
||||
r'(?P<year>\d{4})-(?P<month>\d{1,2})-(?P<day>\d{1,2})'
|
||||
r'(?: (?P<hour>\d{1,2}):(?P<minute>\d{1,2}):(?P<second>\d{1,2})'
|
||||
r'(?:\.(?P<microsecond>\d{1,6}))?)?')
|
||||
r'(?:[ T](?P<hour>\d{1,2}):(?P<minute>\d{1,2}):(?P<second>\d{1,2})'
|
||||
r'(?:\.(?P<microsecond>\d{1,6}))?'
|
||||
r'(?P<timezone>Z|[+-]\d\d:\d\d)?)?$')
|
||||
|
||||
|
||||
def convert_to_datetime(input, tz, arg_name):
|
||||
@@ -105,7 +118,9 @@ def convert_to_datetime(input, tz, arg_name):
|
||||
If the input is a string, it is parsed as a datetime with the given timezone.
|
||||
|
||||
Date strings are accepted in three different forms: date only (Y-m-d), date with time
|
||||
(Y-m-d H:M:S) or with date+time with microseconds (Y-m-d H:M:S.micro).
|
||||
(Y-m-d H:M:S) or with date+time with microseconds (Y-m-d H:M:S.micro). Additionally you can
|
||||
override the time zone by giving a specific offset in the format specified by ISO 8601:
|
||||
Z (UTC), +HH:MM or -HH:MM.
|
||||
|
||||
:param str|datetime input: the datetime or string to convert to a timezone aware datetime
|
||||
:param datetime.tzinfo tz: timezone to interpret ``input`` in
|
||||
@@ -123,8 +138,17 @@ def convert_to_datetime(input, tz, arg_name):
|
||||
m = _DATE_REGEX.match(input)
|
||||
if not m:
|
||||
raise ValueError('Invalid date string')
|
||||
values = [(k, int(v or 0)) for k, v in m.groupdict().items()]
|
||||
values = dict(values)
|
||||
|
||||
values = m.groupdict()
|
||||
tzname = values.pop('timezone')
|
||||
if tzname == 'Z':
|
||||
tz = utc
|
||||
elif tzname:
|
||||
hours, minutes = (int(x) for x in tzname[1:].split(':'))
|
||||
sign = 1 if tzname[0] == '+' else -1
|
||||
tz = FixedOffset(sign * (hours * 60 + minutes))
|
||||
|
||||
values = {k: int(v or 0) for k, v in values.items()}
|
||||
datetime_ = datetime(**values)
|
||||
else:
|
||||
raise TypeError('Unsupported type for %s: %s' % (arg_name, input.__class__.__name__))
|
||||
@@ -210,7 +234,7 @@ def get_callable_name(func):
|
||||
# class methods, bound and unbound methods
|
||||
f_self = getattr(func, '__self__', None) or getattr(func, 'im_self', None)
|
||||
if f_self and hasattr(func, '__name__'):
|
||||
f_class = f_self if isinstance(f_self, type) else f_self.__class__
|
||||
f_class = f_self if isclass(f_self) else f_self.__class__
|
||||
else:
|
||||
f_class = getattr(func, 'im_class', None)
|
||||
|
||||
@@ -248,7 +272,18 @@ def obj_to_ref(obj):
|
||||
if '<locals>' in name:
|
||||
raise ValueError('Cannot create a reference to a nested function')
|
||||
|
||||
return '%s:%s' % (obj.__module__, name)
|
||||
if ismethod(obj):
|
||||
if hasattr(obj, 'im_self') and obj.im_self:
|
||||
# bound method
|
||||
module = obj.im_self.__module__
|
||||
elif hasattr(obj, 'im_class') and obj.im_class:
|
||||
# unbound method
|
||||
module = obj.im_class.__module__
|
||||
else:
|
||||
module = obj.__module__
|
||||
else:
|
||||
module = obj.__module__
|
||||
return '%s:%s' % (module, name)
|
||||
|
||||
|
||||
def ref_to_obj(ref):
|
||||
@@ -383,3 +418,12 @@ def check_callable_args(func, args, kwargs):
|
||||
raise ValueError(
|
||||
'The target callable does not accept the following keyword arguments: %s' %
|
||||
', '.join(unmatched_kwargs))
|
||||
|
||||
|
||||
def iscoroutinefunction_partial(f):
|
||||
while isinstance(f, partial):
|
||||
f = f.func
|
||||
|
||||
# The asyncio version of iscoroutinefunction includes testing for @coroutine
|
||||
# decorations vs. the inspect version which does not.
|
||||
return iscoroutinefunction(f)
|
||||
|
186
lib/argparse.py
186
lib/argparse.py
@@ -1,4 +1,5 @@
|
||||
# Author: Steven J. Bethard <steven.bethard@gmail.com>.
|
||||
# Maintainer: Thomas Waldmann <tw@waldmann-edv.de>
|
||||
|
||||
"""Command-line parsing library
|
||||
|
||||
@@ -61,7 +62,12 @@ considered public as object names -- the API of the formatter objects is
|
||||
still considered an implementation detail.)
|
||||
"""
|
||||
|
||||
__version__ = '1.1'
|
||||
__version__ = '1.4.0' # we use our own version number independant of the
|
||||
# one in stdlib and we release this on pypi.
|
||||
|
||||
__external_lib__ = True # to make sure the tests really test THIS lib,
|
||||
# not the builtin one in Python stdlib
|
||||
|
||||
__all__ = [
|
||||
'ArgumentParser',
|
||||
'ArgumentError',
|
||||
@@ -71,7 +77,6 @@ __all__ = [
|
||||
'ArgumentDefaultsHelpFormatter',
|
||||
'RawDescriptionHelpFormatter',
|
||||
'RawTextHelpFormatter',
|
||||
'MetavarTypeHelpFormatter',
|
||||
'Namespace',
|
||||
'Action',
|
||||
'ONE_OR_MORE',
|
||||
@@ -83,14 +88,35 @@ __all__ = [
|
||||
]
|
||||
|
||||
|
||||
import collections as _collections
|
||||
import copy as _copy
|
||||
import os as _os
|
||||
import re as _re
|
||||
import sys as _sys
|
||||
import textwrap as _textwrap
|
||||
|
||||
from gettext import gettext as _, ngettext
|
||||
from gettext import gettext as _
|
||||
|
||||
try:
|
||||
set
|
||||
except NameError:
|
||||
# for python < 2.4 compatibility (sets module is there since 2.3):
|
||||
from sets import Set as set
|
||||
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
basestring = str
|
||||
|
||||
try:
|
||||
sorted
|
||||
except NameError:
|
||||
# for python < 2.4 compatibility:
|
||||
def sorted(iterable, reverse=False):
|
||||
result = list(iterable)
|
||||
result.sort()
|
||||
if reverse:
|
||||
result.reverse()
|
||||
return result
|
||||
|
||||
|
||||
def _callable(obj):
|
||||
@@ -424,8 +450,7 @@ class HelpFormatter(object):
|
||||
|
||||
# produce all arg strings
|
||||
elif not action.option_strings:
|
||||
default = self._get_default_metavar_for_positional(action)
|
||||
part = self._format_args(action, default)
|
||||
part = self._format_args(action, action.dest)
|
||||
|
||||
# if it's in a group, strip the outer []
|
||||
if action in group_actions:
|
||||
@@ -447,7 +472,7 @@ class HelpFormatter(object):
|
||||
# if the Optional takes a value, format is:
|
||||
# -s ARGS or --long ARGS
|
||||
else:
|
||||
default = self._get_default_metavar_for_optional(action)
|
||||
default = action.dest.upper()
|
||||
args_string = self._format_args(action, default)
|
||||
part = '%s %s' % (option_string, args_string)
|
||||
|
||||
@@ -533,8 +558,7 @@ class HelpFormatter(object):
|
||||
|
||||
def _format_action_invocation(self, action):
|
||||
if not action.option_strings:
|
||||
default = self._get_default_metavar_for_positional(action)
|
||||
metavar, = self._metavar_formatter(action, default)(1)
|
||||
metavar, = self._metavar_formatter(action, action.dest)(1)
|
||||
return metavar
|
||||
|
||||
else:
|
||||
@@ -548,7 +572,7 @@ class HelpFormatter(object):
|
||||
# if the Optional takes a value, format is:
|
||||
# -s ARGS, --long ARGS
|
||||
else:
|
||||
default = self._get_default_metavar_for_optional(action)
|
||||
default = action.dest.upper()
|
||||
args_string = self._format_args(action, default)
|
||||
for option_string in action.option_strings:
|
||||
parts.append('%s %s' % (option_string, args_string))
|
||||
@@ -626,12 +650,6 @@ class HelpFormatter(object):
|
||||
def _get_help_string(self, action):
|
||||
return action.help
|
||||
|
||||
def _get_default_metavar_for_optional(self, action):
|
||||
return action.dest.upper()
|
||||
|
||||
def _get_default_metavar_for_positional(self, action):
|
||||
return action.dest
|
||||
|
||||
|
||||
class RawDescriptionHelpFormatter(HelpFormatter):
|
||||
"""Help message formatter which retains any formatting in descriptions.
|
||||
@@ -672,22 +690,6 @@ class ArgumentDefaultsHelpFormatter(HelpFormatter):
|
||||
return help
|
||||
|
||||
|
||||
class MetavarTypeHelpFormatter(HelpFormatter):
|
||||
"""Help message formatter which uses the argument 'type' as the default
|
||||
metavar value (instead of the argument 'dest')
|
||||
|
||||
Only the name of this class is considered a public API. All the methods
|
||||
provided by the class are considered an implementation detail.
|
||||
"""
|
||||
|
||||
def _get_default_metavar_for_optional(self, action):
|
||||
return action.type.__name__
|
||||
|
||||
def _get_default_metavar_for_positional(self, action):
|
||||
return action.type.__name__
|
||||
|
||||
|
||||
|
||||
# =====================
|
||||
# Options and Arguments
|
||||
# =====================
|
||||
@@ -1055,7 +1057,7 @@ class _SubParsersAction(Action):
|
||||
metavar += ' (%s)' % ', '.join(aliases)
|
||||
sup = super(_SubParsersAction._ChoicesPseudoAction, self)
|
||||
sup.__init__(option_strings=[], dest=dest, help=help,
|
||||
metavar=metavar)
|
||||
metavar=metavar)
|
||||
|
||||
def __init__(self,
|
||||
option_strings,
|
||||
@@ -1067,7 +1069,7 @@ class _SubParsersAction(Action):
|
||||
|
||||
self._prog_prefix = prog
|
||||
self._parser_class = parser_class
|
||||
self._name_parser_map = _collections.OrderedDict()
|
||||
self._name_parser_map = {}
|
||||
self._choices_actions = []
|
||||
|
||||
super(_SubParsersAction, self).__init__(
|
||||
@@ -1116,9 +1118,8 @@ class _SubParsersAction(Action):
|
||||
try:
|
||||
parser = self._name_parser_map[parser_name]
|
||||
except KeyError:
|
||||
args = {'parser_name': parser_name,
|
||||
'choices': ', '.join(self._name_parser_map)}
|
||||
msg = _('unknown parser %(parser_name)r (choices: %(choices)s)') % args
|
||||
tup = parser_name, ', '.join(self._name_parser_map)
|
||||
msg = _('unknown parser %r (choices: %s)' % tup)
|
||||
raise ArgumentError(self, msg)
|
||||
|
||||
# parse all the remaining options into the namespace
|
||||
@@ -1147,7 +1148,7 @@ class FileType(object):
|
||||
the builtin open() function.
|
||||
"""
|
||||
|
||||
def __init__(self, mode='r', bufsize=-1):
|
||||
def __init__(self, mode='r', bufsize=None):
|
||||
self._mode = mode
|
||||
self._bufsize = bufsize
|
||||
|
||||
@@ -1159,19 +1160,23 @@ class FileType(object):
|
||||
elif 'w' in self._mode:
|
||||
return _sys.stdout
|
||||
else:
|
||||
msg = _('argument "-" with mode %r') % self._mode
|
||||
msg = _('argument "-" with mode %r' % self._mode)
|
||||
raise ValueError(msg)
|
||||
|
||||
# all other arguments are used as file names
|
||||
try:
|
||||
return open(string, self._mode, self._bufsize)
|
||||
except IOError as e:
|
||||
# all other arguments are used as file names
|
||||
if self._bufsize:
|
||||
return open(string, self._mode, self._bufsize)
|
||||
else:
|
||||
return open(string, self._mode)
|
||||
except IOError:
|
||||
err = _sys.exc_info()[1]
|
||||
message = _("can't open '%s': %s")
|
||||
raise ArgumentTypeError(message % (string, e))
|
||||
raise ArgumentTypeError(message % (string, err))
|
||||
|
||||
def __repr__(self):
|
||||
args = self._mode, self._bufsize
|
||||
args_str = ', '.join(repr(arg) for arg in args if arg != -1)
|
||||
args = [self._mode, self._bufsize]
|
||||
args_str = ', '.join([repr(arg) for arg in args if arg is not None])
|
||||
return '%s(%s)' % (type(self).__name__, args_str)
|
||||
|
||||
# ===========================
|
||||
@@ -1189,6 +1194,8 @@ class Namespace(_AttributeHolder):
|
||||
for name in kwargs:
|
||||
setattr(self, name, kwargs[name])
|
||||
|
||||
__hash__ = None
|
||||
|
||||
def __eq__(self, other):
|
||||
return vars(self) == vars(other)
|
||||
|
||||
@@ -1312,20 +1319,13 @@ class _ActionsContainer(object):
|
||||
# create the action object, and add it to the parser
|
||||
action_class = self._pop_action_class(kwargs)
|
||||
if not _callable(action_class):
|
||||
raise ValueError('unknown action "%s"' % (action_class,))
|
||||
raise ValueError('unknown action "%s"' % action_class)
|
||||
action = action_class(**kwargs)
|
||||
|
||||
# raise an error if the action type is not callable
|
||||
type_func = self._registry_get('type', action.type, action.type)
|
||||
if not _callable(type_func):
|
||||
raise ValueError('%r is not callable' % (type_func,))
|
||||
|
||||
# raise an error if the metavar does not match the type
|
||||
if hasattr(self, "_get_formatter"):
|
||||
try:
|
||||
self._get_formatter()._format_args(action, None)
|
||||
except TypeError:
|
||||
raise ValueError("length of metavar tuple does not match nargs")
|
||||
raise ValueError('%r is not callable' % type_func)
|
||||
|
||||
return self._add_action(action)
|
||||
|
||||
@@ -1426,11 +1426,10 @@ class _ActionsContainer(object):
|
||||
for option_string in args:
|
||||
# error on strings that don't start with an appropriate prefix
|
||||
if not option_string[0] in self.prefix_chars:
|
||||
args = {'option': option_string,
|
||||
'prefix_chars': self.prefix_chars}
|
||||
msg = _('invalid option string %(option)r: '
|
||||
'must start with a character %(prefix_chars)r')
|
||||
raise ValueError(msg % args)
|
||||
msg = _('invalid option string %r: '
|
||||
'must start with a character %r')
|
||||
tup = option_string, self.prefix_chars
|
||||
raise ValueError(msg % tup)
|
||||
|
||||
# strings starting with two prefix characters are long options
|
||||
option_strings.append(option_string)
|
||||
@@ -1483,9 +1482,7 @@ class _ActionsContainer(object):
|
||||
conflict_handler(action, confl_optionals)
|
||||
|
||||
def _handle_conflict_error(self, action, conflicting_actions):
|
||||
message = ngettext('conflicting option string: %s',
|
||||
'conflicting option strings: %s',
|
||||
len(conflicting_actions))
|
||||
message = _('conflicting option string(s): %s')
|
||||
conflict_string = ', '.join([option_string
|
||||
for option_string, action
|
||||
in conflicting_actions])
|
||||
@@ -1528,7 +1525,6 @@ class _ArgumentGroup(_ActionsContainer):
|
||||
self._defaults = container._defaults
|
||||
self._has_negative_number_optionals = \
|
||||
container._has_negative_number_optionals
|
||||
self._mutually_exclusive_groups = container._mutually_exclusive_groups
|
||||
|
||||
def _add_action(self, action):
|
||||
action = super(_ArgumentGroup, self)._add_action(action)
|
||||
@@ -1630,7 +1626,10 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer):
|
||||
|
||||
# add help and version arguments if necessary
|
||||
# (using explicit default to override global argument_default)
|
||||
default_prefix = '-' if '-' in prefix_chars else prefix_chars[0]
|
||||
if '-' in prefix_chars:
|
||||
default_prefix = '-'
|
||||
else:
|
||||
default_prefix = prefix_chars[0]
|
||||
if self.add_help:
|
||||
self.add_argument(
|
||||
default_prefix+'h', default_prefix*2+'help',
|
||||
@@ -1743,10 +1742,7 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer):
|
||||
if action.dest is not SUPPRESS:
|
||||
if not hasattr(namespace, action.dest):
|
||||
if action.default is not SUPPRESS:
|
||||
default = action.default
|
||||
if isinstance(action.default, str):
|
||||
default = self._get_value(action, default)
|
||||
setattr(namespace, action.dest, default)
|
||||
setattr(namespace, action.dest, action.default)
|
||||
|
||||
# add any parser defaults that aren't present
|
||||
for dest in self._defaults:
|
||||
@@ -1969,12 +1965,28 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer):
|
||||
# if we didn't consume all the argument strings, there were extras
|
||||
extras.extend(arg_strings[stop_index:])
|
||||
|
||||
# make sure all required actions were present
|
||||
required_actions = [_get_action_name(action) for action in self._actions
|
||||
if action.required and action not in seen_actions]
|
||||
if required_actions:
|
||||
self.error(_('the following arguments are required: %s') %
|
||||
', '.join(required_actions))
|
||||
# if we didn't use all the Positional objects, there were too few
|
||||
# arg strings supplied.
|
||||
if positionals:
|
||||
self.error(_('too few arguments'))
|
||||
|
||||
# make sure all required actions were present, and convert defaults.
|
||||
for action in self._actions:
|
||||
if action not in seen_actions:
|
||||
if action.required:
|
||||
name = _get_action_name(action)
|
||||
self.error(_('argument %s is required') % name)
|
||||
else:
|
||||
# Convert action default now instead of doing it before
|
||||
# parsing arguments to avoid calling convert functions
|
||||
# twice (which may fail) if the argument was given, but
|
||||
# only if it was defined already in the namespace
|
||||
if (action.default is not None and
|
||||
isinstance(action.default, basestring) and
|
||||
hasattr(namespace, action.dest) and
|
||||
action.default is getattr(namespace, action.dest)):
|
||||
setattr(namespace, action.dest,
|
||||
self._get_value(action, action.default))
|
||||
|
||||
# make sure all required groups had one option present
|
||||
for group in self._mutually_exclusive_groups:
|
||||
@@ -2038,9 +2050,7 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer):
|
||||
OPTIONAL: _('expected at most one argument'),
|
||||
ONE_OR_MORE: _('expected at least one argument'),
|
||||
}
|
||||
default = ngettext('expected %s argument',
|
||||
'expected %s arguments',
|
||||
action.nargs) % action.nargs
|
||||
default = _('expected %s argument(s)') % action.nargs
|
||||
msg = nargs_errors.get(action.nargs, default)
|
||||
raise ArgumentError(action, msg)
|
||||
|
||||
@@ -2096,9 +2106,8 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer):
|
||||
if len(option_tuples) > 1:
|
||||
options = ', '.join([option_string
|
||||
for action, option_string, explicit_arg in option_tuples])
|
||||
args = {'option': arg_string, 'matches': options}
|
||||
msg = _('ambiguous option: %(option)s could match %(matches)s')
|
||||
self.error(msg % args)
|
||||
tup = arg_string, options
|
||||
self.error(_('ambiguous option: %s could match %s') % tup)
|
||||
|
||||
# if exactly one action matched, this segmentation is good,
|
||||
# so return the parsed action
|
||||
@@ -2220,7 +2229,7 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer):
|
||||
value = action.const
|
||||
else:
|
||||
value = action.default
|
||||
if isinstance(value, str):
|
||||
if isinstance(value, basestring):
|
||||
value = self._get_value(action, value)
|
||||
self._check_value(action, value)
|
||||
|
||||
@@ -2277,9 +2286,8 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer):
|
||||
# TypeErrors or ValueErrors also indicate errors
|
||||
except (TypeError, ValueError):
|
||||
name = getattr(action.type, '__name__', repr(action.type))
|
||||
args = {'type': name, 'value': arg_string}
|
||||
msg = _('invalid %(type)s value: %(value)r')
|
||||
raise ArgumentError(action, msg % args)
|
||||
msg = _('invalid %s value: %r')
|
||||
raise ArgumentError(action, msg % (name, arg_string))
|
||||
|
||||
# return the converted value
|
||||
return result
|
||||
@@ -2287,10 +2295,9 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer):
|
||||
def _check_value(self, action, value):
|
||||
# converted value must be one of the choices (if specified)
|
||||
if action.choices is not None and value not in action.choices:
|
||||
args = {'value': value,
|
||||
'choices': ', '.join(map(repr, action.choices))}
|
||||
msg = _('invalid choice: %(value)r (choose from %(choices)s)')
|
||||
raise ArgumentError(action, msg % args)
|
||||
tup = value, ', '.join(map(repr, action.choices))
|
||||
msg = _('invalid choice: %r (choose from %s)') % tup
|
||||
raise ArgumentError(action, msg)
|
||||
|
||||
# =======================
|
||||
# Help-formatting methods
|
||||
@@ -2382,5 +2389,4 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer):
|
||||
should either exit or raise an exception.
|
||||
"""
|
||||
self.print_usage(_sys.stderr)
|
||||
args = {'prog': self.prog, 'message': message}
|
||||
self.exit(2, _('%(prog)s: error: %(message)s\n') % args)
|
||||
self.exit(2, _('%s: error: %s\n') % (self.prog, message))
|
||||
|
1
lib/backports/__init__.py
Normal file
1
lib/backports/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
__path__ = __import__('pkgutil').extend_path(__path__, __name__)
|
196
lib/backports/functools_lru_cache.py
Normal file
196
lib/backports/functools_lru_cache.py
Normal file
@@ -0,0 +1,196 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import functools
|
||||
from collections import namedtuple
|
||||
from threading import RLock
|
||||
|
||||
_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
|
||||
|
||||
|
||||
@functools.wraps(functools.update_wrapper)
|
||||
def update_wrapper(
|
||||
wrapper,
|
||||
wrapped,
|
||||
assigned=functools.WRAPPER_ASSIGNMENTS,
|
||||
updated=functools.WRAPPER_UPDATES,
|
||||
):
|
||||
"""
|
||||
Patch two bugs in functools.update_wrapper.
|
||||
"""
|
||||
# workaround for http://bugs.python.org/issue3445
|
||||
assigned = tuple(attr for attr in assigned if hasattr(wrapped, attr))
|
||||
wrapper = functools.update_wrapper(wrapper, wrapped, assigned, updated)
|
||||
# workaround for https://bugs.python.org/issue17482
|
||||
wrapper.__wrapped__ = wrapped
|
||||
return wrapper
|
||||
|
||||
|
||||
class _HashedSeq(list):
|
||||
__slots__ = 'hashvalue'
|
||||
|
||||
def __init__(self, tup, hash=hash):
|
||||
self[:] = tup
|
||||
self.hashvalue = hash(tup)
|
||||
|
||||
def __hash__(self):
|
||||
return self.hashvalue
|
||||
|
||||
|
||||
def _make_key(
|
||||
args,
|
||||
kwds,
|
||||
typed,
|
||||
kwd_mark=(object(),),
|
||||
fasttypes=set([int, str, frozenset, type(None)]),
|
||||
sorted=sorted,
|
||||
tuple=tuple,
|
||||
type=type,
|
||||
len=len,
|
||||
):
|
||||
'Make a cache key from optionally typed positional and keyword arguments'
|
||||
key = args
|
||||
if kwds:
|
||||
sorted_items = sorted(kwds.items())
|
||||
key += kwd_mark
|
||||
for item in sorted_items:
|
||||
key += item
|
||||
if typed:
|
||||
key += tuple(type(v) for v in args)
|
||||
if kwds:
|
||||
key += tuple(type(v) for k, v in sorted_items)
|
||||
elif len(key) == 1 and type(key[0]) in fasttypes:
|
||||
return key[0]
|
||||
return _HashedSeq(key)
|
||||
|
||||
|
||||
def lru_cache(maxsize=100, typed=False):
|
||||
"""Least-recently-used cache decorator.
|
||||
|
||||
If *maxsize* is set to None, the LRU features are disabled and the cache
|
||||
can grow without bound.
|
||||
|
||||
If *typed* is True, arguments of different types will be cached separately.
|
||||
For example, f(3.0) and f(3) will be treated as distinct calls with
|
||||
distinct results.
|
||||
|
||||
Arguments to the cached function must be hashable.
|
||||
|
||||
View the cache statistics named tuple (hits, misses, maxsize, currsize) with
|
||||
f.cache_info(). Clear the cache and statistics with f.cache_clear().
|
||||
Access the underlying function with f.__wrapped__.
|
||||
|
||||
See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
|
||||
|
||||
"""
|
||||
|
||||
# Users should only access the lru_cache through its public API:
|
||||
# cache_info, cache_clear, and f.__wrapped__
|
||||
# The internals of the lru_cache are encapsulated for thread safety and
|
||||
# to allow the implementation to change (including a possible C version).
|
||||
|
||||
def decorating_function(user_function):
|
||||
|
||||
cache = dict()
|
||||
stats = [0, 0] # make statistics updateable non-locally
|
||||
HITS, MISSES = 0, 1 # names for the stats fields
|
||||
make_key = _make_key
|
||||
cache_get = cache.get # bound method to lookup key or return None
|
||||
_len = len # localize the global len() function
|
||||
lock = RLock() # because linkedlist updates aren't threadsafe
|
||||
root = [] # root of the circular doubly linked list
|
||||
root[:] = [root, root, None, None] # initialize by pointing to self
|
||||
nonlocal_root = [root] # make updateable non-locally
|
||||
PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields
|
||||
|
||||
if maxsize == 0:
|
||||
|
||||
def wrapper(*args, **kwds):
|
||||
# no caching, just do a statistics update after a successful call
|
||||
result = user_function(*args, **kwds)
|
||||
stats[MISSES] += 1
|
||||
return result
|
||||
|
||||
elif maxsize is None:
|
||||
|
||||
def wrapper(*args, **kwds):
|
||||
# simple caching without ordering or size limit
|
||||
key = make_key(args, kwds, typed)
|
||||
result = cache_get(
|
||||
key, root
|
||||
) # root used here as a unique not-found sentinel
|
||||
if result is not root:
|
||||
stats[HITS] += 1
|
||||
return result
|
||||
result = user_function(*args, **kwds)
|
||||
cache[key] = result
|
||||
stats[MISSES] += 1
|
||||
return result
|
||||
|
||||
else:
|
||||
|
||||
def wrapper(*args, **kwds):
|
||||
# size limited caching that tracks accesses by recency
|
||||
key = make_key(args, kwds, typed) if kwds or typed else args
|
||||
with lock:
|
||||
link = cache_get(key)
|
||||
if link is not None:
|
||||
# record recent use of the key by moving it
|
||||
# to the front of the list
|
||||
root, = nonlocal_root
|
||||
link_prev, link_next, key, result = link
|
||||
link_prev[NEXT] = link_next
|
||||
link_next[PREV] = link_prev
|
||||
last = root[PREV]
|
||||
last[NEXT] = root[PREV] = link
|
||||
link[PREV] = last
|
||||
link[NEXT] = root
|
||||
stats[HITS] += 1
|
||||
return result
|
||||
result = user_function(*args, **kwds)
|
||||
with lock:
|
||||
root, = nonlocal_root
|
||||
if key in cache:
|
||||
# getting here means that this same key was added to the
|
||||
# cache while the lock was released. since the link
|
||||
# update is already done, we need only return the
|
||||
# computed result and update the count of misses.
|
||||
pass
|
||||
elif _len(cache) >= maxsize:
|
||||
# use the old root to store the new key and result
|
||||
oldroot = root
|
||||
oldroot[KEY] = key
|
||||
oldroot[RESULT] = result
|
||||
# empty the oldest link and make it the new root
|
||||
root = nonlocal_root[0] = oldroot[NEXT]
|
||||
oldkey = root[KEY]
|
||||
root[KEY] = root[RESULT] = None
|
||||
# now update the cache dictionary for the new links
|
||||
del cache[oldkey]
|
||||
cache[key] = oldroot
|
||||
else:
|
||||
# put result in a new link at the front of the list
|
||||
last = root[PREV]
|
||||
link = [last, root, key, result]
|
||||
last[NEXT] = root[PREV] = cache[key] = link
|
||||
stats[MISSES] += 1
|
||||
return result
|
||||
|
||||
def cache_info():
|
||||
"""Report cache statistics"""
|
||||
with lock:
|
||||
return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache))
|
||||
|
||||
def cache_clear():
|
||||
"""Clear the cache and cache statistics"""
|
||||
with lock:
|
||||
cache.clear()
|
||||
root = nonlocal_root[0]
|
||||
root[:] = [root, root, None, None]
|
||||
stats[:] = [0, 0]
|
||||
|
||||
wrapper.__wrapped__ = user_function
|
||||
wrapper.cache_info = cache_info
|
||||
wrapper.cache_clear = cache_clear
|
||||
return update_wrapper(wrapper, user_function)
|
||||
|
||||
return decorating_function
|
@@ -5,26 +5,30 @@ http://www.crummy.com/software/BeautifulSoup/
|
||||
|
||||
Beautiful Soup uses a pluggable XML or HTML parser to parse a
|
||||
(possibly invalid) document into a tree representation. Beautiful Soup
|
||||
provides provides methods and Pythonic idioms that make it easy to
|
||||
navigate, search, and modify the parse tree.
|
||||
provides methods and Pythonic idioms that make it easy to navigate,
|
||||
search, and modify the parse tree.
|
||||
|
||||
Beautiful Soup works with Python 2.6 and up. It works better if lxml
|
||||
Beautiful Soup works with Python 2.7 and up. It works better if lxml
|
||||
and/or html5lib is installed.
|
||||
|
||||
For more than you ever wanted to know about Beautiful Soup, see the
|
||||
documentation:
|
||||
http://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||
|
||||
"""
|
||||
|
||||
__author__ = "Leonard Richardson (leonardr@segfault.org)"
|
||||
__version__ = "4.3.2"
|
||||
__copyright__ = "Copyright (c) 2004-2013 Leonard Richardson"
|
||||
__version__ = "4.8.1"
|
||||
__copyright__ = "Copyright (c) 2004-2019 Leonard Richardson"
|
||||
# Use of this source code is governed by the MIT license.
|
||||
__license__ = "MIT"
|
||||
|
||||
__all__ = ['BeautifulSoup']
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import traceback
|
||||
import warnings
|
||||
|
||||
from .builder import builder_registry, ParserRejectedMarkup
|
||||
@@ -45,7 +49,7 @@ from .element import (
|
||||
|
||||
# The very first thing we do is give a useful error if someone is
|
||||
# running this code under Python 3 without converting it.
|
||||
syntax_error = u'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
|
||||
'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
|
||||
|
||||
class BeautifulSoup(Tag):
|
||||
"""
|
||||
@@ -59,7 +63,7 @@ class BeautifulSoup(Tag):
|
||||
handle_starttag(name, attrs) # See note about return value
|
||||
handle_endtag(name)
|
||||
handle_data(data) # Appends to the current data node
|
||||
endData(containerClass=NavigableString) # Ends the current data node
|
||||
endData(containerClass) # Ends the current data node
|
||||
|
||||
No matter how complicated the underlying parser is, you should be
|
||||
able to build a tree using 'start tag' events, 'end tag' events,
|
||||
@@ -69,21 +73,70 @@ class BeautifulSoup(Tag):
|
||||
like HTML's <br> tag), call handle_starttag and then
|
||||
handle_endtag.
|
||||
"""
|
||||
ROOT_TAG_NAME = u'[document]'
|
||||
ROOT_TAG_NAME = '[document]'
|
||||
|
||||
# If the end-user gives no indication which tree builder they
|
||||
# want, look for one with these features.
|
||||
DEFAULT_BUILDER_FEATURES = ['html', 'fast']
|
||||
|
||||
|
||||
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
|
||||
|
||||
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
|
||||
|
||||
def __init__(self, markup="", features=None, builder=None,
|
||||
parse_only=None, from_encoding=None, **kwargs):
|
||||
"""The Soup object is initialized as the 'root tag', and the
|
||||
provided markup (which can be a string or a file-like object)
|
||||
is fed into the underlying parser."""
|
||||
parse_only=None, from_encoding=None, exclude_encodings=None,
|
||||
element_classes=None, **kwargs):
|
||||
"""Constructor.
|
||||
|
||||
:param markup: A string or a file-like object representing
|
||||
markup to be parsed.
|
||||
|
||||
:param features: Desirable features of the parser to be used. This
|
||||
may be the name of a specific parser ("lxml", "lxml-xml",
|
||||
"html.parser", or "html5lib") or it may be the type of markup
|
||||
to be used ("html", "html5", "xml"). It's recommended that you
|
||||
name a specific parser, so that Beautiful Soup gives you the
|
||||
same results across platforms and virtual environments.
|
||||
|
||||
:param builder: A TreeBuilder subclass to instantiate (or
|
||||
instance to use) instead of looking one up based on
|
||||
`features`. You only need to use this if you've implemented a
|
||||
custom TreeBuilder.
|
||||
|
||||
:param parse_only: A SoupStrainer. Only parts of the document
|
||||
matching the SoupStrainer will be considered. This is useful
|
||||
when parsing part of a document that would otherwise be too
|
||||
large to fit into memory.
|
||||
|
||||
:param from_encoding: A string indicating the encoding of the
|
||||
document to be parsed. Pass this in if Beautiful Soup is
|
||||
guessing wrongly about the document's encoding.
|
||||
|
||||
:param exclude_encodings: A list of strings indicating
|
||||
encodings known to be wrong. Pass this in if you don't know
|
||||
the document's encoding but you know Beautiful Soup's guess is
|
||||
wrong.
|
||||
|
||||
:param element_classes: A dictionary mapping BeautifulSoup
|
||||
classes like Tag and NavigableString to other classes you'd
|
||||
like to be instantiated instead as the parse tree is
|
||||
built. This is useful for using subclasses to modify the
|
||||
default behavior of Tag or NavigableString.
|
||||
|
||||
:param kwargs: For backwards compatibility purposes, the
|
||||
constructor accepts certain keyword arguments used in
|
||||
Beautiful Soup 3. None of these arguments do anything in
|
||||
Beautiful Soup 4; they will result in a warning and then be ignored.
|
||||
|
||||
Apart from this, any keyword arguments passed into the BeautifulSoup
|
||||
constructor are propagated to the TreeBuilder constructor. This
|
||||
makes it possible to configure a TreeBuilder beyond saying
|
||||
which one to use.
|
||||
|
||||
"""
|
||||
|
||||
if 'convertEntities' in kwargs:
|
||||
del kwargs['convertEntities']
|
||||
warnings.warn(
|
||||
"BS4 does not respect the convertEntities argument to the "
|
||||
"BeautifulSoup constructor. Entities are always converted "
|
||||
@@ -114,9 +167,9 @@ class BeautifulSoup(Tag):
|
||||
del kwargs['isHTML']
|
||||
warnings.warn(
|
||||
"BS4 does not respect the isHTML argument to the "
|
||||
"BeautifulSoup constructor. You can pass in features='html' "
|
||||
"or features='xml' to get a builder capable of handling "
|
||||
"one or the other.")
|
||||
"BeautifulSoup constructor. Suggest you use "
|
||||
"features='lxml' for HTML and features='lxml-xml' for "
|
||||
"XML.")
|
||||
|
||||
def deprecated_argument(old_name, new_name):
|
||||
if old_name in kwargs:
|
||||
@@ -134,13 +187,24 @@ class BeautifulSoup(Tag):
|
||||
from_encoding = from_encoding or deprecated_argument(
|
||||
"fromEncoding", "from_encoding")
|
||||
|
||||
if len(kwargs) > 0:
|
||||
arg = kwargs.keys().pop()
|
||||
raise TypeError(
|
||||
"__init__() got an unexpected keyword argument '%s'" % arg)
|
||||
if from_encoding and isinstance(markup, str):
|
||||
warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
|
||||
from_encoding = None
|
||||
|
||||
if builder is None:
|
||||
if isinstance(features, basestring):
|
||||
self.element_classes = element_classes or dict()
|
||||
|
||||
# We need this information to track whether or not the builder
|
||||
# was specified well enough that we can omit the 'you need to
|
||||
# specify a parser' warning.
|
||||
original_builder = builder
|
||||
original_features = features
|
||||
|
||||
if isinstance(builder, type):
|
||||
# A builder class was passed in; it needs to be instantiated.
|
||||
builder_class = builder
|
||||
builder = None
|
||||
elif builder is None:
|
||||
if isinstance(features, str):
|
||||
features = [features]
|
||||
if features is None or len(features) == 0:
|
||||
features = self.DEFAULT_BUILDER_FEATURES
|
||||
@@ -150,21 +214,73 @@ class BeautifulSoup(Tag):
|
||||
"Couldn't find a tree builder with the features you "
|
||||
"requested: %s. Do you need to install a parser library?"
|
||||
% ",".join(features))
|
||||
builder = builder_class()
|
||||
|
||||
# At this point either we have a TreeBuilder instance in
|
||||
# builder, or we have a builder_class that we can instantiate
|
||||
# with the remaining **kwargs.
|
||||
if builder is None:
|
||||
builder = builder_class(**kwargs)
|
||||
if not original_builder and not (
|
||||
original_features == builder.NAME or
|
||||
original_features in builder.ALTERNATE_NAMES
|
||||
):
|
||||
if builder.is_xml:
|
||||
markup_type = "XML"
|
||||
else:
|
||||
markup_type = "HTML"
|
||||
|
||||
# This code adapted from warnings.py so that we get the same line
|
||||
# of code as our warnings.warn() call gets, even if the answer is wrong
|
||||
# (as it may be in a multithreading situation).
|
||||
caller = None
|
||||
try:
|
||||
caller = sys._getframe(1)
|
||||
except ValueError:
|
||||
pass
|
||||
if caller:
|
||||
globals = caller.f_globals
|
||||
line_number = caller.f_lineno
|
||||
else:
|
||||
globals = sys.__dict__
|
||||
line_number= 1
|
||||
filename = globals.get('__file__')
|
||||
if filename:
|
||||
fnl = filename.lower()
|
||||
if fnl.endswith((".pyc", ".pyo")):
|
||||
filename = filename[:-1]
|
||||
if filename:
|
||||
# If there is no filename at all, the user is most likely in a REPL,
|
||||
# and the warning is not necessary.
|
||||
values = dict(
|
||||
filename=filename,
|
||||
line_number=line_number,
|
||||
parser=builder.NAME,
|
||||
markup_type=markup_type
|
||||
)
|
||||
warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % values, stacklevel=2)
|
||||
else:
|
||||
if kwargs:
|
||||
warnings.warn("Keyword arguments to the BeautifulSoup constructor will be ignored. These would normally be passed into the TreeBuilder constructor, but a TreeBuilder instance was passed in as `builder`.")
|
||||
|
||||
self.builder = builder
|
||||
self.is_xml = builder.is_xml
|
||||
self.builder.soup = self
|
||||
|
||||
self.known_xml = self.is_xml
|
||||
self._namespaces = dict()
|
||||
self.parse_only = parse_only
|
||||
|
||||
self.builder.initialize_soup(self)
|
||||
|
||||
if hasattr(markup, 'read'): # It's a file-type object.
|
||||
markup = markup.read()
|
||||
elif len(markup) <= 256:
|
||||
elif len(markup) <= 256 and (
|
||||
(isinstance(markup, bytes) and not b'<' in markup)
|
||||
or (isinstance(markup, str) and not '<' in markup)
|
||||
):
|
||||
# Print out warnings for a couple beginner problems
|
||||
# involving passing non-markup to Beautiful Soup.
|
||||
# Beautiful Soup will still parse the input as markup,
|
||||
# just in case that's what the user really wants.
|
||||
if (isinstance(markup, unicode)
|
||||
if (isinstance(markup, str)
|
||||
and not os.path.supports_unicode_filenames):
|
||||
possible_filename = markup.encode("utf8")
|
||||
else:
|
||||
@@ -172,37 +288,93 @@ class BeautifulSoup(Tag):
|
||||
is_file = False
|
||||
try:
|
||||
is_file = os.path.exists(possible_filename)
|
||||
except Exception, e:
|
||||
except Exception as e:
|
||||
# This is almost certainly a problem involving
|
||||
# characters not valid in filenames on this
|
||||
# system. Just let it go.
|
||||
pass
|
||||
if is_file:
|
||||
if isinstance(markup, str):
|
||||
markup = markup.encode("utf8")
|
||||
warnings.warn(
|
||||
'"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
|
||||
if markup[:5] == "http:" or markup[:6] == "https:":
|
||||
# TODO: This is ugly but I couldn't get it to work in
|
||||
# Python 3 otherwise.
|
||||
if ((isinstance(markup, bytes) and not b' ' in markup)
|
||||
or (isinstance(markup, unicode) and not u' ' in markup)):
|
||||
warnings.warn(
|
||||
'"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)
|
||||
'"%s" looks like a filename, not markup. You should'
|
||||
' probably open this file and pass the filehandle into'
|
||||
' Beautiful Soup.' % markup)
|
||||
self._check_markup_is_url(markup)
|
||||
|
||||
rejections = []
|
||||
success = False
|
||||
for (self.markup, self.original_encoding, self.declared_html_encoding,
|
||||
self.contains_replacement_characters) in (
|
||||
self.builder.prepare_markup(markup, from_encoding)):
|
||||
self.builder.prepare_markup(
|
||||
markup, from_encoding, exclude_encodings=exclude_encodings)):
|
||||
self.reset()
|
||||
try:
|
||||
self._feed()
|
||||
success = True
|
||||
break
|
||||
except ParserRejectedMarkup:
|
||||
except ParserRejectedMarkup as e:
|
||||
rejections.append(e)
|
||||
pass
|
||||
|
||||
if not success:
|
||||
other_exceptions = [str(e) for e in rejections]
|
||||
raise ParserRejectedMarkup(
|
||||
"The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions)
|
||||
)
|
||||
|
||||
# Clear out the markup and remove the builder's circular
|
||||
# reference to this object.
|
||||
self.markup = None
|
||||
self.builder.soup = None
|
||||
|
||||
def __copy__(self):
|
||||
copy = type(self)(
|
||||
self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
|
||||
)
|
||||
|
||||
# Although we encoded the tree to UTF-8, that may not have
|
||||
# been the encoding of the original markup. Set the copy's
|
||||
# .original_encoding to reflect the original object's
|
||||
# .original_encoding.
|
||||
copy.original_encoding = self.original_encoding
|
||||
return copy
|
||||
|
||||
def __getstate__(self):
|
||||
# Frequently a tree builder can't be pickled.
|
||||
d = dict(self.__dict__)
|
||||
if 'builder' in d and not self.builder.picklable:
|
||||
d['builder'] = None
|
||||
return d
|
||||
|
||||
@staticmethod
|
||||
def _check_markup_is_url(markup):
|
||||
"""
|
||||
Check if markup looks like it's actually a url and raise a warning
|
||||
if so. Markup can be unicode or str (py2) / bytes (py3).
|
||||
"""
|
||||
if isinstance(markup, bytes):
|
||||
space = b' '
|
||||
cant_start_with = (b"http:", b"https:")
|
||||
elif isinstance(markup, str):
|
||||
space = ' '
|
||||
cant_start_with = ("http:", "https:")
|
||||
else:
|
||||
return
|
||||
|
||||
if any(markup.startswith(prefix) for prefix in cant_start_with):
|
||||
if not space in markup:
|
||||
if isinstance(markup, bytes):
|
||||
decoded_markup = markup.decode('utf-8', 'replace')
|
||||
else:
|
||||
decoded_markup = markup
|
||||
warnings.warn(
|
||||
'"%s" looks like a URL. Beautiful Soup is not an'
|
||||
' HTTP client. You should probably use an HTTP client like'
|
||||
' requests to get the document behind the URL, and feed'
|
||||
' that document to Beautiful Soup.' % decoded_markup
|
||||
)
|
||||
|
||||
def _feed(self):
|
||||
# Convert the document to Unicode.
|
||||
self.builder.reset()
|
||||
@@ -223,15 +395,21 @@ class BeautifulSoup(Tag):
|
||||
self.preserve_whitespace_tag_stack = []
|
||||
self.pushTag(self)
|
||||
|
||||
def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
|
||||
def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
|
||||
sourceline=None, sourcepos=None, **kwattrs):
|
||||
"""Create a new tag associated with this soup."""
|
||||
return Tag(None, self.builder, name, namespace, nsprefix, attrs)
|
||||
kwattrs.update(attrs)
|
||||
return self.element_classes.get(Tag, Tag)(
|
||||
None, self.builder, name, namespace, nsprefix, kwattrs,
|
||||
sourceline=sourceline, sourcepos=sourcepos
|
||||
)
|
||||
|
||||
def new_string(self, s, subclass=NavigableString):
|
||||
def new_string(self, s, subclass=None):
|
||||
"""Create a new NavigableString associated with this soup."""
|
||||
navigable = subclass(s)
|
||||
navigable.setup()
|
||||
return navigable
|
||||
subclass = subclass or self.element_classes.get(
|
||||
NavigableString, NavigableString
|
||||
)
|
||||
return subclass(s)
|
||||
|
||||
def insert_before(self, successor):
|
||||
raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
|
||||
@@ -250,16 +428,26 @@ class BeautifulSoup(Tag):
|
||||
|
||||
def pushTag(self, tag):
|
||||
#print "Push", tag.name
|
||||
if self.currentTag:
|
||||
if self.currentTag is not None:
|
||||
self.currentTag.contents.append(tag)
|
||||
self.tagStack.append(tag)
|
||||
self.currentTag = self.tagStack[-1]
|
||||
if tag.name in self.builder.preserve_whitespace_tags:
|
||||
self.preserve_whitespace_tag_stack.append(tag)
|
||||
|
||||
def endData(self, containerClass=NavigableString):
|
||||
def endData(self, containerClass=None):
|
||||
|
||||
# Default container is NavigableString.
|
||||
containerClass = containerClass or NavigableString
|
||||
|
||||
# The user may want us to instantiate some alias for the
|
||||
# container class.
|
||||
containerClass = self.element_classes.get(
|
||||
containerClass, containerClass
|
||||
)
|
||||
|
||||
if self.current_data:
|
||||
current_data = u''.join(self.current_data)
|
||||
current_data = ''.join(self.current_data)
|
||||
# If whitespace is not preserved, and this string contains
|
||||
# nothing but ASCII spaces, replace it with a single space
|
||||
# or newline.
|
||||
@@ -289,15 +477,72 @@ class BeautifulSoup(Tag):
|
||||
|
||||
def object_was_parsed(self, o, parent=None, most_recent_element=None):
|
||||
"""Add an object to the parse tree."""
|
||||
parent = parent or self.currentTag
|
||||
most_recent_element = most_recent_element or self._most_recent_element
|
||||
o.setup(parent, most_recent_element)
|
||||
|
||||
if parent is None:
|
||||
parent = self.currentTag
|
||||
if most_recent_element is not None:
|
||||
most_recent_element.next_element = o
|
||||
previous_element = most_recent_element
|
||||
else:
|
||||
previous_element = self._most_recent_element
|
||||
|
||||
next_element = previous_sibling = next_sibling = None
|
||||
if isinstance(o, Tag):
|
||||
next_element = o.next_element
|
||||
next_sibling = o.next_sibling
|
||||
previous_sibling = o.previous_sibling
|
||||
if previous_element is None:
|
||||
previous_element = o.previous_element
|
||||
|
||||
fix = parent.next_element is not None
|
||||
|
||||
o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)
|
||||
|
||||
self._most_recent_element = o
|
||||
parent.contents.append(o)
|
||||
|
||||
# Check if we are inserting into an already parsed node.
|
||||
if fix:
|
||||
self._linkage_fixer(parent)
|
||||
|
||||
def _linkage_fixer(self, el):
|
||||
"""Make sure linkage of this fragment is sound."""
|
||||
|
||||
first = el.contents[0]
|
||||
child = el.contents[-1]
|
||||
descendant = child
|
||||
|
||||
if child is first and el.parent is not None:
|
||||
# Parent should be linked to first child
|
||||
el.next_element = child
|
||||
# We are no longer linked to whatever this element is
|
||||
prev_el = child.previous_element
|
||||
if prev_el is not None and prev_el is not el:
|
||||
prev_el.next_element = None
|
||||
# First child should be linked to the parent, and no previous siblings.
|
||||
child.previous_element = el
|
||||
child.previous_sibling = None
|
||||
|
||||
# We have no sibling as we've been appended as the last.
|
||||
child.next_sibling = None
|
||||
|
||||
# This index is a tag, dig deeper for a "last descendant"
|
||||
if isinstance(child, Tag) and child.contents:
|
||||
descendant = child._last_descendant(False)
|
||||
|
||||
# As the final step, link last descendant. It should be linked
|
||||
# to the parent's next sibling (if found), else walk up the chain
|
||||
# and find a parent with a sibling. It should have no next sibling.
|
||||
descendant.next_element = None
|
||||
descendant.next_sibling = None
|
||||
target = el
|
||||
while True:
|
||||
if target is None:
|
||||
break
|
||||
elif target.next_sibling is not None:
|
||||
descendant.next_element = target.next_sibling
|
||||
target.next_sibling.previous_element = child
|
||||
break
|
||||
target = target.parent
|
||||
|
||||
def _popToTag(self, name, nsprefix=None, inclusivePop=True):
|
||||
"""Pops the tag stack up to and including the most recent
|
||||
instance of the given tag. If inclusivePop is false, pops the tag
|
||||
@@ -321,11 +566,12 @@ class BeautifulSoup(Tag):
|
||||
|
||||
return most_recently_popped
|
||||
|
||||
def handle_starttag(self, name, namespace, nsprefix, attrs):
|
||||
def handle_starttag(self, name, namespace, nsprefix, attrs, sourceline=None,
|
||||
sourcepos=None):
|
||||
"""Push a start tag on to the stack.
|
||||
|
||||
If this method returns None, the tag was rejected by the
|
||||
SoupStrainer. You should proceed as if the tag had not occured
|
||||
SoupStrainer. You should proceed as if the tag had not occurred
|
||||
in the document. For instance, if this was a self-closing tag,
|
||||
don't call handle_endtag.
|
||||
"""
|
||||
@@ -338,11 +584,14 @@ class BeautifulSoup(Tag):
|
||||
or not self.parse_only.search_tag(name, attrs))):
|
||||
return None
|
||||
|
||||
tag = Tag(self, self.builder, name, namespace, nsprefix, attrs,
|
||||
self.currentTag, self._most_recent_element)
|
||||
tag = self.element_classes.get(Tag, Tag)(
|
||||
self, self.builder, name, namespace, nsprefix, attrs,
|
||||
self.currentTag, self._most_recent_element,
|
||||
sourceline=sourceline, sourcepos=sourcepos
|
||||
)
|
||||
if tag is None:
|
||||
return tag
|
||||
if self._most_recent_element:
|
||||
if self._most_recent_element is not None:
|
||||
self._most_recent_element.next_element = tag
|
||||
self._most_recent_element = tag
|
||||
self.pushTag(tag)
|
||||
@@ -367,9 +616,9 @@ class BeautifulSoup(Tag):
|
||||
encoding_part = ''
|
||||
if eventual_encoding != None:
|
||||
encoding_part = ' encoding="%s"' % eventual_encoding
|
||||
prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
|
||||
prefix = '<?xml version="1.0"%s?>\n' % encoding_part
|
||||
else:
|
||||
prefix = u''
|
||||
prefix = ''
|
||||
if not pretty_print:
|
||||
indent_level = None
|
||||
else:
|
||||
@@ -403,4 +652,4 @@ class FeatureNotFound(ValueError):
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
soup = BeautifulSoup(sys.stdin)
|
||||
print soup.prettify()
|
||||
print(soup.prettify())
|
||||
|
@@ -1,10 +1,13 @@
|
||||
# Use of this source code is governed by the MIT license.
|
||||
__license__ = "MIT"
|
||||
|
||||
from collections import defaultdict
|
||||
import itertools
|
||||
import sys
|
||||
from bs4.element import (
|
||||
CharsetMetaAttributeValue,
|
||||
ContentMetaAttributeValue,
|
||||
whitespace_re
|
||||
nonwhitespace_re
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
@@ -80,21 +83,70 @@ builder_registry = TreeBuilderRegistry()
|
||||
class TreeBuilder(object):
|
||||
"""Turn a document into a Beautiful Soup object tree."""
|
||||
|
||||
NAME = "[Unknown tree builder]"
|
||||
ALTERNATE_NAMES = []
|
||||
features = []
|
||||
|
||||
is_xml = False
|
||||
preserve_whitespace_tags = set()
|
||||
picklable = False
|
||||
empty_element_tags = None # A tag will be considered an empty-element
|
||||
# tag when and only when it has no contents.
|
||||
|
||||
|
||||
# A value for these tag/attribute combinations is a space- or
|
||||
# comma-separated list of CDATA, rather than a single CDATA.
|
||||
cdata_list_attributes = {}
|
||||
DEFAULT_CDATA_LIST_ATTRIBUTES = {}
|
||||
|
||||
DEFAULT_PRESERVE_WHITESPACE_TAGS = set()
|
||||
|
||||
USE_DEFAULT = object()
|
||||
|
||||
def __init__(self):
|
||||
# Most parsers don't keep track of line numbers.
|
||||
TRACKS_LINE_NUMBERS = False
|
||||
|
||||
def __init__(self, multi_valued_attributes=USE_DEFAULT,
|
||||
preserve_whitespace_tags=USE_DEFAULT,
|
||||
store_line_numbers=USE_DEFAULT):
|
||||
"""Constructor.
|
||||
|
||||
:param multi_valued_attributes: If this is set to None, the
|
||||
TreeBuilder will not turn any values for attributes like
|
||||
'class' into lists. Setting this do a dictionary will
|
||||
customize this behavior; look at DEFAULT_CDATA_LIST_ATTRIBUTES
|
||||
for an example.
|
||||
|
||||
Internally, these are called "CDATA list attributes", but that
|
||||
probably doesn't make sense to an end-user, so the argument name
|
||||
is `multi_valued_attributes`.
|
||||
|
||||
:param preserve_whitespace_tags: A list of tags to treat
|
||||
the way <pre> tags are treated in HTML. Tags in this list
|
||||
will have
|
||||
|
||||
:param store_line_numbers: If the parser keeps track of the
|
||||
line numbers and positions of the original markup, that
|
||||
information will, by default, be stored in each corresponding
|
||||
`Tag` object. You can turn this off by passing
|
||||
store_line_numbers=False. If the parser you're using doesn't
|
||||
keep track of this information, then setting store_line_numbers=True
|
||||
will do nothing.
|
||||
"""
|
||||
self.soup = None
|
||||
|
||||
if multi_valued_attributes is self.USE_DEFAULT:
|
||||
multi_valued_attributes = self.DEFAULT_CDATA_LIST_ATTRIBUTES
|
||||
self.cdata_list_attributes = multi_valued_attributes
|
||||
if preserve_whitespace_tags is self.USE_DEFAULT:
|
||||
preserve_whitespace_tags = self.DEFAULT_PRESERVE_WHITESPACE_TAGS
|
||||
self.preserve_whitespace_tags = preserve_whitespace_tags
|
||||
if store_line_numbers == self.USE_DEFAULT:
|
||||
store_line_numbers = self.TRACKS_LINE_NUMBERS
|
||||
self.store_line_numbers = store_line_numbers
|
||||
|
||||
def initialize_soup(self, soup):
|
||||
"""The BeautifulSoup object has been initialized and is now
|
||||
being associated with the TreeBuilder.
|
||||
"""
|
||||
self.soup = soup
|
||||
|
||||
def reset(self):
|
||||
pass
|
||||
|
||||
@@ -118,13 +170,13 @@ class TreeBuilder(object):
|
||||
if self.empty_element_tags is None:
|
||||
return True
|
||||
return tag_name in self.empty_element_tags
|
||||
|
||||
|
||||
def feed(self, markup):
|
||||
raise NotImplementedError()
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding=None,
|
||||
document_declared_encoding=None):
|
||||
return markup, None, None, False
|
||||
document_declared_encoding=None, exclude_encodings=None):
|
||||
yield markup, None, None, False
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""Wrap an HTML fragment to make it look like a document.
|
||||
@@ -153,14 +205,14 @@ class TreeBuilder(object):
|
||||
universal = self.cdata_list_attributes.get('*', [])
|
||||
tag_specific = self.cdata_list_attributes.get(
|
||||
tag_name.lower(), None)
|
||||
for attr in attrs.keys():
|
||||
for attr in list(attrs.keys()):
|
||||
if attr in universal or (tag_specific and attr in tag_specific):
|
||||
# We have a "class"-type attribute whose string
|
||||
# value is a whitespace-separated list of
|
||||
# values. Split it into a list.
|
||||
value = attrs[attr]
|
||||
if isinstance(value, basestring):
|
||||
values = whitespace_re.split(value)
|
||||
if isinstance(value, str):
|
||||
values = nonwhitespace_re.findall(value)
|
||||
else:
|
||||
# html5lib sometimes calls setAttributes twice
|
||||
# for the same tag when rearranging the parse
|
||||
@@ -224,10 +276,20 @@ class HTMLTreeBuilder(TreeBuilder):
|
||||
Such as which tags are empty-element tags.
|
||||
"""
|
||||
|
||||
preserve_whitespace_tags = set(['pre', 'textarea'])
|
||||
empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
|
||||
'spacer', 'link', 'frame', 'base'])
|
||||
empty_element_tags = set([
|
||||
# These are from HTML5.
|
||||
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
|
||||
|
||||
# These are from earlier versions of HTML and are removed in HTML5.
|
||||
'basefont', 'bgsound', 'command', 'frame', 'image', 'isindex', 'nextid', 'spacer'
|
||||
])
|
||||
|
||||
# The HTML standard defines these as block-level elements. Beautiful
|
||||
# Soup does not treat these elements differently from other elements,
|
||||
# but it may do so eventually, and this information is available if
|
||||
# you need to use it.
|
||||
block_elements = set(["address", "article", "aside", "blockquote", "canvas", "dd", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "li", "main", "nav", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"])
|
||||
|
||||
# The HTML standard defines these attributes as containing a
|
||||
# space-separated list of values, not a single value. That is,
|
||||
# class="foo bar" means that the 'class' attribute has two values,
|
||||
@@ -235,7 +297,7 @@ class HTMLTreeBuilder(TreeBuilder):
|
||||
# encounter one of these attributes, we will parse its value into
|
||||
# a list of values if possible. Upon output, the list will be
|
||||
# converted back into a string.
|
||||
cdata_list_attributes = {
|
||||
DEFAULT_CDATA_LIST_ATTRIBUTES = {
|
||||
"*" : ['class', 'accesskey', 'dropzone'],
|
||||
"a" : ['rel', 'rev'],
|
||||
"link" : ['rel', 'rev'],
|
||||
@@ -252,6 +314,8 @@ class HTMLTreeBuilder(TreeBuilder):
|
||||
"output" : ["for"],
|
||||
}
|
||||
|
||||
DEFAULT_PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea'])
|
||||
|
||||
def set_up_substitutions(self, tag):
|
||||
# We are only interested in <meta> tags
|
||||
if tag.name != 'meta':
|
||||
@@ -299,8 +363,15 @@ def register_treebuilders_from(module):
|
||||
this_module.builder_registry.register(obj)
|
||||
|
||||
class ParserRejectedMarkup(Exception):
|
||||
pass
|
||||
|
||||
def __init__(self, message_or_exception):
|
||||
"""Explain why the parser rejected the given markup, either
|
||||
with a textual explanation or another exception.
|
||||
"""
|
||||
if isinstance(message_or_exception, Exception):
|
||||
e = message_or_exception
|
||||
message_or_exception = "%s: %s" % (e.__class__.__name__, str(e))
|
||||
super(ParserRejectedMarkup, self).__init__(message_or_exception)
|
||||
|
||||
# Builders are registered in reverse order of priority, so that custom
|
||||
# builder registrations will take precedence. In general, we want lxml
|
||||
# to take precedence over html5lib, because it's faster. And we only
|
||||
|
@@ -1,17 +1,27 @@
|
||||
# Use of this source code is governed by the MIT license.
|
||||
__license__ = "MIT"
|
||||
|
||||
__all__ = [
|
||||
'HTML5TreeBuilder',
|
||||
]
|
||||
|
||||
import warnings
|
||||
import re
|
||||
from bs4.builder import (
|
||||
PERMISSIVE,
|
||||
HTML,
|
||||
HTML_5,
|
||||
HTMLTreeBuilder,
|
||||
)
|
||||
from bs4.element import NamespacedAttribute
|
||||
from bs4.element import (
|
||||
NamespacedAttribute,
|
||||
nonwhitespace_re,
|
||||
)
|
||||
import html5lib
|
||||
from html5lib.constants import namespaces
|
||||
from html5lib.constants import (
|
||||
namespaces,
|
||||
prefixes,
|
||||
)
|
||||
from bs4.element import (
|
||||
Comment,
|
||||
Doctype,
|
||||
@@ -19,14 +29,36 @@ from bs4.element import (
|
||||
Tag,
|
||||
)
|
||||
|
||||
try:
|
||||
# Pre-0.99999999
|
||||
from html5lib.treebuilders import _base as treebuilder_base
|
||||
new_html5lib = False
|
||||
except ImportError as e:
|
||||
# 0.99999999 and up
|
||||
from html5lib.treebuilders import base as treebuilder_base
|
||||
new_html5lib = True
|
||||
|
||||
class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
"""Use html5lib to build a tree."""
|
||||
|
||||
features = ['html5lib', PERMISSIVE, HTML_5, HTML]
|
||||
NAME = "html5lib"
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding):
|
||||
features = [NAME, PERMISSIVE, HTML_5, HTML]
|
||||
|
||||
# html5lib can tell us which line number and position in the
|
||||
# original file is the source of an element.
|
||||
TRACKS_LINE_NUMBERS = True
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding,
|
||||
document_declared_encoding=None, exclude_encodings=None):
|
||||
# Store the user-specified encoding for use later on.
|
||||
self.user_specified_encoding = user_specified_encoding
|
||||
|
||||
# document_declared_encoding and exclude_encodings aren't used
|
||||
# ATM because the html5lib TreeBuilder doesn't use
|
||||
# UnicodeDammit.
|
||||
if exclude_encodings:
|
||||
warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
|
||||
yield (markup, None, None, False)
|
||||
|
||||
# These methods are defined by Beautiful Soup.
|
||||
@@ -34,32 +66,63 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
if self.soup.parse_only is not None:
|
||||
warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
|
||||
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
|
||||
doc = parser.parse(markup, encoding=self.user_specified_encoding)
|
||||
|
||||
self.underlying_builder.parser = parser
|
||||
extra_kwargs = dict()
|
||||
if not isinstance(markup, str):
|
||||
if new_html5lib:
|
||||
extra_kwargs['override_encoding'] = self.user_specified_encoding
|
||||
else:
|
||||
extra_kwargs['encoding'] = self.user_specified_encoding
|
||||
doc = parser.parse(markup, **extra_kwargs)
|
||||
|
||||
# Set the character encoding detected by the tokenizer.
|
||||
if isinstance(markup, unicode):
|
||||
if isinstance(markup, str):
|
||||
# We need to special-case this because html5lib sets
|
||||
# charEncoding to UTF-8 if it gets Unicode input.
|
||||
doc.original_encoding = None
|
||||
else:
|
||||
doc.original_encoding = parser.tokenizer.stream.charEncoding[0]
|
||||
|
||||
original_encoding = parser.tokenizer.stream.charEncoding[0]
|
||||
if not isinstance(original_encoding, str):
|
||||
# In 0.99999999 and up, the encoding is an html5lib
|
||||
# Encoding object. We want to use a string for compatibility
|
||||
# with other tree builders.
|
||||
original_encoding = original_encoding.name
|
||||
doc.original_encoding = original_encoding
|
||||
self.underlying_builder.parser = None
|
||||
|
||||
def create_treebuilder(self, namespaceHTMLElements):
|
||||
self.underlying_builder = TreeBuilderForHtml5lib(
|
||||
self.soup, namespaceHTMLElements)
|
||||
namespaceHTMLElements, self.soup,
|
||||
store_line_numbers=self.store_line_numbers
|
||||
)
|
||||
return self.underlying_builder
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""See `TreeBuilder`."""
|
||||
return u'<html><head></head><body>%s</body></html>' % fragment
|
||||
return '<html><head></head><body>%s</body></html>' % fragment
|
||||
|
||||
|
||||
class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
|
||||
|
||||
def __init__(self, soup, namespaceHTMLElements):
|
||||
self.soup = soup
|
||||
class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
|
||||
|
||||
def __init__(self, namespaceHTMLElements, soup=None,
|
||||
store_line_numbers=True, **kwargs):
|
||||
if soup:
|
||||
self.soup = soup
|
||||
else:
|
||||
from bs4 import BeautifulSoup
|
||||
# TODO: Why is the parser 'html.parser' here? To avoid an
|
||||
# infinite loop?
|
||||
self.soup = BeautifulSoup(
|
||||
"", "html.parser", store_line_numbers=store_line_numbers,
|
||||
**kwargs
|
||||
)
|
||||
super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
|
||||
|
||||
# This will be set later to an html5lib.html5parser.HTMLParser
|
||||
# object, which we can use to track the current line number.
|
||||
self.parser = None
|
||||
self.store_line_numbers = store_line_numbers
|
||||
|
||||
def documentClass(self):
|
||||
self.soup.reset()
|
||||
return Element(self.soup, self.soup, None)
|
||||
@@ -73,14 +136,26 @@ class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
|
||||
self.soup.object_was_parsed(doctype)
|
||||
|
||||
def elementClass(self, name, namespace):
|
||||
tag = self.soup.new_tag(name, namespace)
|
||||
kwargs = {}
|
||||
if self.parser and self.store_line_numbers:
|
||||
# This represents the point immediately after the end of the
|
||||
# tag. We don't know when the tag started, but we do know
|
||||
# where it ended -- the character just before this one.
|
||||
sourceline, sourcepos = self.parser.tokenizer.stream.position()
|
||||
kwargs['sourceline'] = sourceline
|
||||
kwargs['sourcepos'] = sourcepos-1
|
||||
tag = self.soup.new_tag(name, namespace, **kwargs)
|
||||
|
||||
return Element(tag, self.soup, namespace)
|
||||
|
||||
def commentClass(self, data):
|
||||
return TextNode(Comment(data), self.soup)
|
||||
|
||||
def fragmentClass(self):
|
||||
self.soup = BeautifulSoup("")
|
||||
from bs4 import BeautifulSoup
|
||||
# TODO: Why is the parser 'html.parser' here? To avoid an
|
||||
# infinite loop?
|
||||
self.soup = BeautifulSoup("", "html.parser")
|
||||
self.soup.name = "[document_fragment]"
|
||||
return Element(self.soup, self.soup, None)
|
||||
|
||||
@@ -92,7 +167,57 @@ class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
|
||||
return self.soup
|
||||
|
||||
def getFragment(self):
|
||||
return html5lib.treebuilders._base.TreeBuilder.getFragment(self).element
|
||||
return treebuilder_base.TreeBuilder.getFragment(self).element
|
||||
|
||||
def testSerializer(self, element):
|
||||
from bs4 import BeautifulSoup
|
||||
rv = []
|
||||
doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$')
|
||||
|
||||
def serializeElement(element, indent=0):
|
||||
if isinstance(element, BeautifulSoup):
|
||||
pass
|
||||
if isinstance(element, Doctype):
|
||||
m = doctype_re.match(element)
|
||||
if m:
|
||||
name = m.group(1)
|
||||
if m.lastindex > 1:
|
||||
publicId = m.group(2) or ""
|
||||
systemId = m.group(3) or m.group(4) or ""
|
||||
rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
|
||||
(' ' * indent, name, publicId, systemId))
|
||||
else:
|
||||
rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, name))
|
||||
else:
|
||||
rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
|
||||
elif isinstance(element, Comment):
|
||||
rv.append("|%s<!-- %s -->" % (' ' * indent, element))
|
||||
elif isinstance(element, NavigableString):
|
||||
rv.append("|%s\"%s\"" % (' ' * indent, element))
|
||||
else:
|
||||
if element.namespace:
|
||||
name = "%s %s" % (prefixes[element.namespace],
|
||||
element.name)
|
||||
else:
|
||||
name = element.name
|
||||
rv.append("|%s<%s>" % (' ' * indent, name))
|
||||
if element.attrs:
|
||||
attributes = []
|
||||
for name, value in list(element.attrs.items()):
|
||||
if isinstance(name, NamespacedAttribute):
|
||||
name = "%s %s" % (prefixes[name.namespace], name.name)
|
||||
if isinstance(value, list):
|
||||
value = " ".join(value)
|
||||
attributes.append((name, value))
|
||||
|
||||
for name, value in sorted(attributes):
|
||||
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
|
||||
indent += 2
|
||||
for child in element.children:
|
||||
serializeElement(child, indent)
|
||||
serializeElement(element, 0)
|
||||
|
||||
return "\n".join(rv)
|
||||
|
||||
class AttrList(object):
|
||||
def __init__(self, element):
|
||||
@@ -101,7 +226,16 @@ class AttrList(object):
|
||||
def __iter__(self):
|
||||
return list(self.attrs.items()).__iter__()
|
||||
def __setitem__(self, name, value):
|
||||
"set attr", name, value
|
||||
# If this attribute is a multi-valued attribute for this element,
|
||||
# turn its value into a list.
|
||||
list_attr = self.element.cdata_list_attributes
|
||||
if (name in list_attr['*']
|
||||
or (self.element.name in list_attr
|
||||
and name in list_attr[self.element.name])):
|
||||
# A node that is being cloned may have already undergone
|
||||
# this procedure.
|
||||
if not isinstance(value, list):
|
||||
value = nonwhitespace_re.findall(value)
|
||||
self.element[name] = value
|
||||
def items(self):
|
||||
return list(self.attrs.items())
|
||||
@@ -115,16 +249,16 @@ class AttrList(object):
|
||||
return name in list(self.attrs.keys())
|
||||
|
||||
|
||||
class Element(html5lib.treebuilders._base.Node):
|
||||
class Element(treebuilder_base.Node):
|
||||
def __init__(self, element, soup, namespace):
|
||||
html5lib.treebuilders._base.Node.__init__(self, element.name)
|
||||
treebuilder_base.Node.__init__(self, element.name)
|
||||
self.element = element
|
||||
self.soup = soup
|
||||
self.namespace = namespace
|
||||
|
||||
def appendChild(self, node):
|
||||
string_child = child = None
|
||||
if isinstance(node, basestring):
|
||||
if isinstance(node, str):
|
||||
# Some other piece of code decided to pass in a string
|
||||
# instead of creating a TextElement object to contain the
|
||||
# string.
|
||||
@@ -136,13 +270,15 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
child = node
|
||||
elif node.element.__class__ == NavigableString:
|
||||
string_child = child = node.element
|
||||
node.parent = self
|
||||
else:
|
||||
child = node.element
|
||||
node.parent = self
|
||||
|
||||
if not isinstance(child, basestring) and child.parent is not None:
|
||||
if not isinstance(child, str) and child.parent is not None:
|
||||
node.element.extract()
|
||||
|
||||
if (string_child and self.element.contents
|
||||
if (string_child is not None and self.element.contents
|
||||
and self.element.contents[-1].__class__ == NavigableString):
|
||||
# We are appending a string onto another string.
|
||||
# TODO This has O(n^2) performance, for input like
|
||||
@@ -152,7 +288,7 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
old_element.replace_with(new_element)
|
||||
self.soup._most_recent_element = new_element
|
||||
else:
|
||||
if isinstance(node, basestring):
|
||||
if isinstance(node, str):
|
||||
# Create a brand new NavigableString from this string.
|
||||
child = self.soup.new_string(node)
|
||||
|
||||
@@ -161,6 +297,12 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
# immediately after the parent, if it has no children.)
|
||||
if self.element.contents:
|
||||
most_recent_element = self.element._last_descendant(False)
|
||||
elif self.element.next_element is not None:
|
||||
# Something from further ahead in the parse tree is
|
||||
# being inserted into this earlier element. This is
|
||||
# very annoying because it means an expensive search
|
||||
# for the last element in the tree.
|
||||
most_recent_element = self.soup._last_descendant()
|
||||
else:
|
||||
most_recent_element = self.element
|
||||
|
||||
@@ -169,9 +311,12 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
most_recent_element=most_recent_element)
|
||||
|
||||
def getAttributes(self):
|
||||
if isinstance(self.element, Comment):
|
||||
return {}
|
||||
return AttrList(self.element)
|
||||
|
||||
def setAttributes(self, attributes):
|
||||
|
||||
if attributes is not None and len(attributes) > 0:
|
||||
|
||||
converted_attributes = []
|
||||
@@ -183,7 +328,7 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
|
||||
self.soup.builder._replace_cdata_list_attribute_values(
|
||||
self.name, attributes)
|
||||
for name, value in attributes.items():
|
||||
for name, value in list(attributes.items()):
|
||||
self.element[name] = value
|
||||
|
||||
# The attributes may contain variables that need substitution.
|
||||
@@ -195,11 +340,11 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
attributes = property(getAttributes, setAttributes)
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
text = TextNode(self.soup.new_string(data), self.soup)
|
||||
if insertBefore:
|
||||
text = TextNode(self.soup.new_string(data), self.soup)
|
||||
self.insertBefore(data, insertBefore)
|
||||
self.insertBefore(text, insertBefore)
|
||||
else:
|
||||
self.appendChild(data)
|
||||
self.appendChild(text)
|
||||
|
||||
def insertBefore(self, node, refNode):
|
||||
index = self.element.index(refNode.element)
|
||||
@@ -218,6 +363,10 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
|
||||
def reparentChildren(self, new_parent):
|
||||
"""Move all of this tag's children into another tag."""
|
||||
# print "MOVE", self.element.contents
|
||||
# print "FROM", self.element
|
||||
# print "TO", new_parent.element
|
||||
|
||||
element = self.element
|
||||
new_parent_element = new_parent.element
|
||||
# Determine what this tag's next_element will be once all the children
|
||||
@@ -236,18 +385,35 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
new_parents_last_descendant_next_element = new_parent_element.next_element
|
||||
|
||||
to_append = element.contents
|
||||
append_after = new_parent.element.contents
|
||||
if len(to_append) > 0:
|
||||
# Set the first child's previous_element and previous_sibling
|
||||
# to elements within the new parent
|
||||
first_child = to_append[0]
|
||||
first_child.previous_element = new_parents_last_descendant
|
||||
if new_parents_last_descendant is not None:
|
||||
first_child.previous_element = new_parents_last_descendant
|
||||
else:
|
||||
first_child.previous_element = new_parent_element
|
||||
first_child.previous_sibling = new_parents_last_child
|
||||
if new_parents_last_descendant is not None:
|
||||
new_parents_last_descendant.next_element = first_child
|
||||
else:
|
||||
new_parent_element.next_element = first_child
|
||||
if new_parents_last_child is not None:
|
||||
new_parents_last_child.next_sibling = first_child
|
||||
|
||||
# Fix the last child's next_element and next_sibling
|
||||
last_child = to_append[-1]
|
||||
last_child.next_element = new_parents_last_descendant_next_element
|
||||
last_child.next_sibling = None
|
||||
# Find the very last element being moved. It is now the
|
||||
# parent's last descendant. It has no .next_sibling and
|
||||
# its .next_element is whatever the previous last
|
||||
# descendant had.
|
||||
last_childs_last_descendant = to_append[-1]._last_descendant(False, True)
|
||||
|
||||
last_childs_last_descendant.next_element = new_parents_last_descendant_next_element
|
||||
if new_parents_last_descendant_next_element is not None:
|
||||
# TODO: This code has no test coverage and I'm not sure
|
||||
# how to get html5lib to go through this path, but it's
|
||||
# just the other side of the previous line.
|
||||
new_parents_last_descendant_next_element.previous_element = last_childs_last_descendant
|
||||
last_childs_last_descendant.next_sibling = None
|
||||
|
||||
for child in to_append:
|
||||
child.parent = new_parent_element
|
||||
@@ -257,6 +423,10 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
element.contents = []
|
||||
element.next_element = final_next_element
|
||||
|
||||
# print "DONE WITH MOVE"
|
||||
# print "FROM", self.element
|
||||
# print "TO", new_parent_element
|
||||
|
||||
def cloneNode(self):
|
||||
tag = self.soup.new_tag(self.element.name, self.namespace)
|
||||
node = Element(tag, self.soup, self.namespace)
|
||||
@@ -268,7 +438,7 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
return self.element.contents
|
||||
|
||||
def getNameTuple(self):
|
||||
if self.namespace is None:
|
||||
if self.namespace == None:
|
||||
return namespaces["html"], self.name
|
||||
else:
|
||||
return self.namespace, self.name
|
||||
@@ -277,7 +447,7 @@ class Element(html5lib.treebuilders._base.Node):
|
||||
|
||||
class TextNode(Element):
|
||||
def __init__(self, element, soup):
|
||||
html5lib.treebuilders._base.Node.__init__(self, None)
|
||||
treebuilder_base.Node.__init__(self, None)
|
||||
self.element = element
|
||||
self.soup = soup
|
||||
|
||||
|
@@ -1,13 +1,23 @@
|
||||
# encoding: utf-8
|
||||
"""Use the HTMLParser library to parse HTML files that aren't too bad."""
|
||||
|
||||
# Use of this source code is governed by the MIT license.
|
||||
__license__ = "MIT"
|
||||
|
||||
__all__ = [
|
||||
'HTMLParserTreeBuilder',
|
||||
]
|
||||
|
||||
from HTMLParser import (
|
||||
HTMLParser,
|
||||
HTMLParseError,
|
||||
)
|
||||
from future.moves.html.parser import HTMLParser
|
||||
|
||||
try:
|
||||
from html.parser import HTMLParseError
|
||||
except ImportError as e:
|
||||
# HTMLParseError is removed in Python 3.5. Since it can never be
|
||||
# thrown in 3.5, we can just define our own class as a placeholder.
|
||||
class HTMLParseError(Exception):
|
||||
pass
|
||||
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
@@ -19,10 +29,10 @@ import warnings
|
||||
# At the end of this file, we monkeypatch HTMLParser so that
|
||||
# strict=True works well on Python 3.2.2.
|
||||
major, minor, release = sys.version_info[:3]
|
||||
CONSTRUCTOR_TAKES_STRICT = (
|
||||
major > 3
|
||||
or (major == 3 and minor > 2)
|
||||
or (major == 3 and minor == 2 and release >= 3))
|
||||
CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
|
||||
CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
|
||||
CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
|
||||
|
||||
|
||||
from bs4.element import (
|
||||
CData,
|
||||
@@ -43,7 +53,42 @@ from bs4.builder import (
|
||||
HTMLPARSER = 'html.parser'
|
||||
|
||||
class BeautifulSoupHTMLParser(HTMLParser):
|
||||
def handle_starttag(self, name, attrs):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
HTMLParser.__init__(self, *args, **kwargs)
|
||||
|
||||
# Keep a list of empty-element tags that were encountered
|
||||
# without an explicit closing tag. If we encounter a closing tag
|
||||
# of this type, we'll associate it with one of those entries.
|
||||
#
|
||||
# This isn't a stack because we don't care about the
|
||||
# order. It's a list of closing tags we've already handled and
|
||||
# will ignore, assuming they ever show up.
|
||||
self.already_closed_empty_element = []
|
||||
|
||||
def error(self, msg):
|
||||
"""In Python 3, HTMLParser subclasses must implement error(), although this
|
||||
requirement doesn't appear to be documented.
|
||||
|
||||
In Python 2, HTMLParser implements error() as raising an exception.
|
||||
|
||||
In any event, this method is called only on very strange markup and our best strategy
|
||||
is to pretend it didn't happen and keep going.
|
||||
"""
|
||||
warnings.warn(msg)
|
||||
|
||||
def handle_startendtag(self, name, attrs):
|
||||
# This is only called when the markup looks like
|
||||
# <tag/>.
|
||||
|
||||
# is_startend() tells handle_starttag not to close the tag
|
||||
# just because its name matches a known empty-element tag. We
|
||||
# know that this is an empty-element tag and we want to call
|
||||
# handle_endtag ourselves.
|
||||
tag = self.handle_starttag(name, attrs, handle_empty_element=False)
|
||||
self.handle_endtag(name)
|
||||
|
||||
def handle_starttag(self, name, attrs, handle_empty_element=True):
|
||||
# XXX namespace
|
||||
attr_dict = {}
|
||||
for key, value in attrs:
|
||||
@@ -53,17 +98,46 @@ class BeautifulSoupHTMLParser(HTMLParser):
|
||||
value = ''
|
||||
attr_dict[key] = value
|
||||
attrvalue = '""'
|
||||
self.soup.handle_starttag(name, None, None, attr_dict)
|
||||
#print "START", name
|
||||
sourceline, sourcepos = self.getpos()
|
||||
tag = self.soup.handle_starttag(
|
||||
name, None, None, attr_dict, sourceline=sourceline,
|
||||
sourcepos=sourcepos
|
||||
)
|
||||
if tag and tag.is_empty_element and handle_empty_element:
|
||||
# Unlike other parsers, html.parser doesn't send separate end tag
|
||||
# events for empty-element tags. (It's handled in
|
||||
# handle_startendtag, but only if the original markup looked like
|
||||
# <tag/>.)
|
||||
#
|
||||
# So we need to call handle_endtag() ourselves. Since we
|
||||
# know the start event is identical to the end event, we
|
||||
# don't want handle_endtag() to cross off any previous end
|
||||
# events for tags of this name.
|
||||
self.handle_endtag(name, check_already_closed=False)
|
||||
|
||||
def handle_endtag(self, name):
|
||||
self.soup.handle_endtag(name)
|
||||
# But we might encounter an explicit closing tag for this tag
|
||||
# later on. If so, we want to ignore it.
|
||||
self.already_closed_empty_element.append(name)
|
||||
|
||||
def handle_endtag(self, name, check_already_closed=True):
|
||||
#print "END", name
|
||||
if check_already_closed and name in self.already_closed_empty_element:
|
||||
# This is a redundant end tag for an empty-element tag.
|
||||
# We've already called handle_endtag() for it, so just
|
||||
# check it off the list.
|
||||
# print "ALREADY CLOSED", name
|
||||
self.already_closed_empty_element.remove(name)
|
||||
else:
|
||||
self.soup.handle_endtag(name)
|
||||
|
||||
def handle_data(self, data):
|
||||
self.soup.handle_data(data)
|
||||
|
||||
def handle_charref(self, name):
|
||||
# XXX workaround for a bug in HTMLParser. Remove this once
|
||||
# it's fixed.
|
||||
# it's fixed in all supported versions.
|
||||
# http://bugs.python.org/issue13633
|
||||
if name.startswith('x'):
|
||||
real_name = int(name.lstrip('x'), 16)
|
||||
elif name.startswith('X'):
|
||||
@@ -71,11 +145,26 @@ class BeautifulSoupHTMLParser(HTMLParser):
|
||||
else:
|
||||
real_name = int(name)
|
||||
|
||||
try:
|
||||
data = unichr(real_name)
|
||||
except (ValueError, OverflowError), e:
|
||||
data = u"\N{REPLACEMENT CHARACTER}"
|
||||
|
||||
data = None
|
||||
if real_name < 256:
|
||||
# HTML numeric entities are supposed to reference Unicode
|
||||
# code points, but sometimes they reference code points in
|
||||
# some other encoding (ahem, Windows-1252). E.g. “
|
||||
# instead of É for LEFT DOUBLE QUOTATION MARK. This
|
||||
# code tries to detect this situation and compensate.
|
||||
for encoding in (self.soup.original_encoding, 'windows-1252'):
|
||||
if not encoding:
|
||||
continue
|
||||
try:
|
||||
data = bytearray([real_name]).decode(encoding)
|
||||
except UnicodeDecodeError as e:
|
||||
pass
|
||||
if not data:
|
||||
try:
|
||||
data = chr(real_name)
|
||||
except (ValueError, OverflowError) as e:
|
||||
pass
|
||||
data = data or "\N{REPLACEMENT CHARACTER}"
|
||||
self.handle_data(data)
|
||||
|
||||
def handle_entityref(self, name):
|
||||
@@ -83,7 +172,12 @@ class BeautifulSoupHTMLParser(HTMLParser):
|
||||
if character is not None:
|
||||
data = character
|
||||
else:
|
||||
data = "&%s;" % name
|
||||
# If this were XML, it would be ambiguous whether "&foo"
|
||||
# was an character entity reference with a missing
|
||||
# semicolon or the literal string "&foo". Since this is
|
||||
# HTML, we have a complete list of all character entity references,
|
||||
# and this one wasn't found, so assume it's the literal string "&foo".
|
||||
data = "&%s" % name
|
||||
self.handle_data(data)
|
||||
|
||||
def handle_comment(self, data):
|
||||
@@ -113,14 +207,6 @@ class BeautifulSoupHTMLParser(HTMLParser):
|
||||
|
||||
def handle_pi(self, data):
|
||||
self.soup.endData()
|
||||
if data.endswith("?") and data.lower().startswith("xml"):
|
||||
# "An XHTML processing instruction using the trailing '?'
|
||||
# will cause the '?' to be included in data." - HTMLParser
|
||||
# docs.
|
||||
#
|
||||
# Strip the question mark so we don't end up with two
|
||||
# question marks.
|
||||
data = data[:-1]
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(ProcessingInstruction)
|
||||
|
||||
@@ -128,26 +214,38 @@ class BeautifulSoupHTMLParser(HTMLParser):
|
||||
class HTMLParserTreeBuilder(HTMLTreeBuilder):
|
||||
|
||||
is_xml = False
|
||||
features = [HTML, STRICT, HTMLPARSER]
|
||||
picklable = True
|
||||
NAME = HTMLPARSER
|
||||
features = [NAME, HTML, STRICT]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if CONSTRUCTOR_TAKES_STRICT:
|
||||
kwargs['strict'] = False
|
||||
self.parser_args = (args, kwargs)
|
||||
# The html.parser knows which line number and position in the
|
||||
# original file is the source of an element.
|
||||
TRACKS_LINE_NUMBERS = True
|
||||
|
||||
def __init__(self, parser_args=None, parser_kwargs=None, **kwargs):
|
||||
super(HTMLParserTreeBuilder, self).__init__(**kwargs)
|
||||
parser_args = parser_args or []
|
||||
parser_kwargs = parser_kwargs or {}
|
||||
if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
|
||||
parser_kwargs['strict'] = False
|
||||
if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
|
||||
parser_kwargs['convert_charrefs'] = False
|
||||
self.parser_args = (parser_args, parser_kwargs)
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding=None,
|
||||
document_declared_encoding=None):
|
||||
document_declared_encoding=None, exclude_encodings=None):
|
||||
"""
|
||||
:return: A 4-tuple (markup, original encoding, encoding
|
||||
declared within markup, whether any characters had to be
|
||||
replaced with REPLACEMENT CHARACTER).
|
||||
"""
|
||||
if isinstance(markup, unicode):
|
||||
if isinstance(markup, str):
|
||||
yield (markup, None, None, False)
|
||||
return
|
||||
|
||||
try_encodings = [user_specified_encoding, document_declared_encoding]
|
||||
dammit = UnicodeDammit(markup, try_encodings, is_html=True)
|
||||
dammit = UnicodeDammit(markup, try_encodings, is_html=True,
|
||||
exclude_encodings=exclude_encodings)
|
||||
yield (dammit.markup, dammit.original_encoding,
|
||||
dammit.declared_html_encoding,
|
||||
dammit.contains_replacement_characters)
|
||||
@@ -158,10 +256,12 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
|
||||
parser.soup = self.soup
|
||||
try:
|
||||
parser.feed(markup)
|
||||
except HTMLParseError, e:
|
||||
parser.close()
|
||||
except HTMLParseError as e:
|
||||
warnings.warn(RuntimeWarning(
|
||||
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
|
||||
raise e
|
||||
parser.already_closed_empty_element = []
|
||||
|
||||
# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
|
||||
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
|
||||
|
@@ -1,13 +1,26 @@
|
||||
# Use of this source code is governed by the MIT license.
|
||||
__license__ = "MIT"
|
||||
|
||||
__all__ = [
|
||||
'LXMLTreeBuilderForXML',
|
||||
'LXMLTreeBuilder',
|
||||
]
|
||||
|
||||
try:
|
||||
from collections.abc import Callable # Python 3.6
|
||||
except ImportError as e:
|
||||
from collections import Callable
|
||||
|
||||
from io import BytesIO
|
||||
from StringIO import StringIO
|
||||
import collections
|
||||
from io import StringIO
|
||||
from lxml import etree
|
||||
from bs4.element import Comment, Doctype, NamespacedAttribute
|
||||
from bs4.element import (
|
||||
Comment,
|
||||
Doctype,
|
||||
NamespacedAttribute,
|
||||
ProcessingInstruction,
|
||||
XMLProcessingInstruction,
|
||||
)
|
||||
from bs4.builder import (
|
||||
FAST,
|
||||
HTML,
|
||||
@@ -20,19 +33,55 @@ from bs4.dammit import EncodingDetector
|
||||
|
||||
LXML = 'lxml'
|
||||
|
||||
def _invert(d):
|
||||
"Invert a dictionary."
|
||||
return dict((v,k) for k, v in list(d.items()))
|
||||
|
||||
class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
DEFAULT_PARSER_CLASS = etree.XMLParser
|
||||
|
||||
is_xml = True
|
||||
processing_instruction_class = XMLProcessingInstruction
|
||||
|
||||
NAME = "lxml-xml"
|
||||
ALTERNATE_NAMES = ["xml"]
|
||||
|
||||
# Well, it's permissive by XML parser standards.
|
||||
features = [LXML, XML, FAST, PERMISSIVE]
|
||||
features = [NAME, LXML, XML, FAST, PERMISSIVE]
|
||||
|
||||
CHUNK_SIZE = 512
|
||||
|
||||
# This namespace mapping is specified in the XML Namespace
|
||||
# standard.
|
||||
DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"}
|
||||
DEFAULT_NSMAPS = dict(xml='http://www.w3.org/XML/1998/namespace')
|
||||
|
||||
DEFAULT_NSMAPS_INVERTED = _invert(DEFAULT_NSMAPS)
|
||||
|
||||
# NOTE: If we parsed Element objects and looked at .sourceline,
|
||||
# we'd be able to see the line numbers from the original document.
|
||||
# But instead we build an XMLParser or HTMLParser object to serve
|
||||
# as the target of parse messages, and those messages don't include
|
||||
# line numbers.
|
||||
|
||||
def initialize_soup(self, soup):
|
||||
"""Let the BeautifulSoup object know about the standard namespace
|
||||
mapping.
|
||||
"""
|
||||
super(LXMLTreeBuilderForXML, self).initialize_soup(soup)
|
||||
self._register_namespaces(self.DEFAULT_NSMAPS)
|
||||
|
||||
def _register_namespaces(self, mapping):
|
||||
"""Let the BeautifulSoup object know about namespaces encountered
|
||||
while parsing the document.
|
||||
|
||||
This might be useful later on when creating CSS selectors.
|
||||
"""
|
||||
for key, value in list(mapping.items()):
|
||||
if key and key not in self.soup._namespaces:
|
||||
# Let the BeautifulSoup object know about a new namespace.
|
||||
# If there are multiple namespaces defined with the same
|
||||
# prefix, the first one in the document takes precedence.
|
||||
self.soup._namespaces[key] = value
|
||||
|
||||
def default_parser(self, encoding):
|
||||
# This can either return a parser object or a class, which
|
||||
@@ -46,12 +95,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
# Use the default parser.
|
||||
parser = self.default_parser(encoding)
|
||||
|
||||
if isinstance(parser, collections.Callable):
|
||||
if isinstance(parser, Callable):
|
||||
# Instantiate the parser with default arguments
|
||||
parser = parser(target=self, strip_cdata=False, encoding=encoding)
|
||||
return parser
|
||||
|
||||
def __init__(self, parser=None, empty_element_tags=None):
|
||||
def __init__(self, parser=None, empty_element_tags=None, **kwargs):
|
||||
# TODO: Issue a warning if parser is present but not a
|
||||
# callable, since that means there's no way to create new
|
||||
# parsers for different encodings.
|
||||
@@ -59,8 +108,9 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
if empty_element_tags is not None:
|
||||
self.empty_element_tags = set(empty_element_tags)
|
||||
self.soup = None
|
||||
self.nsmaps = [self.DEFAULT_NSMAPS]
|
||||
|
||||
self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED]
|
||||
super(LXMLTreeBuilderForXML, self).__init__(**kwargs)
|
||||
|
||||
def _getNsTag(self, tag):
|
||||
# Split the namespace URL out of a fully-qualified lxml tag
|
||||
# name. Copied from lxml's src/lxml/sax.py.
|
||||
@@ -70,6 +120,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
return (None, tag)
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding=None,
|
||||
exclude_encodings=None,
|
||||
document_declared_encoding=None):
|
||||
"""
|
||||
:yield: A series of 4-tuples.
|
||||
@@ -78,31 +129,37 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
|
||||
Each 4-tuple represents a strategy for parsing the document.
|
||||
"""
|
||||
if isinstance(markup, unicode):
|
||||
# We were given Unicode. Maybe lxml can parse Unicode on
|
||||
# this system?
|
||||
yield markup, None, document_declared_encoding, False
|
||||
|
||||
if isinstance(markup, unicode):
|
||||
# No, apparently not. Convert the Unicode to UTF-8 and
|
||||
# tell lxml to parse it as UTF-8.
|
||||
yield (markup.encode("utf8"), "utf8",
|
||||
document_declared_encoding, False)
|
||||
|
||||
# Instead of using UnicodeDammit to convert the bytestring to
|
||||
# Unicode using different encodings, use EncodingDetector to
|
||||
# iterate over the encodings, and tell lxml to try to parse
|
||||
# the document as each one in turn.
|
||||
is_html = not self.is_xml
|
||||
if is_html:
|
||||
self.processing_instruction_class = ProcessingInstruction
|
||||
else:
|
||||
self.processing_instruction_class = XMLProcessingInstruction
|
||||
|
||||
if isinstance(markup, str):
|
||||
# We were given Unicode. Maybe lxml can parse Unicode on
|
||||
# this system?
|
||||
yield markup, None, document_declared_encoding, False
|
||||
|
||||
if isinstance(markup, str):
|
||||
# No, apparently not. Convert the Unicode to UTF-8 and
|
||||
# tell lxml to parse it as UTF-8.
|
||||
yield (markup.encode("utf8"), "utf8",
|
||||
document_declared_encoding, False)
|
||||
|
||||
try_encodings = [user_specified_encoding, document_declared_encoding]
|
||||
detector = EncodingDetector(markup, try_encodings, is_html)
|
||||
detector = EncodingDetector(
|
||||
markup, try_encodings, is_html, exclude_encodings)
|
||||
for encoding in detector.encodings:
|
||||
yield (detector.markup, encoding, document_declared_encoding, False)
|
||||
|
||||
def feed(self, markup):
|
||||
if isinstance(markup, bytes):
|
||||
markup = BytesIO(markup)
|
||||
elif isinstance(markup, unicode):
|
||||
elif isinstance(markup, str):
|
||||
markup = StringIO(markup)
|
||||
|
||||
# Call feed() at least once, even if the markup is empty,
|
||||
@@ -117,30 +174,36 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
if len(data) != 0:
|
||||
self.parser.feed(data)
|
||||
self.parser.close()
|
||||
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
|
||||
raise ParserRejectedMarkup(str(e))
|
||||
except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
|
||||
raise ParserRejectedMarkup(e)
|
||||
|
||||
def close(self):
|
||||
self.nsmaps = [self.DEFAULT_NSMAPS]
|
||||
self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED]
|
||||
|
||||
def start(self, name, attrs, nsmap={}):
|
||||
# Make sure attrs is a mutable dict--lxml may send an immutable dictproxy.
|
||||
attrs = dict(attrs)
|
||||
nsprefix = None
|
||||
# Invert each namespace map as it comes in.
|
||||
if len(self.nsmaps) > 1:
|
||||
# There are no new namespaces for this tag, but
|
||||
# non-default namespaces are in play, so we need a
|
||||
# separate tag stack to know when they end.
|
||||
self.nsmaps.append(None)
|
||||
if len(nsmap) == 0 and len(self.nsmaps) > 1:
|
||||
# There are no new namespaces for this tag, but
|
||||
# non-default namespaces are in play, so we need a
|
||||
# separate tag stack to know when they end.
|
||||
self.nsmaps.append(None)
|
||||
elif len(nsmap) > 0:
|
||||
# A new namespace mapping has come into play.
|
||||
inverted_nsmap = dict((value, key) for key, value in nsmap.items())
|
||||
self.nsmaps.append(inverted_nsmap)
|
||||
|
||||
# First, Let the BeautifulSoup object know about it.
|
||||
self._register_namespaces(nsmap)
|
||||
|
||||
# Then, add it to our running list of inverted namespace
|
||||
# mappings.
|
||||
self.nsmaps.append(_invert(nsmap))
|
||||
|
||||
# Also treat the namespace mapping as a set of attributes on the
|
||||
# tag, so we can recreate it later.
|
||||
attrs = attrs.copy()
|
||||
for prefix, namespace in nsmap.items():
|
||||
for prefix, namespace in list(nsmap.items()):
|
||||
attribute = NamespacedAttribute(
|
||||
"xmlns", prefix, "http://www.w3.org/2000/xmlns/")
|
||||
attrs[attribute] = namespace
|
||||
@@ -149,7 +212,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
# from lxml with namespaces attached to their names, and
|
||||
# turn then into NamespacedAttribute objects.
|
||||
new_attrs = {}
|
||||
for attr, value in attrs.items():
|
||||
for attr, value in list(attrs.items()):
|
||||
namespace, attr = self._getNsTag(attr)
|
||||
if namespace is None:
|
||||
new_attrs[attr] = value
|
||||
@@ -189,7 +252,9 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
self.nsmaps.pop()
|
||||
|
||||
def pi(self, target, data):
|
||||
pass
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(target + ' ' + data)
|
||||
self.soup.endData(self.processing_instruction_class)
|
||||
|
||||
def data(self, content):
|
||||
self.soup.handle_data(content)
|
||||
@@ -207,13 +272,17 @@ class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""See `TreeBuilder`."""
|
||||
return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
|
||||
return '<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
|
||||
|
||||
|
||||
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
|
||||
|
||||
features = [LXML, HTML, FAST, PERMISSIVE]
|
||||
NAME = LXML
|
||||
ALTERNATE_NAMES = ["lxml-html"]
|
||||
|
||||
features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
|
||||
is_xml = False
|
||||
processing_instruction_class = ProcessingInstruction
|
||||
|
||||
def default_parser(self, encoding):
|
||||
return etree.HTMLParser
|
||||
@@ -224,10 +293,10 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
|
||||
self.parser = self.parser_for(encoding)
|
||||
self.parser.feed(markup)
|
||||
self.parser.close()
|
||||
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
|
||||
raise ParserRejectedMarkup(str(e))
|
||||
except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
|
||||
raise ParserRejectedMarkup(e)
|
||||
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""See `TreeBuilder`."""
|
||||
return u'<html><body>%s</body></html>' % fragment
|
||||
return '<html><body>%s</body></html>' % fragment
|
||||
|
4
lib/bs4/check_block.py
Normal file
4
lib/bs4/check_block.py
Normal file
@@ -0,0 +1,4 @@
|
||||
import requests
|
||||
data = requests.get("https://www.crummy.com/").content
|
||||
from bs4 import _s
|
||||
data = [x for x in _s(data).block_text()]
|
@@ -3,12 +3,15 @@
|
||||
|
||||
This library converts a bytestream to Unicode through any means
|
||||
necessary. It is heavily based on code from Mark Pilgrim's Universal
|
||||
Feed Parser. It works best on XML and XML, but it does not rewrite the
|
||||
Feed Parser. It works best on XML and HTML, but it does not rewrite the
|
||||
XML or HTML to reflect a new encoding; that's the tree builder's job.
|
||||
"""
|
||||
# Use of this source code is governed by the MIT license.
|
||||
__license__ = "MIT"
|
||||
|
||||
import codecs
|
||||
from htmlentitydefs import codepoint2name
|
||||
from future.moves.html.entities import codepoint2name
|
||||
from future.builtins import chr
|
||||
import re
|
||||
import logging
|
||||
import string
|
||||
@@ -20,6 +23,8 @@ try:
|
||||
# PyPI package: cchardet
|
||||
import cchardet
|
||||
def chardet_dammit(s):
|
||||
if isinstance(s, str):
|
||||
return None
|
||||
return cchardet.detect(s)['encoding']
|
||||
except ImportError:
|
||||
try:
|
||||
@@ -28,6 +33,8 @@ except ImportError:
|
||||
# PyPI package: chardet
|
||||
import chardet
|
||||
def chardet_dammit(s):
|
||||
if isinstance(s, str):
|
||||
return None
|
||||
return chardet.detect(s)['encoding']
|
||||
#import chardet.constants
|
||||
#chardet.constants._debug = 1
|
||||
@@ -42,10 +49,19 @@ try:
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
xml_encoding_re = re.compile(
|
||||
'^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
|
||||
html_meta_re = re.compile(
|
||||
'<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
|
||||
# Build bytestring and Unicode versions of regular expressions for finding
|
||||
# a declared encoding inside an XML or HTML document.
|
||||
xml_encoding = '^\s*<\\?.*encoding=[\'"](.*?)[\'"].*\\?>'
|
||||
html_meta = '<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]'
|
||||
encoding_res = dict()
|
||||
encoding_res[bytes] = {
|
||||
'html' : re.compile(html_meta.encode("ascii"), re.I),
|
||||
'xml' : re.compile(xml_encoding.encode("ascii"), re.I),
|
||||
}
|
||||
encoding_res[str] = {
|
||||
'html' : re.compile(html_meta, re.I),
|
||||
'xml' : re.compile(xml_encoding, re.I)
|
||||
}
|
||||
|
||||
class EntitySubstitution(object):
|
||||
|
||||
@@ -55,15 +71,24 @@ class EntitySubstitution(object):
|
||||
lookup = {}
|
||||
reverse_lookup = {}
|
||||
characters_for_re = []
|
||||
for codepoint, name in list(codepoint2name.items()):
|
||||
character = unichr(codepoint)
|
||||
if codepoint != 34:
|
||||
|
||||
# &apos is an XHTML entity and an HTML 5, but not an HTML 4
|
||||
# entity. We don't want to use it, but we want to recognize it on the way in.
|
||||
#
|
||||
# TODO: Ideally we would be able to recognize all HTML 5 named
|
||||
# entities, but that's a little tricky.
|
||||
extra = [(39, 'apos')]
|
||||
for codepoint, name in list(codepoint2name.items()) + extra:
|
||||
character = chr(codepoint)
|
||||
if codepoint not in (34, 39):
|
||||
# There's no point in turning the quotation mark into
|
||||
# ", unless it happens within an attribute value, which
|
||||
# is handled elsewhere.
|
||||
# " or the single quote into ', unless it
|
||||
# happens within an attribute value, which is handled
|
||||
# elsewhere.
|
||||
characters_for_re.append(character)
|
||||
lookup[character] = name
|
||||
# But we do want to turn " into the quotation mark.
|
||||
# But we do want to recognize those entities on the way in and
|
||||
# convert them to Unicode characters.
|
||||
reverse_lookup[name] = character
|
||||
re_definition = "[%s]" % "".join(characters_for_re)
|
||||
return lookup, reverse_lookup, re.compile(re_definition)
|
||||
@@ -79,7 +104,7 @@ class EntitySubstitution(object):
|
||||
}
|
||||
|
||||
BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
|
||||
"&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
|
||||
"&(?!#\\d+;|#x[0-9a-fA-F]+;|\\w+;)"
|
||||
")")
|
||||
|
||||
AMPERSAND_OR_BRACKET = re.compile("([<>&])")
|
||||
@@ -212,8 +237,11 @@ class EncodingDetector:
|
||||
|
||||
5. Windows-1252.
|
||||
"""
|
||||
def __init__(self, markup, override_encodings=None, is_html=False):
|
||||
def __init__(self, markup, override_encodings=None, is_html=False,
|
||||
exclude_encodings=None):
|
||||
self.override_encodings = override_encodings or []
|
||||
exclude_encodings = exclude_encodings or []
|
||||
self.exclude_encodings = set([x.lower() for x in exclude_encodings])
|
||||
self.chardet_encoding = None
|
||||
self.is_html = is_html
|
||||
self.declared_encoding = None
|
||||
@@ -224,6 +252,8 @@ class EncodingDetector:
|
||||
def _usable(self, encoding, tried):
|
||||
if encoding is not None:
|
||||
encoding = encoding.lower()
|
||||
if encoding in self.exclude_encodings:
|
||||
return False
|
||||
if encoding not in tried:
|
||||
tried.add(encoding)
|
||||
return True
|
||||
@@ -266,6 +296,9 @@ class EncodingDetector:
|
||||
def strip_byte_order_mark(cls, data):
|
||||
"""If a byte-order mark is present, strip it and return the encoding it implies."""
|
||||
encoding = None
|
||||
if isinstance(data, str):
|
||||
# Unicode data cannot have a byte-order mark.
|
||||
return data, encoding
|
||||
if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
|
||||
and (data[2:4] != '\x00\x00'):
|
||||
encoding = 'utf-16be'
|
||||
@@ -300,14 +333,22 @@ class EncodingDetector:
|
||||
xml_endpos = 1024
|
||||
html_endpos = max(2048, int(len(markup) * 0.05))
|
||||
|
||||
if isinstance(markup, bytes):
|
||||
res = encoding_res[bytes]
|
||||
else:
|
||||
res = encoding_res[str]
|
||||
|
||||
xml_re = res['xml']
|
||||
html_re = res['html']
|
||||
declared_encoding = None
|
||||
declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos)
|
||||
declared_encoding_match = xml_re.search(markup, endpos=xml_endpos)
|
||||
if not declared_encoding_match and is_html:
|
||||
declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos)
|
||||
declared_encoding_match = html_re.search(markup, endpos=html_endpos)
|
||||
if declared_encoding_match is not None:
|
||||
declared_encoding = declared_encoding_match.groups()[0].decode(
|
||||
'ascii')
|
||||
declared_encoding = declared_encoding_match.groups()[0]
|
||||
if declared_encoding:
|
||||
if isinstance(declared_encoding, bytes):
|
||||
declared_encoding = declared_encoding.decode('ascii', 'replace')
|
||||
return declared_encoding.lower()
|
||||
return None
|
||||
|
||||
@@ -331,18 +372,19 @@ class UnicodeDammit:
|
||||
]
|
||||
|
||||
def __init__(self, markup, override_encodings=[],
|
||||
smart_quotes_to=None, is_html=False):
|
||||
smart_quotes_to=None, is_html=False, exclude_encodings=[]):
|
||||
self.smart_quotes_to = smart_quotes_to
|
||||
self.tried_encodings = []
|
||||
self.contains_replacement_characters = False
|
||||
self.is_html = is_html
|
||||
|
||||
self.detector = EncodingDetector(markup, override_encodings, is_html)
|
||||
self.log = logging.getLogger(__name__)
|
||||
self.detector = EncodingDetector(
|
||||
markup, override_encodings, is_html, exclude_encodings)
|
||||
|
||||
# Short-circuit if the data is in Unicode to begin with.
|
||||
if isinstance(markup, unicode) or markup == '':
|
||||
if isinstance(markup, str) or markup == '':
|
||||
self.markup = markup
|
||||
self.unicode_markup = unicode(markup)
|
||||
self.unicode_markup = str(markup)
|
||||
self.original_encoding = None
|
||||
return
|
||||
|
||||
@@ -365,9 +407,10 @@ class UnicodeDammit:
|
||||
if encoding != "ascii":
|
||||
u = self._convert_from(encoding, "replace")
|
||||
if u is not None:
|
||||
logging.warning(
|
||||
self.log.warning(
|
||||
"Some characters could not be decoded, and were "
|
||||
"replaced with REPLACEMENT CHARACTER.")
|
||||
"replaced with REPLACEMENT CHARACTER."
|
||||
)
|
||||
self.contains_replacement_characters = True
|
||||
break
|
||||
|
||||
@@ -425,7 +468,7 @@ class UnicodeDammit:
|
||||
def _to_unicode(self, data, encoding, errors="strict"):
|
||||
'''Given a string and its encoding, decodes the string into Unicode.
|
||||
%encoding is a string recognized by encodings.aliases'''
|
||||
return unicode(data, encoding, errors)
|
||||
return str(data, encoding, errors)
|
||||
|
||||
@property
|
||||
def declared_html_encoding(self):
|
||||
@@ -723,7 +766,7 @@ class UnicodeDammit:
|
||||
0xde : b'\xc3\x9e', # Þ
|
||||
0xdf : b'\xc3\x9f', # ß
|
||||
0xe0 : b'\xc3\xa0', # à
|
||||
0xe1 : b'\xa1', # á
|
||||
0xe1 : b'\xa1', # á
|
||||
0xe2 : b'\xc3\xa2', # â
|
||||
0xe3 : b'\xc3\xa3', # ã
|
||||
0xe4 : b'\xc3\xa4', # ä
|
||||
|
@@ -1,7 +1,11 @@
|
||||
"""Diagnostic functions, mainly for use when doing tech support."""
|
||||
|
||||
# Use of this source code is governed by the MIT license.
|
||||
__license__ = "MIT"
|
||||
|
||||
import cProfile
|
||||
from StringIO import StringIO
|
||||
from HTMLParser import HTMLParser
|
||||
from io import StringIO
|
||||
from html.parser import HTMLParser
|
||||
import bs4
|
||||
from bs4 import BeautifulSoup, __version__
|
||||
from bs4.builder import builder_registry
|
||||
@@ -17,8 +21,8 @@ import cProfile
|
||||
|
||||
def diagnose(data):
|
||||
"""Diagnostic suite for isolating common problems."""
|
||||
print "Diagnostic running on Beautiful Soup %s" % __version__
|
||||
print "Python version %s" % sys.version
|
||||
print("Diagnostic running on Beautiful Soup %s" % __version__)
|
||||
print("Python version %s" % sys.version)
|
||||
|
||||
basic_parsers = ["html.parser", "html5lib", "lxml"]
|
||||
for name in basic_parsers:
|
||||
@@ -27,44 +31,60 @@ def diagnose(data):
|
||||
break
|
||||
else:
|
||||
basic_parsers.remove(name)
|
||||
print (
|
||||
print((
|
||||
"I noticed that %s is not installed. Installing it may help." %
|
||||
name)
|
||||
name))
|
||||
|
||||
if 'lxml' in basic_parsers:
|
||||
basic_parsers.append(["lxml", "xml"])
|
||||
from lxml import etree
|
||||
print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
|
||||
basic_parsers.append("lxml-xml")
|
||||
try:
|
||||
from lxml import etree
|
||||
print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)))
|
||||
except ImportError as e:
|
||||
print (
|
||||
"lxml is not installed or couldn't be imported.")
|
||||
|
||||
|
||||
if 'html5lib' in basic_parsers:
|
||||
import html5lib
|
||||
print "Found html5lib version %s" % html5lib.__version__
|
||||
try:
|
||||
import html5lib
|
||||
print("Found html5lib version %s" % html5lib.__version__)
|
||||
except ImportError as e:
|
||||
print (
|
||||
"html5lib is not installed or couldn't be imported.")
|
||||
|
||||
if hasattr(data, 'read'):
|
||||
data = data.read()
|
||||
elif os.path.exists(data):
|
||||
print '"%s" looks like a filename. Reading data from the file.' % data
|
||||
data = open(data).read()
|
||||
elif data.startswith("http:") or data.startswith("https:"):
|
||||
print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
|
||||
print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
|
||||
print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data)
|
||||
print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
|
||||
return
|
||||
print
|
||||
else:
|
||||
try:
|
||||
if os.path.exists(data):
|
||||
print('"%s" looks like a filename. Reading data from the file.' % data)
|
||||
with open(data) as fp:
|
||||
data = fp.read()
|
||||
except ValueError:
|
||||
# This can happen on some platforms when the 'filename' is
|
||||
# too long. Assume it's data and not a filename.
|
||||
pass
|
||||
print()
|
||||
|
||||
for parser in basic_parsers:
|
||||
print "Trying to parse your markup with %s" % parser
|
||||
print("Trying to parse your markup with %s" % parser)
|
||||
success = False
|
||||
try:
|
||||
soup = BeautifulSoup(data, parser)
|
||||
soup = BeautifulSoup(data, features=parser)
|
||||
success = True
|
||||
except Exception, e:
|
||||
print "%s could not parse the markup." % parser
|
||||
except Exception as e:
|
||||
print("%s could not parse the markup." % parser)
|
||||
traceback.print_exc()
|
||||
if success:
|
||||
print "Here's what %s did with the markup:" % parser
|
||||
print soup.prettify()
|
||||
print("Here's what %s did with the markup:" % parser)
|
||||
print(soup.prettify())
|
||||
|
||||
print "-" * 80
|
||||
print("-" * 80)
|
||||
|
||||
def lxml_trace(data, html=True, **kwargs):
|
||||
"""Print out the lxml events that occur during parsing.
|
||||
@@ -74,7 +94,7 @@ def lxml_trace(data, html=True, **kwargs):
|
||||
"""
|
||||
from lxml import etree
|
||||
for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
|
||||
print("%s, %4s, %s" % (event, element.tag, element.text))
|
||||
print(("%s, %4s, %s" % (event, element.tag, element.text)))
|
||||
|
||||
class AnnouncingParser(HTMLParser):
|
||||
"""Announces HTMLParser parse events, without doing anything else."""
|
||||
@@ -135,7 +155,7 @@ def rword(length=5):
|
||||
def rsentence(length=4):
|
||||
"Generate a random sentence-like string."
|
||||
return " ".join(rword(random.randint(4,9)) for i in range(length))
|
||||
|
||||
|
||||
def rdoc(num_elements=1000):
|
||||
"""Randomly generate an invalid HTML document."""
|
||||
tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table']
|
||||
@@ -156,10 +176,10 @@ def rdoc(num_elements=1000):
|
||||
|
||||
def benchmark_parsers(num_elements=100000):
|
||||
"""Very basic head-to-head performance benchmark."""
|
||||
print "Comparative parser benchmark on Beautiful Soup %s" % __version__
|
||||
print("Comparative parser benchmark on Beautiful Soup %s" % __version__)
|
||||
data = rdoc(num_elements)
|
||||
print "Generated a large invalid HTML document (%d bytes)." % len(data)
|
||||
|
||||
print("Generated a large invalid HTML document (%d bytes)." % len(data))
|
||||
|
||||
for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
|
||||
success = False
|
||||
try:
|
||||
@@ -167,24 +187,24 @@ def benchmark_parsers(num_elements=100000):
|
||||
soup = BeautifulSoup(data, parser)
|
||||
b = time.time()
|
||||
success = True
|
||||
except Exception, e:
|
||||
print "%s could not parse the markup." % parser
|
||||
except Exception as e:
|
||||
print("%s could not parse the markup." % parser)
|
||||
traceback.print_exc()
|
||||
if success:
|
||||
print "BS4+%s parsed the markup in %.2fs." % (parser, b-a)
|
||||
print("BS4+%s parsed the markup in %.2fs." % (parser, b-a))
|
||||
|
||||
from lxml import etree
|
||||
a = time.time()
|
||||
etree.HTML(data)
|
||||
b = time.time()
|
||||
print "Raw lxml parsed the markup in %.2fs." % (b-a)
|
||||
print("Raw lxml parsed the markup in %.2fs." % (b-a))
|
||||
|
||||
import html5lib
|
||||
parser = html5lib.HTMLParser()
|
||||
a = time.time()
|
||||
parser.parse(data)
|
||||
b = time.time()
|
||||
print "Raw html5lib parsed the markup in %.2fs." % (b-a)
|
||||
print("Raw html5lib parsed the markup in %.2fs." % (b-a))
|
||||
|
||||
def profile(num_elements=100000, parser="lxml"):
|
||||
|
||||
|
1021
lib/bs4/element.py
1021
lib/bs4/element.py
File diff suppressed because it is too large
Load Diff
99
lib/bs4/formatter.py
Normal file
99
lib/bs4/formatter.py
Normal file
@@ -0,0 +1,99 @@
|
||||
from bs4.dammit import EntitySubstitution
|
||||
|
||||
class Formatter(EntitySubstitution):
|
||||
"""Describes a strategy to use when outputting a parse tree to a string.
|
||||
|
||||
Some parts of this strategy come from the distinction between
|
||||
HTML4, HTML5, and XML. Others are configurable by the user.
|
||||
"""
|
||||
# Registries of XML and HTML formatters.
|
||||
XML_FORMATTERS = {}
|
||||
HTML_FORMATTERS = {}
|
||||
|
||||
HTML = 'html'
|
||||
XML = 'xml'
|
||||
|
||||
HTML_DEFAULTS = dict(
|
||||
cdata_containing_tags=set(["script", "style"]),
|
||||
)
|
||||
|
||||
def _default(self, language, value, kwarg):
|
||||
if value is not None:
|
||||
return value
|
||||
if language == self.XML:
|
||||
return set()
|
||||
return self.HTML_DEFAULTS[kwarg]
|
||||
|
||||
def __init__(
|
||||
self, language=None, entity_substitution=None,
|
||||
void_element_close_prefix='/', cdata_containing_tags=None,
|
||||
):
|
||||
"""
|
||||
|
||||
:param void_element_close_prefix: By default, represent void
|
||||
elements as <tag/> rather than <tag>
|
||||
"""
|
||||
self.language = language
|
||||
self.entity_substitution = entity_substitution
|
||||
self.void_element_close_prefix = void_element_close_prefix
|
||||
self.cdata_containing_tags = self._default(
|
||||
language, cdata_containing_tags, 'cdata_containing_tags'
|
||||
)
|
||||
|
||||
def substitute(self, ns):
|
||||
"""Process a string that needs to undergo entity substitution."""
|
||||
if not self.entity_substitution:
|
||||
return ns
|
||||
from .element import NavigableString
|
||||
if (isinstance(ns, NavigableString)
|
||||
and ns.parent is not None
|
||||
and ns.parent.name in self.cdata_containing_tags):
|
||||
# Do nothing.
|
||||
return ns
|
||||
# Substitute.
|
||||
return self.entity_substitution(ns)
|
||||
|
||||
def attribute_value(self, value):
|
||||
"""Process the value of an attribute."""
|
||||
return self.substitute(value)
|
||||
|
||||
def attributes(self, tag):
|
||||
"""Reorder a tag's attributes however you want."""
|
||||
return sorted(tag.attrs.items())
|
||||
|
||||
|
||||
class HTMLFormatter(Formatter):
|
||||
REGISTRY = {}
|
||||
def __init__(self, *args, **kwargs):
|
||||
return super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs)
|
||||
|
||||
|
||||
class XMLFormatter(Formatter):
|
||||
REGISTRY = {}
|
||||
def __init__(self, *args, **kwargs):
|
||||
return super(XMLFormatter, self).__init__(self.XML, *args, **kwargs)
|
||||
|
||||
|
||||
# Set up aliases for the default formatters.
|
||||
HTMLFormatter.REGISTRY['html'] = HTMLFormatter(
|
||||
entity_substitution=EntitySubstitution.substitute_html
|
||||
)
|
||||
HTMLFormatter.REGISTRY["html5"] = HTMLFormatter(
|
||||
entity_substitution=EntitySubstitution.substitute_html,
|
||||
void_element_close_prefix = None
|
||||
)
|
||||
HTMLFormatter.REGISTRY["minimal"] = HTMLFormatter(
|
||||
entity_substitution=EntitySubstitution.substitute_xml
|
||||
)
|
||||
HTMLFormatter.REGISTRY[None] = HTMLFormatter(
|
||||
entity_substitution=None
|
||||
)
|
||||
XMLFormatter.REGISTRY["html"] = XMLFormatter(
|
||||
entity_substitution=EntitySubstitution.substitute_html
|
||||
)
|
||||
XMLFormatter.REGISTRY["minimal"] = XMLFormatter(
|
||||
entity_substitution=EntitySubstitution.substitute_xml
|
||||
)
|
||||
XMLFormatter.REGISTRY[None] = Formatter(
|
||||
Formatter(Formatter.XML, entity_substitution=None)
|
||||
)
|
@@ -1,5 +1,10 @@
|
||||
# encoding: utf-8
|
||||
"""Helper classes for tests."""
|
||||
|
||||
# Use of this source code is governed by the MIT license.
|
||||
__license__ = "MIT"
|
||||
|
||||
import pickle
|
||||
import copy
|
||||
import functools
|
||||
import unittest
|
||||
@@ -11,29 +16,66 @@ from bs4.element import (
|
||||
ContentMetaAttributeValue,
|
||||
Doctype,
|
||||
SoupStrainer,
|
||||
Tag
|
||||
)
|
||||
|
||||
from bs4.builder import HTMLParserTreeBuilder
|
||||
default_builder = HTMLParserTreeBuilder
|
||||
|
||||
BAD_DOCUMENT = """A bare string
|
||||
<!DOCTYPE xsl:stylesheet SYSTEM "htmlent.dtd">
|
||||
<!DOCTYPE xsl:stylesheet PUBLIC "htmlent.dtd">
|
||||
<div><![CDATA[A CDATA section where it doesn't belong]]></div>
|
||||
<div><svg><![CDATA[HTML5 does allow CDATA sections in SVG]]></svg></div>
|
||||
<div>A <meta> tag</div>
|
||||
<div>A <br> tag that supposedly has contents.</br></div>
|
||||
<div>AT&T</div>
|
||||
<div><textarea>Within a textarea, markup like <b> tags and <&<& should be treated as literal</textarea></div>
|
||||
<div><script>if (i < 2) { alert("<b>Markup within script tags should be treated as literal.</b>"); }</script></div>
|
||||
<div>This numeric entity is missing the final semicolon: <x t="piñata"></div>
|
||||
<div><a href="http://example.com/</a> that attribute value never got closed</div>
|
||||
<div><a href="foo</a>, </a><a href="bar">that attribute value was closed by the subsequent tag</a></div>
|
||||
<! This document starts with a bogus declaration ><div>a</div>
|
||||
<div>This document contains <!an incomplete declaration <div>(do you see it?)</div>
|
||||
<div>This document ends with <!an incomplete declaration
|
||||
<div><a style={height:21px;}>That attribute value was bogus</a></div>
|
||||
<! DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">The doctype is invalid because it contains extra whitespace
|
||||
<div><table><td nowrap>That boolean attribute had no value</td></table></div>
|
||||
<div>Here's a nonexistent entity: &#foo; (do you see it?)</div>
|
||||
<div>This document ends before the entity finishes: >
|
||||
<div><p>Paragraphs shouldn't contain block display elements, but this one does: <dl><dt>you see?</dt></p>
|
||||
<b b="20" a="1" b="10" a="2" a="3" a="4">Multiple values for the same attribute.</b>
|
||||
<div><table><tr><td>Here's a table</td></tr></table></div>
|
||||
<div><table id="1"><tr><td>Here's a nested table:<table id="2"><tr><td>foo</td></tr></table></td></div>
|
||||
<div>This tag contains nothing but whitespace: <b> </b></div>
|
||||
<div><blockquote><p><b>This p tag is cut off by</blockquote></p>the end of the blockquote tag</div>
|
||||
<div><table><div>This table contains bare markup</div></table></div>
|
||||
<div><div id="1">\n <a href="link1">This link is never closed.\n</div>\n<div id="2">\n <div id="3">\n <a href="link2">This link is closed.</a>\n </div>\n</div></div>
|
||||
<div>This document contains a <!DOCTYPE surprise>surprise doctype</div>
|
||||
<div><a><B><Cd><EFG>Mixed case tags are folded to lowercase</efg></CD></b></A></div>
|
||||
<div><our\u2603>Tag name contains Unicode characters</our\u2603></div>
|
||||
<div><a \u2603="snowman">Attribute name contains Unicode characters</a></div>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
"""
|
||||
|
||||
|
||||
class SoupTest(unittest.TestCase):
|
||||
|
||||
@property
|
||||
def default_builder(self):
|
||||
return default_builder()
|
||||
return default_builder
|
||||
|
||||
def soup(self, markup, **kwargs):
|
||||
"""Build a Beautiful Soup object from markup."""
|
||||
builder = kwargs.pop('builder', self.default_builder)
|
||||
return BeautifulSoup(markup, builder=builder, **kwargs)
|
||||
|
||||
def document_for(self, markup):
|
||||
def document_for(self, markup, **kwargs):
|
||||
"""Turn an HTML fragment into a document.
|
||||
|
||||
The details depend on the builder.
|
||||
"""
|
||||
return self.default_builder.test_fragment_to_document(markup)
|
||||
return self.default_builder(**kwargs).test_fragment_to_document(markup)
|
||||
|
||||
def assertSoupEquals(self, to_parse, compare_parsed_to=None):
|
||||
builder = self.default_builder
|
||||
@@ -43,6 +85,131 @@ class SoupTest(unittest.TestCase):
|
||||
|
||||
self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))
|
||||
|
||||
def assertConnectedness(self, element):
|
||||
"""Ensure that next_element and previous_element are properly
|
||||
set for all descendants of the given element.
|
||||
"""
|
||||
earlier = None
|
||||
for e in element.descendants:
|
||||
if earlier:
|
||||
self.assertEqual(e, earlier.next_element)
|
||||
self.assertEqual(earlier, e.previous_element)
|
||||
earlier = e
|
||||
|
||||
def linkage_validator(self, el, _recursive_call=False):
|
||||
"""Ensure proper linkage throughout the document."""
|
||||
descendant = None
|
||||
# Document element should have no previous element or previous sibling.
|
||||
# It also shouldn't have a next sibling.
|
||||
if el.parent is None:
|
||||
assert el.previous_element is None,\
|
||||
"Bad previous_element\nNODE: {}\nPREV: {}\nEXPECTED: {}".format(
|
||||
el, el.previous_element, None
|
||||
)
|
||||
assert el.previous_sibling is None,\
|
||||
"Bad previous_sibling\nNODE: {}\nPREV: {}\nEXPECTED: {}".format(
|
||||
el, el.previous_sibling, None
|
||||
)
|
||||
assert el.next_sibling is None,\
|
||||
"Bad next_sibling\nNODE: {}\nNEXT: {}\nEXPECTED: {}".format(
|
||||
el, el.next_sibling, None
|
||||
)
|
||||
|
||||
idx = 0
|
||||
child = None
|
||||
last_child = None
|
||||
last_idx = len(el.contents) - 1
|
||||
for child in el.contents:
|
||||
descendant = None
|
||||
|
||||
# Parent should link next element to their first child
|
||||
# That child should have no previous sibling
|
||||
if idx == 0:
|
||||
if el.parent is not None:
|
||||
assert el.next_element is child,\
|
||||
"Bad next_element\nNODE: {}\nNEXT: {}\nEXPECTED: {}".format(
|
||||
el, el.next_element, child
|
||||
)
|
||||
assert child.previous_element is el,\
|
||||
"Bad previous_element\nNODE: {}\nPREV: {}\nEXPECTED: {}".format(
|
||||
child, child.previous_element, el
|
||||
)
|
||||
assert child.previous_sibling is None,\
|
||||
"Bad previous_sibling\nNODE: {}\nPREV {}\nEXPECTED: {}".format(
|
||||
child, child.previous_sibling, None
|
||||
)
|
||||
|
||||
# If not the first child, previous index should link as sibling to this index
|
||||
# Previous element should match the last index or the last bubbled up descendant
|
||||
else:
|
||||
assert child.previous_sibling is el.contents[idx - 1],\
|
||||
"Bad previous_sibling\nNODE: {}\nPREV {}\nEXPECTED {}".format(
|
||||
child, child.previous_sibling, el.contents[idx - 1]
|
||||
)
|
||||
assert el.contents[idx - 1].next_sibling is child,\
|
||||
"Bad next_sibling\nNODE: {}\nNEXT {}\nEXPECTED {}".format(
|
||||
el.contents[idx - 1], el.contents[idx - 1].next_sibling, child
|
||||
)
|
||||
|
||||
if last_child is not None:
|
||||
assert child.previous_element is last_child,\
|
||||
"Bad previous_element\nNODE: {}\nPREV {}\nEXPECTED {}\nCONTENTS {}".format(
|
||||
child, child.previous_element, last_child, child.parent.contents
|
||||
)
|
||||
assert last_child.next_element is child,\
|
||||
"Bad next_element\nNODE: {}\nNEXT {}\nEXPECTED {}".format(
|
||||
last_child, last_child.next_element, child
|
||||
)
|
||||
|
||||
if isinstance(child, Tag) and child.contents:
|
||||
descendant = self.linkage_validator(child, True)
|
||||
# A bubbled up descendant should have no next siblings
|
||||
assert descendant.next_sibling is None,\
|
||||
"Bad next_sibling\nNODE: {}\nNEXT {}\nEXPECTED {}".format(
|
||||
descendant, descendant.next_sibling, None
|
||||
)
|
||||
|
||||
# Mark last child as either the bubbled up descendant or the current child
|
||||
if descendant is not None:
|
||||
last_child = descendant
|
||||
else:
|
||||
last_child = child
|
||||
|
||||
# If last child, there are non next siblings
|
||||
if idx == last_idx:
|
||||
assert child.next_sibling is None,\
|
||||
"Bad next_sibling\nNODE: {}\nNEXT {}\nEXPECTED {}".format(
|
||||
child, child.next_sibling, None
|
||||
)
|
||||
idx += 1
|
||||
|
||||
child = descendant if descendant is not None else child
|
||||
if child is None:
|
||||
child = el
|
||||
|
||||
if not _recursive_call and child is not None:
|
||||
target = el
|
||||
while True:
|
||||
if target is None:
|
||||
assert child.next_element is None, \
|
||||
"Bad next_element\nNODE: {}\nNEXT {}\nEXPECTED {}".format(
|
||||
child, child.next_element, None
|
||||
)
|
||||
break
|
||||
elif target.next_sibling is not None:
|
||||
assert child.next_element is target.next_sibling, \
|
||||
"Bad next_element\nNODE: {}\nNEXT {}\nEXPECTED {}".format(
|
||||
child, child.next_element, target.next_sibling
|
||||
)
|
||||
break
|
||||
target = target.parent
|
||||
|
||||
# We are done, so nothing to return
|
||||
return None
|
||||
else:
|
||||
# Return the child to the recursive caller
|
||||
return child
|
||||
|
||||
|
||||
class HTMLTreeBuilderSmokeTest(object):
|
||||
|
||||
@@ -54,6 +221,27 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
markup in these tests, there's not much room for interpretation.
|
||||
"""
|
||||
|
||||
def test_empty_element_tags(self):
|
||||
"""Verify that all HTML4 and HTML5 empty element (aka void element) tags
|
||||
are handled correctly.
|
||||
"""
|
||||
for name in [
|
||||
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
|
||||
'spacer', 'frame'
|
||||
]:
|
||||
soup = self.soup("")
|
||||
new_tag = soup.new_tag(name)
|
||||
self.assertEqual(True, new_tag.is_empty_element)
|
||||
|
||||
def test_pickle_and_unpickle_identity(self):
|
||||
# Pickling a tree, then unpickling it, yields a tree identical
|
||||
# to the original.
|
||||
tree = self.soup("<a><b>foo</a>")
|
||||
dumped = pickle.dumps(tree, 2)
|
||||
loaded = pickle.loads(dumped)
|
||||
self.assertEqual(loaded.__class__, BeautifulSoup)
|
||||
self.assertEqual(loaded.decode(), tree.decode())
|
||||
|
||||
def assertDoctypeHandled(self, doctype_fragment):
|
||||
"""Assert that a given doctype string is handled correctly."""
|
||||
doctype_str, soup = self._document_with_doctype(doctype_fragment)
|
||||
@@ -114,6 +302,27 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
soup.encode("utf-8").replace(b"\n", b""),
|
||||
markup.replace(b"\n", b""))
|
||||
|
||||
def test_namespaced_html(self):
|
||||
"""When a namespaced XML document is parsed as HTML it should
|
||||
be treated as HTML with weird tag names.
|
||||
"""
|
||||
markup = b"""<ns1:foo>content</ns1:foo><ns1:foo/><ns2:foo/>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(2, len(soup.find_all("ns1:foo")))
|
||||
|
||||
def test_processing_instruction(self):
|
||||
# We test both Unicode and bytestring to verify that
|
||||
# process_markup correctly sets processing_instruction_class
|
||||
# even when the markup is already Unicode and there is no
|
||||
# need to process anything.
|
||||
markup = """<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.decode())
|
||||
|
||||
markup = b"""<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode("utf8"))
|
||||
|
||||
def test_deepcopy(self):
|
||||
"""Make sure you can copy the tree builder.
|
||||
|
||||
@@ -155,6 +364,23 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
def test_nested_formatting_elements(self):
|
||||
self.assertSoupEquals("<em><em></em></em>")
|
||||
|
||||
def test_double_head(self):
|
||||
html = '''<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Ordinary HEAD element test</title>
|
||||
</head>
|
||||
<script type="text/javascript">
|
||||
alert("Help!");
|
||||
</script>
|
||||
<body>
|
||||
Hello, world!
|
||||
</body>
|
||||
</html>
|
||||
'''
|
||||
soup = self.soup(html)
|
||||
self.assertEqual("text/javascript", soup.find('script')['type'])
|
||||
|
||||
def test_comment(self):
|
||||
# Comments are represented as Comment objects.
|
||||
markup = "<p>foo<!--foobar-->baz</p>"
|
||||
@@ -171,9 +397,22 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
self.assertEqual(comment, baz.previous_element)
|
||||
|
||||
def test_preserved_whitespace_in_pre_and_textarea(self):
|
||||
"""Whitespace must be preserved in <pre> and <textarea> tags."""
|
||||
self.assertSoupEquals("<pre> </pre>")
|
||||
self.assertSoupEquals("<textarea> woo </textarea>")
|
||||
"""Whitespace must be preserved in <pre> and <textarea> tags,
|
||||
even if that would mean not prettifying the markup.
|
||||
"""
|
||||
pre_markup = "<pre> </pre>"
|
||||
textarea_markup = "<textarea> woo\nwoo </textarea>"
|
||||
self.assertSoupEquals(pre_markup)
|
||||
self.assertSoupEquals(textarea_markup)
|
||||
|
||||
soup = self.soup(pre_markup)
|
||||
self.assertEqual(soup.pre.prettify(), pre_markup)
|
||||
|
||||
soup = self.soup(textarea_markup)
|
||||
self.assertEqual(soup.textarea.prettify(), textarea_markup)
|
||||
|
||||
soup = self.soup("<textarea></textarea>")
|
||||
self.assertEqual(soup.textarea.prettify(), "<textarea></textarea>")
|
||||
|
||||
def test_nested_inline_elements(self):
|
||||
"""Inline elements can be nested indefinitely."""
|
||||
@@ -213,6 +452,18 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
"<tbody><tr><td>Bar</td></tr></tbody>"
|
||||
"<tfoot><tr><td>Baz</td></tr></tfoot></table>")
|
||||
|
||||
def test_multivalued_attribute_with_whitespace(self):
|
||||
# Whitespace separating the values of a multi-valued attribute
|
||||
# should be ignored.
|
||||
|
||||
markup = '<div class=" foo bar "></a>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(['foo', 'bar'], soup.div['class'])
|
||||
|
||||
# If you search by the literal name of the class it's like the whitespace
|
||||
# wasn't there.
|
||||
self.assertEqual(soup.div, soup.find('div', class_="foo bar"))
|
||||
|
||||
def test_deeply_nested_multivalued_attribute(self):
|
||||
# html5lib can set the attributes of the same tag many times
|
||||
# as it rearranges the tree. This has caused problems with
|
||||
@@ -221,18 +472,52 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(["css"], soup.div.div['class'])
|
||||
|
||||
def test_multivalued_attribute_on_html(self):
|
||||
# html5lib uses a different API to set the attributes ot the
|
||||
# <html> tag. This has caused problems with multivalued
|
||||
# attributes.
|
||||
markup = '<html class="a b"></html>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(["a", "b"], soup.html['class'])
|
||||
|
||||
def test_angle_brackets_in_attribute_values_are_escaped(self):
|
||||
self.assertSoupEquals('<a b="<a>"></a>', '<a b="<a>"></a>')
|
||||
|
||||
def test_strings_resembling_character_entity_references(self):
|
||||
# "&T" and "&p" look like incomplete character entities, but they are
|
||||
# not.
|
||||
self.assertSoupEquals(
|
||||
"<p>• AT&T is in the s&p 500</p>",
|
||||
"<p>\u2022 AT&T is in the s&p 500</p>"
|
||||
)
|
||||
|
||||
def test_apos_entity(self):
|
||||
self.assertSoupEquals(
|
||||
"<p>Bob's Bar</p>",
|
||||
"<p>Bob's Bar</p>",
|
||||
)
|
||||
|
||||
def test_entities_in_foreign_document_encoding(self):
|
||||
# “ and ” are invalid numeric entities referencing
|
||||
# Windows-1252 characters. - references a character common
|
||||
# to Windows-1252 and Unicode, and ☃ references a
|
||||
# character only found in Unicode.
|
||||
#
|
||||
# All of these entities should be converted to Unicode
|
||||
# characters.
|
||||
markup = "<p>“Hello” -☃</p>"
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual("“Hello” -☃", soup.p.string)
|
||||
|
||||
def test_entities_in_attributes_converted_to_unicode(self):
|
||||
expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
|
||||
expect = '<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
|
||||
def test_entities_in_text_converted_to_unicode(self):
|
||||
expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
|
||||
expect = '<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
@@ -243,16 +528,52 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
'<p>I said "good day!"</p>')
|
||||
|
||||
def test_out_of_range_entity(self):
|
||||
expect = u"\N{REPLACEMENT CHARACTER}"
|
||||
expect = "\N{REPLACEMENT CHARACTER}"
|
||||
self.assertSoupEquals("�", expect)
|
||||
self.assertSoupEquals("�", expect)
|
||||
self.assertSoupEquals("�", expect)
|
||||
|
||||
|
||||
def test_multipart_strings(self):
|
||||
"Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
|
||||
soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
|
||||
self.assertEqual("p", soup.h2.string.next_element.name)
|
||||
self.assertEqual("p", soup.p.name)
|
||||
self.assertConnectedness(soup)
|
||||
|
||||
def test_empty_element_tags(self):
|
||||
"""Verify consistent handling of empty-element tags,
|
||||
no matter how they come in through the markup.
|
||||
"""
|
||||
self.assertSoupEquals('<br/><br/><br/>', "<br/><br/><br/>")
|
||||
self.assertSoupEquals('<br /><br /><br />', "<br/><br/><br/>")
|
||||
|
||||
def test_head_tag_between_head_and_body(self):
|
||||
"Prevent recurrence of a bug in the html5lib treebuilder."
|
||||
content = """<html><head></head>
|
||||
<link></link>
|
||||
<body>foo</body>
|
||||
</html>
|
||||
"""
|
||||
soup = self.soup(content)
|
||||
self.assertNotEqual(None, soup.html.body)
|
||||
self.assertConnectedness(soup)
|
||||
|
||||
def test_multiple_copies_of_a_tag(self):
|
||||
"Prevent recurrence of a bug in the html5lib treebuilder."
|
||||
content = """<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<article id="a" >
|
||||
<div><a href="1"></div>
|
||||
<footer>
|
||||
<a href="2"></a>
|
||||
</footer>
|
||||
</article>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
soup = self.soup(content)
|
||||
self.assertConnectedness(soup.article)
|
||||
|
||||
def test_basic_namespaces(self):
|
||||
"""Parsers don't need to *understand* namespaces, but at the
|
||||
@@ -285,9 +606,9 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
# A seemingly innocuous document... but it's in Unicode! And
|
||||
# it contains characters that can't be represented in the
|
||||
# encoding found in the declaration! The horror!
|
||||
markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
|
||||
markup = '<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string)
|
||||
self.assertEqual('Sacr\xe9 bleu!', soup.body.string)
|
||||
|
||||
def test_soupstrainer(self):
|
||||
"""Parsers should be able to work with SoupStrainers."""
|
||||
@@ -327,7 +648,7 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
# Both XML and HTML entities are converted to Unicode characters
|
||||
# during parsing.
|
||||
text = "<p><<sacré bleu!>></p>"
|
||||
expected = u"<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>"
|
||||
expected = "<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>"
|
||||
self.assertSoupEquals(text, expected)
|
||||
|
||||
def test_smart_quotes_converted_on_the_way_in(self):
|
||||
@@ -337,15 +658,15 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
soup = self.soup(quote)
|
||||
self.assertEqual(
|
||||
soup.p.string,
|
||||
u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
|
||||
"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
|
||||
|
||||
def test_non_breaking_spaces_converted_on_the_way_in(self):
|
||||
soup = self.soup("<a> </a>")
|
||||
self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
|
||||
self.assertEqual(soup.a.string, "\N{NO-BREAK SPACE}" * 2)
|
||||
|
||||
def test_entities_converted_on_the_way_out(self):
|
||||
text = "<p><<sacré bleu!>></p>"
|
||||
expected = u"<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>".encode("utf-8")
|
||||
expected = "<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>".encode("utf-8")
|
||||
soup = self.soup(text)
|
||||
self.assertEqual(soup.p.encode("utf-8"), expected)
|
||||
|
||||
@@ -354,7 +675,7 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
# easy-to-understand document.
|
||||
|
||||
# Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
|
||||
unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
|
||||
unicode_html = '<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
|
||||
|
||||
# That's because we're going to encode it into ISO-Latin-1, and use
|
||||
# that to test.
|
||||
@@ -399,7 +720,9 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
|
||||
soup = self.soup(
|
||||
hebrew_document, from_encoding="iso8859-8")
|
||||
self.assertEqual(soup.original_encoding, 'iso8859-8')
|
||||
# Some tree builders call it iso8859-8, others call it iso-8859-9.
|
||||
# That's not a difference we really care about.
|
||||
assert soup.original_encoding in ('iso8859-8', 'iso-8859-8')
|
||||
self.assertEqual(
|
||||
soup.encode('utf-8'),
|
||||
hebrew_document.decode("iso8859-8").encode("utf-8"))
|
||||
@@ -461,13 +784,39 @@ class HTMLTreeBuilderSmokeTest(object):
|
||||
data.a['foo'] = 'bar'
|
||||
self.assertEqual('<a foo="bar">text</a>', data.a.decode())
|
||||
|
||||
def test_worst_case(self):
|
||||
"""Test the worst case (currently) for linking issues."""
|
||||
|
||||
soup = self.soup(BAD_DOCUMENT)
|
||||
self.linkage_validator(soup)
|
||||
|
||||
|
||||
class XMLTreeBuilderSmokeTest(object):
|
||||
|
||||
def test_pickle_and_unpickle_identity(self):
|
||||
# Pickling a tree, then unpickling it, yields a tree identical
|
||||
# to the original.
|
||||
tree = self.soup("<a><b>foo</a>")
|
||||
dumped = pickle.dumps(tree, 2)
|
||||
loaded = pickle.loads(dumped)
|
||||
self.assertEqual(loaded.__class__, BeautifulSoup)
|
||||
self.assertEqual(loaded.decode(), tree.decode())
|
||||
|
||||
def test_docstring_generated(self):
|
||||
soup = self.soup("<root/>")
|
||||
self.assertEqual(
|
||||
soup.encode(), b'<?xml version="1.0" encoding="utf-8"?>\n<root/>')
|
||||
|
||||
def test_xml_declaration(self):
|
||||
markup = b"""<?xml version="1.0" encoding="utf8"?>\n<foo/>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode("utf8"))
|
||||
|
||||
def test_processing_instruction(self):
|
||||
markup = b"""<?xml version="1.0" encoding="utf8"?>\n<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode("utf8"))
|
||||
|
||||
def test_real_xhtml_document(self):
|
||||
"""A real XHTML document should come out *exactly* the same as it went in."""
|
||||
markup = b"""<?xml version="1.0" encoding="utf-8"?>
|
||||
@@ -480,12 +829,23 @@ class XMLTreeBuilderSmokeTest(object):
|
||||
self.assertEqual(
|
||||
soup.encode("utf-8"), markup)
|
||||
|
||||
def test_nested_namespaces(self):
|
||||
doc = b"""<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
||||
<parent xmlns="http://ns1/">
|
||||
<child xmlns="http://ns2/" xmlns:ns3="http://ns3/">
|
||||
<grandchild ns3:attr="value" xmlns="http://ns4/"/>
|
||||
</child>
|
||||
</parent>"""
|
||||
soup = self.soup(doc)
|
||||
self.assertEqual(doc, soup.encode())
|
||||
|
||||
def test_formatter_processes_script_tag_for_xml_documents(self):
|
||||
doc = """
|
||||
<script type="text/javascript">
|
||||
</script>
|
||||
"""
|
||||
soup = BeautifulSoup(doc, "xml")
|
||||
soup = BeautifulSoup(doc, "lxml-xml")
|
||||
# lxml would have stripped this while parsing, but we can add
|
||||
# it later.
|
||||
soup.script.string = 'console.log("< < hey > > ");'
|
||||
@@ -493,15 +853,15 @@ class XMLTreeBuilderSmokeTest(object):
|
||||
self.assertTrue(b"< < hey > >" in encoded)
|
||||
|
||||
def test_can_parse_unicode_document(self):
|
||||
markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
|
||||
markup = '<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string)
|
||||
self.assertEqual('Sacr\xe9 bleu!', soup.root.string)
|
||||
|
||||
def test_popping_namespaced_tag(self):
|
||||
markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(
|
||||
unicode(soup.rss), markup)
|
||||
str(soup.rss), markup)
|
||||
|
||||
def test_docstring_includes_correct_encoding(self):
|
||||
soup = self.soup("<root/>")
|
||||
@@ -532,17 +892,57 @@ class XMLTreeBuilderSmokeTest(object):
|
||||
def test_closing_namespaced_tag(self):
|
||||
markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.p), markup)
|
||||
self.assertEqual(str(soup.p), markup)
|
||||
|
||||
def test_namespaced_attributes(self):
|
||||
markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.foo), markup)
|
||||
self.assertEqual(str(soup.foo), markup)
|
||||
|
||||
def test_namespaced_attributes_xml_namespace(self):
|
||||
markup = '<foo xml:lang="fr">bar</foo>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.foo), markup)
|
||||
self.assertEqual(str(soup.foo), markup)
|
||||
|
||||
def test_find_by_prefixed_name(self):
|
||||
doc = """<?xml version="1.0" encoding="utf-8"?>
|
||||
<Document xmlns="http://example.com/ns0"
|
||||
xmlns:ns1="http://example.com/ns1"
|
||||
xmlns:ns2="http://example.com/ns2"
|
||||
<ns1:tag>foo</ns1:tag>
|
||||
<ns1:tag>bar</ns1:tag>
|
||||
<ns2:tag key="value">baz</ns2:tag>
|
||||
</Document>
|
||||
"""
|
||||
soup = self.soup(doc)
|
||||
|
||||
# There are three <tag> tags.
|
||||
self.assertEqual(3, len(soup.find_all('tag')))
|
||||
|
||||
# But two of them are ns1:tag and one of them is ns2:tag.
|
||||
self.assertEqual(2, len(soup.find_all('ns1:tag')))
|
||||
self.assertEqual(1, len(soup.find_all('ns2:tag')))
|
||||
|
||||
self.assertEqual(1, len(soup.find_all('ns2:tag', key='value')))
|
||||
self.assertEqual(3, len(soup.find_all(['ns1:tag', 'ns2:tag'])))
|
||||
|
||||
def test_copy_tag_preserves_namespace(self):
|
||||
xml = """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<w:document xmlns:w="http://example.com/ns0"/>"""
|
||||
|
||||
soup = self.soup(xml)
|
||||
tag = soup.document
|
||||
duplicate = copy.copy(tag)
|
||||
|
||||
# The two tags have the same namespace prefix.
|
||||
self.assertEqual(tag.prefix, duplicate.prefix)
|
||||
|
||||
def test_worst_case(self):
|
||||
"""Test the worst case (currently) for linking issues."""
|
||||
|
||||
soup = self.soup(BAD_DOCUMENT)
|
||||
self.linkage_validator(soup)
|
||||
|
||||
|
||||
class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
|
||||
"""Smoke test for a tree builder that supports HTML5."""
|
||||
|
1
lib/bs4/tests/__init__.py
Normal file
1
lib/bs4/tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"The beautifulsoup tests."
|
147
lib/bs4/tests/test_builder_registry.py
Normal file
147
lib/bs4/tests/test_builder_registry.py
Normal file
@@ -0,0 +1,147 @@
|
||||
"""Tests of the builder registry."""
|
||||
|
||||
import unittest
|
||||
import warnings
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.builder import (
|
||||
builder_registry as registry,
|
||||
HTMLParserTreeBuilder,
|
||||
TreeBuilderRegistry,
|
||||
)
|
||||
|
||||
try:
|
||||
from bs4.builder import HTML5TreeBuilder
|
||||
HTML5LIB_PRESENT = True
|
||||
except ImportError:
|
||||
HTML5LIB_PRESENT = False
|
||||
|
||||
try:
|
||||
from bs4.builder import (
|
||||
LXMLTreeBuilderForXML,
|
||||
LXMLTreeBuilder,
|
||||
)
|
||||
LXML_PRESENT = True
|
||||
except ImportError:
|
||||
LXML_PRESENT = False
|
||||
|
||||
|
||||
class BuiltInRegistryTest(unittest.TestCase):
|
||||
"""Test the built-in registry with the default builders registered."""
|
||||
|
||||
def test_combination(self):
|
||||
if LXML_PRESENT:
|
||||
self.assertEqual(registry.lookup('fast', 'html'),
|
||||
LXMLTreeBuilder)
|
||||
|
||||
if LXML_PRESENT:
|
||||
self.assertEqual(registry.lookup('permissive', 'xml'),
|
||||
LXMLTreeBuilderForXML)
|
||||
self.assertEqual(registry.lookup('strict', 'html'),
|
||||
HTMLParserTreeBuilder)
|
||||
if HTML5LIB_PRESENT:
|
||||
self.assertEqual(registry.lookup('html5lib', 'html'),
|
||||
HTML5TreeBuilder)
|
||||
|
||||
def test_lookup_by_markup_type(self):
|
||||
if LXML_PRESENT:
|
||||
self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
|
||||
self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
|
||||
else:
|
||||
self.assertEqual(registry.lookup('xml'), None)
|
||||
if HTML5LIB_PRESENT:
|
||||
self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
|
||||
else:
|
||||
self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
|
||||
|
||||
def test_named_library(self):
|
||||
if LXML_PRESENT:
|
||||
self.assertEqual(registry.lookup('lxml', 'xml'),
|
||||
LXMLTreeBuilderForXML)
|
||||
self.assertEqual(registry.lookup('lxml', 'html'),
|
||||
LXMLTreeBuilder)
|
||||
if HTML5LIB_PRESENT:
|
||||
self.assertEqual(registry.lookup('html5lib'),
|
||||
HTML5TreeBuilder)
|
||||
|
||||
self.assertEqual(registry.lookup('html.parser'),
|
||||
HTMLParserTreeBuilder)
|
||||
|
||||
def test_beautifulsoup_constructor_does_lookup(self):
|
||||
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
# This will create a warning about not explicitly
|
||||
# specifying a parser, but we'll ignore it.
|
||||
|
||||
# You can pass in a string.
|
||||
BeautifulSoup("", features="html")
|
||||
# Or a list of strings.
|
||||
BeautifulSoup("", features=["html", "fast"])
|
||||
|
||||
# You'll get an exception if BS can't find an appropriate
|
||||
# builder.
|
||||
self.assertRaises(ValueError, BeautifulSoup,
|
||||
"", features="no-such-feature")
|
||||
|
||||
class RegistryTest(unittest.TestCase):
|
||||
"""Test the TreeBuilderRegistry class in general."""
|
||||
|
||||
def setUp(self):
|
||||
self.registry = TreeBuilderRegistry()
|
||||
|
||||
def builder_for_features(self, *feature_list):
|
||||
cls = type('Builder_' + '_'.join(feature_list),
|
||||
(object,), {'features' : feature_list})
|
||||
|
||||
self.registry.register(cls)
|
||||
return cls
|
||||
|
||||
def test_register_with_no_features(self):
|
||||
builder = self.builder_for_features()
|
||||
|
||||
# Since the builder advertises no features, you can't find it
|
||||
# by looking up features.
|
||||
self.assertEqual(self.registry.lookup('foo'), None)
|
||||
|
||||
# But you can find it by doing a lookup with no features, if
|
||||
# this happens to be the only registered builder.
|
||||
self.assertEqual(self.registry.lookup(), builder)
|
||||
|
||||
def test_register_with_features_makes_lookup_succeed(self):
|
||||
builder = self.builder_for_features('foo', 'bar')
|
||||
self.assertEqual(self.registry.lookup('foo'), builder)
|
||||
self.assertEqual(self.registry.lookup('bar'), builder)
|
||||
|
||||
def test_lookup_fails_when_no_builder_implements_feature(self):
|
||||
builder = self.builder_for_features('foo', 'bar')
|
||||
self.assertEqual(self.registry.lookup('baz'), None)
|
||||
|
||||
def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
|
||||
builder1 = self.builder_for_features('foo')
|
||||
builder2 = self.builder_for_features('bar')
|
||||
self.assertEqual(self.registry.lookup(), builder2)
|
||||
|
||||
def test_lookup_fails_when_no_tree_builders_registered(self):
|
||||
self.assertEqual(self.registry.lookup(), None)
|
||||
|
||||
def test_lookup_gets_most_recent_builder_supporting_all_features(self):
|
||||
has_one = self.builder_for_features('foo')
|
||||
has_the_other = self.builder_for_features('bar')
|
||||
has_both_early = self.builder_for_features('foo', 'bar', 'baz')
|
||||
has_both_late = self.builder_for_features('foo', 'bar', 'quux')
|
||||
lacks_one = self.builder_for_features('bar')
|
||||
has_the_other = self.builder_for_features('foo')
|
||||
|
||||
# There are two builders featuring 'foo' and 'bar', but
|
||||
# the one that also features 'quux' was registered later.
|
||||
self.assertEqual(self.registry.lookup('foo', 'bar'),
|
||||
has_both_late)
|
||||
|
||||
# There is only one builder featuring 'foo', 'bar', and 'baz'.
|
||||
self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
|
||||
has_both_early)
|
||||
|
||||
def test_lookup_fails_when_cannot_reconcile_requested_features(self):
|
||||
builder1 = self.builder_for_features('foo', 'bar')
|
||||
builder2 = self.builder_for_features('foo', 'baz')
|
||||
self.assertEqual(self.registry.lookup('bar', 'baz'), None)
|
36
lib/bs4/tests/test_docs.py
Normal file
36
lib/bs4/tests/test_docs.py
Normal file
@@ -0,0 +1,36 @@
|
||||
"Test harness for doctests."
|
||||
|
||||
# pylint: disable-msg=E0611,W0142
|
||||
|
||||
__metaclass__ = type
|
||||
__all__ = [
|
||||
'additional_tests',
|
||||
]
|
||||
|
||||
import atexit
|
||||
import doctest
|
||||
import os
|
||||
#from pkg_resources import (
|
||||
# resource_filename, resource_exists, resource_listdir, cleanup_resources)
|
||||
import unittest
|
||||
|
||||
DOCTEST_FLAGS = (
|
||||
doctest.ELLIPSIS |
|
||||
doctest.NORMALIZE_WHITESPACE |
|
||||
doctest.REPORT_NDIFF)
|
||||
|
||||
|
||||
# def additional_tests():
|
||||
# "Run the doc tests (README.txt and docs/*, if any exist)"
|
||||
# doctest_files = [
|
||||
# os.path.abspath(resource_filename('bs4', 'README.txt'))]
|
||||
# if resource_exists('bs4', 'docs'):
|
||||
# for name in resource_listdir('bs4', 'docs'):
|
||||
# if name.endswith('.txt'):
|
||||
# doctest_files.append(
|
||||
# os.path.abspath(
|
||||
# resource_filename('bs4', 'docs/%s' % name)))
|
||||
# kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS)
|
||||
# atexit.register(cleanup_resources)
|
||||
# return unittest.TestSuite((
|
||||
# doctest.DocFileSuite(*doctest_files, **kwargs)))
|
184
lib/bs4/tests/test_html5lib.py
Normal file
184
lib/bs4/tests/test_html5lib.py
Normal file
@@ -0,0 +1,184 @@
|
||||
"""Tests to ensure that the html5lib tree builder generates good trees."""
|
||||
|
||||
import warnings
|
||||
|
||||
try:
|
||||
from bs4.builder import HTML5TreeBuilder
|
||||
HTML5LIB_PRESENT = True
|
||||
except ImportError as e:
|
||||
HTML5LIB_PRESENT = False
|
||||
from bs4.element import SoupStrainer
|
||||
from bs4.testing import (
|
||||
HTML5TreeBuilderSmokeTest,
|
||||
SoupTest,
|
||||
skipIf,
|
||||
)
|
||||
|
||||
@skipIf(
|
||||
not HTML5LIB_PRESENT,
|
||||
"html5lib seems not to be present, not testing its tree builder.")
|
||||
class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
|
||||
"""See ``HTML5TreeBuilderSmokeTest``."""
|
||||
|
||||
@property
|
||||
def default_builder(self):
|
||||
return HTML5TreeBuilder
|
||||
|
||||
def test_soupstrainer(self):
|
||||
# The html5lib tree builder does not support SoupStrainers.
|
||||
strainer = SoupStrainer("b")
|
||||
markup = "<p>A <b>bold</b> statement.</p>"
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup(markup, parse_only=strainer)
|
||||
self.assertEqual(
|
||||
soup.decode(), self.document_for(markup))
|
||||
|
||||
self.assertTrue(
|
||||
"the html5lib tree builder doesn't support parse_only" in
|
||||
str(w[0].message))
|
||||
|
||||
def test_correctly_nested_tables(self):
|
||||
"""html5lib inserts <tbody> tags where other parsers don't."""
|
||||
markup = ('<table id="1">'
|
||||
'<tr>'
|
||||
"<td>Here's another table:"
|
||||
'<table id="2">'
|
||||
'<tr><td>foo</td></tr>'
|
||||
'</table></td>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
markup,
|
||||
'<table id="1"><tbody><tr><td>Here\'s another table:'
|
||||
'<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>'
|
||||
'</td></tr></tbody></table>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
"<table><thead><tr><td>Foo</td></tr></thead>"
|
||||
"<tbody><tr><td>Bar</td></tr></tbody>"
|
||||
"<tfoot><tr><td>Baz</td></tr></tfoot></table>")
|
||||
|
||||
def test_xml_declaration_followed_by_doctype(self):
|
||||
markup = '''<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
</head>
|
||||
<body>
|
||||
<p>foo</p>
|
||||
</body>
|
||||
</html>'''
|
||||
soup = self.soup(markup)
|
||||
# Verify that we can reach the <p> tag; this means the tree is connected.
|
||||
self.assertEqual(b"<p>foo</p>", soup.p.encode())
|
||||
|
||||
def test_reparented_markup(self):
|
||||
markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
|
||||
self.assertEqual(2, len(soup.find_all('p')))
|
||||
|
||||
|
||||
def test_reparented_markup_ends_with_whitespace(self):
|
||||
markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
|
||||
self.assertEqual(2, len(soup.find_all('p')))
|
||||
|
||||
def test_reparented_markup_containing_identical_whitespace_nodes(self):
|
||||
"""Verify that we keep the two whitespace nodes in this
|
||||
document distinct when reparenting the adjacent <tbody> tags.
|
||||
"""
|
||||
markup = '<table> <tbody><tbody><ims></tbody> </table>'
|
||||
soup = self.soup(markup)
|
||||
space1, space2 = soup.find_all(string=' ')
|
||||
tbody1, tbody2 = soup.find_all('tbody')
|
||||
assert space1.next_element is tbody1
|
||||
assert tbody2.next_element is space2
|
||||
|
||||
def test_reparented_markup_containing_children(self):
|
||||
markup = '<div><a>aftermath<p><noscript>target</noscript>aftermath</a></p></div>'
|
||||
soup = self.soup(markup)
|
||||
noscript = soup.noscript
|
||||
self.assertEqual("target", noscript.next_element)
|
||||
target = soup.find(string='target')
|
||||
|
||||
# The 'aftermath' string was duplicated; we want the second one.
|
||||
final_aftermath = soup.find_all(string='aftermath')[-1]
|
||||
|
||||
# The <noscript> tag was moved beneath a copy of the <a> tag,
|
||||
# but the 'target' string within is still connected to the
|
||||
# (second) 'aftermath' string.
|
||||
self.assertEqual(final_aftermath, target.next_element)
|
||||
self.assertEqual(target, final_aftermath.previous_element)
|
||||
|
||||
def test_processing_instruction(self):
|
||||
"""Processing instructions become comments."""
|
||||
markup = b"""<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
assert str(soup).startswith("<!--?PITarget PIContent?-->")
|
||||
|
||||
def test_cloned_multivalue_node(self):
|
||||
markup = b"""<a class="my_class"><p></a>"""
|
||||
soup = self.soup(markup)
|
||||
a1, a2 = soup.find_all('a')
|
||||
self.assertEqual(a1, a2)
|
||||
assert a1 is not a2
|
||||
|
||||
def test_foster_parenting(self):
|
||||
markup = b"""<table><td></tbody>A"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual("<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())
|
||||
|
||||
def test_extraction(self):
|
||||
"""
|
||||
Test that extraction does not destroy the tree.
|
||||
|
||||
https://bugs.launchpad.net/beautifulsoup/+bug/1782928
|
||||
"""
|
||||
|
||||
markup = """
|
||||
<html><head></head>
|
||||
<style>
|
||||
</style><script></script><body><p>hello</p></body></html>
|
||||
"""
|
||||
soup = self.soup(markup)
|
||||
[s.extract() for s in soup('script')]
|
||||
[s.extract() for s in soup('style')]
|
||||
|
||||
self.assertEqual(len(soup.find_all("p")), 1)
|
||||
|
||||
def test_empty_comment(self):
|
||||
"""
|
||||
Test that empty comment does not break structure.
|
||||
|
||||
https://bugs.launchpad.net/beautifulsoup/+bug/1806598
|
||||
"""
|
||||
|
||||
markup = """
|
||||
<html>
|
||||
<body>
|
||||
<form>
|
||||
<!----><input type="text">
|
||||
</form>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
soup = self.soup(markup)
|
||||
inputs = []
|
||||
for form in soup.find_all('form'):
|
||||
inputs.extend(form.find_all('input'))
|
||||
self.assertEqual(len(inputs), 1)
|
||||
|
||||
def test_tracking_line_numbers(self):
|
||||
# The html.parser TreeBuilder keeps track of line number and
|
||||
# position of each element.
|
||||
markup = "\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>"
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(2, soup.p.sourceline)
|
||||
self.assertEqual(5, soup.p.sourcepos)
|
||||
self.assertEqual("sourceline", soup.p.find('sourceline').name)
|
||||
|
||||
# You can deactivate this behavior.
|
||||
soup = self.soup(markup, store_line_numbers=False)
|
||||
self.assertEqual("sourceline", soup.p.sourceline.name)
|
||||
self.assertEqual("sourcepos", soup.p.sourcepos.name)
|
61
lib/bs4/tests/test_htmlparser.py
Normal file
61
lib/bs4/tests/test_htmlparser.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""Tests to ensure that the html.parser tree builder generates good
|
||||
trees."""
|
||||
|
||||
from pdb import set_trace
|
||||
import pickle
|
||||
from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
|
||||
from bs4.builder import HTMLParserTreeBuilder
|
||||
from bs4.builder._htmlparser import BeautifulSoupHTMLParser
|
||||
|
||||
class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||
|
||||
default_builder = HTMLParserTreeBuilder
|
||||
|
||||
def test_namespaced_system_doctype(self):
|
||||
# html.parser can't handle namespaced doctypes, so skip this one.
|
||||
pass
|
||||
|
||||
def test_namespaced_public_doctype(self):
|
||||
# html.parser can't handle namespaced doctypes, so skip this one.
|
||||
pass
|
||||
|
||||
def test_builder_is_pickled(self):
|
||||
"""Unlike most tree builders, HTMLParserTreeBuilder and will
|
||||
be restored after pickling.
|
||||
"""
|
||||
tree = self.soup("<a><b>foo</a>")
|
||||
dumped = pickle.dumps(tree, 2)
|
||||
loaded = pickle.loads(dumped)
|
||||
self.assertTrue(isinstance(loaded.builder, type(tree.builder)))
|
||||
|
||||
def test_redundant_empty_element_closing_tags(self):
|
||||
self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>")
|
||||
self.assertSoupEquals('</br></br></br>', "")
|
||||
|
||||
def test_empty_element(self):
|
||||
# This verifies that any buffered data present when the parser
|
||||
# finishes working is handled.
|
||||
self.assertSoupEquals("foo &# bar", "foo &# bar")
|
||||
|
||||
def test_tracking_line_numbers(self):
|
||||
# The html.parser TreeBuilder keeps track of line number and
|
||||
# position of each element.
|
||||
markup = "\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>"
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(2, soup.p.sourceline)
|
||||
self.assertEqual(3, soup.p.sourcepos)
|
||||
self.assertEqual("sourceline", soup.p.find('sourceline').name)
|
||||
|
||||
# You can deactivate this behavior.
|
||||
soup = self.soup(markup, store_line_numbers=False)
|
||||
self.assertEqual("sourceline", soup.p.sourceline.name)
|
||||
self.assertEqual("sourcepos", soup.p.sourcepos.name)
|
||||
|
||||
|
||||
class TestHTMLParserSubclass(SoupTest):
|
||||
def test_error(self):
|
||||
"""Verify that our HTMLParser subclass implements error() in a way
|
||||
that doesn't cause a crash.
|
||||
"""
|
||||
parser = BeautifulSoupHTMLParser()
|
||||
parser.error("don't crash")
|
115
lib/bs4/tests/test_lxml.py
Normal file
115
lib/bs4/tests/test_lxml.py
Normal file
@@ -0,0 +1,115 @@
|
||||
"""Tests to ensure that the lxml tree builder generates good trees."""
|
||||
|
||||
import re
|
||||
import warnings
|
||||
|
||||
try:
|
||||
import lxml.etree
|
||||
LXML_PRESENT = True
|
||||
LXML_VERSION = lxml.etree.LXML_VERSION
|
||||
except ImportError as e:
|
||||
LXML_PRESENT = False
|
||||
LXML_VERSION = (0,)
|
||||
|
||||
if LXML_PRESENT:
|
||||
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
|
||||
|
||||
from bs4 import (
|
||||
BeautifulSoup,
|
||||
BeautifulStoneSoup,
|
||||
)
|
||||
from bs4.element import Comment, Doctype, SoupStrainer
|
||||
from bs4.testing import skipIf
|
||||
from bs4.tests import test_htmlparser
|
||||
from bs4.testing import (
|
||||
HTMLTreeBuilderSmokeTest,
|
||||
XMLTreeBuilderSmokeTest,
|
||||
SoupTest,
|
||||
skipIf,
|
||||
)
|
||||
|
||||
@skipIf(
|
||||
not LXML_PRESENT,
|
||||
"lxml seems not to be present, not testing its tree builder.")
|
||||
class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||
"""See ``HTMLTreeBuilderSmokeTest``."""
|
||||
|
||||
@property
|
||||
def default_builder(self):
|
||||
return LXMLTreeBuilder
|
||||
|
||||
def test_out_of_range_entity(self):
|
||||
self.assertSoupEquals(
|
||||
"<p>foo�bar</p>", "<p>foobar</p>")
|
||||
self.assertSoupEquals(
|
||||
"<p>foo�bar</p>", "<p>foobar</p>")
|
||||
self.assertSoupEquals(
|
||||
"<p>foo�bar</p>", "<p>foobar</p>")
|
||||
|
||||
def test_entities_in_foreign_document_encoding(self):
|
||||
# We can't implement this case correctly because by the time we
|
||||
# hear about markup like "“", it's been (incorrectly) converted into
|
||||
# a string like u'\x93'
|
||||
pass
|
||||
|
||||
# In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
|
||||
# test if an old version of lxml is installed.
|
||||
|
||||
@skipIf(
|
||||
not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
|
||||
"Skipping doctype test for old version of lxml to avoid segfault.")
|
||||
def test_empty_doctype(self):
|
||||
soup = self.soup("<!DOCTYPE>")
|
||||
doctype = soup.contents[0]
|
||||
self.assertEqual("", doctype.strip())
|
||||
|
||||
def test_beautifulstonesoup_is_xml_parser(self):
|
||||
# Make sure that the deprecated BSS class uses an xml builder
|
||||
# if one is installed.
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = BeautifulStoneSoup("<b />")
|
||||
self.assertEqual("<b/>", str(soup.b))
|
||||
self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
|
||||
|
||||
def test_tracking_line_numbers(self):
|
||||
# The lxml TreeBuilder cannot keep track of line numbers from
|
||||
# the original markup. Even if you ask for line numbers, we
|
||||
# don't have 'em.
|
||||
#
|
||||
# This means that if you have a tag like <sourceline> or
|
||||
# <sourcepos>, attribute access will find it rather than
|
||||
# giving you a numeric answer.
|
||||
soup = self.soup(
|
||||
"\n <p>\n\n<sourceline>\n<b>text</b></sourceline><sourcepos></p>",
|
||||
store_line_numbers=True
|
||||
)
|
||||
self.assertEqual("sourceline", soup.p.sourceline.name)
|
||||
self.assertEqual("sourcepos", soup.p.sourcepos.name)
|
||||
|
||||
@skipIf(
|
||||
not LXML_PRESENT,
|
||||
"lxml seems not to be present, not testing its XML tree builder.")
|
||||
class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
|
||||
"""See ``HTMLTreeBuilderSmokeTest``."""
|
||||
|
||||
@property
|
||||
def default_builder(self):
|
||||
return LXMLTreeBuilderForXML
|
||||
|
||||
def test_namespace_indexing(self):
|
||||
# We should not track un-prefixed namespaces as we can only hold one
|
||||
# and it will be recognized as the default namespace by soupsieve,
|
||||
# which may be confusing in some situations. When no namespace is provided
|
||||
# for a selector, the default namespace (if defined) is assumed.
|
||||
|
||||
soup = self.soup(
|
||||
'<?xml version="1.1"?>\n'
|
||||
'<root>'
|
||||
'<tag xmlns="http://unprefixed-namespace.com">content</tag>'
|
||||
'<prefix:tag xmlns:prefix="http://prefixed-namespace.com">content</tag>'
|
||||
'</root>'
|
||||
)
|
||||
self.assertEqual(
|
||||
soup._namespaces,
|
||||
{'xml': 'http://www.w3.org/XML/1998/namespace', 'prefix': 'http://prefixed-namespace.com'}
|
||||
)
|
682
lib/bs4/tests/test_soup.py
Normal file
682
lib/bs4/tests/test_soup.py
Normal file
@@ -0,0 +1,682 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Tests of Beautiful Soup as a whole."""
|
||||
|
||||
from pdb import set_trace
|
||||
import logging
|
||||
import unittest
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
from bs4 import (
|
||||
BeautifulSoup,
|
||||
BeautifulStoneSoup,
|
||||
)
|
||||
from bs4.builder import (
|
||||
TreeBuilder,
|
||||
ParserRejectedMarkup,
|
||||
)
|
||||
from bs4.element import (
|
||||
CharsetMetaAttributeValue,
|
||||
Comment,
|
||||
ContentMetaAttributeValue,
|
||||
SoupStrainer,
|
||||
NamespacedAttribute,
|
||||
Tag,
|
||||
NavigableString,
|
||||
)
|
||||
|
||||
import bs4.dammit
|
||||
from bs4.dammit import (
|
||||
EntitySubstitution,
|
||||
UnicodeDammit,
|
||||
EncodingDetector,
|
||||
)
|
||||
from bs4.testing import (
|
||||
default_builder,
|
||||
SoupTest,
|
||||
skipIf,
|
||||
)
|
||||
import warnings
|
||||
|
||||
try:
|
||||
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
|
||||
LXML_PRESENT = True
|
||||
except ImportError as e:
|
||||
LXML_PRESENT = False
|
||||
|
||||
PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
|
||||
|
||||
class TestConstructor(SoupTest):
|
||||
|
||||
def test_short_unicode_input(self):
|
||||
data = "<h1>éé</h1>"
|
||||
soup = self.soup(data)
|
||||
self.assertEqual("éé", soup.h1.string)
|
||||
|
||||
def test_embedded_null(self):
|
||||
data = "<h1>foo\0bar</h1>"
|
||||
soup = self.soup(data)
|
||||
self.assertEqual("foo\0bar", soup.h1.string)
|
||||
|
||||
def test_exclude_encodings(self):
|
||||
utf8_data = "Räksmörgås".encode("utf-8")
|
||||
soup = self.soup(utf8_data, exclude_encodings=["utf-8"])
|
||||
self.assertEqual("windows-1252", soup.original_encoding)
|
||||
|
||||
def test_custom_builder_class(self):
|
||||
# Verify that you can pass in a custom Builder class and
|
||||
# it'll be instantiated with the appropriate keyword arguments.
|
||||
class Mock(object):
|
||||
def __init__(self, **kwargs):
|
||||
self.called_with = kwargs
|
||||
self.is_xml = True
|
||||
self.store_line_numbers = False
|
||||
self.cdata_list_attributes = []
|
||||
self.preserve_whitespace_tags = []
|
||||
def initialize_soup(self, soup):
|
||||
pass
|
||||
def feed(self, markup):
|
||||
self.fed = markup
|
||||
def reset(self):
|
||||
pass
|
||||
def ignore(self, ignore):
|
||||
pass
|
||||
set_up_substitutions = can_be_empty_element = ignore
|
||||
def prepare_markup(self, *args, **kwargs):
|
||||
yield "prepared markup", "original encoding", "declared encoding", "contains replacement characters"
|
||||
|
||||
kwargs = dict(
|
||||
var="value",
|
||||
# This is a deprecated BS3-era keyword argument, which
|
||||
# will be stripped out.
|
||||
convertEntities=True,
|
||||
)
|
||||
with warnings.catch_warnings(record=True):
|
||||
soup = BeautifulSoup('', builder=Mock, **kwargs)
|
||||
assert isinstance(soup.builder, Mock)
|
||||
self.assertEqual(dict(var="value"), soup.builder.called_with)
|
||||
self.assertEqual("prepared markup", soup.builder.fed)
|
||||
|
||||
# You can also instantiate the TreeBuilder yourself. In this
|
||||
# case, that specific object is used and any keyword arguments
|
||||
# to the BeautifulSoup constructor are ignored.
|
||||
builder = Mock(**kwargs)
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = BeautifulSoup(
|
||||
'', builder=builder, ignored_value=True,
|
||||
)
|
||||
msg = str(w[0].message)
|
||||
assert msg.startswith("Keyword arguments to the BeautifulSoup constructor will be ignored.")
|
||||
self.assertEqual(builder, soup.builder)
|
||||
self.assertEqual(kwargs, builder.called_with)
|
||||
|
||||
def test_parser_markup_rejection(self):
|
||||
# If markup is completely rejected by the parser, an
|
||||
# explanatory ParserRejectedMarkup exception is raised.
|
||||
class Mock(TreeBuilder):
|
||||
def feed(self, *args, **kwargs):
|
||||
raise ParserRejectedMarkup("Nope.")
|
||||
|
||||
def prepare_markup(self, *args, **kwargs):
|
||||
# We're going to try two different ways of preparing this markup,
|
||||
# but feed() will reject both of them.
|
||||
yield markup, None, None, False
|
||||
yield markup, None, None, False
|
||||
|
||||
import re
|
||||
self.assertRaisesRegex(
|
||||
ParserRejectedMarkup,
|
||||
"The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.",
|
||||
BeautifulSoup, '', builder=Mock,
|
||||
)
|
||||
|
||||
def test_cdata_list_attributes(self):
|
||||
# Most attribute values are represented as scalars, but the
|
||||
# HTML standard says that some attributes, like 'class' have
|
||||
# space-separated lists as values.
|
||||
markup = '<a id=" an id " class=" a class "></a>'
|
||||
soup = self.soup(markup)
|
||||
|
||||
# Note that the spaces are stripped for 'class' but not for 'id'.
|
||||
a = soup.a
|
||||
self.assertEqual(" an id ", a['id'])
|
||||
self.assertEqual(["a", "class"], a['class'])
|
||||
|
||||
# TreeBuilder takes an argument called 'mutli_valued_attributes' which lets
|
||||
# you customize or disable this. As always, you can customize the TreeBuilder
|
||||
# by passing in a keyword argument to the BeautifulSoup constructor.
|
||||
soup = self.soup(markup, builder=default_builder, multi_valued_attributes=None)
|
||||
self.assertEqual(" a class ", soup.a['class'])
|
||||
|
||||
# Here are two ways of saying that `id` is a multi-valued
|
||||
# attribute in this context, but 'class' is not.
|
||||
for switcheroo in ({'*': 'id'}, {'a': 'id'}):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
# This will create a warning about not explicitly
|
||||
# specifying a parser, but we'll ignore it.
|
||||
soup = self.soup(markup, builder=None, multi_valued_attributes=switcheroo)
|
||||
a = soup.a
|
||||
self.assertEqual(["an", "id"], a['id'])
|
||||
self.assertEqual(" a class ", a['class'])
|
||||
|
||||
def test_replacement_classes(self):
|
||||
# Test the ability to pass in replacements for element classes
|
||||
# which will be used when building the tree.
|
||||
class TagPlus(Tag):
|
||||
pass
|
||||
|
||||
class StringPlus(NavigableString):
|
||||
pass
|
||||
|
||||
class CommentPlus(Comment):
|
||||
pass
|
||||
|
||||
soup = self.soup(
|
||||
"<a><b>foo</b>bar</a><!--whee-->",
|
||||
element_classes = {
|
||||
Tag: TagPlus,
|
||||
NavigableString: StringPlus,
|
||||
Comment: CommentPlus,
|
||||
}
|
||||
)
|
||||
|
||||
# The tree was built with TagPlus, StringPlus, and CommentPlus objects,
|
||||
# rather than Tag, String, and Comment objects.
|
||||
assert all(
|
||||
isinstance(x, (TagPlus, StringPlus, CommentPlus))
|
||||
for x in soup.recursiveChildGenerator()
|
||||
)
|
||||
|
||||
class TestWarnings(SoupTest):
|
||||
|
||||
def _no_parser_specified(self, s, is_there=True):
|
||||
v = s.startswith(BeautifulSoup.NO_PARSER_SPECIFIED_WARNING[:80])
|
||||
self.assertTrue(v)
|
||||
|
||||
def test_warning_if_no_parser_specified(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("<a><b></b></a>")
|
||||
msg = str(w[0].message)
|
||||
self._assert_no_parser_specified(msg)
|
||||
|
||||
def test_warning_if_parser_specified_too_vague(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("<a><b></b></a>", "html")
|
||||
msg = str(w[0].message)
|
||||
self._assert_no_parser_specified(msg)
|
||||
|
||||
def test_no_warning_if_explicit_parser_specified(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("<a><b></b></a>", "html.parser")
|
||||
self.assertEqual([], w)
|
||||
|
||||
def test_parseOnlyThese_renamed_to_parse_only(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("<a><b></b></a>", parseOnlyThese=SoupStrainer("b"))
|
||||
msg = str(w[0].message)
|
||||
self.assertTrue("parseOnlyThese" in msg)
|
||||
self.assertTrue("parse_only" in msg)
|
||||
self.assertEqual(b"<b></b>", soup.encode())
|
||||
|
||||
def test_fromEncoding_renamed_to_from_encoding(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
utf8 = b"\xc3\xa9"
|
||||
soup = self.soup(utf8, fromEncoding="utf8")
|
||||
msg = str(w[0].message)
|
||||
self.assertTrue("fromEncoding" in msg)
|
||||
self.assertTrue("from_encoding" in msg)
|
||||
self.assertEqual("utf8", soup.original_encoding)
|
||||
|
||||
def test_unrecognized_keyword_argument(self):
|
||||
self.assertRaises(
|
||||
TypeError, self.soup, "<a>", no_such_argument=True)
|
||||
|
||||
class TestWarnings(SoupTest):
|
||||
|
||||
def test_disk_file_warning(self):
|
||||
filehandle = tempfile.NamedTemporaryFile()
|
||||
filename = filehandle.name
|
||||
try:
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup(filename)
|
||||
msg = str(w[0].message)
|
||||
self.assertTrue("looks like a filename" in msg)
|
||||
finally:
|
||||
filehandle.close()
|
||||
|
||||
# The file no longer exists, so Beautiful Soup will no longer issue the warning.
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup(filename)
|
||||
self.assertEqual(0, len(w))
|
||||
|
||||
def test_url_warning_with_bytes_url(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup(b"http://www.crummybytes.com/")
|
||||
# Be aware this isn't the only warning that can be raised during
|
||||
# execution..
|
||||
self.assertTrue(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
def test_url_warning_with_unicode_url(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
# note - this url must differ from the bytes one otherwise
|
||||
# python's warnings system swallows the second warning
|
||||
soup = self.soup("http://www.crummyunicode.com/")
|
||||
self.assertTrue(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
def test_url_warning_with_bytes_and_space(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup(b"http://www.crummybytes.com/ is great")
|
||||
self.assertFalse(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
def test_url_warning_with_unicode_and_space(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup("http://www.crummyuncode.com/ is great")
|
||||
self.assertFalse(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
|
||||
class TestSelectiveParsing(SoupTest):
|
||||
|
||||
def test_parse_with_soupstrainer(self):
|
||||
markup = "No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>"
|
||||
strainer = SoupStrainer("b")
|
||||
soup = self.soup(markup, parse_only=strainer)
|
||||
self.assertEqual(soup.encode(), b"<b>Yes</b><b>Yes <c>Yes</c></b>")
|
||||
|
||||
|
||||
class TestEntitySubstitution(unittest.TestCase):
|
||||
"""Standalone tests of the EntitySubstitution class."""
|
||||
def setUp(self):
|
||||
self.sub = EntitySubstitution
|
||||
|
||||
def test_simple_html_substitution(self):
|
||||
# Unicode characters corresponding to named HTML entites
|
||||
# are substituted, and no others.
|
||||
s = "foo\u2200\N{SNOWMAN}\u00f5bar"
|
||||
self.assertEqual(self.sub.substitute_html(s),
|
||||
"foo∀\N{SNOWMAN}õbar")
|
||||
|
||||
def test_smart_quote_substitution(self):
|
||||
# MS smart quotes are a common source of frustration, so we
|
||||
# give them a special test.
|
||||
quotes = b"\x91\x92foo\x93\x94"
|
||||
dammit = UnicodeDammit(quotes)
|
||||
self.assertEqual(self.sub.substitute_html(dammit.markup),
|
||||
"‘’foo“”")
|
||||
|
||||
def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self):
|
||||
s = 'Welcome to "my bar"'
|
||||
self.assertEqual(self.sub.substitute_xml(s, False), s)
|
||||
|
||||
def test_xml_attribute_quoting_normally_uses_double_quotes(self):
|
||||
self.assertEqual(self.sub.substitute_xml("Welcome", True),
|
||||
'"Welcome"')
|
||||
self.assertEqual(self.sub.substitute_xml("Bob's Bar", True),
|
||||
'"Bob\'s Bar"')
|
||||
|
||||
def test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes(self):
|
||||
s = 'Welcome to "my bar"'
|
||||
self.assertEqual(self.sub.substitute_xml(s, True),
|
||||
"'Welcome to \"my bar\"'")
|
||||
|
||||
def test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes(self):
|
||||
s = 'Welcome to "Bob\'s Bar"'
|
||||
self.assertEqual(
|
||||
self.sub.substitute_xml(s, True),
|
||||
'"Welcome to "Bob\'s Bar""')
|
||||
|
||||
def test_xml_quotes_arent_escaped_when_value_is_not_being_quoted(self):
|
||||
quoted = 'Welcome to "Bob\'s Bar"'
|
||||
self.assertEqual(self.sub.substitute_xml(quoted), quoted)
|
||||
|
||||
def test_xml_quoting_handles_angle_brackets(self):
|
||||
self.assertEqual(
|
||||
self.sub.substitute_xml("foo<bar>"),
|
||||
"foo<bar>")
|
||||
|
||||
def test_xml_quoting_handles_ampersands(self):
|
||||
self.assertEqual(self.sub.substitute_xml("AT&T"), "AT&T")
|
||||
|
||||
def test_xml_quoting_including_ampersands_when_they_are_part_of_an_entity(self):
|
||||
self.assertEqual(
|
||||
self.sub.substitute_xml("ÁT&T"),
|
||||
"&Aacute;T&T")
|
||||
|
||||
def test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entity(self):
|
||||
self.assertEqual(
|
||||
self.sub.substitute_xml_containing_entities("ÁT&T"),
|
||||
"ÁT&T")
|
||||
|
||||
def test_quotes_not_html_substituted(self):
|
||||
"""There's no need to do this except inside attribute values."""
|
||||
text = 'Bob\'s "bar"'
|
||||
self.assertEqual(self.sub.substitute_html(text), text)
|
||||
|
||||
|
||||
class TestEncodingConversion(SoupTest):
|
||||
# Test Beautiful Soup's ability to decode and encode from various
|
||||
# encodings.
|
||||
|
||||
def setUp(self):
|
||||
super(TestEncodingConversion, self).setUp()
|
||||
self.unicode_data = '<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>'
|
||||
self.utf8_data = self.unicode_data.encode("utf-8")
|
||||
# Just so you know what it looks like.
|
||||
self.assertEqual(
|
||||
self.utf8_data,
|
||||
b'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\xc3\xa9 bleu!</foo></body></html>')
|
||||
|
||||
def test_ascii_in_unicode_out(self):
|
||||
# ASCII input is converted to Unicode. The original_encoding
|
||||
# attribute is set to 'utf-8', a superset of ASCII.
|
||||
chardet = bs4.dammit.chardet_dammit
|
||||
logging.disable(logging.WARNING)
|
||||
try:
|
||||
def noop(str):
|
||||
return None
|
||||
# Disable chardet, which will realize that the ASCII is ASCII.
|
||||
bs4.dammit.chardet_dammit = noop
|
||||
ascii = b"<foo>a</foo>"
|
||||
soup_from_ascii = self.soup(ascii)
|
||||
unicode_output = soup_from_ascii.decode()
|
||||
self.assertTrue(isinstance(unicode_output, str))
|
||||
self.assertEqual(unicode_output, self.document_for(ascii.decode()))
|
||||
self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
|
||||
finally:
|
||||
logging.disable(logging.NOTSET)
|
||||
bs4.dammit.chardet_dammit = chardet
|
||||
|
||||
def test_unicode_in_unicode_out(self):
|
||||
# Unicode input is left alone. The original_encoding attribute
|
||||
# is not set.
|
||||
soup_from_unicode = self.soup(self.unicode_data)
|
||||
self.assertEqual(soup_from_unicode.decode(), self.unicode_data)
|
||||
self.assertEqual(soup_from_unicode.foo.string, 'Sacr\xe9 bleu!')
|
||||
self.assertEqual(soup_from_unicode.original_encoding, None)
|
||||
|
||||
def test_utf8_in_unicode_out(self):
|
||||
# UTF-8 input is converted to Unicode. The original_encoding
|
||||
# attribute is set.
|
||||
soup_from_utf8 = self.soup(self.utf8_data)
|
||||
self.assertEqual(soup_from_utf8.decode(), self.unicode_data)
|
||||
self.assertEqual(soup_from_utf8.foo.string, 'Sacr\xe9 bleu!')
|
||||
|
||||
def test_utf8_out(self):
|
||||
# The internal data structures can be encoded as UTF-8.
|
||||
soup_from_unicode = self.soup(self.unicode_data)
|
||||
self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
|
||||
|
||||
@skipIf(
|
||||
PYTHON_3_PRE_3_2,
|
||||
"Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
|
||||
def test_attribute_name_containing_unicode_characters(self):
|
||||
markup = '<div><a \N{SNOWMAN}="snowman"></a></div>'
|
||||
self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8"))
|
||||
|
||||
class TestUnicodeDammit(unittest.TestCase):
|
||||
"""Standalone tests of UnicodeDammit."""
|
||||
|
||||
def test_unicode_input(self):
|
||||
markup = "I'm already Unicode! \N{SNOWMAN}"
|
||||
dammit = UnicodeDammit(markup)
|
||||
self.assertEqual(dammit.unicode_markup, markup)
|
||||
|
||||
def test_smart_quotes_to_unicode(self):
|
||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
||||
dammit = UnicodeDammit(markup)
|
||||
self.assertEqual(
|
||||
dammit.unicode_markup, "<foo>\u2018\u2019\u201c\u201d</foo>")
|
||||
|
||||
def test_smart_quotes_to_xml_entities(self):
|
||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
||||
dammit = UnicodeDammit(markup, smart_quotes_to="xml")
|
||||
self.assertEqual(
|
||||
dammit.unicode_markup, "<foo>‘’“”</foo>")
|
||||
|
||||
def test_smart_quotes_to_html_entities(self):
|
||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
||||
dammit = UnicodeDammit(markup, smart_quotes_to="html")
|
||||
self.assertEqual(
|
||||
dammit.unicode_markup, "<foo>‘’“”</foo>")
|
||||
|
||||
def test_smart_quotes_to_ascii(self):
|
||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
||||
dammit = UnicodeDammit(markup, smart_quotes_to="ascii")
|
||||
self.assertEqual(
|
||||
dammit.unicode_markup, """<foo>''""</foo>""")
|
||||
|
||||
def test_detect_utf8(self):
|
||||
utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83"
|
||||
dammit = UnicodeDammit(utf8)
|
||||
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
|
||||
self.assertEqual(dammit.unicode_markup, 'Sacr\xe9 bleu! \N{SNOWMAN}')
|
||||
|
||||
|
||||
def test_convert_hebrew(self):
|
||||
hebrew = b"\xed\xe5\xec\xf9"
|
||||
dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
|
||||
self.assertEqual(dammit.original_encoding.lower(), 'iso-8859-8')
|
||||
self.assertEqual(dammit.unicode_markup, '\u05dd\u05d5\u05dc\u05e9')
|
||||
|
||||
def test_dont_see_smart_quotes_where_there_are_none(self):
|
||||
utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
|
||||
dammit = UnicodeDammit(utf_8)
|
||||
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
|
||||
self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8)
|
||||
|
||||
def test_ignore_inappropriate_codecs(self):
|
||||
utf8_data = "Räksmörgås".encode("utf-8")
|
||||
dammit = UnicodeDammit(utf8_data, ["iso-8859-8"])
|
||||
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
|
||||
|
||||
def test_ignore_invalid_codecs(self):
|
||||
utf8_data = "Räksmörgås".encode("utf-8")
|
||||
for bad_encoding in ['.utf8', '...', 'utF---16.!']:
|
||||
dammit = UnicodeDammit(utf8_data, [bad_encoding])
|
||||
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
|
||||
|
||||
def test_exclude_encodings(self):
|
||||
# This is UTF-8.
|
||||
utf8_data = "Räksmörgås".encode("utf-8")
|
||||
|
||||
# But if we exclude UTF-8 from consideration, the guess is
|
||||
# Windows-1252.
|
||||
dammit = UnicodeDammit(utf8_data, exclude_encodings=["utf-8"])
|
||||
self.assertEqual(dammit.original_encoding.lower(), 'windows-1252')
|
||||
|
||||
# And if we exclude that, there is no valid guess at all.
|
||||
dammit = UnicodeDammit(
|
||||
utf8_data, exclude_encodings=["utf-8", "windows-1252"])
|
||||
self.assertEqual(dammit.original_encoding, None)
|
||||
|
||||
def test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_character(self):
|
||||
detected = EncodingDetector(
|
||||
b'<?xml version="1.0" encoding="UTF-\xdb" ?>')
|
||||
encodings = list(detected.encodings)
|
||||
assert 'utf-\N{REPLACEMENT CHARACTER}' in encodings
|
||||
|
||||
def test_detect_html5_style_meta_tag(self):
|
||||
|
||||
for data in (
|
||||
b'<html><meta charset="euc-jp" /></html>',
|
||||
b"<html><meta charset='euc-jp' /></html>",
|
||||
b"<html><meta charset=euc-jp /></html>",
|
||||
b"<html><meta charset=euc-jp/></html>"):
|
||||
dammit = UnicodeDammit(data, is_html=True)
|
||||
self.assertEqual(
|
||||
"euc-jp", dammit.original_encoding)
|
||||
|
||||
def test_last_ditch_entity_replacement(self):
|
||||
# This is a UTF-8 document that contains bytestrings
|
||||
# completely incompatible with UTF-8 (ie. encoded with some other
|
||||
# encoding).
|
||||
#
|
||||
# Since there is no consistent encoding for the document,
|
||||
# Unicode, Dammit will eventually encode the document as UTF-8
|
||||
# and encode the incompatible characters as REPLACEMENT
|
||||
# CHARACTER.
|
||||
#
|
||||
# If chardet is installed, it will detect that the document
|
||||
# can be converted into ISO-8859-1 without errors. This happens
|
||||
# to be the wrong encoding, but it is a consistent encoding, so the
|
||||
# code we're testing here won't run.
|
||||
#
|
||||
# So we temporarily disable chardet if it's present.
|
||||
doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
|
||||
<html><b>\330\250\330\252\330\261</b>
|
||||
<i>\310\322\321\220\312\321\355\344</i></html>"""
|
||||
chardet = bs4.dammit.chardet_dammit
|
||||
logging.disable(logging.WARNING)
|
||||
try:
|
||||
def noop(str):
|
||||
return None
|
||||
bs4.dammit.chardet_dammit = noop
|
||||
dammit = UnicodeDammit(doc)
|
||||
self.assertEqual(True, dammit.contains_replacement_characters)
|
||||
self.assertTrue("\ufffd" in dammit.unicode_markup)
|
||||
|
||||
soup = BeautifulSoup(doc, "html.parser")
|
||||
self.assertTrue(soup.contains_replacement_characters)
|
||||
finally:
|
||||
logging.disable(logging.NOTSET)
|
||||
bs4.dammit.chardet_dammit = chardet
|
||||
|
||||
def test_byte_order_mark_removed(self):
|
||||
# A document written in UTF-16LE will have its byte order marker stripped.
|
||||
data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00'
|
||||
dammit = UnicodeDammit(data)
|
||||
self.assertEqual("<a>áé</a>", dammit.unicode_markup)
|
||||
self.assertEqual("utf-16le", dammit.original_encoding)
|
||||
|
||||
def test_detwingle(self):
|
||||
# Here's a UTF8 document.
|
||||
utf8 = ("\N{SNOWMAN}" * 3).encode("utf8")
|
||||
|
||||
# Here's a Windows-1252 document.
|
||||
windows_1252 = (
|
||||
"\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!"
|
||||
"\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252")
|
||||
|
||||
# Through some unholy alchemy, they've been stuck together.
|
||||
doc = utf8 + windows_1252 + utf8
|
||||
|
||||
# The document can't be turned into UTF-8:
|
||||
self.assertRaises(UnicodeDecodeError, doc.decode, "utf8")
|
||||
|
||||
# Unicode, Dammit thinks the whole document is Windows-1252,
|
||||
# and decodes it into "☃☃☃“Hi, I like Windows!”☃☃☃"
|
||||
|
||||
# But if we run it through fix_embedded_windows_1252, it's fixed:
|
||||
|
||||
fixed = UnicodeDammit.detwingle(doc)
|
||||
self.assertEqual(
|
||||
"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8"))
|
||||
|
||||
def test_detwingle_ignores_multibyte_characters(self):
|
||||
# Each of these characters has a UTF-8 representation ending
|
||||
# in \x93. \x93 is a smart quote if interpreted as
|
||||
# Windows-1252. But our code knows to skip over multibyte
|
||||
# UTF-8 characters, so they'll survive the process unscathed.
|
||||
for tricky_unicode_char in (
|
||||
"\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93'
|
||||
"\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93'
|
||||
"\xf0\x90\x90\x93", # This is a CJK character, not sure which one.
|
||||
):
|
||||
input = tricky_unicode_char.encode("utf8")
|
||||
self.assertTrue(input.endswith(b'\x93'))
|
||||
output = UnicodeDammit.detwingle(input)
|
||||
self.assertEqual(output, input)
|
||||
|
||||
def test_find_declared_encoding(self):
|
||||
# Test our ability to find a declared encoding inside an
|
||||
# XML or HTML document.
|
||||
#
|
||||
# Even if the document comes in as Unicode, it may be
|
||||
# interesting to know what encoding was claimed
|
||||
# originally.
|
||||
|
||||
html_unicode = '<html><head><meta charset="utf-8"></head></html>'
|
||||
html_bytes = html_unicode.encode("ascii")
|
||||
|
||||
xml_unicode= '<?xml version="1.0" encoding="ISO-8859-1" ?>'
|
||||
xml_bytes = xml_unicode.encode("ascii")
|
||||
|
||||
m = EncodingDetector.find_declared_encoding
|
||||
self.assertEqual(None, m(html_unicode, is_html=False))
|
||||
self.assertEqual("utf-8", m(html_unicode, is_html=True))
|
||||
self.assertEqual("utf-8", m(html_bytes, is_html=True))
|
||||
|
||||
self.assertEqual("iso-8859-1", m(xml_unicode))
|
||||
self.assertEqual("iso-8859-1", m(xml_bytes))
|
||||
|
||||
# Normally, only the first few kilobytes of a document are checked for
|
||||
# an encoding.
|
||||
spacer = b' ' * 5000
|
||||
self.assertEqual(None, m(spacer + html_bytes))
|
||||
self.assertEqual(None, m(spacer + xml_bytes))
|
||||
|
||||
# But you can tell find_declared_encoding to search an entire
|
||||
# HTML document.
|
||||
self.assertEqual(
|
||||
"utf-8",
|
||||
m(spacer + html_bytes, is_html=True, search_entire_document=True)
|
||||
)
|
||||
|
||||
# The XML encoding declaration has to be the very first thing
|
||||
# in the document. We'll allow whitespace before the document
|
||||
# starts, but nothing else.
|
||||
self.assertEqual(
|
||||
"iso-8859-1",
|
||||
m(xml_bytes, search_entire_document=True)
|
||||
)
|
||||
self.assertEqual(
|
||||
None, m(b'a' + xml_bytes, search_entire_document=True)
|
||||
)
|
||||
|
||||
class TestNamedspacedAttribute(SoupTest):
|
||||
|
||||
def test_name_may_be_none_or_missing(self):
|
||||
a = NamespacedAttribute("xmlns", None)
|
||||
self.assertEqual(a, "xmlns")
|
||||
|
||||
a = NamespacedAttribute("xmlns")
|
||||
self.assertEqual(a, "xmlns")
|
||||
|
||||
def test_attribute_is_equivalent_to_colon_separated_string(self):
|
||||
a = NamespacedAttribute("a", "b")
|
||||
self.assertEqual("a:b", a)
|
||||
|
||||
def test_attributes_are_equivalent_if_prefix_and_name_identical(self):
|
||||
a = NamespacedAttribute("a", "b", "c")
|
||||
b = NamespacedAttribute("a", "b", "c")
|
||||
self.assertEqual(a, b)
|
||||
|
||||
# The actual namespace is not considered.
|
||||
c = NamespacedAttribute("a", "b", None)
|
||||
self.assertEqual(a, c)
|
||||
|
||||
# But name and prefix are important.
|
||||
d = NamespacedAttribute("a", "z", "c")
|
||||
self.assertNotEqual(a, d)
|
||||
|
||||
e = NamespacedAttribute("z", "b", "c")
|
||||
self.assertNotEqual(a, e)
|
||||
|
||||
|
||||
class TestAttributeValueWithCharsetSubstitution(unittest.TestCase):
|
||||
|
||||
def test_content_meta_attribute_value(self):
|
||||
value = CharsetMetaAttributeValue("euc-jp")
|
||||
self.assertEqual("euc-jp", value)
|
||||
self.assertEqual("euc-jp", value.original_value)
|
||||
self.assertEqual("utf8", value.encode("utf8"))
|
||||
|
||||
|
||||
def test_content_meta_attribute_value(self):
|
||||
value = ContentMetaAttributeValue("text/html; charset=euc-jp")
|
||||
self.assertEqual("text/html; charset=euc-jp", value)
|
||||
self.assertEqual("text/html; charset=euc-jp", value.original_value)
|
||||
self.assertEqual("text/html; charset=utf8", value.encode("utf8"))
|
2254
lib/bs4/tests/test_tree.py
Normal file
2254
lib/bs4/tests/test_tree.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -76,7 +76,7 @@ def createCertificate(req, issuerCertKey, serial, validityPeriod, digest="sha256
|
||||
cert.sign(issuerKey, digest)
|
||||
return cert
|
||||
|
||||
def createSelfSignedCertificate((issuerName, issuerKey), serial, (notBefore, notAfter), altNames, digest="sha256"):
|
||||
def createSelfSignedCertificate(issuerName, issuerKey, serial, notBefore, notAfter, altNames, digest="sha256"):
|
||||
"""
|
||||
Generate a certificate given a certificate request.
|
||||
Arguments: issuerName - The name of the issuer
|
||||
|
15
lib/cheroot/__init__.py
Normal file
15
lib/cheroot/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
"""High-performance, pure-Python HTTP server used by CherryPy."""
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
__metaclass__ = type
|
||||
|
||||
try:
|
||||
import pkg_resources
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
try:
|
||||
__version__ = pkg_resources.get_distribution('cheroot').version
|
||||
except Exception:
|
||||
__version__ = 'unknown'
|
6
lib/cheroot/__main__.py
Normal file
6
lib/cheroot/__main__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""Stub for accessing the Cheroot CLI tool."""
|
||||
|
||||
from .cli import main
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
110
lib/cheroot/_compat.py
Normal file
110
lib/cheroot/_compat.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""Compatibility code for using Cheroot with various versions of Python."""
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
__metaclass__ = type
|
||||
|
||||
import platform
|
||||
import re
|
||||
|
||||
import six
|
||||
|
||||
try:
|
||||
import ssl
|
||||
IS_ABOVE_OPENSSL10 = ssl.OPENSSL_VERSION_INFO >= (1, 1)
|
||||
del ssl
|
||||
except ImportError:
|
||||
IS_ABOVE_OPENSSL10 = None
|
||||
|
||||
|
||||
IS_PYPY = platform.python_implementation() == 'PyPy'
|
||||
|
||||
|
||||
SYS_PLATFORM = platform.system()
|
||||
IS_WINDOWS = SYS_PLATFORM == 'Windows'
|
||||
IS_LINUX = SYS_PLATFORM == 'Linux'
|
||||
IS_MACOS = SYS_PLATFORM == 'Darwin'
|
||||
|
||||
PLATFORM_ARCH = platform.machine()
|
||||
IS_PPC = PLATFORM_ARCH.startswith('ppc')
|
||||
|
||||
|
||||
if not six.PY2:
|
||||
def ntob(n, encoding='ISO-8859-1'):
|
||||
"""Return the native string as bytes in the given encoding."""
|
||||
assert_native(n)
|
||||
# In Python 3, the native string type is unicode
|
||||
return n.encode(encoding)
|
||||
|
||||
def ntou(n, encoding='ISO-8859-1'):
|
||||
"""Return the native string as unicode with the given encoding."""
|
||||
assert_native(n)
|
||||
# In Python 3, the native string type is unicode
|
||||
return n
|
||||
|
||||
def bton(b, encoding='ISO-8859-1'):
|
||||
"""Return the byte string as native string in the given encoding."""
|
||||
return b.decode(encoding)
|
||||
else:
|
||||
# Python 2
|
||||
def ntob(n, encoding='ISO-8859-1'):
|
||||
"""Return the native string as bytes in the given encoding."""
|
||||
assert_native(n)
|
||||
# In Python 2, the native string type is bytes. Assume it's already
|
||||
# in the given encoding, which for ISO-8859-1 is almost always what
|
||||
# was intended.
|
||||
return n
|
||||
|
||||
def ntou(n, encoding='ISO-8859-1'):
|
||||
"""Return the native string as unicode with the given encoding."""
|
||||
assert_native(n)
|
||||
# In Python 2, the native string type is bytes.
|
||||
# First, check for the special encoding 'escape'. The test suite uses
|
||||
# this to signal that it wants to pass a string with embedded \uXXXX
|
||||
# escapes, but without having to prefix it with u'' for Python 2,
|
||||
# but no prefix for Python 3.
|
||||
if encoding == 'escape':
|
||||
return re.sub(
|
||||
r'\\u([0-9a-zA-Z]{4})',
|
||||
lambda m: six.unichr(int(m.group(1), 16)),
|
||||
n.decode('ISO-8859-1'),
|
||||
)
|
||||
# Assume it's already in the given encoding, which for ISO-8859-1
|
||||
# is almost always what was intended.
|
||||
return n.decode(encoding)
|
||||
|
||||
def bton(b, encoding='ISO-8859-1'):
|
||||
"""Return the byte string as native string in the given encoding."""
|
||||
return b
|
||||
|
||||
|
||||
def assert_native(n):
|
||||
"""Check whether the input is of nativ ``str`` type.
|
||||
|
||||
Raises:
|
||||
TypeError: in case of failed check
|
||||
|
||||
"""
|
||||
if not isinstance(n, str):
|
||||
raise TypeError('n must be a native str (got %s)' % type(n).__name__)
|
||||
|
||||
|
||||
if not six.PY2:
|
||||
"""Python 3 has memoryview builtin."""
|
||||
# Python 2.7 has it backported, but socket.write() does
|
||||
# str(memoryview(b'0' * 100)) -> <memory at 0x7fb6913a5588>
|
||||
# instead of accessing it correctly.
|
||||
memoryview = memoryview
|
||||
else:
|
||||
"""Link memoryview to buffer under Python 2."""
|
||||
memoryview = buffer # noqa: F821
|
||||
|
||||
|
||||
def extract_bytes(mv):
|
||||
"""Retrieve bytes out of memoryview/buffer or bytes."""
|
||||
if isinstance(mv, memoryview):
|
||||
return bytes(mv) if six.PY2 else mv.tobytes()
|
||||
|
||||
if isinstance(mv, bytes):
|
||||
return mv
|
||||
|
||||
raise ValueError
|
234
lib/cheroot/cli.py
Normal file
234
lib/cheroot/cli.py
Normal file
@@ -0,0 +1,234 @@
|
||||
"""Command line tool for starting a Cheroot WSGI/HTTP server instance.
|
||||
|
||||
Basic usage::
|
||||
|
||||
# Start a server on 127.0.0.1:8000 with the default settings
|
||||
# for the WSGI app myapp/wsgi.py:application()
|
||||
cheroot myapp.wsgi
|
||||
|
||||
# Start a server on 0.0.0.0:9000 with 8 threads
|
||||
# for the WSGI app myapp/wsgi.py:main_app()
|
||||
cheroot myapp.wsgi:main_app --bind 0.0.0.0:9000 --threads 8
|
||||
|
||||
# Start a server for the cheroot.server.Gateway subclass
|
||||
# myapp/gateway.py:HTTPGateway
|
||||
cheroot myapp.gateway:HTTPGateway
|
||||
|
||||
# Start a server on the UNIX socket /var/spool/myapp.sock
|
||||
cheroot myapp.wsgi --bind /var/spool/myapp.sock
|
||||
|
||||
# Start a server on the abstract UNIX socket CherootServer
|
||||
cheroot myapp.wsgi --bind @CherootServer
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from importlib import import_module
|
||||
import os
|
||||
import sys
|
||||
import contextlib
|
||||
|
||||
import six
|
||||
|
||||
from . import server
|
||||
from . import wsgi
|
||||
|
||||
|
||||
__metaclass__ = type
|
||||
|
||||
|
||||
class BindLocation:
|
||||
"""A class for storing the bind location for a Cheroot instance."""
|
||||
|
||||
|
||||
class TCPSocket(BindLocation):
|
||||
"""TCPSocket."""
|
||||
|
||||
def __init__(self, address, port):
|
||||
"""Initialize.
|
||||
|
||||
Args:
|
||||
address (str): Host name or IP address
|
||||
port (int): TCP port number
|
||||
"""
|
||||
self.bind_addr = address, port
|
||||
|
||||
|
||||
class UnixSocket(BindLocation):
|
||||
"""UnixSocket."""
|
||||
|
||||
def __init__(self, path):
|
||||
"""Initialize."""
|
||||
self.bind_addr = path
|
||||
|
||||
|
||||
class AbstractSocket(BindLocation):
|
||||
"""AbstractSocket."""
|
||||
|
||||
def __init__(self, addr):
|
||||
"""Initialize."""
|
||||
self.bind_addr = '\0{}'.format(self.abstract_socket)
|
||||
|
||||
|
||||
class Application:
|
||||
"""Application."""
|
||||
|
||||
@classmethod
|
||||
def resolve(cls, full_path):
|
||||
"""Read WSGI app/Gateway path string and import application module."""
|
||||
mod_path, _, app_path = full_path.partition(':')
|
||||
app = getattr(import_module(mod_path), app_path or 'application')
|
||||
|
||||
with contextlib.suppress(TypeError):
|
||||
if issubclass(app, server.Gateway):
|
||||
return GatewayYo(app)
|
||||
|
||||
return cls(app)
|
||||
|
||||
def __init__(self, wsgi_app):
|
||||
"""Initialize."""
|
||||
if not callable(wsgi_app):
|
||||
raise TypeError(
|
||||
'Application must be a callable object or '
|
||||
'cheroot.server.Gateway subclass',
|
||||
)
|
||||
self.wsgi_app = wsgi_app
|
||||
|
||||
def server_args(self, parsed_args):
|
||||
"""Return keyword args for Server class."""
|
||||
args = {
|
||||
arg: value
|
||||
for arg, value in vars(parsed_args).items()
|
||||
if not arg.startswith('_') and value is not None
|
||||
}
|
||||
args.update(vars(self))
|
||||
return args
|
||||
|
||||
def server(self, parsed_args):
|
||||
"""Server."""
|
||||
return wsgi.Server(**self.server_args(parsed_args))
|
||||
|
||||
|
||||
class GatewayYo:
|
||||
"""Gateway."""
|
||||
|
||||
def __init__(self, gateway):
|
||||
"""Init."""
|
||||
self.gateway = gateway
|
||||
|
||||
def server(self, parsed_args):
|
||||
"""Server."""
|
||||
server_args = vars(self)
|
||||
server_args['bind_addr'] = parsed_args['bind_addr']
|
||||
if parsed_args.max is not None:
|
||||
server_args['maxthreads'] = parsed_args.max
|
||||
if parsed_args.numthreads is not None:
|
||||
server_args['minthreads'] = parsed_args.numthreads
|
||||
return server.HTTPServer(**server_args)
|
||||
|
||||
|
||||
def parse_wsgi_bind_location(bind_addr_string):
|
||||
"""Convert bind address string to a BindLocation."""
|
||||
# try and match for an IP/hostname and port
|
||||
match = six.moves.urllib.parse.urlparse('//{}'.format(bind_addr_string))
|
||||
try:
|
||||
addr = match.hostname
|
||||
port = match.port
|
||||
if addr is not None or port is not None:
|
||||
return TCPSocket(addr, port)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# else, assume a UNIX socket path
|
||||
# if the string begins with an @ symbol, use an abstract socket
|
||||
if bind_addr_string.startswith('@'):
|
||||
return AbstractSocket(bind_addr_string[1:])
|
||||
return UnixSocket(path=bind_addr_string)
|
||||
|
||||
|
||||
def parse_wsgi_bind_addr(bind_addr_string):
|
||||
"""Convert bind address string to bind address parameter."""
|
||||
return parse_wsgi_bind_location(bind_addr_string).bind_addr
|
||||
|
||||
|
||||
_arg_spec = {
|
||||
'_wsgi_app': dict(
|
||||
metavar='APP_MODULE',
|
||||
type=Application.resolve,
|
||||
help='WSGI application callable or cheroot.server.Gateway subclass',
|
||||
),
|
||||
'--bind': dict(
|
||||
metavar='ADDRESS',
|
||||
dest='bind_addr',
|
||||
type=parse_wsgi_bind_addr,
|
||||
default='[::1]:8000',
|
||||
help='Network interface to listen on (default: [::1]:8000)',
|
||||
),
|
||||
'--chdir': dict(
|
||||
metavar='PATH',
|
||||
type=os.chdir,
|
||||
help='Set the working directory',
|
||||
),
|
||||
'--server-name': dict(
|
||||
dest='server_name',
|
||||
type=str,
|
||||
help='Web server name to be advertised via Server HTTP header',
|
||||
),
|
||||
'--threads': dict(
|
||||
metavar='INT',
|
||||
dest='numthreads',
|
||||
type=int,
|
||||
help='Minimum number of worker threads',
|
||||
),
|
||||
'--max-threads': dict(
|
||||
metavar='INT',
|
||||
dest='max',
|
||||
type=int,
|
||||
help='Maximum number of worker threads',
|
||||
),
|
||||
'--timeout': dict(
|
||||
metavar='INT',
|
||||
dest='timeout',
|
||||
type=int,
|
||||
help='Timeout in seconds for accepted connections',
|
||||
),
|
||||
'--shutdown-timeout': dict(
|
||||
metavar='INT',
|
||||
dest='shutdown_timeout',
|
||||
type=int,
|
||||
help='Time in seconds to wait for worker threads to cleanly exit',
|
||||
),
|
||||
'--request-queue-size': dict(
|
||||
metavar='INT',
|
||||
dest='request_queue_size',
|
||||
type=int,
|
||||
help='Maximum number of queued connections',
|
||||
),
|
||||
'--accepted-queue-size': dict(
|
||||
metavar='INT',
|
||||
dest='accepted_queue_size',
|
||||
type=int,
|
||||
help='Maximum number of active requests in queue',
|
||||
),
|
||||
'--accepted-queue-timeout': dict(
|
||||
metavar='INT',
|
||||
dest='accepted_queue_timeout',
|
||||
type=int,
|
||||
help='Timeout in seconds for putting requests into queue',
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
"""Create a new Cheroot instance with arguments from the command line."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Start an instance of the Cheroot WSGI/HTTP server.',
|
||||
)
|
||||
for arg, spec in _arg_spec.items():
|
||||
parser.add_argument(arg, **spec)
|
||||
raw_args = parser.parse_args()
|
||||
|
||||
# ensure cwd in sys.path
|
||||
'' in sys.path or sys.path.insert(0, '')
|
||||
|
||||
# create a server based on the arguments provided
|
||||
raw_args._wsgi_app.server(raw_args).safe_start()
|
279
lib/cheroot/connections.py
Normal file
279
lib/cheroot/connections.py
Normal file
@@ -0,0 +1,279 @@
|
||||
"""Utilities to manage open connections."""
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
__metaclass__ = type
|
||||
|
||||
import io
|
||||
import os
|
||||
import select
|
||||
import socket
|
||||
import time
|
||||
|
||||
from . import errors
|
||||
from .makefile import MakeFile
|
||||
|
||||
import six
|
||||
|
||||
try:
|
||||
import fcntl
|
||||
except ImportError:
|
||||
try:
|
||||
from ctypes import windll, WinError
|
||||
import ctypes.wintypes
|
||||
_SetHandleInformation = windll.kernel32.SetHandleInformation
|
||||
_SetHandleInformation.argtypes = [
|
||||
ctypes.wintypes.HANDLE,
|
||||
ctypes.wintypes.DWORD,
|
||||
ctypes.wintypes.DWORD,
|
||||
]
|
||||
_SetHandleInformation.restype = ctypes.wintypes.BOOL
|
||||
except ImportError:
|
||||
def prevent_socket_inheritance(sock):
|
||||
"""Stub inheritance prevention.
|
||||
|
||||
Dummy function, since neither fcntl nor ctypes are available.
|
||||
"""
|
||||
pass
|
||||
else:
|
||||
def prevent_socket_inheritance(sock):
|
||||
"""Mark the given socket fd as non-inheritable (Windows)."""
|
||||
if not _SetHandleInformation(sock.fileno(), 1, 0):
|
||||
raise WinError()
|
||||
else:
|
||||
def prevent_socket_inheritance(sock):
|
||||
"""Mark the given socket fd as non-inheritable (POSIX)."""
|
||||
fd = sock.fileno()
|
||||
old_flags = fcntl.fcntl(fd, fcntl.F_GETFD)
|
||||
fcntl.fcntl(fd, fcntl.F_SETFD, old_flags | fcntl.FD_CLOEXEC)
|
||||
|
||||
|
||||
class ConnectionManager:
|
||||
"""Class which manages HTTPConnection objects.
|
||||
|
||||
This is for connections which are being kept-alive for follow-up requests.
|
||||
"""
|
||||
|
||||
def __init__(self, server):
|
||||
"""Initialize ConnectionManager object.
|
||||
|
||||
Args:
|
||||
server (cheroot.server.HTTPServer): web server object
|
||||
that uses this ConnectionManager instance.
|
||||
"""
|
||||
self.server = server
|
||||
self.connections = []
|
||||
|
||||
def put(self, conn):
|
||||
"""Put idle connection into the ConnectionManager to be managed.
|
||||
|
||||
Args:
|
||||
conn (cheroot.server.HTTPConnection): HTTP connection
|
||||
to be managed.
|
||||
"""
|
||||
conn.last_used = time.time()
|
||||
conn.ready_with_data = conn.rfile.has_data()
|
||||
self.connections.append(conn)
|
||||
|
||||
def expire(self):
|
||||
"""Expire least recently used connections.
|
||||
|
||||
This happens if there are either too many open connections, or if the
|
||||
connections have been timed out.
|
||||
|
||||
This should be called periodically.
|
||||
"""
|
||||
if not self.connections:
|
||||
return
|
||||
|
||||
# Look at the first connection - if it can be closed, then do
|
||||
# that, and wait for get_conn to return it.
|
||||
conn = self.connections[0]
|
||||
if conn.closeable:
|
||||
return
|
||||
|
||||
# Too many connections?
|
||||
ka_limit = self.server.keep_alive_conn_limit
|
||||
if ka_limit is not None and len(self.connections) > ka_limit:
|
||||
conn.closeable = True
|
||||
return
|
||||
|
||||
# Connection too old?
|
||||
if (conn.last_used + self.server.timeout) < time.time():
|
||||
conn.closeable = True
|
||||
return
|
||||
|
||||
def get_conn(self, server_socket):
|
||||
"""Return a HTTPConnection object which is ready to be handled.
|
||||
|
||||
A connection returned by this method should be ready for a worker
|
||||
to handle it. If there are no connections ready, None will be
|
||||
returned.
|
||||
|
||||
Any connection returned by this method will need to be `put`
|
||||
back if it should be examined again for another request.
|
||||
|
||||
Args:
|
||||
server_socket (socket.socket): Socket to listen to for new
|
||||
connections.
|
||||
Returns:
|
||||
cheroot.server.HTTPConnection instance, or None.
|
||||
|
||||
"""
|
||||
# Grab file descriptors from sockets, but stop if we find a
|
||||
# connection which is already marked as ready.
|
||||
socket_dict = {}
|
||||
for conn in self.connections:
|
||||
if conn.closeable or conn.ready_with_data:
|
||||
break
|
||||
socket_dict[conn.socket.fileno()] = conn
|
||||
else:
|
||||
# No ready connection.
|
||||
conn = None
|
||||
|
||||
# We have a connection ready for use.
|
||||
if conn:
|
||||
self.connections.remove(conn)
|
||||
return conn
|
||||
|
||||
# Will require a select call.
|
||||
ss_fileno = server_socket.fileno()
|
||||
socket_dict[ss_fileno] = server_socket
|
||||
try:
|
||||
rlist, _, _ = select.select(list(socket_dict), [], [], 0.1)
|
||||
# No available socket.
|
||||
if not rlist:
|
||||
return None
|
||||
except OSError:
|
||||
# Mark any connection which no longer appears valid.
|
||||
for fno, conn in list(socket_dict.items()):
|
||||
# If the server socket is invalid, we'll just ignore it and
|
||||
# wait to be shutdown.
|
||||
if fno == ss_fileno:
|
||||
continue
|
||||
try:
|
||||
os.fstat(fno)
|
||||
except OSError:
|
||||
# Socket is invalid, close the connection, insert at
|
||||
# the front.
|
||||
self.connections.remove(conn)
|
||||
self.connections.insert(0, conn)
|
||||
conn.closeable = True
|
||||
|
||||
# Wait for the next tick to occur.
|
||||
return None
|
||||
|
||||
try:
|
||||
# See if we have a new connection coming in.
|
||||
rlist.remove(ss_fileno)
|
||||
except ValueError:
|
||||
# No new connection, but reuse existing socket.
|
||||
conn = socket_dict[rlist.pop()]
|
||||
else:
|
||||
conn = server_socket
|
||||
|
||||
# All remaining connections in rlist should be marked as ready.
|
||||
for fno in rlist:
|
||||
socket_dict[fno].ready_with_data = True
|
||||
|
||||
# New connection.
|
||||
if conn is server_socket:
|
||||
return self._from_server_socket(server_socket)
|
||||
|
||||
self.connections.remove(conn)
|
||||
return conn
|
||||
|
||||
def _from_server_socket(self, server_socket):
|
||||
try:
|
||||
s, addr = server_socket.accept()
|
||||
if self.server.stats['Enabled']:
|
||||
self.server.stats['Accepts'] += 1
|
||||
prevent_socket_inheritance(s)
|
||||
if hasattr(s, 'settimeout'):
|
||||
s.settimeout(self.server.timeout)
|
||||
|
||||
mf = MakeFile
|
||||
ssl_env = {}
|
||||
# if ssl cert and key are set, we try to be a secure HTTP server
|
||||
if self.server.ssl_adapter is not None:
|
||||
try:
|
||||
s, ssl_env = self.server.ssl_adapter.wrap(s)
|
||||
except errors.NoSSLError:
|
||||
msg = (
|
||||
'The client sent a plain HTTP request, but '
|
||||
'this server only speaks HTTPS on this port.'
|
||||
)
|
||||
buf = [
|
||||
'%s 400 Bad Request\r\n' % self.server.protocol,
|
||||
'Content-Length: %s\r\n' % len(msg),
|
||||
'Content-Type: text/plain\r\n\r\n',
|
||||
msg,
|
||||
]
|
||||
|
||||
sock_to_make = s if not six.PY2 else s._sock
|
||||
wfile = mf(sock_to_make, 'wb', io.DEFAULT_BUFFER_SIZE)
|
||||
try:
|
||||
wfile.write(''.join(buf).encode('ISO-8859-1'))
|
||||
except socket.error as ex:
|
||||
if ex.args[0] not in errors.socket_errors_to_ignore:
|
||||
raise
|
||||
return
|
||||
if not s:
|
||||
return
|
||||
mf = self.server.ssl_adapter.makefile
|
||||
# Re-apply our timeout since we may have a new socket object
|
||||
if hasattr(s, 'settimeout'):
|
||||
s.settimeout(self.server.timeout)
|
||||
|
||||
conn = self.server.ConnectionClass(self.server, s, mf)
|
||||
|
||||
if not isinstance(
|
||||
self.server.bind_addr,
|
||||
(six.text_type, six.binary_type),
|
||||
):
|
||||
# optional values
|
||||
# Until we do DNS lookups, omit REMOTE_HOST
|
||||
if addr is None: # sometimes this can happen
|
||||
# figure out if AF_INET or AF_INET6.
|
||||
if len(s.getsockname()) == 2:
|
||||
# AF_INET
|
||||
addr = ('0.0.0.0', 0)
|
||||
else:
|
||||
# AF_INET6
|
||||
addr = ('::', 0)
|
||||
conn.remote_addr = addr[0]
|
||||
conn.remote_port = addr[1]
|
||||
|
||||
conn.ssl_env = ssl_env
|
||||
return conn
|
||||
|
||||
except socket.timeout:
|
||||
# The only reason for the timeout in start() is so we can
|
||||
# notice keyboard interrupts on Win32, which don't interrupt
|
||||
# accept() by default
|
||||
return
|
||||
except socket.error as ex:
|
||||
if self.server.stats['Enabled']:
|
||||
self.server.stats['Socket Errors'] += 1
|
||||
if ex.args[0] in errors.socket_error_eintr:
|
||||
# I *think* this is right. EINTR should occur when a signal
|
||||
# is received during the accept() call; all docs say retry
|
||||
# the call, and I *think* I'm reading it right that Python
|
||||
# will then go ahead and poll for and handle the signal
|
||||
# elsewhere. See
|
||||
# https://github.com/cherrypy/cherrypy/issues/707.
|
||||
return
|
||||
if ex.args[0] in errors.socket_errors_nonblocking:
|
||||
# Just try again. See
|
||||
# https://github.com/cherrypy/cherrypy/issues/479.
|
||||
return
|
||||
if ex.args[0] in errors.socket_errors_to_ignore:
|
||||
# Our socket was closed.
|
||||
# See https://github.com/cherrypy/cherrypy/issues/686.
|
||||
return
|
||||
raise
|
||||
|
||||
def close(self):
|
||||
"""Close all monitored connections."""
|
||||
for conn in self.connections[:]:
|
||||
conn.close()
|
||||
self.connections = []
|
58
lib/cheroot/errors.py
Normal file
58
lib/cheroot/errors.py
Normal file
@@ -0,0 +1,58 @@
|
||||
"""Collection of exceptions raised and/or processed by Cheroot."""
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
__metaclass__ = type
|
||||
|
||||
import errno
|
||||
import sys
|
||||
|
||||
|
||||
class MaxSizeExceeded(Exception):
|
||||
"""Exception raised when a client sends more data then acceptable within limit.
|
||||
|
||||
Depends on ``request.body.maxbytes`` config option if used within CherryPy
|
||||
"""
|
||||
|
||||
|
||||
class NoSSLError(Exception):
|
||||
"""Exception raised when a client speaks HTTP to an HTTPS socket."""
|
||||
|
||||
|
||||
class FatalSSLAlert(Exception):
|
||||
"""Exception raised when the SSL implementation signals a fatal alert."""
|
||||
|
||||
|
||||
def plat_specific_errors(*errnames):
|
||||
"""Return error numbers for all errors in errnames on this platform.
|
||||
|
||||
The 'errno' module contains different global constants depending on
|
||||
the specific platform (OS). This function will return the list of
|
||||
numeric values for a given list of potential names.
|
||||
"""
|
||||
missing_attr = set([None, ])
|
||||
unique_nums = set(getattr(errno, k, None) for k in errnames)
|
||||
return list(unique_nums - missing_attr)
|
||||
|
||||
|
||||
socket_error_eintr = plat_specific_errors('EINTR', 'WSAEINTR')
|
||||
|
||||
socket_errors_to_ignore = plat_specific_errors(
|
||||
'EPIPE',
|
||||
'EBADF', 'WSAEBADF',
|
||||
'ENOTSOCK', 'WSAENOTSOCK',
|
||||
'ETIMEDOUT', 'WSAETIMEDOUT',
|
||||
'ECONNREFUSED', 'WSAECONNREFUSED',
|
||||
'ECONNRESET', 'WSAECONNRESET',
|
||||
'ECONNABORTED', 'WSAECONNABORTED',
|
||||
'ENETRESET', 'WSAENETRESET',
|
||||
'EHOSTDOWN', 'EHOSTUNREACH',
|
||||
)
|
||||
socket_errors_to_ignore.append('timed out')
|
||||
socket_errors_to_ignore.append('The read operation timed out')
|
||||
socket_errors_nonblocking = plat_specific_errors(
|
||||
'EAGAIN', 'EWOULDBLOCK', 'WSAEWOULDBLOCK',
|
||||
)
|
||||
|
||||
if sys.platform == 'darwin':
|
||||
socket_errors_to_ignore.extend(plat_specific_errors('EPROTOTYPE'))
|
||||
socket_errors_nonblocking.extend(plat_specific_errors('EPROTOTYPE'))
|
447
lib/cheroot/makefile.py
Normal file
447
lib/cheroot/makefile.py
Normal file
@@ -0,0 +1,447 @@
|
||||
"""Socket file object."""
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
__metaclass__ = type
|
||||
|
||||
import socket
|
||||
|
||||
try:
|
||||
# prefer slower Python-based io module
|
||||
import _pyio as io
|
||||
except ImportError:
|
||||
# Python 2.6
|
||||
import io
|
||||
|
||||
import six
|
||||
|
||||
from . import errors
|
||||
from ._compat import extract_bytes, memoryview
|
||||
|
||||
|
||||
# Write only 16K at a time to sockets
|
||||
SOCK_WRITE_BLOCKSIZE = 16384
|
||||
|
||||
|
||||
class BufferedWriter(io.BufferedWriter):
|
||||
"""Faux file object attached to a socket object."""
|
||||
|
||||
def write(self, b):
|
||||
"""Write bytes to buffer."""
|
||||
self._checkClosed()
|
||||
if isinstance(b, str):
|
||||
raise TypeError("can't write str to binary stream")
|
||||
|
||||
with self._write_lock:
|
||||
self._write_buf.extend(b)
|
||||
self._flush_unlocked()
|
||||
return len(b)
|
||||
|
||||
def _flush_unlocked(self):
|
||||
self._checkClosed('flush of closed file')
|
||||
while self._write_buf:
|
||||
try:
|
||||
# ssl sockets only except 'bytes', not bytearrays
|
||||
# so perhaps we should conditionally wrap this for perf?
|
||||
n = self.raw.write(bytes(self._write_buf))
|
||||
except io.BlockingIOError as e:
|
||||
n = e.characters_written
|
||||
del self._write_buf[:n]
|
||||
|
||||
|
||||
class MakeFile_PY2(getattr(socket, '_fileobject', object)):
|
||||
"""Faux file object attached to a socket object."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""Initialize faux file object."""
|
||||
self.bytes_read = 0
|
||||
self.bytes_written = 0
|
||||
socket._fileobject.__init__(self, *args, **kwargs)
|
||||
self._refcount = 0
|
||||
|
||||
def _reuse(self):
|
||||
self._refcount += 1
|
||||
|
||||
def _drop(self):
|
||||
if self._refcount < 0:
|
||||
self.close()
|
||||
else:
|
||||
self._refcount -= 1
|
||||
|
||||
def write(self, data):
|
||||
"""Sendall for non-blocking sockets."""
|
||||
bytes_sent = 0
|
||||
data_mv = memoryview(data)
|
||||
payload_size = len(data_mv)
|
||||
while bytes_sent < payload_size:
|
||||
try:
|
||||
bytes_sent += self.send(
|
||||
data_mv[bytes_sent:bytes_sent + SOCK_WRITE_BLOCKSIZE],
|
||||
)
|
||||
except socket.error as e:
|
||||
if e.args[0] not in errors.socket_errors_nonblocking:
|
||||
raise
|
||||
|
||||
def send(self, data):
|
||||
"""Send some part of message to the socket."""
|
||||
bytes_sent = self._sock.send(extract_bytes(data))
|
||||
self.bytes_written += bytes_sent
|
||||
return bytes_sent
|
||||
|
||||
def flush(self):
|
||||
"""Write all data from buffer to socket and reset write buffer."""
|
||||
if self._wbuf:
|
||||
buffer = ''.join(self._wbuf)
|
||||
self._wbuf = []
|
||||
self.write(buffer)
|
||||
|
||||
def recv(self, size):
|
||||
"""Receive message of a size from the socket."""
|
||||
while True:
|
||||
try:
|
||||
data = self._sock.recv(size)
|
||||
self.bytes_read += len(data)
|
||||
return data
|
||||
except socket.error as e:
|
||||
what = (
|
||||
e.args[0] not in errors.socket_errors_nonblocking
|
||||
and e.args[0] not in errors.socket_error_eintr
|
||||
)
|
||||
if what:
|
||||
raise
|
||||
|
||||
class FauxSocket:
|
||||
"""Faux socket with the minimal interface required by pypy."""
|
||||
|
||||
def _reuse(self):
|
||||
pass
|
||||
|
||||
_fileobject_uses_str_type = six.PY2 and isinstance(
|
||||
socket._fileobject(FauxSocket())._rbuf, six.string_types,
|
||||
)
|
||||
|
||||
# FauxSocket is no longer needed
|
||||
del FauxSocket
|
||||
|
||||
if not _fileobject_uses_str_type:
|
||||
def read(self, size=-1):
|
||||
"""Read data from the socket to buffer."""
|
||||
# Use max, disallow tiny reads in a loop as they are very
|
||||
# inefficient.
|
||||
# We never leave read() with any leftover data from a new recv()
|
||||
# call in our internal buffer.
|
||||
rbufsize = max(self._rbufsize, self.default_bufsize)
|
||||
# Our use of StringIO rather than lists of string objects returned
|
||||
# by recv() minimizes memory usage and fragmentation that occurs
|
||||
# when rbufsize is large compared to the typical return value of
|
||||
# recv().
|
||||
buf = self._rbuf
|
||||
buf.seek(0, 2) # seek end
|
||||
if size < 0:
|
||||
# Read until EOF
|
||||
# reset _rbuf. we consume it via buf.
|
||||
self._rbuf = io.BytesIO()
|
||||
while True:
|
||||
data = self.recv(rbufsize)
|
||||
if not data:
|
||||
break
|
||||
buf.write(data)
|
||||
return buf.getvalue()
|
||||
else:
|
||||
# Read until size bytes or EOF seen, whichever comes first
|
||||
buf_len = buf.tell()
|
||||
if buf_len >= size:
|
||||
# Already have size bytes in our buffer? Extract and
|
||||
# return.
|
||||
buf.seek(0)
|
||||
rv = buf.read(size)
|
||||
self._rbuf = io.BytesIO()
|
||||
self._rbuf.write(buf.read())
|
||||
return rv
|
||||
|
||||
# reset _rbuf. we consume it via buf.
|
||||
self._rbuf = io.BytesIO()
|
||||
while True:
|
||||
left = size - buf_len
|
||||
# recv() will malloc the amount of memory given as its
|
||||
# parameter even though it often returns much less data
|
||||
# than that. The returned data string is short lived
|
||||
# as we copy it into a StringIO and free it. This avoids
|
||||
# fragmentation issues on many platforms.
|
||||
data = self.recv(left)
|
||||
if not data:
|
||||
break
|
||||
n = len(data)
|
||||
if n == size and not buf_len:
|
||||
# Shortcut. Avoid buffer data copies when:
|
||||
# - We have no data in our buffer.
|
||||
# AND
|
||||
# - Our call to recv returned exactly the
|
||||
# number of bytes we were asked to read.
|
||||
return data
|
||||
if n == left:
|
||||
buf.write(data)
|
||||
del data # explicit free
|
||||
break
|
||||
assert n <= left, 'recv(%d) returned %d bytes' % (left, n)
|
||||
buf.write(data)
|
||||
buf_len += n
|
||||
del data # explicit free
|
||||
# assert buf_len == buf.tell()
|
||||
return buf.getvalue()
|
||||
|
||||
def readline(self, size=-1):
|
||||
"""Read line from the socket to buffer."""
|
||||
buf = self._rbuf
|
||||
buf.seek(0, 2) # seek end
|
||||
if buf.tell() > 0:
|
||||
# check if we already have it in our buffer
|
||||
buf.seek(0)
|
||||
bline = buf.readline(size)
|
||||
if bline.endswith('\n') or len(bline) == size:
|
||||
self._rbuf = io.BytesIO()
|
||||
self._rbuf.write(buf.read())
|
||||
return bline
|
||||
del bline
|
||||
if size < 0:
|
||||
# Read until \n or EOF, whichever comes first
|
||||
if self._rbufsize <= 1:
|
||||
# Speed up unbuffered case
|
||||
buf.seek(0)
|
||||
buffers = [buf.read()]
|
||||
# reset _rbuf. we consume it via buf.
|
||||
self._rbuf = io.BytesIO()
|
||||
data = None
|
||||
recv = self.recv
|
||||
while data != '\n':
|
||||
data = recv(1)
|
||||
if not data:
|
||||
break
|
||||
buffers.append(data)
|
||||
return ''.join(buffers)
|
||||
|
||||
buf.seek(0, 2) # seek end
|
||||
# reset _rbuf. we consume it via buf.
|
||||
self._rbuf = io.BytesIO()
|
||||
while True:
|
||||
data = self.recv(self._rbufsize)
|
||||
if not data:
|
||||
break
|
||||
nl = data.find('\n')
|
||||
if nl >= 0:
|
||||
nl += 1
|
||||
buf.write(data[:nl])
|
||||
self._rbuf.write(data[nl:])
|
||||
del data
|
||||
break
|
||||
buf.write(data)
|
||||
return buf.getvalue()
|
||||
|
||||
else:
|
||||
# Read until size bytes or \n or EOF seen, whichever comes
|
||||
# first
|
||||
buf.seek(0, 2) # seek end
|
||||
buf_len = buf.tell()
|
||||
if buf_len >= size:
|
||||
buf.seek(0)
|
||||
rv = buf.read(size)
|
||||
self._rbuf = io.BytesIO()
|
||||
self._rbuf.write(buf.read())
|
||||
return rv
|
||||
# reset _rbuf. we consume it via buf.
|
||||
self._rbuf = io.BytesIO()
|
||||
while True:
|
||||
data = self.recv(self._rbufsize)
|
||||
if not data:
|
||||
break
|
||||
left = size - buf_len
|
||||
# did we just receive a newline?
|
||||
nl = data.find('\n', 0, left)
|
||||
if nl >= 0:
|
||||
nl += 1
|
||||
# save the excess data to _rbuf
|
||||
self._rbuf.write(data[nl:])
|
||||
if buf_len:
|
||||
buf.write(data[:nl])
|
||||
break
|
||||
else:
|
||||
# Shortcut. Avoid data copy through buf when
|
||||
# returning a substring of our first recv().
|
||||
return data[:nl]
|
||||
n = len(data)
|
||||
if n == size and not buf_len:
|
||||
# Shortcut. Avoid data copy through buf when
|
||||
# returning exactly all of our first recv().
|
||||
return data
|
||||
if n >= left:
|
||||
buf.write(data[:left])
|
||||
self._rbuf.write(data[left:])
|
||||
break
|
||||
buf.write(data)
|
||||
buf_len += n
|
||||
# assert buf_len == buf.tell()
|
||||
return buf.getvalue()
|
||||
|
||||
def has_data(self):
|
||||
"""Return true if there is buffered data to read."""
|
||||
return bool(self._rbuf.getvalue())
|
||||
|
||||
else:
|
||||
def read(self, size=-1):
|
||||
"""Read data from the socket to buffer."""
|
||||
if size < 0:
|
||||
# Read until EOF
|
||||
buffers = [self._rbuf]
|
||||
self._rbuf = ''
|
||||
if self._rbufsize <= 1:
|
||||
recv_size = self.default_bufsize
|
||||
else:
|
||||
recv_size = self._rbufsize
|
||||
|
||||
while True:
|
||||
data = self.recv(recv_size)
|
||||
if not data:
|
||||
break
|
||||
buffers.append(data)
|
||||
return ''.join(buffers)
|
||||
else:
|
||||
# Read until size bytes or EOF seen, whichever comes first
|
||||
data = self._rbuf
|
||||
buf_len = len(data)
|
||||
if buf_len >= size:
|
||||
self._rbuf = data[size:]
|
||||
return data[:size]
|
||||
buffers = []
|
||||
if data:
|
||||
buffers.append(data)
|
||||
self._rbuf = ''
|
||||
while True:
|
||||
left = size - buf_len
|
||||
recv_size = max(self._rbufsize, left)
|
||||
data = self.recv(recv_size)
|
||||
if not data:
|
||||
break
|
||||
buffers.append(data)
|
||||
n = len(data)
|
||||
if n >= left:
|
||||
self._rbuf = data[left:]
|
||||
buffers[-1] = data[:left]
|
||||
break
|
||||
buf_len += n
|
||||
return ''.join(buffers)
|
||||
|
||||
def readline(self, size=-1):
|
||||
"""Read line from the socket to buffer."""
|
||||
data = self._rbuf
|
||||
if size < 0:
|
||||
# Read until \n or EOF, whichever comes first
|
||||
if self._rbufsize <= 1:
|
||||
# Speed up unbuffered case
|
||||
assert data == ''
|
||||
buffers = []
|
||||
while data != '\n':
|
||||
data = self.recv(1)
|
||||
if not data:
|
||||
break
|
||||
buffers.append(data)
|
||||
return ''.join(buffers)
|
||||
nl = data.find('\n')
|
||||
if nl >= 0:
|
||||
nl += 1
|
||||
self._rbuf = data[nl:]
|
||||
return data[:nl]
|
||||
buffers = []
|
||||
if data:
|
||||
buffers.append(data)
|
||||
self._rbuf = ''
|
||||
while True:
|
||||
data = self.recv(self._rbufsize)
|
||||
if not data:
|
||||
break
|
||||
buffers.append(data)
|
||||
nl = data.find('\n')
|
||||
if nl >= 0:
|
||||
nl += 1
|
||||
self._rbuf = data[nl:]
|
||||
buffers[-1] = data[:nl]
|
||||
break
|
||||
return ''.join(buffers)
|
||||
else:
|
||||
# Read until size bytes or \n or EOF seen, whichever comes
|
||||
# first
|
||||
nl = data.find('\n', 0, size)
|
||||
if nl >= 0:
|
||||
nl += 1
|
||||
self._rbuf = data[nl:]
|
||||
return data[:nl]
|
||||
buf_len = len(data)
|
||||
if buf_len >= size:
|
||||
self._rbuf = data[size:]
|
||||
return data[:size]
|
||||
buffers = []
|
||||
if data:
|
||||
buffers.append(data)
|
||||
self._rbuf = ''
|
||||
while True:
|
||||
data = self.recv(self._rbufsize)
|
||||
if not data:
|
||||
break
|
||||
buffers.append(data)
|
||||
left = size - buf_len
|
||||
nl = data.find('\n', 0, left)
|
||||
if nl >= 0:
|
||||
nl += 1
|
||||
self._rbuf = data[nl:]
|
||||
buffers[-1] = data[:nl]
|
||||
break
|
||||
n = len(data)
|
||||
if n >= left:
|
||||
self._rbuf = data[left:]
|
||||
buffers[-1] = data[:left]
|
||||
break
|
||||
buf_len += n
|
||||
return ''.join(buffers)
|
||||
|
||||
def has_data(self):
|
||||
"""Return true if there is buffered data to read."""
|
||||
return bool(self._rbuf)
|
||||
|
||||
|
||||
if not six.PY2:
|
||||
class StreamReader(io.BufferedReader):
|
||||
"""Socket stream reader."""
|
||||
|
||||
def __init__(self, sock, mode='r', bufsize=io.DEFAULT_BUFFER_SIZE):
|
||||
"""Initialize socket stream reader."""
|
||||
super().__init__(socket.SocketIO(sock, mode), bufsize)
|
||||
self.bytes_read = 0
|
||||
|
||||
def read(self, *args, **kwargs):
|
||||
"""Capture bytes read."""
|
||||
val = super().read(*args, **kwargs)
|
||||
self.bytes_read += len(val)
|
||||
return val
|
||||
|
||||
def has_data(self):
|
||||
"""Return true if there is buffered data to read."""
|
||||
return len(self._read_buf) > self._read_pos
|
||||
|
||||
class StreamWriter(BufferedWriter):
|
||||
"""Socket stream writer."""
|
||||
|
||||
def __init__(self, sock, mode='w', bufsize=io.DEFAULT_BUFFER_SIZE):
|
||||
"""Initialize socket stream writer."""
|
||||
super().__init__(socket.SocketIO(sock, mode), bufsize)
|
||||
self.bytes_written = 0
|
||||
|
||||
def write(self, val, *args, **kwargs):
|
||||
"""Capture bytes written."""
|
||||
res = super().write(val, *args, **kwargs)
|
||||
self.bytes_written += len(val)
|
||||
return res
|
||||
|
||||
def MakeFile(sock, mode='r', bufsize=io.DEFAULT_BUFFER_SIZE):
|
||||
"""File object attached to a socket object."""
|
||||
cls = StreamReader if 'r' in mode else StreamWriter
|
||||
return cls(sock, mode, bufsize)
|
||||
else:
|
||||
StreamReader = StreamWriter = MakeFile = MakeFile_PY2
|
2114
lib/cheroot/server.py
Normal file
2114
lib/cheroot/server.py
Normal file
File diff suppressed because it is too large
Load Diff
52
lib/cheroot/ssl/__init__.py
Normal file
52
lib/cheroot/ssl/__init__.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""Implementation of the SSL adapter base interface."""
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
__metaclass__ = type
|
||||
|
||||
from abc import ABCMeta, abstractmethod
|
||||
|
||||
from six import add_metaclass
|
||||
|
||||
|
||||
@add_metaclass(ABCMeta)
|
||||
class Adapter:
|
||||
"""Base class for SSL driver library adapters.
|
||||
|
||||
Required methods:
|
||||
|
||||
* ``wrap(sock) -> (wrapped socket, ssl environ dict)``
|
||||
* ``makefile(sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE) ->
|
||||
socket file object``
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def __init__(
|
||||
self, certificate, private_key, certificate_chain=None,
|
||||
ciphers=None,
|
||||
):
|
||||
"""Set up certificates, private key ciphers and reset context."""
|
||||
self.certificate = certificate
|
||||
self.private_key = private_key
|
||||
self.certificate_chain = certificate_chain
|
||||
self.ciphers = ciphers
|
||||
self.context = None
|
||||
|
||||
@abstractmethod
|
||||
def bind(self, sock):
|
||||
"""Wrap and return the given socket."""
|
||||
return sock
|
||||
|
||||
@abstractmethod
|
||||
def wrap(self, sock):
|
||||
"""Wrap and return the given socket, plus WSGI environ entries."""
|
||||
raise NotImplementedError # pragma: no cover
|
||||
|
||||
@abstractmethod
|
||||
def get_environ(self):
|
||||
"""Return WSGI environ entries to be merged into each request."""
|
||||
raise NotImplementedError # pragma: no cover
|
||||
|
||||
@abstractmethod
|
||||
def makefile(self, sock, mode='r', bufsize=-1):
|
||||
"""Return socket file object."""
|
||||
raise NotImplementedError # pragma: no cover
|
210
lib/cheroot/ssl/builtin.py
Normal file
210
lib/cheroot/ssl/builtin.py
Normal file
@@ -0,0 +1,210 @@
|
||||
"""
|
||||
A library for integrating Python's builtin ``ssl`` library with Cheroot.
|
||||
|
||||
The ssl module must be importable for SSL functionality.
|
||||
|
||||
To use this module, set ``HTTPServer.ssl_adapter`` to an instance of
|
||||
``BuiltinSSLAdapter``.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
__metaclass__ = type
|
||||
|
||||
try:
|
||||
import ssl
|
||||
except ImportError:
|
||||
ssl = None
|
||||
|
||||
try:
|
||||
from _pyio import DEFAULT_BUFFER_SIZE
|
||||
except ImportError:
|
||||
try:
|
||||
from io import DEFAULT_BUFFER_SIZE
|
||||
except ImportError:
|
||||
DEFAULT_BUFFER_SIZE = -1
|
||||
|
||||
import six
|
||||
|
||||
from . import Adapter
|
||||
from .. import errors
|
||||
from .._compat import IS_ABOVE_OPENSSL10
|
||||
from ..makefile import StreamReader, StreamWriter
|
||||
|
||||
if six.PY2:
|
||||
import socket
|
||||
generic_socket_error = socket.error
|
||||
del socket
|
||||
else:
|
||||
generic_socket_error = OSError
|
||||
|
||||
|
||||
def _assert_ssl_exc_contains(exc, *msgs):
|
||||
"""Check whether SSL exception contains either of messages provided."""
|
||||
if len(msgs) < 1:
|
||||
raise TypeError(
|
||||
'_assert_ssl_exc_contains() requires '
|
||||
'at least one message to be passed.',
|
||||
)
|
||||
err_msg_lower = str(exc).lower()
|
||||
return any(m.lower() in err_msg_lower for m in msgs)
|
||||
|
||||
|
||||
class BuiltinSSLAdapter(Adapter):
|
||||
"""A wrapper for integrating Python's builtin ssl module with Cheroot."""
|
||||
|
||||
certificate = None
|
||||
"""The filename of the server SSL certificate."""
|
||||
|
||||
private_key = None
|
||||
"""The filename of the server's private key file."""
|
||||
|
||||
certificate_chain = None
|
||||
"""The filename of the certificate chain file."""
|
||||
|
||||
context = None
|
||||
"""The ssl.SSLContext that will be used to wrap sockets."""
|
||||
|
||||
ciphers = None
|
||||
"""The ciphers list of SSL."""
|
||||
|
||||
CERT_KEY_TO_ENV = {
|
||||
'subject': 'SSL_CLIENT_S_DN',
|
||||
'issuer': 'SSL_CLIENT_I_DN',
|
||||
}
|
||||
|
||||
CERT_KEY_TO_LDAP_CODE = {
|
||||
'countryName': 'C',
|
||||
'stateOrProvinceName': 'ST',
|
||||
'localityName': 'L',
|
||||
'organizationName': 'O',
|
||||
'organizationalUnitName': 'OU',
|
||||
'commonName': 'CN',
|
||||
'emailAddress': 'Email',
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self, certificate, private_key, certificate_chain=None,
|
||||
ciphers=None,
|
||||
):
|
||||
"""Set up context in addition to base class properties if available."""
|
||||
if ssl is None:
|
||||
raise ImportError('You must install the ssl module to use HTTPS.')
|
||||
|
||||
super(BuiltinSSLAdapter, self).__init__(
|
||||
certificate, private_key, certificate_chain, ciphers,
|
||||
)
|
||||
|
||||
self.context = ssl.create_default_context(
|
||||
purpose=ssl.Purpose.CLIENT_AUTH,
|
||||
cafile=certificate_chain,
|
||||
)
|
||||
self.context.load_cert_chain(certificate, private_key)
|
||||
if self.ciphers is not None:
|
||||
self.context.set_ciphers(ciphers)
|
||||
|
||||
def bind(self, sock):
|
||||
"""Wrap and return the given socket."""
|
||||
return super(BuiltinSSLAdapter, self).bind(sock)
|
||||
|
||||
def wrap(self, sock):
|
||||
"""Wrap and return the given socket, plus WSGI environ entries."""
|
||||
EMPTY_RESULT = None, {}
|
||||
try:
|
||||
s = self.context.wrap_socket(
|
||||
sock, do_handshake_on_connect=True, server_side=True,
|
||||
)
|
||||
except ssl.SSLError as ex:
|
||||
if ex.errno == ssl.SSL_ERROR_EOF:
|
||||
# This is almost certainly due to the cherrypy engine
|
||||
# 'pinging' the socket to assert it's connectable;
|
||||
# the 'ping' isn't SSL.
|
||||
return EMPTY_RESULT
|
||||
elif ex.errno == ssl.SSL_ERROR_SSL:
|
||||
if _assert_ssl_exc_contains(ex, 'http request'):
|
||||
# The client is speaking HTTP to an HTTPS server.
|
||||
raise errors.NoSSLError
|
||||
|
||||
# Check if it's one of the known errors
|
||||
# Errors that are caught by PyOpenSSL, but thrown by
|
||||
# built-in ssl
|
||||
_block_errors = (
|
||||
'unknown protocol', 'unknown ca', 'unknown_ca',
|
||||
'unknown error',
|
||||
'https proxy request', 'inappropriate fallback',
|
||||
'wrong version number',
|
||||
'no shared cipher', 'certificate unknown',
|
||||
'ccs received early',
|
||||
'certificate verify failed', # client cert w/o trusted CA
|
||||
)
|
||||
if _assert_ssl_exc_contains(ex, *_block_errors):
|
||||
# Accepted error, let's pass
|
||||
return EMPTY_RESULT
|
||||
elif _assert_ssl_exc_contains(ex, 'handshake operation timed out'):
|
||||
# This error is thrown by builtin SSL after a timeout
|
||||
# when client is speaking HTTP to an HTTPS server.
|
||||
# The connection can safely be dropped.
|
||||
return EMPTY_RESULT
|
||||
raise
|
||||
except generic_socket_error as exc:
|
||||
"""It is unclear why exactly this happens.
|
||||
|
||||
It's reproducible only with openssl>1.0 and stdlib ``ssl`` wrapper.
|
||||
In CherryPy it's triggered by Checker plugin, which connects
|
||||
to the app listening to the socket port in TLS mode via plain
|
||||
HTTP during startup (from the same process).
|
||||
|
||||
|
||||
Ref: https://github.com/cherrypy/cherrypy/issues/1618
|
||||
"""
|
||||
is_error0 = exc.args == (0, 'Error')
|
||||
|
||||
if is_error0 and IS_ABOVE_OPENSSL10:
|
||||
return EMPTY_RESULT
|
||||
raise
|
||||
return s, self.get_environ(s)
|
||||
|
||||
# TODO: fill this out more with mod ssl env
|
||||
def get_environ(self, sock):
|
||||
"""Create WSGI environ entries to be merged into each request."""
|
||||
cipher = sock.cipher()
|
||||
ssl_environ = {
|
||||
'wsgi.url_scheme': 'https',
|
||||
'HTTPS': 'on',
|
||||
'SSL_PROTOCOL': cipher[1],
|
||||
'SSL_CIPHER': cipher[0],
|
||||
# SSL_VERSION_INTERFACE string The mod_ssl program version
|
||||
# SSL_VERSION_LIBRARY string The OpenSSL program version
|
||||
}
|
||||
|
||||
if self.context and self.context.verify_mode != ssl.CERT_NONE:
|
||||
client_cert = sock.getpeercert()
|
||||
if client_cert:
|
||||
for cert_key, env_var in self.CERT_KEY_TO_ENV.items():
|
||||
ssl_environ.update(
|
||||
self.env_dn_dict(env_var, client_cert.get(cert_key)),
|
||||
)
|
||||
|
||||
return ssl_environ
|
||||
|
||||
def env_dn_dict(self, env_prefix, cert_value):
|
||||
"""Return a dict of WSGI environment variables for a client cert DN.
|
||||
|
||||
E.g. SSL_CLIENT_S_DN_CN, SSL_CLIENT_S_DN_C, etc.
|
||||
See SSL_CLIENT_S_DN_x509 at
|
||||
https://httpd.apache.org/docs/2.4/mod/mod_ssl.html#envvars.
|
||||
"""
|
||||
if not cert_value:
|
||||
return {}
|
||||
|
||||
env = {}
|
||||
for rdn in cert_value:
|
||||
for attr_name, val in rdn:
|
||||
attr_code = self.CERT_KEY_TO_LDAP_CODE.get(attr_name)
|
||||
if attr_code:
|
||||
env['%s_%s' % (env_prefix, attr_code)] = val
|
||||
return env
|
||||
|
||||
def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE):
|
||||
"""Return socket file object."""
|
||||
cls = StreamReader if 'r' in mode else StreamWriter
|
||||
return cls(sock, mode, bufsize)
|
@@ -1,10 +1,11 @@
|
||||
"""A library for integrating pyOpenSSL with CherryPy.
|
||||
"""
|
||||
A library for integrating pyOpenSSL with Cheroot.
|
||||
|
||||
The OpenSSL module must be importable for SSL functionality.
|
||||
You can obtain it from `here <https://launchpad.net/pyopenssl>`_.
|
||||
|
||||
To use this module, set CherryPyWSGIServer.ssl_adapter to an instance of
|
||||
SSLAdapter. There are two ways to use SSL:
|
||||
To use this module, set HTTPServer.ssl_adapter to an instance of
|
||||
ssl.Adapter. There are two ways to use SSL:
|
||||
|
||||
Method One
|
||||
----------
|
||||
@@ -30,26 +31,33 @@ and .certificate are both given and valid, they will be read, and the
|
||||
context will be automatically created from them.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
__metaclass__ = type
|
||||
|
||||
import socket
|
||||
import threading
|
||||
import time
|
||||
|
||||
from cherrypy import wsgiserver
|
||||
import six
|
||||
|
||||
try:
|
||||
from OpenSSL import SSL
|
||||
from OpenSSL import crypto
|
||||
if hasattr(SSL, 'Connection'):
|
||||
SSLConnectionType = SSL.Connection
|
||||
else:
|
||||
SSLConnectionType = SSL.ConnectionType
|
||||
|
||||
try:
|
||||
ssl_conn_type = SSL.Connection
|
||||
except AttributeError:
|
||||
ssl_conn_type = SSL.ConnectionType
|
||||
except ImportError:
|
||||
SSL = None
|
||||
|
||||
from . import Adapter
|
||||
from .. import errors, server as cheroot_server
|
||||
from ..makefile import StreamReader, StreamWriter
|
||||
|
||||
class SSL_fileobject(wsgiserver.CP_fileobject):
|
||||
|
||||
"""SSL file object attached to a socket object."""
|
||||
class SSLFileobjectMixin:
|
||||
"""Base mixin for an SSL socket stream."""
|
||||
|
||||
ssl_timeout = 3
|
||||
ssl_retry = .01
|
||||
@@ -69,20 +77,21 @@ class SSL_fileobject(wsgiserver.CP_fileobject):
|
||||
# the rest of the stack has no way of differentiating
|
||||
# between a "new handshake" error and "client dropped".
|
||||
# Note this isn't an endless loop: there's a timeout below.
|
||||
# Ref: https://stackoverflow.com/a/5133568/595220
|
||||
time.sleep(self.ssl_retry)
|
||||
except SSL.WantWriteError:
|
||||
time.sleep(self.ssl_retry)
|
||||
except SSL.SysCallError as e:
|
||||
if is_reader and e.args == (-1, 'Unexpected EOF'):
|
||||
return ""
|
||||
return b''
|
||||
|
||||
errnum = e.args[0]
|
||||
if is_reader and errnum in wsgiserver.socket_errors_to_ignore:
|
||||
return ""
|
||||
if is_reader and errnum in errors.socket_errors_to_ignore:
|
||||
return b''
|
||||
raise socket.error(errnum)
|
||||
except SSL.Error as e:
|
||||
if is_reader and e.args == (-1, 'Unexpected EOF'):
|
||||
return ""
|
||||
return b''
|
||||
|
||||
thirdarg = None
|
||||
try:
|
||||
@@ -92,69 +101,127 @@ class SSL_fileobject(wsgiserver.CP_fileobject):
|
||||
|
||||
if thirdarg == 'http request':
|
||||
# The client is talking HTTP to an HTTPS server.
|
||||
raise wsgiserver.NoSSLError()
|
||||
raise errors.NoSSLError()
|
||||
|
||||
raise wsgiserver.FatalSSLAlert(*e.args)
|
||||
except:
|
||||
raise
|
||||
raise errors.FatalSSLAlert(*e.args)
|
||||
|
||||
if time.time() - start > self.ssl_timeout:
|
||||
raise socket.timeout("timed out")
|
||||
raise socket.timeout('timed out')
|
||||
|
||||
def recv(self, size):
|
||||
return self._safe_call(True, super(SSL_fileobject, self).recv, size)
|
||||
"""Receive message of a size from the socket."""
|
||||
return self._safe_call(
|
||||
True,
|
||||
super(SSLFileobjectMixin, self).recv,
|
||||
size,
|
||||
)
|
||||
|
||||
def readline(self, size=-1):
|
||||
"""Receive message of a size from the socket.
|
||||
|
||||
Matches the following interface:
|
||||
https://docs.python.org/3/library/io.html#io.IOBase.readline
|
||||
"""
|
||||
return self._safe_call(
|
||||
True,
|
||||
super(SSLFileobjectMixin, self).readline,
|
||||
size,
|
||||
)
|
||||
|
||||
def sendall(self, *args, **kwargs):
|
||||
return self._safe_call(False, super(SSL_fileobject, self).sendall,
|
||||
*args, **kwargs)
|
||||
"""Send whole message to the socket."""
|
||||
return self._safe_call(
|
||||
False,
|
||||
super(SSLFileobjectMixin, self).sendall,
|
||||
*args, **kwargs
|
||||
)
|
||||
|
||||
def send(self, *args, **kwargs):
|
||||
return self._safe_call(False, super(SSL_fileobject, self).send,
|
||||
*args, **kwargs)
|
||||
"""Send some part of message to the socket."""
|
||||
return self._safe_call(
|
||||
False,
|
||||
super(SSLFileobjectMixin, self).send,
|
||||
*args, **kwargs
|
||||
)
|
||||
|
||||
|
||||
class SSLFileobjectStreamReader(SSLFileobjectMixin, StreamReader):
|
||||
"""SSL file object attached to a socket object."""
|
||||
|
||||
|
||||
class SSLFileobjectStreamWriter(SSLFileobjectMixin, StreamWriter):
|
||||
"""SSL file object attached to a socket object."""
|
||||
|
||||
|
||||
class SSLConnectionProxyMeta:
|
||||
"""Metaclass for generating a bunch of proxy methods."""
|
||||
|
||||
def __new__(mcl, name, bases, nmspc):
|
||||
"""Attach a list of proxy methods to a new class."""
|
||||
proxy_methods = (
|
||||
'get_context', 'pending', 'send', 'write', 'recv', 'read',
|
||||
'renegotiate', 'bind', 'listen', 'connect', 'accept',
|
||||
'setblocking', 'fileno', 'close', 'get_cipher_list',
|
||||
'getpeername', 'getsockname', 'getsockopt', 'setsockopt',
|
||||
'makefile', 'get_app_data', 'set_app_data', 'state_string',
|
||||
'sock_shutdown', 'get_peer_certificate', 'want_read',
|
||||
'want_write', 'set_connect_state', 'set_accept_state',
|
||||
'connect_ex', 'sendall', 'settimeout', 'gettimeout',
|
||||
'shutdown',
|
||||
)
|
||||
proxy_methods_no_args = (
|
||||
'shutdown',
|
||||
)
|
||||
|
||||
proxy_props = (
|
||||
'family',
|
||||
)
|
||||
|
||||
def lock_decorator(method):
|
||||
"""Create a proxy method for a new class."""
|
||||
def proxy_wrapper(self, *args):
|
||||
self._lock.acquire()
|
||||
try:
|
||||
new_args = (
|
||||
args[:] if method not in proxy_methods_no_args else []
|
||||
)
|
||||
return getattr(self._ssl_conn, method)(*new_args)
|
||||
finally:
|
||||
self._lock.release()
|
||||
return proxy_wrapper
|
||||
for m in proxy_methods:
|
||||
nmspc[m] = lock_decorator(m)
|
||||
nmspc[m].__name__ = m
|
||||
|
||||
def make_property(property_):
|
||||
"""Create a proxy method for a new class."""
|
||||
def proxy_prop_wrapper(self):
|
||||
return getattr(self._ssl_conn, property_)
|
||||
proxy_prop_wrapper.__name__ = property_
|
||||
return property(proxy_prop_wrapper)
|
||||
for p in proxy_props:
|
||||
nmspc[p] = make_property(p)
|
||||
|
||||
# Doesn't work via super() for some reason.
|
||||
# Falling back to type() instead:
|
||||
return type(name, bases, nmspc)
|
||||
|
||||
|
||||
@six.add_metaclass(SSLConnectionProxyMeta)
|
||||
class SSLConnection:
|
||||
|
||||
"""A thread-safe wrapper for an SSL.Connection.
|
||||
|
||||
``*args``: the arguments to create the wrapped ``SSL.Connection(*args)``.
|
||||
"""
|
||||
|
||||
def __init__(self, *args):
|
||||
"""Initialize SSLConnection instance."""
|
||||
self._ssl_conn = SSL.Connection(*args)
|
||||
self._lock = threading.RLock()
|
||||
|
||||
for f in ('get_context', 'pending', 'send', 'write', 'recv', 'read',
|
||||
'renegotiate', 'bind', 'listen', 'connect', 'accept',
|
||||
'setblocking', 'fileno', 'close', 'get_cipher_list',
|
||||
'getpeername', 'getsockname', 'getsockopt', 'setsockopt',
|
||||
'makefile', 'get_app_data', 'set_app_data', 'state_string',
|
||||
'sock_shutdown', 'get_peer_certificate', 'want_read',
|
||||
'want_write', 'set_connect_state', 'set_accept_state',
|
||||
'connect_ex', 'sendall', 'settimeout', 'gettimeout'):
|
||||
exec("""def %s(self, *args):
|
||||
self._lock.acquire()
|
||||
try:
|
||||
return self._ssl_conn.%s(*args)
|
||||
finally:
|
||||
self._lock.release()
|
||||
""" % (f, f))
|
||||
|
||||
def shutdown(self, *args):
|
||||
self._lock.acquire()
|
||||
try:
|
||||
# pyOpenSSL.socket.shutdown takes no args
|
||||
return self._ssl_conn.shutdown()
|
||||
finally:
|
||||
self._lock.release()
|
||||
|
||||
|
||||
class pyOpenSSLAdapter(wsgiserver.SSLAdapter):
|
||||
|
||||
"""A wrapper for integrating pyOpenSSL with CherryPy."""
|
||||
|
||||
context = None
|
||||
"""An instance of SSL.Context."""
|
||||
class pyOpenSSLAdapter(Adapter):
|
||||
"""A wrapper for integrating pyOpenSSL with Cheroot."""
|
||||
|
||||
certificate = None
|
||||
"""The filename of the server SSL certificate."""
|
||||
@@ -168,14 +235,24 @@ class pyOpenSSLAdapter(wsgiserver.SSLAdapter):
|
||||
This is needed for cheaper "chained root" SSL certificates, and should be
|
||||
left as None if not required."""
|
||||
|
||||
def __init__(self, certificate, private_key, certificate_chain=None):
|
||||
if SSL is None:
|
||||
raise ImportError("You must install pyOpenSSL to use HTTPS.")
|
||||
context = None
|
||||
"""An instance of SSL.Context."""
|
||||
|
||||
ciphers = None
|
||||
"""The ciphers list of SSL."""
|
||||
|
||||
def __init__(
|
||||
self, certificate, private_key, certificate_chain=None,
|
||||
ciphers=None,
|
||||
):
|
||||
"""Initialize OpenSSL Adapter instance."""
|
||||
if SSL is None:
|
||||
raise ImportError('You must install pyOpenSSL to use HTTPS.')
|
||||
|
||||
super(pyOpenSSLAdapter, self).__init__(
|
||||
certificate, private_key, certificate_chain, ciphers,
|
||||
)
|
||||
|
||||
self.context = None
|
||||
self.certificate = certificate
|
||||
self.private_key = private_key
|
||||
self.certificate_chain = certificate_chain
|
||||
self._environ = None
|
||||
|
||||
def bind(self, sock):
|
||||
@@ -192,7 +269,7 @@ class pyOpenSSLAdapter(wsgiserver.SSLAdapter):
|
||||
|
||||
def get_context(self):
|
||||
"""Return an SSL.Context from self attributes."""
|
||||
# See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/442473
|
||||
# See https://code.activestate.com/recipes/442473/
|
||||
c = SSL.Context(SSL.SSLv23_METHOD)
|
||||
c.use_privatekey_file(self.private_key)
|
||||
if self.certificate_chain:
|
||||
@@ -203,12 +280,12 @@ class pyOpenSSLAdapter(wsgiserver.SSLAdapter):
|
||||
def get_environ(self):
|
||||
"""Return WSGI environ entries to be merged into each request."""
|
||||
ssl_environ = {
|
||||
"HTTPS": "on",
|
||||
'HTTPS': 'on',
|
||||
# pyOpenSSL doesn't provide access to any of these AFAICT
|
||||
# 'SSL_PROTOCOL': 'SSLv2',
|
||||
# SSL_CIPHER string The cipher specification name
|
||||
# SSL_VERSION_INTERFACE string The mod_ssl program version
|
||||
# SSL_VERSION_LIBRARY string The OpenSSL program version
|
||||
# SSL_CIPHER string The cipher specification name
|
||||
# SSL_VERSION_INTERFACE string The mod_ssl program version
|
||||
# SSL_VERSION_LIBRARY string The OpenSSL program version
|
||||
}
|
||||
|
||||
if self.certificate:
|
||||
@@ -224,8 +301,10 @@ class pyOpenSSLAdapter(wsgiserver.SSLAdapter):
|
||||
# Validity of server's certificate (end time),
|
||||
})
|
||||
|
||||
for prefix, dn in [("I", cert.get_issuer()),
|
||||
("S", cert.get_subject())]:
|
||||
for prefix, dn in [
|
||||
('I', cert.get_issuer()),
|
||||
('S', cert.get_subject()),
|
||||
]:
|
||||
# X509Name objects don't seem to have a way to get the
|
||||
# complete DN string. Use str() and slice it instead,
|
||||
# because str(dn) == "<X509Name object '/C=US/ST=...'>"
|
||||
@@ -237,9 +316,9 @@ class pyOpenSSLAdapter(wsgiserver.SSLAdapter):
|
||||
# The DN should be of the form: /k1=v1/k2=v2, but we must allow
|
||||
# for any value to contain slashes itself (in a URL).
|
||||
while dnstr:
|
||||
pos = dnstr.rfind("=")
|
||||
pos = dnstr.rfind('=')
|
||||
dnstr, value = dnstr[:pos], dnstr[pos + 1:]
|
||||
pos = dnstr.rfind("/")
|
||||
pos = dnstr.rfind('/')
|
||||
dnstr, key = dnstr[:pos], dnstr[pos + 1:]
|
||||
if key and value:
|
||||
wsgikey = 'SSL_SERVER_%s_DN_%s' % (prefix, key)
|
||||
@@ -248,10 +327,17 @@ class pyOpenSSLAdapter(wsgiserver.SSLAdapter):
|
||||
return ssl_environ
|
||||
|
||||
def makefile(self, sock, mode='r', bufsize=-1):
|
||||
if SSL and isinstance(sock, SSLConnectionType):
|
||||
timeout = sock.gettimeout()
|
||||
f = SSL_fileobject(sock, mode, bufsize)
|
||||
f.ssl_timeout = timeout
|
||||
return f
|
||||
"""Return socket file object."""
|
||||
cls = (
|
||||
SSLFileobjectStreamReader
|
||||
if 'r' in mode else
|
||||
SSLFileobjectStreamWriter
|
||||
)
|
||||
if SSL and isinstance(sock, ssl_conn_type):
|
||||
wrapped_socket = cls(sock, mode, bufsize)
|
||||
wrapped_socket.ssl_timeout = sock.gettimeout()
|
||||
return wrapped_socket
|
||||
# This is from past:
|
||||
# TODO: figure out what it's meant for
|
||||
else:
|
||||
return wsgiserver.CP_fileobject(sock, mode, bufsize)
|
||||
return cheroot_server.CP_fileobject(sock, mode, bufsize)
|
1
lib/cheroot/test/__init__.py
Normal file
1
lib/cheroot/test/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Cheroot test suite."""
|
69
lib/cheroot/test/conftest.py
Normal file
69
lib/cheroot/test/conftest.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""Pytest configuration module.
|
||||
|
||||
Contains fixtures, which are tightly bound to the Cheroot framework
|
||||
itself, useless for end-users' app testing.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
__metaclass__ = type
|
||||
|
||||
import threading
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
from ..server import Gateway, HTTPServer
|
||||
from ..testing import ( # noqa: F401
|
||||
native_server, wsgi_server,
|
||||
)
|
||||
from ..testing import get_server_client
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def wsgi_server_client(wsgi_server): # noqa: F811
|
||||
"""Create a test client out of given WSGI server."""
|
||||
return get_server_client(wsgi_server)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def native_server_client(native_server): # noqa: F811
|
||||
"""Create a test client out of given HTTP server."""
|
||||
return get_server_client(native_server)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def http_server():
|
||||
"""Provision a server creator as a fixture."""
|
||||
def start_srv():
|
||||
bind_addr = yield
|
||||
if bind_addr is None:
|
||||
return
|
||||
httpserver = make_http_server(bind_addr)
|
||||
yield httpserver
|
||||
yield httpserver
|
||||
|
||||
srv_creator = iter(start_srv())
|
||||
next(srv_creator)
|
||||
yield srv_creator
|
||||
try:
|
||||
while True:
|
||||
httpserver = next(srv_creator)
|
||||
if httpserver is not None:
|
||||
httpserver.stop()
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
|
||||
def make_http_server(bind_addr):
|
||||
"""Create and start an HTTP server bound to bind_addr."""
|
||||
httpserver = HTTPServer(
|
||||
bind_addr=bind_addr,
|
||||
gateway=Gateway,
|
||||
)
|
||||
|
||||
threading.Thread(target=httpserver.safe_start).start()
|
||||
|
||||
while not httpserver.ready:
|
||||
time.sleep(0.1)
|
||||
|
||||
return httpserver
|
168
lib/cheroot/test/helper.py
Normal file
168
lib/cheroot/test/helper.py
Normal file
@@ -0,0 +1,168 @@
|
||||
"""A library of helper functions for the Cheroot test suite."""
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
__metaclass__ = type
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import threading
|
||||
import types
|
||||
|
||||
from six.moves import http_client
|
||||
|
||||
import six
|
||||
|
||||
import cheroot.server
|
||||
import cheroot.wsgi
|
||||
|
||||
from cheroot.test import webtest
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
thisdir = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
|
||||
config = {
|
||||
'bind_addr': ('127.0.0.1', 54583),
|
||||
'server': 'wsgi',
|
||||
'wsgi_app': None,
|
||||
}
|
||||
|
||||
|
||||
class CherootWebCase(webtest.WebCase):
|
||||
"""Helper class for a web app test suite."""
|
||||
|
||||
script_name = ''
|
||||
scheme = 'http'
|
||||
|
||||
available_servers = {
|
||||
'wsgi': cheroot.wsgi.Server,
|
||||
'native': cheroot.server.HTTPServer,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
"""Create and run one HTTP server per class."""
|
||||
conf = config.copy()
|
||||
conf.update(getattr(cls, 'config', {}))
|
||||
|
||||
s_class = conf.pop('server', 'wsgi')
|
||||
server_factory = cls.available_servers.get(s_class)
|
||||
if server_factory is None:
|
||||
raise RuntimeError('Unknown server in config: %s' % conf['server'])
|
||||
cls.httpserver = server_factory(**conf)
|
||||
|
||||
cls.HOST, cls.PORT = cls.httpserver.bind_addr
|
||||
if cls.httpserver.ssl_adapter is None:
|
||||
ssl = ''
|
||||
cls.scheme = 'http'
|
||||
else:
|
||||
ssl = ' (ssl)'
|
||||
cls.HTTP_CONN = http_client.HTTPSConnection
|
||||
cls.scheme = 'https'
|
||||
|
||||
v = sys.version.split()[0]
|
||||
log.info('Python version used to run this test script: %s' % v)
|
||||
log.info('Cheroot version: %s' % cheroot.__version__)
|
||||
log.info('HTTP server version: %s%s' % (cls.httpserver.protocol, ssl))
|
||||
log.info('PID: %s' % os.getpid())
|
||||
|
||||
if hasattr(cls, 'setup_server'):
|
||||
# Clear the wsgi server so that
|
||||
# it can be updated with the new root
|
||||
cls.setup_server()
|
||||
cls.start()
|
||||
|
||||
@classmethod
|
||||
def teardown_class(cls):
|
||||
"""Cleanup HTTP server."""
|
||||
if hasattr(cls, 'setup_server'):
|
||||
cls.stop()
|
||||
|
||||
@classmethod
|
||||
def start(cls):
|
||||
"""Load and start the HTTP server."""
|
||||
threading.Thread(target=cls.httpserver.safe_start).start()
|
||||
while not cls.httpserver.ready:
|
||||
time.sleep(0.1)
|
||||
|
||||
@classmethod
|
||||
def stop(cls):
|
||||
"""Terminate HTTP server."""
|
||||
cls.httpserver.stop()
|
||||
td = getattr(cls, 'teardown', None)
|
||||
if td:
|
||||
td()
|
||||
|
||||
date_tolerance = 2
|
||||
|
||||
def assertEqualDates(self, dt1, dt2, seconds=None):
|
||||
"""Assert abs(dt1 - dt2) is within Y seconds."""
|
||||
if seconds is None:
|
||||
seconds = self.date_tolerance
|
||||
|
||||
if dt1 > dt2:
|
||||
diff = dt1 - dt2
|
||||
else:
|
||||
diff = dt2 - dt1
|
||||
if not diff < datetime.timedelta(seconds=seconds):
|
||||
raise AssertionError('%r and %r are not within %r seconds.' %
|
||||
(dt1, dt2, seconds))
|
||||
|
||||
|
||||
class Request:
|
||||
"""HTTP request container."""
|
||||
|
||||
def __init__(self, environ):
|
||||
"""Initialize HTTP request."""
|
||||
self.environ = environ
|
||||
|
||||
|
||||
class Response:
|
||||
"""HTTP response container."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize HTTP response."""
|
||||
self.status = '200 OK'
|
||||
self.headers = {'Content-Type': 'text/html'}
|
||||
self.body = None
|
||||
|
||||
def output(self):
|
||||
"""Generate iterable response body object."""
|
||||
if self.body is None:
|
||||
return []
|
||||
elif isinstance(self.body, six.text_type):
|
||||
return [self.body.encode('iso-8859-1')]
|
||||
elif isinstance(self.body, six.binary_type):
|
||||
return [self.body]
|
||||
else:
|
||||
return [x.encode('iso-8859-1') for x in self.body]
|
||||
|
||||
|
||||
class Controller:
|
||||
"""WSGI app for tests."""
|
||||
|
||||
def __call__(self, environ, start_response):
|
||||
"""WSGI request handler."""
|
||||
req, resp = Request(environ), Response()
|
||||
try:
|
||||
# Python 3 supports unicode attribute names
|
||||
# Python 2 encodes them
|
||||
handler = self.handlers[environ['PATH_INFO']]
|
||||
except KeyError:
|
||||
resp.status = '404 Not Found'
|
||||
else:
|
||||
output = handler(req, resp)
|
||||
if (output is not None
|
||||
and not any(resp.status.startswith(status_code)
|
||||
for status_code in ('204', '304'))):
|
||||
resp.body = output
|
||||
try:
|
||||
resp.headers.setdefault('Content-Length', str(len(output)))
|
||||
except TypeError:
|
||||
if not isinstance(output, types.GeneratorType):
|
||||
raise
|
||||
start_response(resp.status, resp.headers.items())
|
||||
return resp.output()
|
62
lib/cheroot/test/test__compat.py
Normal file
62
lib/cheroot/test/test__compat.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Test suite for cross-python compatibility helpers."""
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
__metaclass__ = type
|
||||
|
||||
import pytest
|
||||
import six
|
||||
|
||||
from cheroot._compat import extract_bytes, memoryview, ntob, ntou, bton
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'func,inp,out',
|
||||
[
|
||||
(ntob, 'bar', b'bar'),
|
||||
(ntou, 'bar', u'bar'),
|
||||
(bton, b'bar', 'bar'),
|
||||
],
|
||||
)
|
||||
def test_compat_functions_positive(func, inp, out):
|
||||
"""Check that compat functions work with correct input."""
|
||||
assert func(inp, encoding='utf-8') == out
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'func',
|
||||
[
|
||||
ntob,
|
||||
ntou,
|
||||
],
|
||||
)
|
||||
def test_compat_functions_negative_nonnative(func):
|
||||
"""Check that compat functions fail loudly for incorrect input."""
|
||||
non_native_test_str = u'bar' if six.PY2 else b'bar'
|
||||
with pytest.raises(TypeError):
|
||||
func(non_native_test_str, encoding='utf-8')
|
||||
|
||||
|
||||
def test_ntou_escape():
|
||||
"""Check that ntou supports escape-encoding under Python 2."""
|
||||
expected = u'hišřії'
|
||||
actual = ntou('hi\u0161\u0159\u0456\u0457', encoding='escape')
|
||||
assert actual == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'input_argument,expected_result',
|
||||
[
|
||||
(b'qwerty', b'qwerty'),
|
||||
(memoryview(b'asdfgh'), b'asdfgh'),
|
||||
],
|
||||
)
|
||||
def test_extract_bytes(input_argument, expected_result):
|
||||
"""Check that legitimate inputs produce bytes."""
|
||||
assert extract_bytes(input_argument) == expected_result
|
||||
|
||||
|
||||
def test_extract_bytes_invalid():
|
||||
"""Ensure that invalid input causes exception to be raised."""
|
||||
with pytest.raises(ValueError):
|
||||
extract_bytes(u'some юнікод їїї')
|
980
lib/cheroot/test/test_conn.py
Normal file
980
lib/cheroot/test/test_conn.py
Normal file
@@ -0,0 +1,980 @@
|
||||
"""Tests for TCP connection handling, including proper and timely close."""
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
__metaclass__ = type
|
||||
|
||||
import socket
|
||||
import time
|
||||
|
||||
from six.moves import range, http_client, urllib
|
||||
|
||||
import six
|
||||
import pytest
|
||||
|
||||
from cheroot.test import helper, webtest
|
||||
|
||||
|
||||
timeout = 1
|
||||
pov = 'pPeErRsSiIsStTeEnNcCeE oOfF vViIsSiIoOnN'
|
||||
|
||||
|
||||
class Controller(helper.Controller):
|
||||
"""Controller for serving WSGI apps."""
|
||||
|
||||
def hello(req, resp):
|
||||
"""Render Hello world."""
|
||||
return 'Hello, world!'
|
||||
|
||||
def pov(req, resp):
|
||||
"""Render pov value."""
|
||||
return pov
|
||||
|
||||
def stream(req, resp):
|
||||
"""Render streaming response."""
|
||||
if 'set_cl' in req.environ['QUERY_STRING']:
|
||||
resp.headers['Content-Length'] = str(10)
|
||||
|
||||
def content():
|
||||
for x in range(10):
|
||||
yield str(x)
|
||||
|
||||
return content()
|
||||
|
||||
def upload(req, resp):
|
||||
"""Process file upload and render thank."""
|
||||
if not req.environ['REQUEST_METHOD'] == 'POST':
|
||||
raise AssertionError("'POST' != request.method %r" %
|
||||
req.environ['REQUEST_METHOD'])
|
||||
return "thanks for '%s'" % req.environ['wsgi.input'].read()
|
||||
|
||||
def custom_204(req, resp):
|
||||
"""Render response with status 204."""
|
||||
resp.status = '204'
|
||||
return 'Code = 204'
|
||||
|
||||
def custom_304(req, resp):
|
||||
"""Render response with status 304."""
|
||||
resp.status = '304'
|
||||
return 'Code = 304'
|
||||
|
||||
def err_before_read(req, resp):
|
||||
"""Render response with status 500."""
|
||||
resp.status = '500 Internal Server Error'
|
||||
return 'ok'
|
||||
|
||||
def one_megabyte_of_a(req, resp):
|
||||
"""Render 1MB response."""
|
||||
return ['a' * 1024] * 1024
|
||||
|
||||
def wrong_cl_buffered(req, resp):
|
||||
"""Render buffered response with invalid length value."""
|
||||
resp.headers['Content-Length'] = '5'
|
||||
return 'I have too many bytes'
|
||||
|
||||
def wrong_cl_unbuffered(req, resp):
|
||||
"""Render unbuffered response with invalid length value."""
|
||||
resp.headers['Content-Length'] = '5'
|
||||
return ['I too', ' have too many bytes']
|
||||
|
||||
def _munge(string):
|
||||
"""Encode PATH_INFO correctly depending on Python version.
|
||||
|
||||
WSGI 1.0 is a mess around unicode. Create endpoints
|
||||
that match the PATH_INFO that it produces.
|
||||
"""
|
||||
if six.PY2:
|
||||
return string
|
||||
return string.encode('utf-8').decode('latin-1')
|
||||
|
||||
handlers = {
|
||||
'/hello': hello,
|
||||
'/pov': pov,
|
||||
'/page1': pov,
|
||||
'/page2': pov,
|
||||
'/page3': pov,
|
||||
'/stream': stream,
|
||||
'/upload': upload,
|
||||
'/custom/204': custom_204,
|
||||
'/custom/304': custom_304,
|
||||
'/err_before_read': err_before_read,
|
||||
'/one_megabyte_of_a': one_megabyte_of_a,
|
||||
'/wrong_cl_buffered': wrong_cl_buffered,
|
||||
'/wrong_cl_unbuffered': wrong_cl_unbuffered,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def testing_server(wsgi_server_client):
|
||||
"""Attach a WSGI app to the given server and pre-configure it."""
|
||||
app = Controller()
|
||||
|
||||
def _timeout(req, resp):
|
||||
return str(wsgi_server.timeout)
|
||||
app.handlers['/timeout'] = _timeout
|
||||
wsgi_server = wsgi_server_client.server_instance
|
||||
wsgi_server.wsgi_app = app
|
||||
wsgi_server.max_request_body_size = 1001
|
||||
wsgi_server.timeout = timeout
|
||||
wsgi_server.server_client = wsgi_server_client
|
||||
wsgi_server.keep_alive_conn_limit = 2
|
||||
return wsgi_server
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_client(testing_server):
|
||||
"""Get and return a test client out of the given server."""
|
||||
return testing_server.server_client
|
||||
|
||||
|
||||
def header_exists(header_name, headers):
|
||||
"""Check that a header is present."""
|
||||
return header_name.lower() in (k.lower() for (k, _) in headers)
|
||||
|
||||
|
||||
def header_has_value(header_name, header_value, headers):
|
||||
"""Check that a header with a given value is present."""
|
||||
return header_name.lower() in (
|
||||
k.lower() for (k, v) in headers
|
||||
if v == header_value
|
||||
)
|
||||
|
||||
|
||||
def test_HTTP11_persistent_connections(test_client):
|
||||
"""Test persistent HTTP/1.1 connections."""
|
||||
# Initialize a persistent HTTP connection
|
||||
http_connection = test_client.get_connection()
|
||||
http_connection.auto_open = False
|
||||
http_connection.connect()
|
||||
|
||||
# Make the first request and assert there's no "Connection: close".
|
||||
status_line, actual_headers, actual_resp_body = test_client.get(
|
||||
'/pov', http_conn=http_connection,
|
||||
)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 200
|
||||
assert status_line[4:] == 'OK'
|
||||
assert actual_resp_body == pov.encode()
|
||||
assert not header_exists('Connection', actual_headers)
|
||||
|
||||
# Make another request on the same connection.
|
||||
status_line, actual_headers, actual_resp_body = test_client.get(
|
||||
'/page1', http_conn=http_connection,
|
||||
)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 200
|
||||
assert status_line[4:] == 'OK'
|
||||
assert actual_resp_body == pov.encode()
|
||||
assert not header_exists('Connection', actual_headers)
|
||||
|
||||
# Test client-side close.
|
||||
status_line, actual_headers, actual_resp_body = test_client.get(
|
||||
'/page2', http_conn=http_connection,
|
||||
headers=[('Connection', 'close')],
|
||||
)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 200
|
||||
assert status_line[4:] == 'OK'
|
||||
assert actual_resp_body == pov.encode()
|
||||
assert header_has_value('Connection', 'close', actual_headers)
|
||||
|
||||
# Make another request on the same connection, which should error.
|
||||
with pytest.raises(http_client.NotConnected):
|
||||
test_client.get('/pov', http_conn=http_connection)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'set_cl',
|
||||
(
|
||||
False, # Without Content-Length
|
||||
True, # With Content-Length
|
||||
),
|
||||
)
|
||||
def test_streaming_11(test_client, set_cl):
|
||||
"""Test serving of streaming responses with HTTP/1.1 protocol."""
|
||||
# Initialize a persistent HTTP connection
|
||||
http_connection = test_client.get_connection()
|
||||
http_connection.auto_open = False
|
||||
http_connection.connect()
|
||||
|
||||
# Make the first request and assert there's no "Connection: close".
|
||||
status_line, actual_headers, actual_resp_body = test_client.get(
|
||||
'/pov', http_conn=http_connection,
|
||||
)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 200
|
||||
assert status_line[4:] == 'OK'
|
||||
assert actual_resp_body == pov.encode()
|
||||
assert not header_exists('Connection', actual_headers)
|
||||
|
||||
# Make another, streamed request on the same connection.
|
||||
if set_cl:
|
||||
# When a Content-Length is provided, the content should stream
|
||||
# without closing the connection.
|
||||
status_line, actual_headers, actual_resp_body = test_client.get(
|
||||
'/stream?set_cl=Yes', http_conn=http_connection,
|
||||
)
|
||||
assert header_exists('Content-Length', actual_headers)
|
||||
assert not header_has_value('Connection', 'close', actual_headers)
|
||||
assert not header_exists('Transfer-Encoding', actual_headers)
|
||||
|
||||
assert actual_status == 200
|
||||
assert status_line[4:] == 'OK'
|
||||
assert actual_resp_body == b'0123456789'
|
||||
else:
|
||||
# When no Content-Length response header is provided,
|
||||
# streamed output will either close the connection, or use
|
||||
# chunked encoding, to determine transfer-length.
|
||||
status_line, actual_headers, actual_resp_body = test_client.get(
|
||||
'/stream', http_conn=http_connection,
|
||||
)
|
||||
assert not header_exists('Content-Length', actual_headers)
|
||||
assert actual_status == 200
|
||||
assert status_line[4:] == 'OK'
|
||||
assert actual_resp_body == b'0123456789'
|
||||
|
||||
chunked_response = False
|
||||
for k, v in actual_headers:
|
||||
if k.lower() == 'transfer-encoding':
|
||||
if str(v) == 'chunked':
|
||||
chunked_response = True
|
||||
|
||||
if chunked_response:
|
||||
assert not header_has_value('Connection', 'close', actual_headers)
|
||||
else:
|
||||
assert header_has_value('Connection', 'close', actual_headers)
|
||||
|
||||
# Make another request on the same connection, which should
|
||||
# error.
|
||||
with pytest.raises(http_client.NotConnected):
|
||||
test_client.get('/pov', http_conn=http_connection)
|
||||
|
||||
# Try HEAD.
|
||||
# See https://www.bitbucket.org/cherrypy/cherrypy/issue/864.
|
||||
# TODO: figure out how can this be possible on an closed connection
|
||||
# (chunked_response case)
|
||||
status_line, actual_headers, actual_resp_body = test_client.head(
|
||||
'/stream', http_conn=http_connection,
|
||||
)
|
||||
assert actual_status == 200
|
||||
assert status_line[4:] == 'OK'
|
||||
assert actual_resp_body == b''
|
||||
assert not header_exists('Transfer-Encoding', actual_headers)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'set_cl',
|
||||
(
|
||||
False, # Without Content-Length
|
||||
True, # With Content-Length
|
||||
),
|
||||
)
|
||||
def test_streaming_10(test_client, set_cl):
|
||||
"""Test serving of streaming responses with HTTP/1.0 protocol."""
|
||||
original_server_protocol = test_client.server_instance.protocol
|
||||
test_client.server_instance.protocol = 'HTTP/1.0'
|
||||
|
||||
# Initialize a persistent HTTP connection
|
||||
http_connection = test_client.get_connection()
|
||||
http_connection.auto_open = False
|
||||
http_connection.connect()
|
||||
|
||||
# Make the first request and assert Keep-Alive.
|
||||
status_line, actual_headers, actual_resp_body = test_client.get(
|
||||
'/pov', http_conn=http_connection,
|
||||
headers=[('Connection', 'Keep-Alive')],
|
||||
protocol='HTTP/1.0',
|
||||
)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 200
|
||||
assert status_line[4:] == 'OK'
|
||||
assert actual_resp_body == pov.encode()
|
||||
assert header_has_value('Connection', 'Keep-Alive', actual_headers)
|
||||
|
||||
# Make another, streamed request on the same connection.
|
||||
if set_cl:
|
||||
# When a Content-Length is provided, the content should
|
||||
# stream without closing the connection.
|
||||
status_line, actual_headers, actual_resp_body = test_client.get(
|
||||
'/stream?set_cl=Yes', http_conn=http_connection,
|
||||
headers=[('Connection', 'Keep-Alive')],
|
||||
protocol='HTTP/1.0',
|
||||
)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 200
|
||||
assert status_line[4:] == 'OK'
|
||||
assert actual_resp_body == b'0123456789'
|
||||
|
||||
assert header_exists('Content-Length', actual_headers)
|
||||
assert header_has_value('Connection', 'Keep-Alive', actual_headers)
|
||||
assert not header_exists('Transfer-Encoding', actual_headers)
|
||||
else:
|
||||
# When a Content-Length is not provided,
|
||||
# the server should close the connection.
|
||||
status_line, actual_headers, actual_resp_body = test_client.get(
|
||||
'/stream', http_conn=http_connection,
|
||||
headers=[('Connection', 'Keep-Alive')],
|
||||
protocol='HTTP/1.0',
|
||||
)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 200
|
||||
assert status_line[4:] == 'OK'
|
||||
assert actual_resp_body == b'0123456789'
|
||||
|
||||
assert not header_exists('Content-Length', actual_headers)
|
||||
assert not header_has_value('Connection', 'Keep-Alive', actual_headers)
|
||||
assert not header_exists('Transfer-Encoding', actual_headers)
|
||||
|
||||
# Make another request on the same connection, which should error.
|
||||
with pytest.raises(http_client.NotConnected):
|
||||
test_client.get(
|
||||
'/pov', http_conn=http_connection,
|
||||
protocol='HTTP/1.0',
|
||||
)
|
||||
|
||||
test_client.server_instance.protocol = original_server_protocol
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'http_server_protocol',
|
||||
(
|
||||
'HTTP/1.0',
|
||||
'HTTP/1.1',
|
||||
),
|
||||
)
|
||||
def test_keepalive(test_client, http_server_protocol):
|
||||
"""Test Keep-Alive enabled connections."""
|
||||
original_server_protocol = test_client.server_instance.protocol
|
||||
test_client.server_instance.protocol = http_server_protocol
|
||||
|
||||
http_client_protocol = 'HTTP/1.0'
|
||||
|
||||
# Initialize a persistent HTTP connection
|
||||
http_connection = test_client.get_connection()
|
||||
http_connection.auto_open = False
|
||||
http_connection.connect()
|
||||
|
||||
# Test a normal HTTP/1.0 request.
|
||||
status_line, actual_headers, actual_resp_body = test_client.get(
|
||||
'/page2',
|
||||
protocol=http_client_protocol,
|
||||
)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 200
|
||||
assert status_line[4:] == 'OK'
|
||||
assert actual_resp_body == pov.encode()
|
||||
assert not header_exists('Connection', actual_headers)
|
||||
|
||||
# Test a keep-alive HTTP/1.0 request.
|
||||
|
||||
status_line, actual_headers, actual_resp_body = test_client.get(
|
||||
'/page3', headers=[('Connection', 'Keep-Alive')],
|
||||
http_conn=http_connection, protocol=http_client_protocol,
|
||||
)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 200
|
||||
assert status_line[4:] == 'OK'
|
||||
assert actual_resp_body == pov.encode()
|
||||
assert header_has_value('Connection', 'Keep-Alive', actual_headers)
|
||||
|
||||
# Remove the keep-alive header again.
|
||||
status_line, actual_headers, actual_resp_body = test_client.get(
|
||||
'/page3', http_conn=http_connection,
|
||||
protocol=http_client_protocol,
|
||||
)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 200
|
||||
assert status_line[4:] == 'OK'
|
||||
assert actual_resp_body == pov.encode()
|
||||
assert not header_exists('Connection', actual_headers)
|
||||
|
||||
test_client.server_instance.protocol = original_server_protocol
|
||||
|
||||
|
||||
def test_keepalive_conn_management(test_client):
|
||||
"""Test management of Keep-Alive connections."""
|
||||
test_client.server_instance.timeout = 2
|
||||
|
||||
def connection():
|
||||
# Initialize a persistent HTTP connection
|
||||
http_connection = test_client.get_connection()
|
||||
http_connection.auto_open = False
|
||||
http_connection.connect()
|
||||
return http_connection
|
||||
|
||||
def request(conn):
|
||||
status_line, actual_headers, actual_resp_body = test_client.get(
|
||||
'/page3', headers=[('Connection', 'Keep-Alive')],
|
||||
http_conn=conn, protocol='HTTP/1.0',
|
||||
)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 200
|
||||
assert status_line[4:] == 'OK'
|
||||
assert actual_resp_body == pov.encode()
|
||||
assert header_has_value('Connection', 'Keep-Alive', actual_headers)
|
||||
|
||||
disconnect_errors = (
|
||||
http_client.BadStatusLine,
|
||||
http_client.CannotSendRequest,
|
||||
http_client.NotConnected,
|
||||
)
|
||||
|
||||
# Make a new connection.
|
||||
c1 = connection()
|
||||
request(c1)
|
||||
|
||||
# Make a second one.
|
||||
c2 = connection()
|
||||
request(c2)
|
||||
|
||||
# Reusing the first connection should still work.
|
||||
request(c1)
|
||||
|
||||
# Creating a new connection should still work.
|
||||
c3 = connection()
|
||||
request(c3)
|
||||
|
||||
# Allow a tick.
|
||||
time.sleep(0.2)
|
||||
|
||||
# That's three connections, we should expect the one used less recently
|
||||
# to be expired.
|
||||
with pytest.raises(disconnect_errors):
|
||||
request(c2)
|
||||
|
||||
# But the oldest created one should still be valid.
|
||||
# (As well as the newest one).
|
||||
request(c1)
|
||||
request(c3)
|
||||
|
||||
# Wait for some of our timeout.
|
||||
time.sleep(1.0)
|
||||
|
||||
# Refresh the third connection.
|
||||
request(c3)
|
||||
|
||||
# Wait for the remainder of our timeout, plus one tick.
|
||||
time.sleep(1.2)
|
||||
|
||||
# First connection should now be expired.
|
||||
with pytest.raises(disconnect_errors):
|
||||
request(c1)
|
||||
|
||||
# But the third one should still be valid.
|
||||
request(c3)
|
||||
|
||||
test_client.server_instance.timeout = timeout
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'timeout_before_headers',
|
||||
(
|
||||
True,
|
||||
False,
|
||||
),
|
||||
)
|
||||
def test_HTTP11_Timeout(test_client, timeout_before_headers):
|
||||
"""Check timeout without sending any data.
|
||||
|
||||
The server will close the conn with a 408.
|
||||
"""
|
||||
conn = test_client.get_connection()
|
||||
conn.auto_open = False
|
||||
conn.connect()
|
||||
|
||||
if not timeout_before_headers:
|
||||
# Connect but send half the headers only.
|
||||
conn.send(b'GET /hello HTTP/1.1')
|
||||
conn.send(('Host: %s' % conn.host).encode('ascii'))
|
||||
# else: Connect but send nothing.
|
||||
|
||||
# Wait for our socket timeout
|
||||
time.sleep(timeout * 2)
|
||||
|
||||
# The request should have returned 408 already.
|
||||
response = conn.response_class(conn.sock, method='GET')
|
||||
response.begin()
|
||||
assert response.status == 408
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_HTTP11_Timeout_after_request(test_client):
|
||||
"""Check timeout after at least one request has succeeded.
|
||||
|
||||
The server should close the connection without 408.
|
||||
"""
|
||||
fail_msg = "Writing to timed out socket didn't fail as it should have: %s"
|
||||
|
||||
# Make an initial request
|
||||
conn = test_client.get_connection()
|
||||
conn.putrequest('GET', '/timeout?t=%s' % timeout, skip_host=True)
|
||||
conn.putheader('Host', conn.host)
|
||||
conn.endheaders()
|
||||
response = conn.response_class(conn.sock, method='GET')
|
||||
response.begin()
|
||||
assert response.status == 200
|
||||
actual_body = response.read()
|
||||
expected_body = str(timeout).encode()
|
||||
assert actual_body == expected_body
|
||||
|
||||
# Make a second request on the same socket
|
||||
conn._output(b'GET /hello HTTP/1.1')
|
||||
conn._output(('Host: %s' % conn.host).encode('ascii'))
|
||||
conn._send_output()
|
||||
response = conn.response_class(conn.sock, method='GET')
|
||||
response.begin()
|
||||
assert response.status == 200
|
||||
actual_body = response.read()
|
||||
expected_body = b'Hello, world!'
|
||||
assert actual_body == expected_body
|
||||
|
||||
# Wait for our socket timeout
|
||||
time.sleep(timeout * 2)
|
||||
|
||||
# Make another request on the same socket, which should error
|
||||
conn._output(b'GET /hello HTTP/1.1')
|
||||
conn._output(('Host: %s' % conn.host).encode('ascii'))
|
||||
conn._send_output()
|
||||
response = conn.response_class(conn.sock, method='GET')
|
||||
try:
|
||||
response.begin()
|
||||
except (socket.error, http_client.BadStatusLine):
|
||||
pass
|
||||
except Exception as ex:
|
||||
pytest.fail(fail_msg % ex)
|
||||
else:
|
||||
if response.status != 408:
|
||||
pytest.fail(fail_msg % response.read())
|
||||
|
||||
conn.close()
|
||||
|
||||
# Make another request on a new socket, which should work
|
||||
conn = test_client.get_connection()
|
||||
conn.putrequest('GET', '/pov', skip_host=True)
|
||||
conn.putheader('Host', conn.host)
|
||||
conn.endheaders()
|
||||
response = conn.response_class(conn.sock, method='GET')
|
||||
response.begin()
|
||||
assert response.status == 200
|
||||
actual_body = response.read()
|
||||
expected_body = pov.encode()
|
||||
assert actual_body == expected_body
|
||||
|
||||
# Make another request on the same socket,
|
||||
# but timeout on the headers
|
||||
conn.send(b'GET /hello HTTP/1.1')
|
||||
# Wait for our socket timeout
|
||||
time.sleep(timeout * 2)
|
||||
response = conn.response_class(conn.sock, method='GET')
|
||||
try:
|
||||
response.begin()
|
||||
except (socket.error, http_client.BadStatusLine):
|
||||
pass
|
||||
except Exception as ex:
|
||||
pytest.fail(fail_msg % ex)
|
||||
else:
|
||||
if response.status != 408:
|
||||
pytest.fail(fail_msg % response.read())
|
||||
|
||||
conn.close()
|
||||
|
||||
# Retry the request on a new connection, which should work
|
||||
conn = test_client.get_connection()
|
||||
conn.putrequest('GET', '/pov', skip_host=True)
|
||||
conn.putheader('Host', conn.host)
|
||||
conn.endheaders()
|
||||
response = conn.response_class(conn.sock, method='GET')
|
||||
response.begin()
|
||||
assert response.status == 200
|
||||
actual_body = response.read()
|
||||
expected_body = pov.encode()
|
||||
assert actual_body == expected_body
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_HTTP11_pipelining(test_client):
|
||||
"""Test HTTP/1.1 pipelining.
|
||||
|
||||
httplib doesn't support this directly.
|
||||
"""
|
||||
conn = test_client.get_connection()
|
||||
|
||||
# Put request 1
|
||||
conn.putrequest('GET', '/hello', skip_host=True)
|
||||
conn.putheader('Host', conn.host)
|
||||
conn.endheaders()
|
||||
|
||||
for trial in range(5):
|
||||
# Put next request
|
||||
conn._output(
|
||||
('GET /hello?%s HTTP/1.1' % trial).encode('iso-8859-1'),
|
||||
)
|
||||
conn._output(('Host: %s' % conn.host).encode('ascii'))
|
||||
conn._send_output()
|
||||
|
||||
# Retrieve previous response
|
||||
response = conn.response_class(conn.sock, method='GET')
|
||||
# there is a bug in python3 regarding the buffering of
|
||||
# ``conn.sock``. Until that bug get's fixed we will
|
||||
# monkey patch the ``response`` instance.
|
||||
# https://bugs.python.org/issue23377
|
||||
if not six.PY2:
|
||||
response.fp = conn.sock.makefile('rb', 0)
|
||||
response.begin()
|
||||
body = response.read(13)
|
||||
assert response.status == 200
|
||||
assert body == b'Hello, world!'
|
||||
|
||||
# Retrieve final response
|
||||
response = conn.response_class(conn.sock, method='GET')
|
||||
response.begin()
|
||||
body = response.read()
|
||||
assert response.status == 200
|
||||
assert body == b'Hello, world!'
|
||||
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_100_Continue(test_client):
|
||||
"""Test 100-continue header processing."""
|
||||
conn = test_client.get_connection()
|
||||
|
||||
# Try a page without an Expect request header first.
|
||||
# Note that httplib's response.begin automatically ignores
|
||||
# 100 Continue responses, so we must manually check for it.
|
||||
conn.putrequest('POST', '/upload', skip_host=True)
|
||||
conn.putheader('Host', conn.host)
|
||||
conn.putheader('Content-Type', 'text/plain')
|
||||
conn.putheader('Content-Length', '4')
|
||||
conn.endheaders()
|
||||
conn.send(b"d'oh")
|
||||
response = conn.response_class(conn.sock, method='POST')
|
||||
version, status, reason = response._read_status()
|
||||
assert status != 100
|
||||
conn.close()
|
||||
|
||||
# Now try a page with an Expect header...
|
||||
conn.connect()
|
||||
conn.putrequest('POST', '/upload', skip_host=True)
|
||||
conn.putheader('Host', conn.host)
|
||||
conn.putheader('Content-Type', 'text/plain')
|
||||
conn.putheader('Content-Length', '17')
|
||||
conn.putheader('Expect', '100-continue')
|
||||
conn.endheaders()
|
||||
response = conn.response_class(conn.sock, method='POST')
|
||||
|
||||
# ...assert and then skip the 100 response
|
||||
version, status, reason = response._read_status()
|
||||
assert status == 100
|
||||
while True:
|
||||
line = response.fp.readline().strip()
|
||||
if line:
|
||||
pytest.fail(
|
||||
'100 Continue should not output any headers. Got %r' %
|
||||
line,
|
||||
)
|
||||
else:
|
||||
break
|
||||
|
||||
# ...send the body
|
||||
body = b'I am a small file'
|
||||
conn.send(body)
|
||||
|
||||
# ...get the final response
|
||||
response.begin()
|
||||
status_line, actual_headers, actual_resp_body = webtest.shb(response)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 200
|
||||
expected_resp_body = ("thanks for '%s'" % body).encode()
|
||||
assert actual_resp_body == expected_resp_body
|
||||
conn.close()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'max_request_body_size',
|
||||
(
|
||||
0,
|
||||
1001,
|
||||
),
|
||||
)
|
||||
def test_readall_or_close(test_client, max_request_body_size):
|
||||
"""Test a max_request_body_size of 0 (the default) and 1001."""
|
||||
old_max = test_client.server_instance.max_request_body_size
|
||||
|
||||
test_client.server_instance.max_request_body_size = max_request_body_size
|
||||
|
||||
conn = test_client.get_connection()
|
||||
|
||||
# Get a POST page with an error
|
||||
conn.putrequest('POST', '/err_before_read', skip_host=True)
|
||||
conn.putheader('Host', conn.host)
|
||||
conn.putheader('Content-Type', 'text/plain')
|
||||
conn.putheader('Content-Length', '1000')
|
||||
conn.putheader('Expect', '100-continue')
|
||||
conn.endheaders()
|
||||
response = conn.response_class(conn.sock, method='POST')
|
||||
|
||||
# ...assert and then skip the 100 response
|
||||
version, status, reason = response._read_status()
|
||||
assert status == 100
|
||||
skip = True
|
||||
while skip:
|
||||
skip = response.fp.readline().strip()
|
||||
|
||||
# ...send the body
|
||||
conn.send(b'x' * 1000)
|
||||
|
||||
# ...get the final response
|
||||
response.begin()
|
||||
status_line, actual_headers, actual_resp_body = webtest.shb(response)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 500
|
||||
|
||||
# Now try a working page with an Expect header...
|
||||
conn._output(b'POST /upload HTTP/1.1')
|
||||
conn._output(('Host: %s' % conn.host).encode('ascii'))
|
||||
conn._output(b'Content-Type: text/plain')
|
||||
conn._output(b'Content-Length: 17')
|
||||
conn._output(b'Expect: 100-continue')
|
||||
conn._send_output()
|
||||
response = conn.response_class(conn.sock, method='POST')
|
||||
|
||||
# ...assert and then skip the 100 response
|
||||
version, status, reason = response._read_status()
|
||||
assert status == 100
|
||||
skip = True
|
||||
while skip:
|
||||
skip = response.fp.readline().strip()
|
||||
|
||||
# ...send the body
|
||||
body = b'I am a small file'
|
||||
conn.send(body)
|
||||
|
||||
# ...get the final response
|
||||
response.begin()
|
||||
status_line, actual_headers, actual_resp_body = webtest.shb(response)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 200
|
||||
expected_resp_body = ("thanks for '%s'" % body).encode()
|
||||
assert actual_resp_body == expected_resp_body
|
||||
conn.close()
|
||||
|
||||
test_client.server_instance.max_request_body_size = old_max
|
||||
|
||||
|
||||
def test_No_Message_Body(test_client):
|
||||
"""Test HTTP queries with an empty response body."""
|
||||
# Initialize a persistent HTTP connection
|
||||
http_connection = test_client.get_connection()
|
||||
http_connection.auto_open = False
|
||||
http_connection.connect()
|
||||
|
||||
# Make the first request and assert there's no "Connection: close".
|
||||
status_line, actual_headers, actual_resp_body = test_client.get(
|
||||
'/pov', http_conn=http_connection,
|
||||
)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 200
|
||||
assert status_line[4:] == 'OK'
|
||||
assert actual_resp_body == pov.encode()
|
||||
assert not header_exists('Connection', actual_headers)
|
||||
|
||||
# Make a 204 request on the same connection.
|
||||
status_line, actual_headers, actual_resp_body = test_client.get(
|
||||
'/custom/204', http_conn=http_connection,
|
||||
)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 204
|
||||
assert not header_exists('Content-Length', actual_headers)
|
||||
assert actual_resp_body == b''
|
||||
assert not header_exists('Connection', actual_headers)
|
||||
|
||||
# Make a 304 request on the same connection.
|
||||
status_line, actual_headers, actual_resp_body = test_client.get(
|
||||
'/custom/304', http_conn=http_connection,
|
||||
)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 304
|
||||
assert not header_exists('Content-Length', actual_headers)
|
||||
assert actual_resp_body == b''
|
||||
assert not header_exists('Connection', actual_headers)
|
||||
|
||||
|
||||
@pytest.mark.xfail(
|
||||
reason='Server does not correctly read trailers/ending of the previous '
|
||||
'HTTP request, thus the second request fails as the server tries '
|
||||
r"to parse b'Content-Type: application/json\r\n' as a "
|
||||
'Request-Line. This results in HTTP status code 400, instead of 413'
|
||||
'Ref: https://github.com/cherrypy/cheroot/issues/69',
|
||||
)
|
||||
def test_Chunked_Encoding(test_client):
|
||||
"""Test HTTP uploads with chunked transfer-encoding."""
|
||||
# Initialize a persistent HTTP connection
|
||||
conn = test_client.get_connection()
|
||||
|
||||
# Try a normal chunked request (with extensions)
|
||||
body = (
|
||||
b'8;key=value\r\nxx\r\nxxxx\r\n5\r\nyyyyy\r\n0\r\n'
|
||||
b'Content-Type: application/json\r\n'
|
||||
b'\r\n'
|
||||
)
|
||||
conn.putrequest('POST', '/upload', skip_host=True)
|
||||
conn.putheader('Host', conn.host)
|
||||
conn.putheader('Transfer-Encoding', 'chunked')
|
||||
conn.putheader('Trailer', 'Content-Type')
|
||||
# Note that this is somewhat malformed:
|
||||
# we shouldn't be sending Content-Length.
|
||||
# RFC 2616 says the server should ignore it.
|
||||
conn.putheader('Content-Length', '3')
|
||||
conn.endheaders()
|
||||
conn.send(body)
|
||||
response = conn.getresponse()
|
||||
status_line, actual_headers, actual_resp_body = webtest.shb(response)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 200
|
||||
assert status_line[4:] == 'OK'
|
||||
expected_resp_body = ("thanks for '%s'" % b'xx\r\nxxxxyyyyy').encode()
|
||||
assert actual_resp_body == expected_resp_body
|
||||
|
||||
# Try a chunked request that exceeds server.max_request_body_size.
|
||||
# Note that the delimiters and trailer are included.
|
||||
body = b'3e3\r\n' + (b'x' * 995) + b'\r\n0\r\n\r\n'
|
||||
conn.putrequest('POST', '/upload', skip_host=True)
|
||||
conn.putheader('Host', conn.host)
|
||||
conn.putheader('Transfer-Encoding', 'chunked')
|
||||
conn.putheader('Content-Type', 'text/plain')
|
||||
# Chunked requests don't need a content-length
|
||||
# conn.putheader("Content-Length", len(body))
|
||||
conn.endheaders()
|
||||
conn.send(body)
|
||||
response = conn.getresponse()
|
||||
status_line, actual_headers, actual_resp_body = webtest.shb(response)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 413
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_Content_Length_in(test_client):
|
||||
"""Try a non-chunked request where Content-Length exceeds limit.
|
||||
|
||||
(server.max_request_body_size).
|
||||
Assert error before body send.
|
||||
"""
|
||||
# Initialize a persistent HTTP connection
|
||||
conn = test_client.get_connection()
|
||||
|
||||
conn.putrequest('POST', '/upload', skip_host=True)
|
||||
conn.putheader('Host', conn.host)
|
||||
conn.putheader('Content-Type', 'text/plain')
|
||||
conn.putheader('Content-Length', '9999')
|
||||
conn.endheaders()
|
||||
response = conn.getresponse()
|
||||
status_line, actual_headers, actual_resp_body = webtest.shb(response)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 413
|
||||
expected_resp_body = (
|
||||
b'The entity sent with the request exceeds '
|
||||
b'the maximum allowed bytes.'
|
||||
)
|
||||
assert actual_resp_body == expected_resp_body
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_Content_Length_not_int(test_client):
|
||||
"""Test that malicious Content-Length header returns 400."""
|
||||
status_line, actual_headers, actual_resp_body = test_client.post(
|
||||
'/upload',
|
||||
headers=[
|
||||
('Content-Type', 'text/plain'),
|
||||
('Content-Length', 'not-an-integer'),
|
||||
],
|
||||
)
|
||||
actual_status = int(status_line[:3])
|
||||
|
||||
assert actual_status == 400
|
||||
assert actual_resp_body == b'Malformed Content-Length Header.'
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'uri,expected_resp_status,expected_resp_body',
|
||||
(
|
||||
(
|
||||
'/wrong_cl_buffered', 500,
|
||||
(
|
||||
b'The requested resource returned more bytes than the '
|
||||
b'declared Content-Length.'
|
||||
),
|
||||
),
|
||||
('/wrong_cl_unbuffered', 200, b'I too'),
|
||||
),
|
||||
)
|
||||
def test_Content_Length_out(
|
||||
test_client,
|
||||
uri, expected_resp_status, expected_resp_body,
|
||||
):
|
||||
"""Test response with Content-Length less than the response body.
|
||||
|
||||
(non-chunked response)
|
||||
"""
|
||||
conn = test_client.get_connection()
|
||||
conn.putrequest('GET', uri, skip_host=True)
|
||||
conn.putheader('Host', conn.host)
|
||||
conn.endheaders()
|
||||
|
||||
response = conn.getresponse()
|
||||
status_line, actual_headers, actual_resp_body = webtest.shb(response)
|
||||
actual_status = int(status_line[:3])
|
||||
|
||||
assert actual_status == expected_resp_status
|
||||
assert actual_resp_body == expected_resp_body
|
||||
|
||||
conn.close()
|
||||
|
||||
|
||||
@pytest.mark.xfail(
|
||||
reason='Sometimes this test fails due to low timeout. '
|
||||
'Ref: https://github.com/cherrypy/cherrypy/issues/598',
|
||||
)
|
||||
def test_598(test_client):
|
||||
"""Test serving large file with a read timeout in place."""
|
||||
# Initialize a persistent HTTP connection
|
||||
conn = test_client.get_connection()
|
||||
remote_data_conn = urllib.request.urlopen(
|
||||
'%s://%s:%s/one_megabyte_of_a'
|
||||
% ('http', conn.host, conn.port),
|
||||
)
|
||||
buf = remote_data_conn.read(512)
|
||||
time.sleep(timeout * 0.6)
|
||||
remaining = (1024 * 1024) - 512
|
||||
while remaining:
|
||||
data = remote_data_conn.read(remaining)
|
||||
if not data:
|
||||
break
|
||||
buf += data
|
||||
remaining -= len(data)
|
||||
|
||||
assert len(buf) == 1024 * 1024
|
||||
assert buf == b'a' * 1024 * 1024
|
||||
assert remaining == 0
|
||||
remote_data_conn.close()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'invalid_terminator',
|
||||
(
|
||||
b'\n\n',
|
||||
b'\r\n\n',
|
||||
),
|
||||
)
|
||||
def test_No_CRLF(test_client, invalid_terminator):
|
||||
"""Test HTTP queries with no valid CRLF terminators."""
|
||||
# Initialize a persistent HTTP connection
|
||||
conn = test_client.get_connection()
|
||||
|
||||
# (b'%s' % b'') is not supported in Python 3.4, so just use +
|
||||
conn.send(b'GET /hello HTTP/1.1' + invalid_terminator)
|
||||
response = conn.response_class(conn.sock, method='GET')
|
||||
response.begin()
|
||||
actual_resp_body = response.read()
|
||||
expected_resp_body = b'HTTP requires CRLF terminators'
|
||||
assert actual_resp_body == expected_resp_body
|
||||
conn.close()
|
415
lib/cheroot/test/test_core.py
Normal file
415
lib/cheroot/test/test_core.py
Normal file
@@ -0,0 +1,415 @@
|
||||
"""Tests for managing HTTP issues (malformed requests, etc)."""
|
||||
# -*- coding: utf-8 -*-
|
||||
# vim: set fileencoding=utf-8 :
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
__metaclass__ = type
|
||||
|
||||
import errno
|
||||
import socket
|
||||
|
||||
import pytest
|
||||
import six
|
||||
from six.moves import urllib
|
||||
|
||||
from cheroot.test import helper
|
||||
|
||||
|
||||
HTTP_BAD_REQUEST = 400
|
||||
HTTP_LENGTH_REQUIRED = 411
|
||||
HTTP_NOT_FOUND = 404
|
||||
HTTP_OK = 200
|
||||
HTTP_VERSION_NOT_SUPPORTED = 505
|
||||
|
||||
|
||||
class HelloController(helper.Controller):
|
||||
"""Controller for serving WSGI apps."""
|
||||
|
||||
def hello(req, resp):
|
||||
"""Render Hello world."""
|
||||
return 'Hello world!'
|
||||
|
||||
def body_required(req, resp):
|
||||
"""Render Hello world or set 411."""
|
||||
if req.environ.get('Content-Length', None) is None:
|
||||
resp.status = '411 Length Required'
|
||||
return
|
||||
return 'Hello world!'
|
||||
|
||||
def query_string(req, resp):
|
||||
"""Render QUERY_STRING value."""
|
||||
return req.environ.get('QUERY_STRING', '')
|
||||
|
||||
def asterisk(req, resp):
|
||||
"""Render request method value."""
|
||||
method = req.environ.get('REQUEST_METHOD', 'NO METHOD FOUND')
|
||||
tmpl = 'Got asterisk URI path with {method} method'
|
||||
return tmpl.format(**locals())
|
||||
|
||||
def _munge(string):
|
||||
"""Encode PATH_INFO correctly depending on Python version.
|
||||
|
||||
WSGI 1.0 is a mess around unicode. Create endpoints
|
||||
that match the PATH_INFO that it produces.
|
||||
"""
|
||||
if six.PY2:
|
||||
return string
|
||||
return string.encode('utf-8').decode('latin-1')
|
||||
|
||||
handlers = {
|
||||
'/hello': hello,
|
||||
'/no_body': hello,
|
||||
'/body_required': body_required,
|
||||
'/query_string': query_string,
|
||||
_munge('/привіт'): hello,
|
||||
_munge('/Юххууу'): hello,
|
||||
'/\xa0Ðblah key 0 900 4 data': hello,
|
||||
'/*': asterisk,
|
||||
}
|
||||
|
||||
|
||||
def _get_http_response(connection, method='GET'):
|
||||
c = connection
|
||||
kwargs = {'strict': c.strict} if hasattr(c, 'strict') else {}
|
||||
# Python 3.2 removed the 'strict' feature, saying:
|
||||
# "http.client now always assumes HTTP/1.x compliant servers."
|
||||
return c.response_class(c.sock, method=method, **kwargs)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def testing_server(wsgi_server_client):
|
||||
"""Attach a WSGI app to the given server and pre-configure it."""
|
||||
wsgi_server = wsgi_server_client.server_instance
|
||||
wsgi_server.wsgi_app = HelloController()
|
||||
wsgi_server.max_request_body_size = 30000000
|
||||
wsgi_server.server_client = wsgi_server_client
|
||||
return wsgi_server
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_client(testing_server):
|
||||
"""Get and return a test client out of the given server."""
|
||||
return testing_server.server_client
|
||||
|
||||
|
||||
def test_http_connect_request(test_client):
|
||||
"""Check that CONNECT query results in Method Not Allowed status."""
|
||||
status_line = test_client.connect('/anything')[0]
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == 405
|
||||
|
||||
|
||||
def test_normal_request(test_client):
|
||||
"""Check that normal GET query succeeds."""
|
||||
status_line, _, actual_resp_body = test_client.get('/hello')
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == HTTP_OK
|
||||
assert actual_resp_body == b'Hello world!'
|
||||
|
||||
|
||||
def test_query_string_request(test_client):
|
||||
"""Check that GET param is parsed well."""
|
||||
status_line, _, actual_resp_body = test_client.get(
|
||||
'/query_string?test=True',
|
||||
)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == HTTP_OK
|
||||
assert actual_resp_body == b'test=True'
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'uri',
|
||||
(
|
||||
'/hello', # plain
|
||||
'/query_string?test=True', # query
|
||||
'/{0}?{1}={2}'.format( # quoted unicode
|
||||
*map(urllib.parse.quote, ('Юххууу', 'ї', 'йо'))
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_parse_acceptable_uri(test_client, uri):
|
||||
"""Check that server responds with OK to valid GET queries."""
|
||||
status_line = test_client.get(uri)[0]
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == HTTP_OK
|
||||
|
||||
|
||||
@pytest.mark.xfail(six.PY2, reason='Fails on Python 2')
|
||||
def test_parse_uri_unsafe_uri(test_client):
|
||||
"""Test that malicious URI does not allow HTTP injection.
|
||||
|
||||
This effectively checks that sending GET request with URL
|
||||
|
||||
/%A0%D0blah%20key%200%20900%204%20data
|
||||
|
||||
is not converted into
|
||||
|
||||
GET /
|
||||
blah key 0 900 4 data
|
||||
HTTP/1.1
|
||||
|
||||
which would be a security issue otherwise.
|
||||
"""
|
||||
c = test_client.get_connection()
|
||||
resource = '/\xa0Ðblah key 0 900 4 data'.encode('latin-1')
|
||||
quoted = urllib.parse.quote(resource)
|
||||
assert quoted == '/%A0%D0blah%20key%200%20900%204%20data'
|
||||
request = 'GET {quoted} HTTP/1.1'.format(**locals())
|
||||
c._output(request.encode('utf-8'))
|
||||
c._send_output()
|
||||
response = _get_http_response(c, method='GET')
|
||||
response.begin()
|
||||
assert response.status == HTTP_OK
|
||||
assert response.read(12) == b'Hello world!'
|
||||
c.close()
|
||||
|
||||
|
||||
def test_parse_uri_invalid_uri(test_client):
|
||||
"""Check that server responds with Bad Request to invalid GET queries.
|
||||
|
||||
Invalid request line test case: it should only contain US-ASCII.
|
||||
"""
|
||||
c = test_client.get_connection()
|
||||
c._output(u'GET /йопта! HTTP/1.1'.encode('utf-8'))
|
||||
c._send_output()
|
||||
response = _get_http_response(c, method='GET')
|
||||
response.begin()
|
||||
assert response.status == HTTP_BAD_REQUEST
|
||||
assert response.read(21) == b'Malformed Request-URI'
|
||||
c.close()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'uri',
|
||||
(
|
||||
'hello', # ascii
|
||||
'привіт', # non-ascii
|
||||
),
|
||||
)
|
||||
def test_parse_no_leading_slash_invalid(test_client, uri):
|
||||
"""Check that server responds with Bad Request to invalid GET queries.
|
||||
|
||||
Invalid request line test case: it should have leading slash (be absolute).
|
||||
"""
|
||||
status_line, _, actual_resp_body = test_client.get(
|
||||
urllib.parse.quote(uri),
|
||||
)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == HTTP_BAD_REQUEST
|
||||
assert b'starting with a slash' in actual_resp_body
|
||||
|
||||
|
||||
def test_parse_uri_absolute_uri(test_client):
|
||||
"""Check that server responds with Bad Request to Absolute URI.
|
||||
|
||||
Only proxy servers should allow this.
|
||||
"""
|
||||
status_line, _, actual_resp_body = test_client.get('http://google.com/')
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == HTTP_BAD_REQUEST
|
||||
expected_body = b'Absolute URI not allowed if server is not a proxy.'
|
||||
assert actual_resp_body == expected_body
|
||||
|
||||
|
||||
def test_parse_uri_asterisk_uri(test_client):
|
||||
"""Check that server responds with OK to OPTIONS with "*" Absolute URI."""
|
||||
status_line, _, actual_resp_body = test_client.options('*')
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == HTTP_OK
|
||||
expected_body = b'Got asterisk URI path with OPTIONS method'
|
||||
assert actual_resp_body == expected_body
|
||||
|
||||
|
||||
def test_parse_uri_fragment_uri(test_client):
|
||||
"""Check that server responds with Bad Request to URI with fragment."""
|
||||
status_line, _, actual_resp_body = test_client.get(
|
||||
'/hello?test=something#fake',
|
||||
)
|
||||
actual_status = int(status_line[:3])
|
||||
assert actual_status == HTTP_BAD_REQUEST
|
||||
expected_body = b'Illegal #fragment in Request-URI.'
|
||||
assert actual_resp_body == expected_body
|
||||
|
||||
|
||||
def test_no_content_length(test_client):
|
||||
"""Test POST query with an empty body being successful."""
|
||||
# "The presence of a message-body in a request is signaled by the
|
||||
# inclusion of a Content-Length or Transfer-Encoding header field in
|
||||
# the request's message-headers."
|
||||
#
|
||||
# Send a message with neither header and no body.
|
||||
c = test_client.get_connection()
|
||||
c.request('POST', '/no_body')
|
||||
response = c.getresponse()
|
||||
actual_resp_body = response.read()
|
||||
actual_status = response.status
|
||||
assert actual_status == HTTP_OK
|
||||
assert actual_resp_body == b'Hello world!'
|
||||
|
||||
|
||||
def test_content_length_required(test_client):
|
||||
"""Test POST query with body failing because of missing Content-Length."""
|
||||
# Now send a message that has no Content-Length, but does send a body.
|
||||
# Verify that CP times out the socket and responds
|
||||
# with 411 Length Required.
|
||||
|
||||
c = test_client.get_connection()
|
||||
c.request('POST', '/body_required')
|
||||
response = c.getresponse()
|
||||
response.read()
|
||||
|
||||
actual_status = response.status
|
||||
assert actual_status == HTTP_LENGTH_REQUIRED
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'request_line,status_code,expected_body',
|
||||
(
|
||||
(
|
||||
b'GET /', # missing proto
|
||||
HTTP_BAD_REQUEST, b'Malformed Request-Line',
|
||||
),
|
||||
(
|
||||
b'GET / HTTPS/1.1', # invalid proto
|
||||
HTTP_BAD_REQUEST, b'Malformed Request-Line: bad protocol',
|
||||
),
|
||||
(
|
||||
b'GET / HTTP/1', # invalid version
|
||||
HTTP_BAD_REQUEST, b'Malformed Request-Line: bad version',
|
||||
),
|
||||
(
|
||||
b'GET / HTTP/2.15', # invalid ver
|
||||
HTTP_VERSION_NOT_SUPPORTED, b'Cannot fulfill request',
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_malformed_request_line(
|
||||
test_client, request_line,
|
||||
status_code, expected_body,
|
||||
):
|
||||
"""Test missing or invalid HTTP version in Request-Line."""
|
||||
c = test_client.get_connection()
|
||||
c._output(request_line)
|
||||
c._send_output()
|
||||
response = _get_http_response(c, method='GET')
|
||||
response.begin()
|
||||
assert response.status == status_code
|
||||
assert response.read(len(expected_body)) == expected_body
|
||||
c.close()
|
||||
|
||||
|
||||
def test_malformed_http_method(test_client):
|
||||
"""Test non-uppercase HTTP method."""
|
||||
c = test_client.get_connection()
|
||||
c.putrequest('GeT', '/malformed_method_case')
|
||||
c.putheader('Content-Type', 'text/plain')
|
||||
c.endheaders()
|
||||
|
||||
response = c.getresponse()
|
||||
actual_status = response.status
|
||||
assert actual_status == HTTP_BAD_REQUEST
|
||||
actual_resp_body = response.read(21)
|
||||
assert actual_resp_body == b'Malformed method name'
|
||||
|
||||
|
||||
def test_malformed_header(test_client):
|
||||
"""Check that broken HTTP header results in Bad Request."""
|
||||
c = test_client.get_connection()
|
||||
c.putrequest('GET', '/')
|
||||
c.putheader('Content-Type', 'text/plain')
|
||||
# See https://www.bitbucket.org/cherrypy/cherrypy/issue/941
|
||||
c._output(b'Re, 1.2.3.4#015#012')
|
||||
c.endheaders()
|
||||
|
||||
response = c.getresponse()
|
||||
actual_status = response.status
|
||||
assert actual_status == HTTP_BAD_REQUEST
|
||||
actual_resp_body = response.read(20)
|
||||
assert actual_resp_body == b'Illegal header line.'
|
||||
|
||||
|
||||
def test_request_line_split_issue_1220(test_client):
|
||||
"""Check that HTTP request line of exactly 256 chars length is OK."""
|
||||
Request_URI = (
|
||||
'/hello?'
|
||||
'intervenant-entreprise-evenement_classaction='
|
||||
'evenement-mailremerciements'
|
||||
'&_path=intervenant-entreprise-evenement'
|
||||
'&intervenant-entreprise-evenement_action-id=19404'
|
||||
'&intervenant-entreprise-evenement_id=19404'
|
||||
'&intervenant-entreprise_id=28092'
|
||||
)
|
||||
assert len('GET %s HTTP/1.1\r\n' % Request_URI) == 256
|
||||
|
||||
actual_resp_body = test_client.get(Request_URI)[2]
|
||||
assert actual_resp_body == b'Hello world!'
|
||||
|
||||
|
||||
def test_garbage_in(test_client):
|
||||
"""Test that server sends an error for garbage received over TCP."""
|
||||
# Connect without SSL regardless of server.scheme
|
||||
|
||||
c = test_client.get_connection()
|
||||
c._output(b'gjkgjklsgjklsgjkljklsg')
|
||||
c._send_output()
|
||||
response = c.response_class(c.sock, method='GET')
|
||||
try:
|
||||
response.begin()
|
||||
actual_status = response.status
|
||||
assert actual_status == HTTP_BAD_REQUEST
|
||||
actual_resp_body = response.read(22)
|
||||
assert actual_resp_body == b'Malformed Request-Line'
|
||||
c.close()
|
||||
except socket.error as ex:
|
||||
# "Connection reset by peer" is also acceptable.
|
||||
if ex.errno != errno.ECONNRESET:
|
||||
raise
|
||||
|
||||
|
||||
class CloseController:
|
||||
"""Controller for testing the close callback."""
|
||||
|
||||
def __call__(self, environ, start_response):
|
||||
"""Get the req to know header sent status."""
|
||||
self.req = start_response.__self__.req
|
||||
resp = CloseResponse(self.close)
|
||||
start_response(resp.status, resp.headers.items())
|
||||
return resp
|
||||
|
||||
def close(self):
|
||||
"""Close, writing hello."""
|
||||
self.req.write(b'hello')
|
||||
|
||||
|
||||
class CloseResponse:
|
||||
"""Dummy empty response to trigger the no body status."""
|
||||
|
||||
def __init__(self, close):
|
||||
"""Use some defaults to ensure we have a header."""
|
||||
self.status = '200 OK'
|
||||
self.headers = {'Content-Type': 'text/html'}
|
||||
self.close = close
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""Ensure we don't have a body."""
|
||||
raise IndexError()
|
||||
|
||||
def output(self):
|
||||
"""Return self to hook the close method."""
|
||||
return self
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def testing_server_close(wsgi_server_client):
|
||||
"""Attach a WSGI app to the given server and pre-configure it."""
|
||||
wsgi_server = wsgi_server_client.server_instance
|
||||
wsgi_server.wsgi_app = CloseController()
|
||||
wsgi_server.max_request_body_size = 30000000
|
||||
wsgi_server.server_client = wsgi_server_client
|
||||
return wsgi_server
|
||||
|
||||
|
||||
def test_send_header_before_closing(testing_server_close):
|
||||
"""Test we are actually sending the headers before calling 'close'."""
|
||||
_, _, resp_body = testing_server_close.server_client.get('/')
|
||||
assert resp_body == b'hello'
|
55
lib/cheroot/test/test_dispatch.py
Normal file
55
lib/cheroot/test/test_dispatch.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""Tests for the HTTP server."""
|
||||
# -*- coding: utf-8 -*-
|
||||
# vim: set fileencoding=utf-8 :
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from cheroot.wsgi import PathInfoDispatcher
|
||||
|
||||
|
||||
def wsgi_invoke(app, environ):
|
||||
"""Serve 1 requeset from a WSGI application."""
|
||||
response = {}
|
||||
|
||||
def start_response(status, headers):
|
||||
response.update({
|
||||
'status': status,
|
||||
'headers': headers,
|
||||
})
|
||||
|
||||
response['body'] = b''.join(
|
||||
app(environ, start_response),
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
def test_dispatch_no_script_name():
|
||||
"""Despatch despite lack of SCRIPT_NAME in environ."""
|
||||
# Bare bones WSGI hello world app (from PEP 333).
|
||||
def app(environ, start_response):
|
||||
start_response(
|
||||
'200 OK', [
|
||||
('Content-Type', 'text/plain; charset=utf-8'),
|
||||
],
|
||||
)
|
||||
return [u'Hello, world!'.encode('utf-8')]
|
||||
|
||||
# Build a dispatch table.
|
||||
d = PathInfoDispatcher([
|
||||
('/', app),
|
||||
])
|
||||
|
||||
# Dispatch a request without `SCRIPT_NAME`.
|
||||
response = wsgi_invoke(
|
||||
d, {
|
||||
'PATH_INFO': '/foo',
|
||||
},
|
||||
)
|
||||
assert response == {
|
||||
'status': '200 OK',
|
||||
'headers': [
|
||||
('Content-Type', 'text/plain; charset=utf-8'),
|
||||
],
|
||||
'body': b'Hello, world!',
|
||||
}
|
30
lib/cheroot/test/test_errors.py
Normal file
30
lib/cheroot/test/test_errors.py
Normal file
@@ -0,0 +1,30 @@
|
||||
"""Test suite for ``cheroot.errors``."""
|
||||
|
||||
import pytest
|
||||
|
||||
from cheroot import errors
|
||||
|
||||
from .._compat import IS_LINUX, IS_MACOS, IS_WINDOWS
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'err_names,err_nums',
|
||||
(
|
||||
(('', 'some-nonsense-name'), []),
|
||||
(
|
||||
(
|
||||
'EPROTOTYPE', 'EAGAIN', 'EWOULDBLOCK',
|
||||
'WSAEWOULDBLOCK', 'EPIPE',
|
||||
),
|
||||
(91, 11, 32) if IS_LINUX else
|
||||
(32, 35, 41) if IS_MACOS else
|
||||
(32, 10041, 11, 10035) if IS_WINDOWS else
|
||||
(),
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_plat_specific_errors(err_names, err_nums):
|
||||
"""Test that plat_specific_errors retrieves correct err num list."""
|
||||
actual_err_nums = errors.plat_specific_errors(*err_names)
|
||||
assert len(actual_err_nums) == len(err_nums)
|
||||
assert sorted(actual_err_nums) == sorted(err_nums)
|
52
lib/cheroot/test/test_makefile.py
Normal file
52
lib/cheroot/test/test_makefile.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""self-explanatory."""
|
||||
|
||||
from cheroot import makefile
|
||||
|
||||
|
||||
__metaclass__ = type
|
||||
|
||||
|
||||
class MockSocket:
|
||||
"""Mocks a socket."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize."""
|
||||
self.messages = []
|
||||
|
||||
def recv_into(self, buf):
|
||||
"""Simulate recv_into for Python 3."""
|
||||
if not self.messages:
|
||||
return 0
|
||||
msg = self.messages.pop(0)
|
||||
for index, byte in enumerate(msg):
|
||||
buf[index] = byte
|
||||
return len(msg)
|
||||
|
||||
def recv(self, size):
|
||||
"""Simulate recv for Python 2."""
|
||||
try:
|
||||
return self.messages.pop(0)
|
||||
except IndexError:
|
||||
return ''
|
||||
|
||||
def send(self, val):
|
||||
"""Simulate a send."""
|
||||
return len(val)
|
||||
|
||||
|
||||
def test_bytes_read():
|
||||
"""Reader should capture bytes read."""
|
||||
sock = MockSocket()
|
||||
sock.messages.append(b'foo')
|
||||
rfile = makefile.MakeFile(sock, 'r')
|
||||
rfile.read()
|
||||
assert rfile.bytes_read == 3
|
||||
|
||||
|
||||
def test_bytes_written():
|
||||
"""Writer should capture bytes writtten."""
|
||||
sock = MockSocket()
|
||||
sock.messages.append(b'foo')
|
||||
wfile = makefile.MakeFile(sock, 'w')
|
||||
wfile.write(b'bar')
|
||||
assert wfile.bytes_written == 3
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user