Rework PDF script (#3654)

This commit is contained in:
Zlatan Vasović
2019-12-17 21:25:52 +01:00
committed by GitHub
parent d77f8bc18a
commit e2ae506613
8 changed files with 126 additions and 177 deletions

4
.gitignore vendored
View File

@@ -14,3 +14,7 @@ npm-debug.log
# hosted at https://tldr-pages.github.io/assets/index.json # hosted at https://tldr-pages.github.io/assets/index.json
pages/index.json pages/index.json
index.json index.json
# Generated PDF pages
scripts/pdf/*.html
scripts/pdf/tldr.pdf

View File

@@ -11,22 +11,14 @@ This directory contains the script and related resources to generate a PDF copy
## Highlights ## Highlights
- No LaTeX dependencies for generating the PDF. - No LaTeX dependencies for generating the PDF.
- 3 available color-schemes: *Basic*, *Solarized Light* and *Solarized Dark*. More can be added easily through CSS.
- 3 available color-schemes- *Basic*, *Solarized Light* and *Solarized Dark*. More can be added easily through CSS.
## Requirements ## Requirements
The PDF is generated by first converting the markdown files to HTML, and then rendering those HTML files as PDF. It depends on the following libraries: The PDF is generated by first converting the markdown files to HTML, and then rendering those HTML files as PDF. It depends on `markdown` and `weasyprint` libraries. To install the dependencies, run:
#### Python-Markdown pip3 install -r requirements.txt
pip3 install markdown
#### WeasyPrint
pip3 install WeasyPrint
Make sure OS specific dependencies for WeasyPrint are installed by following the instructions [here](http://weasyprint.readthedocs.io/en/latest/install.html). Make sure OS specific dependencies for WeasyPrint are installed by following the instructions [here](http://weasyprint.readthedocs.io/en/latest/install.html).
## Usage ## Usage
@@ -34,8 +26,8 @@ Make sure OS specific dependencies for WeasyPrint are installed by following the
Generating the PDF is as simple as running Generating the PDF is as simple as running
python3 render.py <path-to-pages-directory> -c <color-scheme> python3 render.py <path-to-pages-directory> -c <color-scheme>
Complete information about the arguments can be viewed by running Complete information about the arguments can be viewed by running
python3 render.py --help python3 render.py --help
@@ -43,6 +35,3 @@ The color-schemes that can be specified are
* `solarized-light` * `solarized-light`
* `solarized-dark` * `solarized-dark`

View File

@@ -1,34 +1,34 @@
@font-face { @font-face {
font-family: 'PT_Serif-Web-Regular'; font-family: "PT Serif";
src: url('PT_Serif-Web-Regular.ttf') format('truetype'); src: url("pt-serif-web-regular.ttf") format("truetype");
} }
p { p {
margin-left: 2.5em; margin-left: 2.5em;
} }
code { code {
color: darkslategrey; color: darkslategrey;
} }
h1, h4, h2, ul { h1, h2, h4, ul {
font-family: "PT_Serif-Web-Regular"; font-family: "PT Serif";
} }
h1.titlemain { .title-main {
text-align: center; text-align: center;
margin-top: 6em; margin-top: 6em;
font-size: 350%; font-size: 350%;
} }
h4.titlesub { .title-sub {
text-align: center; text-align: center;
font-size: 120%; font-size: 120%;
color: darkslategrey; color: darkslategrey;
} }
h2.titledir { .title-dir {
text-align: center; text-align: center;
margin-top: 8.2em; margin-top: 8.2em;
font-size: 300%; font-size: 300%;
} }

View File

@@ -1,8 +1,8 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
#A Python script to generate a single PDF document with all the tldr pages. It works by generating # A Python script to generate a single PDF document with all the tldr pages. It works by generating
#intermediate HTML files from existing md files using Python-markdown, applying desired formatting # intermediate HTML files from existing md files using Python-markdown, applying desired formatting
#through CSS, and finally rendering them as PDF. There is no LaTeX dependency for generating the PDF. # through CSS, and finally rendering them as PDF. There is no LaTeX dependency for generating the PDF.
import os import os
import sys import sys
@@ -15,108 +15,111 @@ from weasyprint import HTML
def main(loc, colorscheme): def main(loc, colorscheme):
oslist = [] oslist = []
allmd = [] allmd = []
group = [] group = []
ap = [] ap = []
#Checking correctness of path # Checking correctness of path
if not os.path.isdir(loc): if not os.path.isdir(loc):
print("Invalid directory. Please try again!", file = sys.stderr) print("Invalid directory. Please try again!", file = sys.stderr)
sys.exit(1) sys.exit(1)
#Writing names of all directories inside 'pages' to a list # Writing names of all directories inside 'pages' to a list
for os_dir in os.listdir(loc): for os_dir in os.listdir(loc):
oslist.append(os_dir) oslist.append(os_dir)
oslist.sort() oslist.sort()
#Required strings to create intermediate HTML files # Required strings to create intermediate HTML files
header = "<html><head><link rel=stylesheet type=text/css href=" + colorscheme + ".css></head><body>\n" header = '<!doctype html><html><head><meta charset="utf-8"><link rel="stylesheet" href="basic.css">'
footer = "</body></html>" if colorscheme != "basic":
title_content = "<h1 class=titlemain>tldr pages</h1><h4 class=titlesub>Simplified and community driven man pages</h4></body></html>" header += '<link rel="stylesheet" href="' + colorscheme + '.css"></head><body>\n'
header += "</head><body>\n"
footer = "</body></html>"
title_content = "<h1 class=title-main>tldr pages</h1><h4 class=title-sub>Simplified and community-driven man pages</h4></body></html>"
#Creating title page # Creating title page
with open("title.html", 'w') as f: with open("title.html", "w") as f:
f.write(header + title_content) f.write(header + title_content)
group.append(HTML('title.html').render()) group.append(HTML("title.html").render())
for operating_sys in oslist: for operating_sys in oslist:
i = 1 i = 1
#Required string to create directory title pages # Required string to create directory title pages
dir_title = "<h2 class=titledir>" + operating_sys.capitalize() + "</h2></body></html>" dir_title = "<h2 class=title-dir>" + operating_sys.capitalize() + "</h2></body></html>"
#Creating directory title page for current directory # Creating directory title page for current directory
with open("dir_title.html", 'w') as os_html: with open("dir_title.html", "w") as os_html:
os_html.write(header + dir_title) os_html.write(header + dir_title)
group.append(HTML('dir_title.html').render())
#Creating a list of all md files in the current directory group.append(HTML("dir_title.html").render())
for temp in glob.glob(os.path.join(loc, operating_sys, '*.md')):
allmd.append(temp)
#Sorting all filenames in the directory, to maintain the order of the PDF # Creating a list of all md files in the current directory
allmd.sort() for temp in glob.glob(os.path.join(loc, operating_sys, "*.md")):
allmd.append(temp)
#Conversion of md to HTML # Sorting all filenames in the directory, to maintain the order of the PDF
for md in allmd: allmd.sort()
with open(md, "r") as inp: # Conversion of Markdown to HTML
text = inp.readlines() for md in allmd:
with open("htmlout.html", "w") as out: with open(md, "r") as inp:
out.write(header) text = inp.readlines()
for line in text: with open("htmlout.html", "w") as out:
if re.match(r'^>', line): out.write(header)
line = line[:0] + '####' + line[1:]
html = markdown.markdown(line)
out.write(html)
out.write(footer)
group.append(HTML('htmlout.html').render()) for line in text:
print("Rendered page {} of the directory {}".format(str(i), operating_sys)) if re.match(r'^>', line):
i += 1 line = line[:0] + '####' + line[1:]
html = markdown.markdown(line)
allmd.clear() out.write(html)
out.write(footer)
#Merging all the documents into a single PDF group.append(HTML("htmlout.html").render())
for doc in group: print("Rendered page {} of the directory {}".format(str(i), operating_sys))
for p in doc.pages: i += 1
ap.append(p)
#Writing the PDF to disk, preserving metadata of first tldr page allmd.clear()
group[2].copy(ap).write_pdf('tldr.pdf')
if os.path.exists("tldr.pdf"): # Merging all the documents into a single PDF
print("\nCreated tldr.pdf in the current directory!\n") for doc in group:
for p in doc.pages:
ap.append(p)
#Removing unnecessary intermediate files # Writing the PDF to disk, preserving metadata of first tldr page
try: group[2].copy(ap).write_pdf('tldr.pdf')
os.remove("htmlout.html")
os.remove("title.html") if os.path.exists("tldr.pdf"):
os.remove("dir_title.html") print("\nCreated tldr.pdf in the current directory!\n")
except OSError:
print("Error removing temporary file(s)") # Removing unnecessary intermediate files
try:
os.remove("htmlout.html")
os.remove("title.html")
os.remove("dir_title.html")
except OSError:
print("Error removing temporary file(s)")
if __name__ == '__main__': if __name__ == "__main__":
#Unless specified otherwise by the user, this is the default colorscheme # Unless specified otherwise by the user, this is the default colorscheme
colorscheme = "basic" colorscheme = "basic"
#Parsing the arguments # Parsing the arguments
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("dir_path", help = "Path to tldr 'pages' directory") parser.add_argument("dir_path", help = "Path to tldr 'pages' directory")
parser.add_argument("-c", choices=["solarized-light", "solarized-dark"], help="Color scheme of the PDF") parser.add_argument("-c", choices=["solarized-light", "solarized-dark"], help="Color scheme of the PDF")
args = parser.parse_args() args = parser.parse_args()
loc = args.dir_path loc = args.dir_path
if args.c == "solarized-light" or args.c == "solarized-dark": if args.c == "solarized-light" or args.c == "solarized-dark":
colorscheme = args.c colorscheme = args.c
main(loc, colorscheme) main(loc, colorscheme)

View File

@@ -0,0 +1,2 @@
markdown
weasyprint

View File

@@ -1,40 +1,15 @@
@font-face {
font-family: 'PT_Serif-Web-Regular';
src: url('PT_Serif-Web-Regular.ttf') format('truetype');
}
p {
margin-left: 2.5em;
}
code { code {
color: #b58900; color: #b58900;
} }
h1, h2, h4, ul { h1, h2, h4, ul {
font-family: "PT_Serif-Web-Regular"; color: #93a1a1;
color: #93a1a1;
} }
body { body {
background-color: #002b36; background-color: #002b36;
} }
.title-sub {
h1.titlemain { color: #b58900;
text-align: center;
margin-top: 6em;
font-size: 350%;
} }
h4.titlesub {
text-align: center;
font-size: 120%;
color: #b58900;
}
h2.titledir {
text-align: center;
margin-top: 8.2em;
font-size: 300%;
}

View File

@@ -1,39 +1,15 @@
@font-face {
font-family: 'PT_Serif-Web-Regular';
src: url('PT_Serif-Web-Regular.ttf') format('truetype');
}
p {
margin-left: 2.5em;
}
code { code {
color: #dc322f color: #dc322f
} }
h1, h2, h4, ul { h1, h2, h4, ul {
font-family: "PT_Serif-Web-Regular"; color: #586e75;
color: #586e75;
} }
body { body {
background-color: #fdf6e3; background-color: #fdf6e3;
} }
h1.titlemain { .title-sub {
text-align: center; color: #dc322f
margin-top: 6em;
font-size: 350%;
} }
h4.titlesub {
text-align: center;
font-size: 120%;
color: #dc322f
}
h2.titledir {
text-align: center;
margin-top: 8.2em;
font-size: 300%;
}