mirror of
https://github.com/zint/zint
synced 2024-11-16 20:57:25 +13:00
407 lines
12 KiB
Python
407 lines
12 KiB
Python
# This script takes the output from pandoc and converts it into the format needed by
|
|
# the website at Zint.org.uk
|
|
#
|
|
# Warning: This code is ugly... but it saves days of manual effort updating the website.
|
|
#
|
|
# Copyright (C) 2022 <rstuart114@gmail.com>
|
|
|
|
# Works out which tags should influence indentation and puts them on their own line
|
|
def isolate_tag(tag):
|
|
global stage
|
|
|
|
indentable_tag = True
|
|
for keyword in indent_skip:
|
|
if keyword in tag:
|
|
indentable_tag = False
|
|
|
|
if '</' in tag:
|
|
# Close tag
|
|
if (indentable_tag):
|
|
stage += "\n"
|
|
stage += tag
|
|
stage += "\n"
|
|
else:
|
|
stage += tag
|
|
else:
|
|
# Open tag
|
|
if (indentable_tag):
|
|
stage += "\n"
|
|
stage += tag
|
|
stage += "\n"
|
|
else:
|
|
stage += tag
|
|
|
|
# Add the right amount of indendation (indentation X 4 spaces)
|
|
def add_indent():
|
|
global indentation
|
|
retval = ""
|
|
|
|
for i in range(0,indentation):
|
|
retval += " "
|
|
|
|
return retval
|
|
|
|
# Apply indentation to text
|
|
def with_indent(text):
|
|
global indentation
|
|
retval = ""
|
|
d = ''
|
|
|
|
for c in text:
|
|
if d == '\n':
|
|
retval += d
|
|
retval += add_indent()
|
|
else:
|
|
retval += d
|
|
d = c
|
|
|
|
retval += d
|
|
|
|
return retval
|
|
|
|
# Read file and pull some tags onto their own lines for later processing
|
|
manual = ""
|
|
tag = False
|
|
tag_buffer = ""
|
|
text_buffer = ""
|
|
stage = ""
|
|
indent_skip = ['img', 'code', 'pre', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'span', '<a', '</a', 'sup', '<col', '</col', '<hr', 'div']
|
|
|
|
print("Reading... manual.html")
|
|
with open('manual.html') as f:
|
|
manual = f.read()
|
|
|
|
for c in manual:
|
|
if c == '<':
|
|
stage += text_buffer
|
|
tag = True
|
|
tag_buffer = ""
|
|
|
|
if (tag):
|
|
tag_buffer += c
|
|
else:
|
|
text_buffer += c
|
|
|
|
if c == '>':
|
|
tag_buffer = tag_buffer.replace("\n", " ")
|
|
isolate_tag(tag_buffer)
|
|
tag = False
|
|
text_buffer = ""
|
|
|
|
f.close()
|
|
manual = stage
|
|
stage = ""
|
|
|
|
print("Adjusting HTML")
|
|
# Change the guts of the HTML tags
|
|
in_dd = False
|
|
to_remove = False
|
|
remove_next = False
|
|
span_literal = False
|
|
for c in manual:
|
|
if c == '<':
|
|
# Remove "{#tbl:" table identifiers
|
|
if '{#tbl:' in text_buffer:
|
|
text_buffer = text_buffer[text_buffer.index('tag=') + 7:-3]
|
|
text_buffer = text_buffer.replace('\n', ' ')
|
|
text_buffer = '\n' + text_buffer + '\n'
|
|
|
|
# Remove "{@tabl:" table references
|
|
if 'tbl:' in text_buffer:
|
|
text_buffer = ''
|
|
|
|
stage += text_buffer
|
|
tag = True
|
|
tag_buffer = ""
|
|
to_remove = False
|
|
|
|
if (tag):
|
|
tag_buffer += c
|
|
else:
|
|
text_buffer += c
|
|
|
|
if c == '>':
|
|
# Remove some tags which aren't needed on website
|
|
if 'span' in tag_buffer:
|
|
to_remove = True
|
|
|
|
if 'div' in tag_buffer:
|
|
to_remove = True
|
|
|
|
if '<col' in tag_buffer:
|
|
to_remove = True
|
|
|
|
if '</col' in tag_buffer:
|
|
to_remove = True
|
|
|
|
if (remove_next):
|
|
to_remove = True
|
|
remove_next = False
|
|
|
|
if ('a href' in tag_buffer) and ('aria-hidden="true"' in tag_buffer):
|
|
to_remove = True
|
|
remove_next = True
|
|
|
|
if '<a href="#' in tag_buffer:
|
|
to_remove = True
|
|
remove_next = True
|
|
|
|
# Don't allow <p> and </p> between <dd> and </dd>
|
|
if (tag_buffer == "<dd>"):
|
|
in_dd = True
|
|
if (tag_buffer == "</dd>"):
|
|
in_dd = False
|
|
|
|
if (in_dd and tag_buffer == '<p>'):
|
|
to_remove = True
|
|
|
|
if (in_dd and tag_buffer == '</p>'):
|
|
to_remove = True
|
|
|
|
# Remove attributes for some tags
|
|
if '<pre' in tag_buffer:
|
|
tag_buffer = '<pre>'
|
|
|
|
if '<table' in tag_buffer:
|
|
tag_buffer = '<table>'
|
|
|
|
if '<tr' in tag_buffer:
|
|
tag_buffer = '<tr>'
|
|
|
|
if '<td' in tag_buffer:
|
|
tag_buffer = '<td>'
|
|
|
|
if '<th ' in tag_buffer:
|
|
tag_buffer = '<th>'
|
|
|
|
# Bump all headers up one level
|
|
tag_buffer = tag_buffer.replace('<h6', '<h7')
|
|
tag_buffer = tag_buffer.replace('</h6', '</h7')
|
|
tag_buffer = tag_buffer.replace('<h5', '<h6')
|
|
tag_buffer = tag_buffer.replace('</h5', '</h6')
|
|
tag_buffer = tag_buffer.replace('<h4', '<h5')
|
|
tag_buffer = tag_buffer.replace('</h4', '</h5')
|
|
tag_buffer = tag_buffer.replace('<h3', '<h4')
|
|
tag_buffer = tag_buffer.replace('</h3', '</h4')
|
|
tag_buffer = tag_buffer.replace('<h2', '<h3')
|
|
tag_buffer = tag_buffer.replace('</h2', '</h3')
|
|
tag_buffer = tag_buffer.replace('<h1', '<h2')
|
|
tag_buffer = tag_buffer.replace('</h1', '</h2')
|
|
|
|
# Change class names for code snippets
|
|
tag_buffer = tag_buffer.replace('class="sourceCode bash"', 'class="language-bash"')
|
|
tag_buffer = tag_buffer.replace('class="sourceCode c"', 'class="language-cpp"')
|
|
|
|
# Change location of images
|
|
tag_buffer = tag_buffer.replace('src="images/', 'src="/images/manual/')
|
|
|
|
# Change <code> without language to <span>
|
|
if tag_buffer == '<code>':
|
|
tag_buffer = '<span class="literal">'
|
|
span_literal = True
|
|
|
|
if tag_buffer == '</code>' and span_literal:
|
|
tag_buffer = '</span>'
|
|
span_literal = False
|
|
|
|
if not to_remove:
|
|
stage += tag_buffer
|
|
tag = False
|
|
text_buffer = ""
|
|
|
|
manual = stage
|
|
stage = ""
|
|
|
|
print("Removing empty lines")
|
|
# Remove blank lines unless in between <pre> and </pre>
|
|
last_char = ''
|
|
in_pre = False
|
|
for c in manual:
|
|
if c == '<':
|
|
tag = True
|
|
tag_buffer = ""
|
|
|
|
if (tag):
|
|
tag_buffer += c
|
|
else:
|
|
text_buffer += c
|
|
|
|
if c == '>':
|
|
if ("<pre" in tag_buffer):
|
|
in_pre = True
|
|
if ("</pre" in tag_buffer):
|
|
in_pre = False
|
|
tag = False
|
|
text_buffer = ""
|
|
|
|
if c == '\n':
|
|
if (last_char != '\n') or (in_pre == True):
|
|
stage += c
|
|
else:
|
|
stage += c
|
|
last_char = c
|
|
|
|
manual = stage
|
|
stage = ""
|
|
|
|
print("Applying indentation")
|
|
# Indent the code to make it easier to read
|
|
indentation = 1
|
|
in_pre = False
|
|
paragraph_block = False
|
|
document_start = True
|
|
chapter_six = False
|
|
last_char = ''
|
|
for c in manual:
|
|
if c == '<':
|
|
#Fix 'floating' full stops
|
|
text_buffer = text_buffer.replace(' . ', '. ')
|
|
|
|
# Apply indentation to text
|
|
if in_pre:
|
|
stage += text_buffer
|
|
else:
|
|
stage += with_indent(text_buffer)
|
|
tag = True
|
|
tag_buffer = ""
|
|
|
|
if (tag):
|
|
tag_buffer += c
|
|
else:
|
|
# Strip '{}' from already removed table references
|
|
if c == '}' and last_char == '{':
|
|
text_buffer = text_buffer[:-1]
|
|
else:
|
|
text_buffer += c
|
|
last_char = c
|
|
|
|
if c == '>':
|
|
indentable_tag = True
|
|
for keyword in indent_skip:
|
|
if keyword in tag_buffer:
|
|
indentable_tag = False
|
|
|
|
# Protect the indentation in <pre> segments
|
|
if ('<pre' in tag_buffer):
|
|
in_pre = True
|
|
if ('</pre' in tag_buffer):
|
|
in_pre = False
|
|
|
|
# Chapter 6 requires special treatment - detect beginning and end
|
|
if ('id="types-of-symbology"' in tag_buffer):
|
|
chapter_six = True
|
|
if ('id="legal-and-version-information"' in tag_buffer):
|
|
chapter_six = False
|
|
|
|
if '</' in tag_buffer:
|
|
# Close tag
|
|
if (indentable_tag):
|
|
indentation -= 1
|
|
stage += add_indent()
|
|
stage += tag_buffer
|
|
else:
|
|
if text_buffer.endswith('\n'):
|
|
stage += add_indent()
|
|
stage += tag_buffer
|
|
else:
|
|
# Split into sections
|
|
if (indentation == 1) and ('<p' in tag_buffer):
|
|
if not paragraph_block:
|
|
if document_start:
|
|
document_start = False
|
|
else:
|
|
stage += '</section>\n'
|
|
stage += '<section class="container">\n'
|
|
paragraph_block = True
|
|
|
|
# Handle headers but also decide where to split into multiple HTML files and mark with <page>
|
|
if (indentation == 1):
|
|
if ('<h2' in tag_buffer):
|
|
if document_start:
|
|
document_start = False
|
|
stage += '<section class="container">\n'
|
|
paragraph_block = True
|
|
else:
|
|
stage += '</section>\n'
|
|
stage += '<page>\n'
|
|
stage += '<section class="container">\n'
|
|
paragraph_block = True
|
|
elif ('<h3' in tag_buffer) and chapter_six:
|
|
stage += '</section>\n'
|
|
stage += '<page>\n'
|
|
stage += '<section class="container">\n'
|
|
paragraph_block = True
|
|
elif ('<h' in tag_buffer):
|
|
if not paragraph_block:
|
|
stage += '</section>\n'
|
|
stage += '<section class="container">\n'
|
|
paragraph_block = True
|
|
|
|
# <dl> section has it's own class
|
|
if (indentation == 1) and ('<dl' in tag_buffer):
|
|
stage += '</section>\n'
|
|
stage += '<section class="definition-list container">\n'
|
|
paragraph_block = False
|
|
|
|
# <table> section has it's own class
|
|
if (indentation == 1) and ('<table' in tag_buffer):
|
|
stage += '</section>\n'
|
|
stage += '<section class="table">\n'
|
|
paragraph_block = False
|
|
|
|
# Open tag
|
|
if (indentable_tag):
|
|
stage += add_indent()
|
|
stage += tag_buffer
|
|
indentation += 1
|
|
else:
|
|
if text_buffer.endswith('\n'):
|
|
stage += add_indent()
|
|
stage += tag_buffer
|
|
tag = False
|
|
text_buffer = ""
|
|
|
|
stage += '\n</section>\n'
|
|
manual = stage
|
|
stage = ""
|
|
|
|
# Remove <h2> data and split into output files
|
|
out_filenames = ['chapter1.html', 'chapter2.html', 'chapter3.html', 'chapter4.html', 'chapter5.html',
|
|
'chapter6.0.html', 'chapter6.1.html', 'chapter6.2.html', 'chapter6.3.html', 'chapter6.4.html',
|
|
'chapter6.5.html', 'chapter6.6.html', 'chapter6.7.html', 'chapter7.html', 'appendixa.html', 'appendixb.html']
|
|
page = 0
|
|
print("Writing... ", out_filenames[page])
|
|
f = open(out_filenames[page], "w")
|
|
h2_tag = False
|
|
for c in manual:
|
|
if c == '<':
|
|
if h2_tag == False:
|
|
stage += text_buffer
|
|
tag = True
|
|
tag_buffer = ""
|
|
|
|
if (tag):
|
|
tag_buffer += c
|
|
else:
|
|
text_buffer += c
|
|
|
|
if c == '>':
|
|
if '<h2' in tag_buffer:
|
|
h2_tag = True
|
|
elif '</h2' in tag_buffer:
|
|
h2_tag = False
|
|
elif tag_buffer == '<page>':
|
|
f.write(stage)
|
|
f.close()
|
|
stage = ""
|
|
page += 1
|
|
print("Writing... ", out_filenames[page])
|
|
f = open(out_filenames[page], "w")
|
|
else:
|
|
stage += tag_buffer
|
|
tag = False
|
|
text_buffer = ""
|
|
|
|
f.write(stage)
|
|
f.close()
|