|
| 1 | +module Docs |
| 2 | + class Bash |
| 3 | + class CleanHtmlFilter < Filter |
| 4 | + def call |
| 5 | + # Remove the navigation header and footer and the lines underneath and above it |
| 6 | + at_css('.header + hr').remove |
| 7 | + line_above = at_xpath('//div[@class="header"]/preceding::hr[1]') |
| 8 | + line_above.remove unless line_above.nil? |
| 9 | + css('.header').remove |
| 10 | + |
| 11 | + # Remove chapter and section numbers from title |
| 12 | + title_node = at_css('h1, h2, h3, h4, h5, h6') |
| 13 | + title_node.content = title_node.content.gsub(/(\d+\.?)+/, '').strip |
| 14 | + |
| 15 | + # Remove the "D. " from names like "D. Concept Index" and "D. Function Index" |
| 16 | + title_node.content = title_node.content[3..-1] if title_node.content.start_with?("D. ") |
| 17 | + |
| 18 | + # Remove columns containing a single space from tables |
| 19 | + # In the original reference they are used to add width between two columns |
| 20 | + xpath('//td[text()=" " and not(descendant::*)]').remove |
| 21 | + |
| 22 | + # Add id's to additional entry nodes |
| 23 | + css('dl > dt > code').each do |node| |
| 24 | + # Only take the direct text (i.e. "<div>Hello <span>World</span></div>" becomes "Hello") |
| 25 | + node['id'] = node.xpath('text()').to_s.strip |
| 26 | + end |
| 27 | + |
| 28 | + # Fix hashes of index entries so they link to the correct hash on the linked page |
| 29 | + css('table[class^=index-] td[valign=top] > a').each do |node| |
| 30 | + path = node['href'].split('#')[0] |
| 31 | + hash = node.content |
| 32 | + |
| 33 | + # Fix the index entries linking to the Special Parameters page |
| 34 | + # There are multiple index entries that should link to the same paragraph on that page |
| 35 | + # Example: the documentation for "$!" is equal to the documentation for "!" |
| 36 | + if path.downcase.include?('special-parameters') |
| 37 | + if hash.size > 1 && hash[0] == '$' |
| 38 | + hash = hash[1..-1] |
| 39 | + end |
| 40 | + end |
| 41 | + |
| 42 | + node['href'] = path + '#' + hash |
| 43 | + end |
| 44 | + |
| 45 | + # Fix index table letter hashes (the "Jump to" hashes) |
| 46 | + css('table[class^=index-] th > a').each do |node| |
| 47 | + node['id'] = node['name'] |
| 48 | + end |
| 49 | + |
| 50 | + # Remove the rows with a horizontal line in them from the index tables |
| 51 | + css('td[colspan="4"]').remove |
| 52 | + |
| 53 | + # Remove additional text from menu entry and index entry cells |
| 54 | + css('td[valign=top]').each do |node| |
| 55 | + link = node.at_css('a') |
| 56 | + node.children = link unless link.nil? |
| 57 | + end |
| 58 | + |
| 59 | + doc |
| 60 | + end |
| 61 | + end |
| 62 | + end |
| 63 | +end |
0 commit comments