Skip to content

Commit 7b7aa34

Browse files
committed
Improve Rust scraper
1 parent 3dbc605 commit 7b7aa34

3 files changed

Lines changed: 43 additions & 6 deletions

File tree

assets/stylesheets/pages/_rust.scss

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,18 @@
33

44
h4 { @extend %block-label; }
55
.docblock { margin-left: 1em; }
6+
div.information, div.important-traits {
7+
@extend %note;
8+
9+
> pre { margin: .5rem 0; }
10+
}
611

712
div.stability { margin-bottom: 1em; }
813
em.stab, span.stab { @extend %label; }
914
em.stab.unstable, span.stab.unstable { @extend %label-orange; }
10-
.since, .out-of-band { float: right; }
15+
.out-of-band { float: right; }
16+
.since, .srclink {
17+
float: right;
18+
margin-left: .5rem;
19+
}
1120
}

lib/docs/filters/rust/clean_html.rb

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@ module Docs
22
class Rust
33
class CleanHtmlFilter < Filter
44
def call
5-
puts subpath if at_css('#versioninfo')
6-
75
if slug.start_with?('book') || slug.start_with?('reference')
86
@doc = at_css('#content main')
97
elsif slug == 'error-index'
@@ -29,12 +27,16 @@ def call
2927

3028
css('.rusttest', '.test-arrow', 'hr').remove
3129

30+
css('.docblock.attributes').each do |node|
31+
node.remove if node.content.include?('#[must_use]')
32+
end
33+
3234
css('a.header').each do |node|
3335
node.first_element_child['id'] = node['name'] || node['id']
3436
node.before(node.children).remove
3537
end
3638

37-
css('.docblock > h1').each { |node| node.name = 'h4' }
39+
css('.docblock > h1:not(.section-header)').each { |node| node.name = 'h4' }
3840
css('h2.section-header').each { |node| node.name = 'h3' }
3941
css('h1.section-header').each { |node| node.name = 'h2' }
4042

@@ -44,7 +46,7 @@ def call
4446
end
4547
end
4648

47-
css('> .impl-items', '> .docblock', 'pre > pre').each do |node|
49+
css('> .impl-items', '> .docblock', 'pre > pre', '.tooltiptext', '.tooltip').each do |node|
4850
node.before(node.children).remove
4951
end
5052

@@ -65,6 +67,32 @@ def call
6567
doc.first_element_child.name = 'h1' if doc.first_element_child.name = 'h2'
6668
at_css('h1').content = 'Rust Documentation' if root_page?
6769

70+
css('.table-display').each do |node|
71+
node.css('td').each do |td|
72+
node.before(td.children)
73+
end
74+
node.remove
75+
end
76+
77+
css('h2 .important-traits', 'h3 .important-traits', 'h4 .important-traits').each do |node|
78+
content = node.at_css('.content.hidden .content')
79+
node.at_css('.content.hidden').replace(content) if content
80+
node.parent.after(node)
81+
end
82+
83+
css('code.content').each do |node|
84+
node.name = 'pre'
85+
node.css('.fmt-newline').each do |line|
86+
line.inner_html = line.inner_html + "\n"
87+
end
88+
node.inner_html = node.inner_html.gsub('<br>', "\n")
89+
node.content = node.content
90+
end
91+
92+
css('.since + .srclink').each do |node|
93+
node.previous_element.before(node)
94+
end
95+
6896
doc
6997
end
7098
end

lib/docs/scrapers/rust.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
module Docs
22
class Rust < UrlScraper
33
self.type = 'rust'
4-
self.release = '1.28.0'
4+
self.release = '1.29.1'
55
self.base_url = 'https://doc.rust-lang.org/'
66
self.root_path = 'book/second-edition/index.html'
77
self.initial_paths = %w(

0 commit comments

Comments
 (0)