|
3 | 3 | import json |
4 | 4 | import os |
5 | 5 | from collections import deque, defaultdict |
6 | | -from html.parser import HTMLParser |
7 | 6 | from pathlib import Path |
8 | | -from xml.dom import SyntaxErr |
9 | 7 |
|
10 | | -import cssutils |
11 | 8 | from arca.exceptions import PullError, BuildError, RequirementsMismatch |
12 | 9 | from arca.utils import get_hash_for_file |
13 | 10 |
|
@@ -104,131 +101,6 @@ def get_lesson_tree_hash(repo, lesson_slug): |
104 | 101 | return commit |
105 | 102 |
|
106 | 103 |
|
107 | | -class DisallowedElement(Exception): |
108 | | - pass |
109 | | - |
110 | | - |
111 | | -class InvalidHTML(DisallowedElement): |
112 | | - pass |
113 | | - |
114 | | - |
115 | | -class DisallowedAttribute(DisallowedElement): |
116 | | - pass |
117 | | - |
118 | | - |
119 | | -class DisallowedStyle(Exception): |
120 | | - |
121 | | - _BASE = "Style element or page css are only allowed when they modify .dataframe elements." |
122 | | - COULD_NOT_PARSE = _BASE + " Ccould not parse the styles and verify." |
123 | | - OUT_OF_SCOPE = _BASE + " Rendered page contains a style that modifies something else." |
124 | | - |
125 | | - |
126 | | -class AllowedElementsParser(HTMLParser): |
127 | | - """ |
128 | | - This parser is used on all HTML returned from forked repositories. |
129 | | -
|
130 | | - It raises exceptions in two cases: |
131 | | -
|
132 | | - * :class:`DisallowedElement` - if a element not defined in :attr:`allowed_elements` is used |
133 | | - * :class:`DisallowedStyle` - if a <style> element contains unparsable css or if it modifies something |
134 | | - different than ``.dataframe`` elements. |
135 | | - """ |
136 | | - |
137 | | - def __init__(self, **kwargs): |
138 | | - super(AllowedElementsParser, self).__init__(**kwargs) |
139 | | - self.css_parser = cssutils.CSSParser(raiseExceptions=True) |
140 | | - |
141 | | - #: Set of allowed HTML elements |
142 | | - #: It has been compiled out of elements currently used in canonical lessons |
143 | | - self.allowed_elements = { |
144 | | - # functional: |
145 | | - 'a', 'abbr', 'audio', 'img', 'source', |
146 | | - |
147 | | - # styling: |
148 | | - 'big', 'blockquote', 'code', 'font', 'i', 'tt', 'kbd', 'u', 'var', 'small', 'em', 'strong', 'sub', |
149 | | - |
150 | | - # formatting: |
151 | | - 'br', 'div', 'hr', 'p', 'pre', 'span', |
152 | | - |
153 | | - # lists: |
154 | | - 'dd', 'dl', 'dt', 'li', 'ul', 'ol', |
155 | | - |
156 | | - # headers: |
157 | | - 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', |
158 | | - |
159 | | - # tables: |
160 | | - 'table', 'tbody', 'td', 'th', 'thead', 'tr', |
161 | | - |
162 | | - # icons: |
163 | | - 'svg', 'circle', 'path', |
164 | | - |
165 | | - # A special check is applied in :meth:`handle_data` method |
166 | | - # (only ``.dataframe`` styles allowed, generated from notebook converter) |
167 | | - 'style', |
168 | | - } |
169 | | - |
170 | | - #: Set of allowed HTML attributes |
171 | | - #: Compiled out of currently used in canonical lesson |
172 | | - self.allowed_attributes = { |
173 | | - 'alt', 'aria-hidden', 'border', 'class', 'color', 'colspan', 'controls', 'cx', 'cy', 'd', 'halign', 'href', |
174 | | - 'id', 'r', 'rowspan', 'src', 'start', 'title', 'type', 'valign', 'viewbox', |
175 | | - |
176 | | - # inline styles generated from notebook converter |
177 | | - 'style', |
178 | | - } |
179 | | - |
180 | | - self.attrs = set() |
181 | | - |
182 | | - def error(self, message): |
183 | | - raise InvalidHTML(message) |
184 | | - |
185 | | - def check_attributes(self, attrs): |
186 | | - attr_names = set([x[0] for x in attrs]) |
187 | | - |
188 | | - if len(attr_names - self.allowed_attributes): |
189 | | - raise DisallowedAttribute("Attributes '{}' are not allowed".format(", ".join(attr_names))) |
190 | | - |
191 | | - def handle_starttag(self, tag, attrs): |
192 | | - if tag not in self.allowed_elements: |
193 | | - raise DisallowedElement(f"Element {tag} is not allowed.") |
194 | | - |
195 | | - self.check_attributes(attrs) |
196 | | - |
197 | | - def handle_startendtag(self, tag, attrs): |
198 | | - if tag not in self.allowed_elements: |
199 | | - raise DisallowedElement(f"Element {tag} is not allowed.") |
200 | | - |
201 | | - self.check_attributes(attrs) |
202 | | - |
203 | | - def handle_data(self, data): |
204 | | - if self.lasttag == "style": |
205 | | - self.validate_css(data) |
206 | | - |
207 | | - def reset_and_feed(self, data): |
208 | | - self.reset() |
209 | | - self.feed(data) |
210 | | - |
211 | | - def allow_selector(self, selector: str): |
212 | | - if not selector.startswith(".dataframe "): |
213 | | - return False |
214 | | - |
215 | | - return True |
216 | | - |
217 | | - def validate_css(self, data): |
218 | | - try: |
219 | | - parsed_css = self.css_parser.parseString(data) |
220 | | - except SyntaxErr: |
221 | | - raise DisallowedStyle(DisallowedStyle.COULD_NOT_PARSE) |
222 | | - else: |
223 | | - if len(parsed_css.cssRules) == 0: |
224 | | - return |
225 | | - |
226 | | - if not all([self.allow_selector(selector.selectorText) |
227 | | - for rule in parsed_css.cssRules |
228 | | - for selector in rule.selectorList]): |
229 | | - raise DisallowedStyle(DisallowedStyle.OUT_OF_SCOPE) |
230 | | - |
231 | | - |
232 | 104 | def forks_enabled(): |
233 | 105 | """ Returns if forks are enabled. By default they're not (for the purposes of local development). |
234 | 106 |
|
|
0 commit comments