Skip to content

Commit 66a1978

Browse files
committed
pre-commit
1 parent 9aa646f commit 66a1978

1 file changed

Lines changed: 32 additions & 13 deletions

File tree

ac_dc/visualization/visualization.py

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,12 @@ def get_binary_file_downloader_html(bin_file, file_label="File"):
8282
return href
8383

8484
st.markdown(
85-
"Before diving into this demo, you might want to take a look at how the filtering pipeline looks like in more detail in this " +
86-
get_binary_file_downloader_html(
85+
"Before diving into this demo, you might want to take a look at how the filtering pipeline looks like in more detail in this "
86+
+ get_binary_file_downloader_html(
8787
self.path_instructions,
8888
"pdf",
89-
) + ".",
89+
)
90+
+ ".",
9091
unsafe_allow_html=True,
9192
)
9293

@@ -202,7 +203,9 @@ def print_discared_by_cond(cond):
202203
"few or no repetitions, simply because their length gives them more diversity, and we do "
203204
"not want to discard such documents."
204205
)
205-
self.docs["repetitions_ratio"] = self.docs_checkpoint["repetitions_ratio"]
206+
self.docs["repetitions_ratio"] = self.docs_checkpoint[
207+
"repetitions_ratio"
208+
]
206209
for i in range(len(self.docs["repetitions_ratio"])):
207210
self.docs["repetitions_ratio"].iloc[i] = self.docs[
208211
"repetitions_ratio"
@@ -243,13 +246,21 @@ def print_discared_by_cond(cond):
243246

244247
if "stopwords_ratio" in columns:
245248
with st.sidebar.expander("Stop words ratio"):
246-
stopwords_file = st.file_uploader("Upload your own list of stop words (one per line). If there is none, the default one is used.")
249+
stopwords_file = st.file_uploader(
250+
"Upload your own list of stop words (one per line). If there is none, the default one is used."
251+
)
247252
if stopwords_file:
248-
new_stopwords = StringIO(stopwords_file.getvalue().decode("utf-8")).read()
253+
new_stopwords = StringIO(
254+
stopwords_file.getvalue().decode("utf-8")
255+
).read()
249256
new_stopwords = set(new_stopwords.split("\n"))
250-
self.docs["stopwords_ratio"] = self.docs_checkpoint["stopwords_ratio"]
257+
self.docs["stopwords_ratio"] = self.docs_checkpoint[
258+
"stopwords_ratio"
259+
]
251260
for i in range(len(self.docs["stopwords_ratio"])):
252-
self.docs["stopwords_ratio"].iloc[i] = Filtering.compute_stopwords_ratio(
261+
self.docs["stopwords_ratio"].iloc[
262+
i
263+
] = Filtering.compute_stopwords_ratio(
253264
self.docs["text"].iloc[i],
254265
self.sentencepiece_model_tok,
255266
self.param["strip_characters"],
@@ -271,13 +282,21 @@ def print_discared_by_cond(cond):
271282

272283
if "flagged_words_ratio" in columns:
273284
with st.sidebar.expander("Flagged words ratio"):
274-
flagged_words_file = st.file_uploader("Upload your own list of flagged words (one per line). If there is none, the default one is used.")
285+
flagged_words_file = st.file_uploader(
286+
"Upload your own list of flagged words (one per line). If there is none, the default one is used."
287+
)
275288
if flagged_words_file:
276-
new_flagged_words = StringIO(flagged_words_file.getvalue().decode("utf-8")).read()
289+
new_flagged_words = StringIO(
290+
flagged_words_file.getvalue().decode("utf-8")
291+
).read()
277292
new_flagged_words = set(new_flagged_words.split("\n"))
278-
self.docs["flagged_words_ratio"] = self.docs_checkpoint["flagged_words_ratio"]
293+
self.docs["flagged_words_ratio"] = self.docs_checkpoint[
294+
"flagged_words_ratio"
295+
]
279296
for i in range(len(self.docs["flagged_words_ratio"])):
280-
self.docs["flagged_words_ratio"].iloc[i] = Filtering.compute_flagged_words_ratio(
297+
self.docs["flagged_words_ratio"].iloc[
298+
i
299+
] = Filtering.compute_flagged_words_ratio(
281300
self.docs["text"].iloc[i],
282301
self.sentencepiece_model_tok,
283302
self.param["strip_characters"],
@@ -626,7 +645,7 @@ def is_doc_discarded(key, score):
626645
)
627646

628647
def visualization(self):
629-
self.warning_preamble()
648+
# self.warning_preamble()
630649
self.preamble()
631650
self.open_data()
632651
self.set_title()

0 commit comments

Comments
 (0)