Skip to content

Commit 682841b

Browse files
committed
Automatically Delete Long Code Snippets and Ask for Pastebin
1 parent bae8d28 commit 682841b

2 files changed

Lines changed: 56 additions & 1 deletion

File tree

components/callbacks.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import asyncio
22
import logging
33
import random
4+
import re
45
import time
56
from collections import deque
67
from copy import deepcopy
@@ -522,3 +523,50 @@ async def compat_warning(update: Update, _: ContextTypes.DEFAULT_TYPE) -> None:
522523
hint.html_markup(),
523524
reply_markup=hint.inline_keyboard,
524525
)
526+
527+
528+
async def long_code_handling(update: Update, _: ContextTypes.DEFAULT_TYPE) -> None:
529+
"""When someone posts a long code snippet:
530+
Reply with the /pastebin taghint.
531+
Because we do the regexing in here rather than in the filter, the corresponding handler
532+
will have to be in a lower group.
533+
"""
534+
message = cast(Message, update.effective_message)
535+
text = cast(str, message.text)
536+
has_long_code = False
537+
538+
# We make some educated guesses about the message's content. This is nothing more than
539+
# a few simple heuristics, but it should catch the most common cases.
540+
# If we have a code block longer than 15 lines, we assume it's a long code snippet
541+
parsed_entities = message.parse_entities(types=[MessageEntity.CODE, MessageEntity.PRE])
542+
if any(len(text.split("\n")) >= 15 for text in parsed_entities.values()):
543+
has_long_code = True
544+
545+
# if the text contains more than 5 import lines, we assume it's a long code snippet
546+
# regex from https://stackoverflow.com/a/44988666/10606962
547+
pattern = re.compile(r"(?m)^(?:from +(\S+) +)?import +(\S+)(?: +as +\S+)? *$")
548+
if not has_long_code and len(pattern.findall(text)) >= 5:
549+
has_long_code = True
550+
551+
# if the text contains more than 3 class or function definitions, ...
552+
pattern = re.compile(r"(class|def) [a-zA-Z]+[a-zA-Z0-9_]*\(")
553+
if not has_long_code and len(pattern.findall(text)) >= 3:
554+
has_long_code = True
555+
556+
if not has_long_code:
557+
return
558+
559+
# Get the long_code hint
560+
hint = TAG_HINTS["pastebin"]
561+
562+
# the leading ". " is important here since html_markup() splits on whitespaces!
563+
mention = f". {update.effective_user.mention_html()}" if update.effective_user else None
564+
565+
await message.reply_text(
566+
hint.html_markup(mention),
567+
reply_markup=hint.inline_keyboard,
568+
)
569+
await try_to_delete(message)
570+
571+
# We don't want this message to be processed any further
572+
raise ApplicationHandlerStop

rules_bot.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
compat_warning,
3232
delete_message,
3333
leave_chat,
34+
long_code_handling,
3435
off_on_topic,
3536
raise_app_handler_stop,
3637
regex_token_warning,
@@ -137,7 +138,13 @@ def main() -> None:
137138
group=-2,
138139
)
139140

140-
application.add_handler(MessageHandler(~filters.COMMAND, rate_limit_tracker), group=-1)
141+
application.add_handler(MessageHandler(~filters.COMMAND, rate_limit_tracker), group=-2)
142+
143+
# We need several different patterns, so filters.REGEX doesn't do the trick
144+
# therefore we catch everything and do regex ourselves. In case the message contains a
145+
# long code block, we'll raise AppHandlerStop to prevent further processing.
146+
application.add_handler(MessageHandler(filters.TEXT, long_code_handling), group=-1)
147+
141148
application.add_handler(
142149
MessageHandler(
143150
filters.SenderChat.CHANNEL

0 commit comments

Comments
 (0)