Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions apps/common/handle/impl/text/pdf_split_handle.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def collect_toc(doc, outline, level, toc):
title = item.get("/Title")
if title is None:
title = str(item)
toc.append((level, str(title), page_number))
toc.append((level, str(title).replace("\0", ""), page_number))

@staticmethod
def handle_toc(doc, limit):
Expand Down Expand Up @@ -522,7 +522,7 @@ def visitor_text(text, cm, tm, font_dict, font_size):
except BaseException:
return ""

return "".join(text_parts).strip().split("\n")[0].replace(".", "").strip()
return "".join(text_parts).replace("\0", "").strip().split("\n")[0].replace(".", "").strip()

@staticmethod
def extract_first_line(page):
Expand All @@ -531,6 +531,7 @@ def extract_first_line(page):

@staticmethod
def handle_chapter_title(title):
title = title.replace("\0", "")
title = re.sub(r"[一二三四五六七八九十\s*]、\s*", "", title)
title = re.sub(r"第[一二三四五六七八九十]章\s*", "", title)
return title
Expand Down
19 changes: 13 additions & 6 deletions apps/knowledge/serializers/paragraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@
from knowledge.task.generate import generate_related_by_paragraph_id_list


class NullCharacterStrippedCharField(serializers.CharField):
def to_internal_value(self, data):
if isinstance(data, str):
data = data.replace("\x00", "")
return super().to_internal_value(data)


class ParagraphSerializer(serializers.ModelSerializer):
class Meta:
model = Paragraph
Expand All @@ -59,21 +66,21 @@ class ParagraphInstanceSerializer(serializers.Serializer):
段落实例对象
"""

content = serializers.CharField(
content = NullCharacterStrippedCharField(
required=True, label=_("content"), max_length=102400, min_length=1, allow_null=True, allow_blank=True
)
title = serializers.CharField(
title = NullCharacterStrippedCharField(
required=False, max_length=256, label=_("section title"), allow_null=True, allow_blank=True
)
problem_list = ProblemInstanceSerializer(required=False, many=True)
is_active = serializers.BooleanField(required=False, label=_("Is active"))


class EditParagraphSerializers(serializers.Serializer):
title = serializers.CharField(
title = NullCharacterStrippedCharField(
required=False, max_length=256, label=_("section title"), allow_null=True, allow_blank=True
)
content = serializers.CharField(
content = NullCharacterStrippedCharField(
required=False, max_length=102400, allow_null=True, allow_blank=True, label=_("section title")
)
problem_list = ProblemInstanceSerializer(required=False, many=True)
Expand All @@ -91,10 +98,10 @@ class ParagraphBatchGenerateRelatedSerializer(serializers.Serializer):


class ParagraphSerializers(serializers.Serializer):
title = serializers.CharField(
title = NullCharacterStrippedCharField(
required=False, max_length=256, label=_("section title"), allow_null=True, allow_blank=True
)
content = serializers.CharField(required=True, max_length=102400, label=_("section title"))
content = NullCharacterStrippedCharField(required=True, max_length=102400, label=_("section title"))

class Problem(serializers.Serializer):
workspace_id = serializers.CharField(required=True, label=_("workspace id"))
Expand Down