Skip to content

Commit f93de77

Browse files
committed
Add fuzzer for io module
1 parent 71ede86 commit f93de77

3 files changed

Lines changed: 213 additions & 2 deletions

File tree

Makefile

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
all : fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-tarfile-hypothesis fuzzer-zipfile fuzzer-zipfile-hypothesis fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml fuzzer-zoneinfo
1+
all : fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-tarfile-hypothesis fuzzer-zipfile fuzzer-zipfile-hypothesis fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml fuzzer-zoneinfo fuzzer-io
22

33
PYTHON_CONFIG_PATH=$(CPYTHON_INSTALL_PATH)/bin/python3-config
44
CXXFLAGS += $(shell $(PYTHON_CONFIG_PATH) --cflags)
5-
LDFLAGS += -rdynamic $(shell $(PYTHON_CONFIG_PATH) --ldflags --embed)
5+
LDFLAGS += -rdynamic $(shell $(PYTHON_CONFIG_PATH) --ldflags --embed) $(CPYTHON_MODLIBS) -Wl,--allow-multiple-definition
66

77
fuzzer-html:
88
clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"html.py\"" -ldl $(LDFLAGS) -o fuzzer-html
@@ -40,3 +40,6 @@ fuzzer-xml:
4040
clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"xml.py\"" -ldl $(LDFLAGS) -o fuzzer-xml
4141
fuzzer-zoneinfo:
4242
clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"zoneinfo.py\"" -ldl $(LDFLAGS) -o fuzzer-zoneinfo
43+
44+
fuzzer-io:
45+
clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"io.py\"" -ldl $(LDFLAGS) -o fuzzer-io

fuzz_targets.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ difflib difflib.py
66
email email.py
77
html html.py
88
httpclient httpclient.py
9+
io io.py
910
json json.py
1011
plistlib plist.py
1112
re re.py

io.py

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
from fuzzeddataprovider import FuzzedDataProvider
2+
import os
3+
import io
4+
import tempfile
5+
6+
# Top-level operation constants for FuzzerRunOne dispatch
7+
OP_BYTESIO = 0
8+
OP_TEXTIOWRAPPER = 1
9+
OP_BUFFERED_IO = 2
10+
OP_FILEIO = 3
11+
OP_IO_OPEN = 4
12+
OP_NEWLINE_DECODER = 5
13+
OP_STRINGIO = 6
14+
15+
# Buffered IO target constants for op_buffered_io
16+
BUFFERED_READER = 0
17+
BUFFERED_WRITER = 1
18+
BUFFERED_RANDOM = 2
19+
20+
21+
# Tests BytesIO (Modules/_io/bytesio.c): write, seeked read, readline,
22+
# readinto a pre-allocated buffer, getbuffer for the memoryview path,
23+
# truncate at a fuzzed position, and getvalue.
24+
def op_bytesio(fdp):
25+
trunc_pos = fdp.ConsumeIntInRange(0, fdp.remaining_bytes())
26+
data = fdp.ConsumeBytes(fdp.remaining_bytes())
27+
bio = io.BytesIO()
28+
bio.write(data)
29+
bio.seek(0)
30+
bio.read()
31+
bio.seek(0)
32+
bio.readline()
33+
buf = bytearray(min(len(data), 100))
34+
bio.seek(0)
35+
bio.readinto(buf)
36+
bio.getbuffer()
37+
bio.truncate(trunc_pos)
38+
bio.getvalue()
39+
40+
41+
# Tests TextIOWrapper (Modules/_io/textio.c): wraps a BytesIO in a text
42+
# decoder with a fuzzed encoding (utf-8, latin-1, ascii, utf-16) and
43+
# newline mode (None, '', \n, \r, \r\n), then exercises read, readline,
44+
# and detach. Targets the C-level text decoding and newline translation.
45+
def op_textiowrapper(fdp):
46+
encodings = ["utf-8", "latin-1", "ascii", "utf-16"]
47+
encoding = fdp.PickValueInList(encodings)
48+
newlines = [None, "", "\n", "\r", "\r\n"]
49+
newline = fdp.PickValueInList(newlines)
50+
data = fdp.ConsumeBytes(fdp.remaining_bytes())
51+
bio = io.BytesIO(data)
52+
wrapper = io.TextIOWrapper(
53+
bio, encoding=encoding, errors="replace", newline=newline
54+
)
55+
wrapper.read()
56+
wrapper.seek(0)
57+
wrapper.readline()
58+
wrapper.detach()
59+
60+
61+
# Tests BufferedReader/Writer/Random (Modules/_io/bufferedio.c): picks
62+
# one of the three buffered I/O types and exercises read, write, seek,
63+
# and flush through the C buffering layer over a BytesIO raw stream.
64+
def op_buffered_io(fdp):
65+
target = fdp.ConsumeIntInRange(BUFFERED_READER, BUFFERED_RANDOM)
66+
read_size = fdp.ConsumeIntInRange(0, 10000)
67+
data = fdp.ConsumeBytes(fdp.remaining_bytes())
68+
if target == BUFFERED_READER:
69+
raw = io.BytesIO(data)
70+
br = io.BufferedReader(raw)
71+
br.read()
72+
elif target == BUFFERED_WRITER:
73+
raw = io.BytesIO()
74+
bw = io.BufferedWriter(raw)
75+
bw.write(data)
76+
bw.flush()
77+
else:
78+
write_data = fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, 10000))
79+
raw = io.BytesIO(data)
80+
brw = io.BufferedRandom(raw)
81+
brw.read(read_size)
82+
brw.write(write_data)
83+
brw.seek(0)
84+
brw.read()
85+
86+
87+
# Tests FileIO (Modules/_io/fileio.c): writes fuzzed data to a temp file
88+
# then reads it back, or reads pre-written data. Exercises the C-level
89+
# file descriptor I/O paths (open, write, read, close).
90+
def op_fileio(fdp):
91+
do_write = fdp.ConsumeBool()
92+
data = fdp.ConsumeBytes(fdp.remaining_bytes())
93+
tmpname = None
94+
try:
95+
with tempfile.NamedTemporaryFile(delete=False) as tmp:
96+
tmpname = tmp.name
97+
if do_write:
98+
f = io.FileIO(tmpname, "w")
99+
f.write(data)
100+
f.close()
101+
f = io.FileIO(tmpname, "r")
102+
f.read()
103+
f.close()
104+
else:
105+
tmp.write(data)
106+
tmp.flush()
107+
f = io.FileIO(tmpname, "r")
108+
f.read()
109+
f.close()
110+
finally:
111+
if tmpname:
112+
try:
113+
os.unlink(tmpname)
114+
except Exception:
115+
pass
116+
117+
118+
# Tests io.open() (Modules/_io/_iomodule.c): the high-level open function
119+
# that selects the appropriate I/O class based on mode. Writes fuzzed data
120+
# to a temp file then opens it in binary or text mode with error handling.
121+
def op_io_open(fdp):
122+
modes = ["rb", "r", "rb"]
123+
mode = fdp.PickValueInList(modes)
124+
data = fdp.ConsumeBytes(fdp.remaining_bytes())
125+
tmpname = None
126+
try:
127+
with tempfile.NamedTemporaryFile(delete=False) as tmp:
128+
tmpname = tmp.name
129+
tmp.write(data)
130+
tmp.flush()
131+
with io.open(tmpname, mode, errors="replace" if "b" not in mode else None) as f:
132+
f.read()
133+
finally:
134+
if tmpname:
135+
try:
136+
os.unlink(tmpname)
137+
except Exception:
138+
pass
139+
140+
141+
# Tests IncrementalNewlineDecoder (Modules/_io/textio.c): the C-level
142+
# newline translator that handles \r, \n, \r\n conversion. Exercises
143+
# decode with fuzzed text, then getstate/reset for the state machine.
144+
def op_newline_decoder(fdp):
145+
translate = fdp.ConsumeBool()
146+
n = (
147+
fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000))
148+
if fdp.remaining_bytes() > 0
149+
else 0
150+
)
151+
if n == 0:
152+
return
153+
s = fdp.ConsumeBytes(n).decode("latin-1")
154+
decoder = io.IncrementalNewlineDecoder(None, translate)
155+
decoder.decode(s)
156+
decoder.getstate()
157+
decoder.reset()
158+
159+
160+
# Tests StringIO (Modules/_io/stringio.c): in-memory text stream.
161+
# Exercises read, readline, seeked write, and getvalue with fuzzed
162+
# Unicode text content.
163+
def op_stringio(fdp):
164+
n = (
165+
fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000))
166+
if fdp.remaining_bytes() > 0
167+
else 0
168+
)
169+
if n == 0:
170+
return
171+
s = fdp.ConsumeBytes(n).decode("latin-1")
172+
sio = io.StringIO(s)
173+
sio.read()
174+
sio.seek(0)
175+
sio.readline()
176+
sio.seek(0)
177+
sio.write(s)
178+
sio.getvalue()
179+
180+
181+
# Fuzzes CPython's I/O C modules (Modules/_io/). Exercises BytesIO
182+
# (write, seek, read, truncate), TextIOWrapper (read, readline, detach
183+
# with varied encodings and newline modes), BufferedReader/Writer/Random,
184+
# FileIO (read and write modes), io.open(), IncrementalNewlineDecoder
185+
# (decode, getstate, reset), and StringIO operations.
186+
def FuzzerRunOne(FuzzerInput):
187+
if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x100000:
188+
return
189+
fdp = FuzzedDataProvider(FuzzerInput)
190+
op = fdp.ConsumeIntInRange(OP_BYTESIO, OP_STRINGIO)
191+
try:
192+
if op == OP_BYTESIO:
193+
op_bytesio(fdp)
194+
elif op == OP_TEXTIOWRAPPER:
195+
op_textiowrapper(fdp)
196+
elif op == OP_BUFFERED_IO:
197+
op_buffered_io(fdp)
198+
elif op == OP_FILEIO:
199+
op_fileio(fdp)
200+
elif op == OP_IO_OPEN:
201+
op_io_open(fdp)
202+
elif op == OP_NEWLINE_DECODER:
203+
op_newline_decoder(fdp)
204+
else:
205+
op_stringio(fdp)
206+
except Exception:
207+
pass

0 commit comments

Comments
 (0)