From 636152af27314505bc825260f2bb200aac17862b Mon Sep 17 00:00:00 2001 From: Dustin <6962246+djdarcy@users.noreply.github.com> Date: Fri, 15 May 2026 15:41:06 -0400 Subject: [PATCH] messages: Fix msg_headers deser to consume trailing tx-count varint The P2P 'headers' message wire format is a varint N followed by N entries, where each entry is an 80-byte CBlockHeader plus the standard CBlock framing's transaction-count varint. The varint is always 0 (the whole point of a 'headers' message is blocks-without-transactions) but it is still part of the on-wire bytes, so a deserializer that reads only the 80-byte header gets misaligned for header[1] onwards. The bug doesn't show up in self-round-trip tests because msg_ser also skipped the trailing byte, so serialize-then-deserialize was symmetric. It only surfaces against on-wire data from a real peer, where header[1] .hashPrevBlock ends up shifted one byte left of header[0].GetHash(). This fix consumes the trailing varint in msg_deser and writes one back out in msg_ser, keeping the symmetric round-trip property intact while matching the actual on-wire format. The existing Test_msg_headers.test_serialization round-trip test continues to pass. A regression test (genesis + block 1 chain continuity, exact byte consumption, round-trip identity against a wire-formatted body) is available as a gist for review and can be added to bitcoin/tests/test_messages.py if desired: https://gist.github.com/djdarcy/dc8843b6183f65165621c8fc09fcedc4 Closes #320. --- bitcoin/messages.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/bitcoin/messages.py b/bitcoin/messages.py index b4d7604f..910646b4 100644 --- a/bitcoin/messages.py +++ b/bitcoin/messages.py @@ -340,12 +340,24 @@ def __init__(self, protover=PROTO_VERSION): @classmethod def msg_deser(cls, f, protover=PROTO_VERSION): + # Each entry in a P2P 'headers' message is a CBlockHeader followed by + # the block's transaction-count varint (always 0, since blocks-without- + # transactions is the whole point of the 'headers' message). The + # trailing varint is part of the on-wire format and must be consumed, + # or subsequent headers in the same message will be misaligned. c = cls() - c.headers = VectorSerializer.stream_deserialize(CBlockHeader, f) + n = VarIntSerializer.stream_deserialize(f) + for _ in range(n): + header = CBlockHeader.stream_deserialize(f) + VarIntSerializer.stream_deserialize(f) # discard tx-count varint + c.headers.append(header) return c def msg_ser(self, f): - VectorSerializer.stream_serialize(CBlockHeader, self.headers, f) + VarIntSerializer.stream_serialize(len(self.headers), f) + for header in self.headers: + header.stream_serialize(f) + VarIntSerializer.stream_serialize(0, f) # tx-count varint def __repr__(self): return "msg_headers(headers=%s)" % (repr(self.headers))