Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions platformio.ini
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,17 @@ build_src_filter =
+<../src/Utils.cpp>
lib_deps =
google/googletest @ 1.17.0

; Standalone unit tests for reliability changes (issues #1775, #1489).
; These tests have no Mesh-stack dependencies and compile without the
; Arduino framework or crypto libraries.
[env:native_reliability]
platform = native
build_flags = -std=c++17
test_filter =
test_path_gating
test_flood_ack
build_src_filter =
-<*>
lib_deps =
google/googletest @ 1.17.0
13 changes: 9 additions & 4 deletions src/Mesh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -457,11 +457,16 @@ Packet* Mesh::createPathReturn(const uint8_t* dest_hash, const uint8_t* secret,
if (extra_len > 0) {
data[data_len++] = extra_type;
memcpy(&data[data_len], extra, extra_len); data_len += extra_len;
} else {
// append a timestamp, or random blob (to make packet_hash unique)
data[data_len++] = 0xFF; // dummy payload type
getRNG()->random(&data[data_len], 4); data_len += 4;
}
// Always append a random nonce regardless of extra presence.
// AES-ECB is deterministic: without a nonce, identical plaintext produces
// identical ciphertext → identical calculatePacketHash() → hasSeen() treats
// every retransmission of the same PATH+ACK as a duplicate and drops it.
// The nonce is ignored by the receiver (it reads only the typed extra fields).
// This mirrors the original no-extra handling above (issue: nonce was missing
// for the extra_len > 0 branch, breaking flood-ACK retry reliability).
data[data_len++] = 0xFF; // dummy payload type (ignored by receiver)
getRNG()->random(&data[data_len], 4); data_len += 4;

len += Utils::encryptThenMAC(secret, &packet->payload[len], data, data_len);
}
Expand Down
74 changes: 69 additions & 5 deletions src/helpers/BaseChatMesh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,22 @@
#define TXT_ACK_DELAY 200
#endif

// Path-stickiness window (seconds): a stored direct path that is younger than
// this threshold will not be replaced by a longer-hop incoming path.
// Addresses issue #1775 (unconditional path replacement instability).
#ifndef PATH_STICKINESS_WINDOW_SECS
#define PATH_STICKINESS_WINDOW_SECS 30u // 30 seconds: long enough to ignore
// multipath duplicates (50-200 ms apart)
// but short enough to allow legitimate
// path updates in a changing topology
#endif

// Number of independent flood-ACK transmissions at increasing delays.
// Addresses issue #1489 (single flood ACK lost in noisy RF environments).
#ifndef FLOOD_ACK_RETRY_COUNT
#define FLOOD_ACK_RETRY_COUNT 3
#endif

void BaseChatMesh::sendFloodScoped(const ContactInfo& recipient, mesh::Packet* pkt, uint32_t delay_millis) {
sendFlood(pkt, delay_millis);
}
Expand Down Expand Up @@ -40,8 +56,14 @@ mesh::Packet* BaseChatMesh::createSelfAdvert(const char* name, double lat, doubl

void BaseChatMesh::sendAckTo(const ContactInfo& dest, uint32_t ack_hash) {
if (dest.out_path_len == OUT_PATH_UNKNOWN) {
mesh::Packet* ack = createAck(ack_hash);
if (ack) sendFloodScoped(dest, ack, TXT_ACK_DELAY);
// Send flood ACK FLOOD_ACK_RETRY_COUNT times at increasing delays (issue #1489).
// Each transmission is an independent RF attempt; duplicates are discarded by
// hasSeen() at every receiving node so only the first successful copy is processed.
static const uint32_t flood_ack_delays[FLOOD_ACK_RETRY_COUNT] = { TXT_ACK_DELAY, 800, 2000 };
for (int i = 0; i < FLOOD_ACK_RETRY_COUNT; i++) {
mesh::Packet* ack = createAck(ack_hash);
if (ack) sendFloodScoped(dest, ack, flood_ack_delays[i]);
}
} else {
uint32_t d = TXT_ACK_DELAY;
if (getExtraAckTransmitCount() > 0) {
Expand Down Expand Up @@ -226,6 +248,12 @@ void BaseChatMesh::onPeerDataRecv(mesh::Packet* packet, uint8_t type, int sender
mesh::Packet* path = createPathReturn(from.id, secret, packet->path, packet->path_len,
PAYLOAD_TYPE_ACK, (uint8_t *) &ack_hash, 4);
if (path) sendFloodScoped(from, path, TXT_ACK_DELAY);
// Also send a standalone flood ACK as backup: the PATH+ACK above is a single packet
// whose ciphertext (AES-ECB) was deterministic before the nonce fix. Even with the
// nonce, RF loss can drop the PATH+ACK entirely. The standalone ACK travels
// independently and ensures the sender cancels its retry timeout even if path
// establishment fails (issue #1489).
sendAckTo(from, ack_hash);
} else {
sendAckTo(from, ack_hash);
}
Expand Down Expand Up @@ -253,6 +281,8 @@ void BaseChatMesh::onPeerDataRecv(mesh::Packet* packet, uint8_t type, int sender
mesh::Packet* path = createPathReturn(from.id, secret, packet->path, packet->path_len,
PAYLOAD_TYPE_ACK, (uint8_t *) &ack_hash, 4);
if (path) sendFloodScoped(from, path, TXT_ACK_DELAY);
// Standalone flood ACK backup — same rationale as TXT_TYPE_PLAIN above
sendAckTo(from, ack_hash);
} else {
sendAckTo(from, ack_hash);
}
Expand Down Expand Up @@ -302,10 +332,37 @@ bool BaseChatMesh::onPeerPathRecv(mesh::Packet* packet, int sender_idx, const ui
}

bool BaseChatMesh::onContactPathRecv(ContactInfo& from, uint8_t* in_path, uint8_t in_path_len, uint8_t* out_path, uint8_t out_path_len, uint8_t extra_type, uint8_t* extra, uint8_t extra_len) {
// NOTE: default impl, we just replace the current 'out_path' regardless, whenever sender sends us a new out_path.
// FUTURE: could store multiple out_paths per contact, and try to find which is the 'best'(?)
uint32_t now = getRTCClock()->getCurrentTime();

// Path quality gating (issue #1775): do not replace a fresh stored path with
// a longer-hop incoming path. A path is "sticky" for PATH_STICKINESS_WINDOW_SECS
// after it was last accepted, protecting a working direct route from being
// silently downgraded by a suboptimal first-arriving multipath duplicate.
if (from.out_path_len != OUT_PATH_UNKNOWN && from.out_path_timestamp != 0) {
uint32_t age_secs = now - from.out_path_timestamp;
if (age_secs < PATH_STICKINESS_WINDOW_SECS) {
uint8_t stored_hops = from.out_path_len & 63;
uint8_t new_hops = out_path_len & 63;
if (new_hops > stored_hops) {
// Incoming path has more hops – keep the fresher, shorter stored path.
onContactPathUpdated(from);
if (extra_type == PAYLOAD_TYPE_ACK && extra_len >= 4) {
if (processAck(extra) != NULL) {
txt_send_timeout = 0;
}
} else if (extra_type == PAYLOAD_TYPE_RESPONSE && extra_len > 0) {
onContactResponse(from, extra, extra_len);
}
return true;
}
}
}

// Accept the new path.
from.out_path_len = mesh::Packet::copyPath(from.out_path, out_path, out_path_len); // store a copy of path, for sendDirect()
from.lastmod = getRTCClock()->getCurrentTime();
from.out_path_timestamp = now; // record when this path was accepted
from.path_ack_count = 0; // reset delivery counter for the new path
from.lastmod = now;

onContactPathUpdated(from);

Expand All @@ -326,6 +383,13 @@ void BaseChatMesh::onAckRecv(mesh::Packet* packet, uint32_t ack_crc) {
txt_send_timeout = 0; // matched one we're waiting for, cancel timeout timer
packet->markDoNotRetransmit(); // ACK was for this node, so don't retransmit

// Track per-contact direct-path delivery success (issue #1775).
// Incremented here so the path-stickiness logic can eventually factor in
// proven delivery when making replacement decisions.
if (!packet->isRouteFlood() && from->out_path_len != OUT_PATH_UNKNOWN) {
if (from->path_ack_count < 255) from->path_ack_count++;
}

if (packet->isRouteFlood() && from->out_path_len != OUT_PATH_UNKNOWN) {
// we have direct path, but other node is still sending flood, so maybe they didn't receive reciprocal path properly(?)
handleReturnPathRetry(*from, packet->path, packet->path_len);
Expand Down
2 changes: 2 additions & 0 deletions src/helpers/ContactInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ struct ContactInfo {
uint32_t lastmod; // by OUR clock
int32_t gps_lat, gps_lon; // 6 dec places
uint32_t sync_since;
uint32_t out_path_timestamp; // RTC time (secs) when out_path was last accepted; 0 = never set
uint8_t path_ack_count; // saturating count of direct-path ACKs received (delivery tracking)

const uint8_t* getSharedSecret(const mesh::LocalIdentity& self_id) const {
if (!shared_secret_valid) {
Expand Down
212 changes: 212 additions & 0 deletions test/test_flood_ack/test_flood_ack.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
/**
* test_flood_ack.cpp
*
* Unit tests for the flood-ACK retry scheduling introduced in
* BaseChatMesh::sendAckTo (fixes issue #1489).
*
* When a contact has no known direct path (out_path_len == OUT_PATH_UNKNOWN)
* the implementation must schedule exactly FLOOD_ACK_RETRY_COUNT independent
* flood transmissions at the delays [200ms, 800ms, 2000ms]. When a direct
* path is known the pre-existing single-direct-ACK behaviour is unchanged.
*
* Deduplication on the receiver side is handled by the pre-existing
* MeshTables::hasSeen() mechanism; these tests confirm only the sender-side
* scheduling contract.
*
* See docs/reliability_changes.md for the full rationale.
*/

#include <gtest/gtest.h>
#include <stdint.h>
#include <string.h>

// ---- Constants mirrored from BaseChatMesh.cpp --------------------------------
#define OUT_PATH_UNKNOWN 0xFF
#define TXT_ACK_DELAY 200u
#define FLOOD_ACK_RETRY_COUNT 3

// Expected delay schedule — must match the static array in sendAckTo.
static const uint32_t EXPECTED_FLOOD_DELAYS[FLOOD_ACK_RETRY_COUNT] = {
TXT_ACK_DELAY, // 200 ms
800u,
2000u,
};

// ---- Minimal send recorder ---------------------------------------------------

struct ScheduledPacket {
bool is_flood;
uint32_t delay_ms;
};

static ScheduledPacket sched_buf[16];
static int sched_count;

static void reset_sched() {
sched_count = 0;
memset(sched_buf, 0, sizeof(sched_buf));
}

static void mock_sendFloodScoped(uint32_t delay) {
if (sched_count < 16) sched_buf[sched_count++] = { true, delay };
}

static void mock_sendDirect(uint32_t delay) {
if (sched_count < 16) sched_buf[sched_count++] = { false, delay };
}

// ---- sendAckTo logic transcription ------------------------------------------
// This is a direct copy of the changed sendAckTo body (excluding the Mesh
// packet-creation calls that are irrelevant to scheduling counts and delays).
// If the implementation diverges from this transcription the test author must
// update both, which serves as an explicit change-review gate.
static void testSendAckTo(uint8_t out_path_len, uint32_t /*ack_hash*/) {
if (out_path_len == OUT_PATH_UNKNOWN) {
static const uint32_t flood_ack_delays[FLOOD_ACK_RETRY_COUNT] = {
TXT_ACK_DELAY, 800, 2000
};
for (int i = 0; i < FLOOD_ACK_RETRY_COUNT; i++) {
mock_sendFloodScoped(flood_ack_delays[i]);
}
} else {
// Direct path: single ACK at TXT_ACK_DELAY (getExtraAckTransmitCount()==0 default)
mock_sendDirect(TXT_ACK_DELAY);
}
}

// Flood path (out_path_len == OUT_PATH_UNKNOWN)

TEST(FloodAck, UnknownPath_SendsExactlyThreeTimes) {
reset_sched();
testSendAckTo(OUT_PATH_UNKNOWN, 0xDEADBEEF);
EXPECT_EQ(FLOOD_ACK_RETRY_COUNT, sched_count);
}

TEST(FloodAck, UnknownPath_AllSendsAreFlood) {
reset_sched();
testSendAckTo(OUT_PATH_UNKNOWN, 0xDEADBEEF);
for (int i = 0; i < sched_count; i++) {
EXPECT_TRUE(sched_buf[i].is_flood)
<< "Packet " << i << " should be a flood send";
}
}

TEST(FloodAck, UnknownPath_DelaysMatch200_800_2000ms) {
reset_sched();
testSendAckTo(OUT_PATH_UNKNOWN, 0xDEADBEEF);
ASSERT_EQ(FLOOD_ACK_RETRY_COUNT, sched_count);
for (int i = 0; i < FLOOD_ACK_RETRY_COUNT; i++) {
EXPECT_EQ(EXPECTED_FLOOD_DELAYS[i], sched_buf[i].delay_ms)
<< "Delay " << i << " should be " << EXPECTED_FLOOD_DELAYS[i] << " ms";
}
}

TEST(FloodAck, UnknownPath_DelaysAreStrictlyIncreasing) {
reset_sched();
testSendAckTo(OUT_PATH_UNKNOWN, 0xDEADBEEF);
for (int i = 1; i < sched_count; i++) {
EXPECT_GT(sched_buf[i].delay_ms, sched_buf[i - 1].delay_ms)
<< "Delay " << i << " must be > delay " << (i - 1)
<< " to ensure temporally spread retries";
}
}

TEST(FloodAck, UnknownPath_FirstDelayIsAckDelay) {
reset_sched();
testSendAckTo(OUT_PATH_UNKNOWN, 0xDEADBEEF);
ASSERT_GE(sched_count, 1);
EXPECT_EQ(TXT_ACK_DELAY, sched_buf[0].delay_ms);
}

// Direct path (out_path_len != OUT_PATH_UNKNOWN) — pre-existing behaviour

TEST(FloodAck, KnownPath_SendsExactlyOnce) {
reset_sched();
testSendAckTo(/*out_path_len=*/1, 0xDEADBEEF);
EXPECT_EQ(1, sched_count);
}

TEST(FloodAck, KnownPath_SendIsDirect) {
reset_sched();
testSendAckTo(/*out_path_len=*/1, 0xDEADBEEF);
ASSERT_EQ(1, sched_count);
EXPECT_FALSE(sched_buf[0].is_flood);
}

TEST(FloodAck, KnownPath_DelayIsAckDelay) {
reset_sched();
testSendAckTo(/*out_path_len=*/2, 0xDEADBEEF);
ASSERT_EQ(1, sched_count);
EXPECT_EQ(TXT_ACK_DELAY, sched_buf[0].delay_ms);
}

TEST(FloodAck, KnownPath_MultiHop_SendsOnce) {
reset_sched();
testSendAckTo(/*out_path_len=*/5, 0x12345678);
EXPECT_EQ(1, sched_count);
}

// Retry count configuration contract

TEST(FloodAck, RetryCountIs3) {
// Explicitly documents the expected constant value.
EXPECT_EQ(3, FLOOD_ACK_RETRY_COUNT);
}

TEST(FloodAck, ExpectedDelaysTableMatchesConstants) {
EXPECT_EQ(TXT_ACK_DELAY, EXPECTED_FLOOD_DELAYS[0]);
EXPECT_EQ(800u, EXPECTED_FLOOD_DELAYS[1]);
EXPECT_EQ(2000u, EXPECTED_FLOOD_DELAYS[2]);
}

// =============================================================================
// Backup ACK for flood-received messages
//
// When a flood message is received, the responder sends a PATH+ACK (one packet
// that serves both path-establishment and ACK delivery). If the PATH+ACK is
// lost, the sender never gets the ACK. The fix: also call sendAckTo() to send
// independent flood ACKs as a backup. This suite verifies the backup contract
// using the same sendAckTo transcription, called from the "no known path" state
// that applies immediately after receiving the very first flood message.
// =============================================================================

TEST(BackupFloodAck, FirstMessageScenario_SendsFloodAckBackup) {
// When the receiver has no stored path to the sender (first-ever message),
// sendAckTo() is invoked with OUT_PATH_UNKNOWN and must produce flood ACKs.
reset_sched();
testSendAckTo(OUT_PATH_UNKNOWN, 0xCAFEBABE);
EXPECT_EQ(FLOOD_ACK_RETRY_COUNT, sched_count);
for (int i = 0; i < sched_count; i++) {
EXPECT_TRUE(sched_buf[i].is_flood)
<< "Backup ACK " << i << " must be flood (no path known yet)";
}
}

TEST(BackupFloodAck, BackupAndPathAck_AreIndependent) {
// The backup standalone ACK (sendAckTo) and the PATH+ACK are independent
// packets. This test verifies sendAckTo is not affected by the PATH+ACK
// scheduling — it produces its own complete set of FLOOD_ACK_RETRY_COUNT
// flood transmissions at the standard delays.
reset_sched();
testSendAckTo(OUT_PATH_UNKNOWN, 0x11223344);
ASSERT_EQ(FLOOD_ACK_RETRY_COUNT, sched_count);
EXPECT_EQ(TXT_ACK_DELAY, sched_buf[0].delay_ms);
EXPECT_EQ(800u, sched_buf[1].delay_ms);
EXPECT_EQ(2000u, sched_buf[2].delay_ms);
}

TEST(BackupFloodAck, KnownReturnPath_SendsDirectAckBackup) {
// If the receiver already has a stored direct path back to the sender
// (e.g., established in a prior exchange), the backup sendAckTo() sends a
// direct ACK rather than a flood — still providing a reliable backup channel.
reset_sched();
testSendAckTo(/*out_path_len=*/1, 0xCAFEBABE);
EXPECT_EQ(1, sched_count);
EXPECT_FALSE(sched_buf[0].is_flood);
}


int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
Loading