@@ -587,7 +587,7 @@ auto decongestionCallbackLate = [](AsyncTask& task, size_t aid) -> void {
587587// the inputs which are shared between this device and others
588588// to the next one in the daisy chain.
589589// FIXME: do it in a smarter way than O(N^2)
590- static auto forwardInputs = [](ServiceRegistryRef registry, TimesliceSlot slot, std::vector<std::vector <fair::mq::MessagePtr>>& currentSetOfInputs,
590+ static auto forwardInputs = [](ServiceRegistryRef registry, TimesliceSlot slot, std::vector<std::span <fair::mq::MessagePtr>>& currentSetOfInputs,
591591 TimesliceIndex::OldestOutputInfo oldestTimeslice, bool copy, bool consume = true ) {
592592 auto & proxy = registry.get <FairMQDeviceProxy>();
593593
@@ -619,7 +619,7 @@ static auto forwardInputs = [](ServiceRegistryRef registry, TimesliceSlot slot,
619619 O2_SIGNPOST_END (forwarding, sid, " forwardInputs" , " Forwarding done" );
620620};
621621
622- static auto cleanEarlyForward = [](ServiceRegistryRef registry, TimesliceSlot slot, std::vector<std::vector <fair::mq::MessagePtr>>& currentSetOfInputs,
622+ static auto cleanEarlyForward = [](ServiceRegistryRef registry, TimesliceSlot slot, std::vector<std::span <fair::mq::MessagePtr>>& currentSetOfInputs,
623623 TimesliceIndex::OldestOutputInfo oldestTimeslice, bool copy, bool consume = true ) {
624624 auto & proxy = registry.get <FairMQDeviceProxy>();
625625
@@ -629,8 +629,7 @@ static auto cleanEarlyForward = [](ServiceRegistryRef registry, TimesliceSlot sl
629629 // Always copy them, because we do not want to actually send them.
630630 // We merely need the side effect of the consume, if applicable.
631631 for (size_t ii = 0 , ie = currentSetOfInputs.size (); ii < ie; ++ii) {
632- auto span = std::span<fair::mq::MessagePtr>(currentSetOfInputs[ii]);
633- DataProcessingHelpers::cleanForwardedMessages (span, consume);
632+ DataProcessingHelpers::cleanForwardedMessages (currentSetOfInputs[ii], consume);
634633 }
635634
636635 O2_SIGNPOST_END (forwarding, sid, " forwardInputs" , " Cleaning done" );
@@ -1278,7 +1277,7 @@ void DataProcessingDevice::Run()
12781277 // - we can trigger further events from the queue
12791278 // - we can guarantee this is the last thing we do in the loop (
12801279 // assuming no one else is adding to the queue before this point).
1281- auto onDrop = [®istry = mServiceRegistry , lid](TimesliceSlot slot, std::vector<std::vector <fair::mq::MessagePtr>>& dropped, TimesliceIndex::OldestOutputInfo oldestOutputInfo) {
1280+ auto onDrop = [®istry = mServiceRegistry , lid](TimesliceSlot slot, std::vector<std::span <fair::mq::MessagePtr>>& dropped, TimesliceIndex::OldestOutputInfo oldestOutputInfo) {
12821281 O2_SIGNPOST_START (device, lid, " run_loop" , " Dropping message from slot %" PRIu64 " . Forwarding as needed." , (uint64_t )slot.index );
12831282 ServiceRegistryRef ref{registry};
12841283 ref.get <AsyncQueue>();
@@ -1985,7 +1984,7 @@ void DataProcessingDevice::handleData(ServiceRegistryRef ref, InputChannelInfo&
19851984 nPayloadsPerHeader = 1 ;
19861985 ii += (nMessages / 2 ) - 1 ;
19871986 }
1988- auto onDrop = [ref](TimesliceSlot slot, std::vector<std::vector <fair::mq::MessagePtr>>& dropped, TimesliceIndex::OldestOutputInfo oldestOutputInfo) {
1987+ auto onDrop = [ref](TimesliceSlot slot, std::vector<std::span <fair::mq::MessagePtr>>& dropped, TimesliceIndex::OldestOutputInfo oldestOutputInfo) {
19891988 O2_SIGNPOST_ID_GENERATE (cid, async_queue);
19901989 O2_SIGNPOST_EVENT_EMIT (async_queue, cid, " onDrop" , " Dropping message from slot %zu. Forwarding as needed. Timeslice %zu" ,
19911990 slot.index , oldestOutputInfo.timeslice .value );
@@ -2163,15 +2162,20 @@ bool DataProcessingDevice::tryDispatchComputation(ServiceRegistryRef ref, std::v
21632162 // want to support multithreaded dispatching of operations, I can simply
21642163 // move these to some thread local store and the rest of the lambdas
21652164 // should work just fine.
2166- std::vector<std::vector<fair::mq::MessagePtr>> currentSetOfInputs;
2165+ std::vector<std::span<fair::mq::MessagePtr>> currentSetOfInputs;
2166+ std::vector<std::vector<fair::mq::MessagePtr>> ownedInputs;
21672167
21682168 //
2169- auto getInputSpan = [ref, ¤tSetOfInputs](TimesliceSlot slot, bool consume = true ) {
2169+ auto getInputSpan = [ref, ¤tSetOfInputs, &ownedInputs ](TimesliceSlot slot, bool consume = true ) {
21702170 auto & relayer = ref.get <DataRelayer>();
21712171 if (consume) {
21722172 currentSetOfInputs = relayer.consumeAllInputsForTimeslice (slot);
21732173 } else {
2174- currentSetOfInputs = relayer.consumeExistingInputsForTimeslice (slot);
2174+ ownedInputs = relayer.consumeExistingInputsForTimeslice (slot);
2175+ currentSetOfInputs.resize (ownedInputs.size ());
2176+ for (size_t i = 0 ; i < ownedInputs.size (); ++i) {
2177+ currentSetOfInputs[i] = std::span (ownedInputs[i]);
2178+ }
21752179 }
21762180 // Convert raw message indices directly to a DataRef in O(1).
21772181 // Used both by the sequential PartIterator and as the fallback for positional access.
@@ -2245,7 +2249,7 @@ bool DataProcessingDevice::tryDispatchComputation(ServiceRegistryRef ref, std::v
22452249 // to avoid double counting them.
22462250 // This was actually the easiest solution we could find for
22472251 // O2-646.
2248- auto cleanTimers = [¤tSetOfInputs](TimesliceSlot slot, InputRecord& record) {
2252+ auto cleanTimers = [¤tSetOfInputs, &ownedInputs ](TimesliceSlot slot, InputRecord& record) {
22492253 assert (record.size () == currentSetOfInputs.size ());
22502254 for (size_t ii = 0 , ie = record.size (); ii < ie; ++ii) {
22512255 // assuming that for timer inputs we do have exactly one PartRef object
@@ -2258,8 +2262,10 @@ bool DataProcessingDevice::tryDispatchComputation(ServiceRegistryRef ref, std::v
22582262 if (input.header == nullptr ) {
22592263 continue ;
22602264 }
2261- // This will hopefully delete the message.
2262- currentSetOfInputs[ii].clear ();
2265+ // For the consume=false (Process) path, ownedInputs holds the actual
2266+ // message vectors and the span points into them.
2267+ ownedInputs[ii].clear ();
2268+ currentSetOfInputs[ii] = {};
22632269 }
22642270 };
22652271
@@ -2412,9 +2418,11 @@ bool DataProcessingDevice::tryDispatchComputation(ServiceRegistryRef ref, std::v
24122418 if (spec.forwards .empty () == false ) {
24132419 auto & timesliceIndex = ref.get <TimesliceIndex>();
24142420 forwardInputs (ref, action.slot , currentSetOfInputs, timesliceIndex.getOldestPossibleOutput (), false );
2421+ ref.get <DataRelayer>().releaseSlot (action.slot );
24152422 O2_SIGNPOST_END (device, aid, " device" , " Forwarding inputs consume: %d." , false );
24162423 continue ;
24172424 }
2425+ ref.get <DataRelayer>().releaseSlot (action.slot );
24182426 }
24192427 // If there is no optional inputs we canForwardEarly
24202428 // the messages to that parallel processing can happen.
@@ -2567,6 +2575,9 @@ bool DataProcessingDevice::tryDispatchComputation(ServiceRegistryRef ref, std::v
25672575 if (action.op == CompletionPolicy::CompletionOp::Process) {
25682576 cleanTimers (action.slot , record);
25692577 }
2578+ if (shouldConsume) {
2579+ ref.get <DataRelayer>().releaseSlot (action.slot );
2580+ }
25702581 O2_SIGNPOST_END (device, aid, " device" , " Done processing action on slot %lu for action %{public}s" , action.slot .index , fmt::format (" {}" , action.op ).c_str ());
25712582 }
25722583 O2_SIGNPOST_END (device, sid, " device" , " Start processing ready actions" );
0 commit comments