diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx index a19759d0577ec..c31e5c274235d 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx @@ -52,8 +52,6 @@ void TrackerTraitsGPU::initialiseTimeFrame(const int iteration) if (this->mTrkParams[iteration].PassFlags[IterationStep::FirstPass] || this->mTrkParams[iteration].PassFlags[IterationStep::UseUPCMask]) { mTimeFrameGPU->loadROFCutMask(iteration); } - // push every create artefact on the stack - mTimeFrameGPU->pushMemoryStack(iteration); } template @@ -68,8 +66,9 @@ void TrackerTraitsGPU::computeLayerTracklets(const int iteration, int i { const auto topology = mTimeFrameGPU->getDeviceTrackingTopologyView(); const auto hostTopology = mTimeFrameGPU->getTrackingTopologyView(); + const bool loadFirstPassData = this->mTrkParams[iteration].PassFlags[IterationStep::FirstPass] && iVertex <= 0; // load data only on first pass and first vertex for (int iLayer{0}; iLayer < this->mTrkParams[iteration].NLayers; ++iLayer) { - if (this->mTrkParams[iteration].PassFlags[IterationStep::FirstPass]) { + if (loadFirstPassData) { mTimeFrameGPU->createUsedClustersDevice(iLayer); mTimeFrameGPU->loadClustersDevice(iLayer); mTimeFrameGPU->loadClustersIndexTables(iLayer); @@ -78,9 +77,16 @@ void TrackerTraitsGPU::computeLayerTracklets(const int iteration, int i mTimeFrameGPU->recordEvent(iLayer); } + for (int linkId{0}; linkId < hostTopology.nLinks; ++linkId) { + mTimeFrameGPU->createTrackletsLUTDevice(loadFirstPassData, linkId); // on first pass allocates, then only clears memory + } + + // Stack allocations created from trackleting through road finding are scoped to one tracker pass. + // With per-primary-vertex processing, the chain is called once per vertex while initialisation is only done once. + mTimeFrameGPU->pushMemoryStack(iteration); + for (int linkId{0}; linkId < hostTopology.nLinks; ++linkId) { const auto link = hostTopology.getLink(linkId); - mTimeFrameGPU->createTrackletsLUTDevice(this->mTrkParams[iteration].PassFlags[IterationStep::FirstPass], linkId); mTimeFrameGPU->waitEvent(linkId, link.fromLayer); mTimeFrameGPU->waitEvent(linkId, link.toLayer); countTrackletsInROFsHandler(mTimeFrameGPU->getDeviceIndexTableUtils(),