AliceO2Group
diff --git a/‎Common/ML/src/OrtInterface.cxx‎
Lines changed: 10 additions & 0 deletions b/‎Common/ML/src/OrtInterface.cxx‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎GPU/GPUTracking/Definitions/GPUSettingsList.h‎
Lines changed: 1 addition & 0 deletions b/‎GPU/GPUTracking/Definitions/GPUSettingsList.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx‎
Lines changed: 48 additions & 12 deletions b/‎GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx‎
Lines changed: 48 additions & 12 deletions
@@ -354,6 +354,11 @@ template void OrtModel::inference<OrtDataType::Float16_t, OrtDataType::Float16_t
 template void OrtModel::inference<OrtDataType::Float16_t, float>(OrtDataType::Float16_t*, int64_t, float*);
 template void OrtModel::inference<float, OrtDataType::Float16_t>(float*, int64_t, OrtDataType::Float16_t*);
 template void OrtModel::inference<float, float>(float*, int64_t, float*);
+template void OrtModel::inference<int8_t, int8_t>(int8_t*, int64_t, int8_t*);
+template void OrtModel::inference<int8_t, float>(int8_t*, int64_t, float*);
+template void OrtModel::inference<float, int8_t>(float*, int64_t, int8_t*);
+template void OrtModel::inference<int8_t, OrtDataType::Float16_t>(int8_t*, int64_t, OrtDataType::Float16_t*);
+template void OrtModel::inference<OrtDataType::Float16_t, int8_t>(OrtDataType::Float16_t*, int64_t, int8_t*);
 
 template <class I, class O>
 void OrtModel::inference(I** input, int64_t input_size, O* output)
@@ -414,6 +419,11 @@ template void OrtModel::inference<OrtDataType::Float16_t, OrtDataType::Float16_t
 template void OrtModel::inference<OrtDataType::Float16_t, float>(OrtDataType::Float16_t**, int64_t, float*);
 template void OrtModel::inference<float, OrtDataType::Float16_t>(float**, int64_t, OrtDataType::Float16_t*);
 template void OrtModel::inference<float, float>(float**, int64_t, float*);
+template void OrtModel::inference<int8_t, int8_t>(int8_t**, int64_t, int8_t*);
+template void OrtModel::inference<int8_t, float>(int8_t**, int64_t, float*);
+template void OrtModel::inference<float, int8_t>(float**, int64_t, int8_t*);
+template void OrtModel::inference<int8_t, OrtDataType::Float16_t>(int8_t**, int64_t, OrtDataType::Float16_t*);
+template void OrtModel::inference<OrtDataType::Float16_t, int8_t>(OrtDataType::Float16_t**, int64_t, int8_t*);
 
 template <class I, class O>
 std::vector<O> OrtModel::inference(std::vector<std::vector<I>>& inputs)
 
@@ -300,6 +300,7 @@ AddOption(nnCCDBClassificationLayerType, std::string, "FC", "", 0, "Distinguishe
 AddOption(nnCCDBRegressionLayerType, std::string, "FC", "", 0, "Distinguishes between network with different layer types. Options: FC, CNN")
 AddOption(nnCCDBBeamType, std::string, "pp", "", 0, "Distinguishes between networks trained for different beam types. Options: pp, pPb, PbPb")
 AddOption(nnCCDBInteractionRate, std::string, "500", "", 0, "Distinguishes between networks for different interaction rates [kHz].")
+AddOption(nnCCDBExtraMetadata, std::string, "", "", 0, "Extra metadata to distinguish between networks, e.g. for different internal datatypes, etc.")
 AddHelp("help", 'h')
 EndConfig()
 
 
@@ -1269,15 +1269,27 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
                 if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane]->Start(); }
                 if (clustererNNShadow.mNnInferenceInputDType == 0) {
                   if (clustererNNShadow.mNnInferenceOutputDType == 0) {
-                    (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_16);
+                    (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_32);
                   } else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
-                    (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_32);
+                    (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_16);
+                  } else if (clustererNNShadow.mNnInferenceOutputDType == 2) {
+                    (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_8);
                   }
                 } else if (clustererNNShadow.mNnInferenceInputDType == 1) {
                   if (clustererNNShadow.mNnInferenceOutputDType == 0) {
-                    (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_16);
+                    (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_32);
                   } else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
-                    (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_32);
+                    (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_16);
+                  } else if (clustererNNShadow.mNnInferenceOutputDType == 2) {
+                    (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_8);
+                  }
+                } else if (clustererNNShadow.mNnInferenceInputDType == 2) {
+                  if (clustererNNShadow.mNnInferenceOutputDType == 0) {
+                    (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_8, iSize, clustererNNShadow.mModelProbabilities_32);
+                  } else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
+                    (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_8, iSize, clustererNNShadow.mModelProbabilities_16);
+                  } else if (clustererNNShadow.mNnInferenceOutputDType == 2) {
+                    (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_8, iSize, clustererNNShadow.mModelProbabilities_8);
                   }
                 }
                 if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane]->Stop(); } // doGPU || lane<4 -> only for GPU or first 4 CPU lanes (to limit number of concurrent timers). At least gives some statistics for CPU time...
@@ -1289,31 +1301,55 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
                 if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 1]->Start(); }
                 if (clustererNNShadow.mNnInferenceInputDType == 0) {
                   if (clustererNNShadow.mNnInferenceOutputDType == 0) {
-                    (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_16);
+                    (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_32);
                   } else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
-                    (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_32);
+                    (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_16);
+                  } else if (clustererNNShadow.mNnInferenceOutputDType == 2) {
+                    (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_8);
                   }
                 } else if (clustererNNShadow.mNnInferenceInputDType == 1) {
                   if (clustererNNShadow.mNnInferenceOutputDType == 0) {
-                    (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_16);
+                    (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_32);
                   } else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
-                    (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_32);
+                    (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_16);
+                  } else if (clustererNNShadow.mNnInferenceOutputDType == 2) {
+                    (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_8);
+                  }
+                } else if (clustererNNShadow.mNnInferenceInputDType == 2) {
+                  if (clustererNNShadow.mNnInferenceOutputDType == 0) {
+                    (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_8, iSize, clustererNNShadow.mOutputDataReg1_32);
+                  } else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
+                    (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_8, iSize, clustererNNShadow.mOutputDataReg1_16);
+                  } else if (clustererNNShadow.mNnInferenceOutputDType == 2) {
+                    (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_8, iSize, clustererNNShadow.mOutputDataReg1_8);
                   }
                 }
                 if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 1]->Stop(); }
                 if (nnApplication.mModelClass.getNumOutputNodes()[0][1] > 1 && nnApplication.mModelReg2.isInitialized()) {
                   if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 2]->Start(); }
                   if (clustererNNShadow.mNnInferenceInputDType == 0) {
                     if (clustererNNShadow.mNnInferenceOutputDType == 0) {
-                      (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_16);
+                      (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_32);
                     } else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
-                      (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_32);
+                      (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_16);
+                    } else if (clustererNNShadow.mNnInferenceOutputDType == 2) {
+                      (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_8);
                     }
                   } else if (clustererNNShadow.mNnInferenceInputDType == 1) {
                     if (clustererNNShadow.mNnInferenceOutputDType == 0) {
-                      (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_16);
+                      (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_32);
                     } else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
-                      (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_32);
+                      (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_16);
+                    } else if (clustererNNShadow.mNnInferenceOutputDType == 2) {
+                      (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_8);
+                    }
+                  } else if (clustererNNShadow.mNnInferenceInputDType == 2) {
+                    if (clustererNNShadow.mNnInferenceOutputDType == 0) {
+                      (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_8, iSize, clustererNNShadow.mOutputDataReg2_32);
+                    } else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
+                      (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_8, iSize, clustererNNShadow.mOutputDataReg2_16);
+                    } else if (clustererNNShadow.mNnInferenceOutputDType == 2) {
+                      (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_8, iSize, clustererNNShadow.mOutputDataReg2_8);
                     }
                   }
                   if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 2]->Stop(); }