Also running yolox on gaudi1 seems to be not working and getting following error and hangs.
docker image : vault.habana.ai/gaudi-docker/1.9.0/ubuntu20.04/habanalabs/pytorch-installer-1.13.1
2023-06-06 00:24:57 | INFO | yolox.core.trainer:237 - Loading dataset into memory...
2023-06-06 00:25:05 | INFO | yolox.core.trainer:239 - Done
/home/jenkins/workspace/cdsoftwarebuilder/create-binaries-from-sw-sources---bp-dt/repos/hcl/src/infra/hcl_event.cpp::41(waitForList): The condition [ count++ < GCFG_MAX_WAIT_ATTEMPTS.value() ] failed. waitForList:
waitForHandle timed out. maxWaitAttempts: 100, count: 101
/home/jenkins/workspace/cdsoftwarebuilder/create-binaries-from-sw-sources---bp-dt/repos/hcl/src/infra/hcl_event.cpp::41(waitForList): The condition [ count++ < GCFG_MAX_WAIT_ATTEMPTS.value() ] failed. waitForList:
waitForHandle timed out. maxWaitAttempts: 100, count: 101
/home/jenkins/workspace/cdsoftwarebuilder/create-binaries-from-sw-sources---bp-dt/repos/hcl/src/infra/hcl_event.cpp::41(waitForList): The condition [ count++ < GCFG_MAX_WAIT_ATTEMPTS.value() ] failed. waitForList:
waitForHandle timed out. maxWaitAttempts: 100, count: 101
/home/jenkins/workspace/cdsoftwarebuilder/create-binaries-from-sw-sources---bp-dt/repos/hcl/src/infra/hcl_event.cpp::41(waitForList): The condition [ count++ < GCFG_MAX_WAIT_ATTEMPTS.value() ] failed. waitForList:
waitForHandle timed out. maxWaitAttempts: 100, count: 101
/home/jenkins/workspace/cdsoftwarebuilder/create-binaries-from-sw-sources---bp-dt/repos/hcl/src/infra/hcl_event.cpp::41(waitForList): The condition [ count++ < GCFG_MAX_WAIT_ATTEMPTS.value() ] failed. waitForList:
waitForHandle timed out. maxWaitAttempts: 100, count: 101
/home/jenkins/workspace/cdsoftwarebuilder/create-binaries-from-sw-sources---bp-dt/repos/hcl/src/infra/hcl_event.cpp::41(waitForList): The condition [ count++ < GCFG_MAX_WAIT_ATTEMPTS.value() ] failed. waitForList:
waitForHandle timed out. maxWaitAttempts: 100, count: 101
/home/jenkins/workspace/cdsoftwarebuilder/create-binaries-from-sw-sources---bp-dt/repos/hcl/src/infra/hcl_event.cpp::41(waitForList): The condition [ count++ < GCFG_MAX_WAIT_ATTEMPTS.value() ] failed. waitForList:
waitForHandle timed out. maxWaitAttempts: 100, count: 101
/home/jenkins/workspace/cdsoftwarebuilder/create-binaries-from-sw-sources---bp-dt/repos/hcl/src/infra/hcl_event.cpp::41(waitForList): The condition [ count++ < GCFG_MAX_WAIT_ATTEMPTS.value() ] failed. waitForList:
waitForHandle timed out. maxWaitAttempts: 100, count: 101
dmesg: read kernel buffer failed: Operation not permitted
dmesg: read kernel buffer failed: Operation not permitted
dmesg: read kernel buffer failed: Operation not permitted
src/tcmalloc.cc:332] Attempt to free invalid pointer 0x2020202010676e18
dmesg: read kernel buffer failed: Operation not permitted
backtrace (up to 30)
/usr/lib/habanalabs/libhl_logger.so(hl_logger::v1_0::logStackTrace(std::ostream&)+0x50) [0x7f8063e38240]
/usr/lib/habanalabs/libSynapse.so(+0x12676ec) [0x7f8065bf96ec]
/usr/lib/habanalabs/libSynapse.so(std::_Function_handler<void (int, char const*, bool), hl_logger::v1_2_fmt_compile::ModuleLoggerData<synapse::LogManager::LogType>::ModuleLoggerData(char const*)::{lambda(int, char
const*, bool)#1}>::_M_invoke(std::_Any_data const&, int&&, char const*&&, bool&&)+0x1f) [0x7f8065bfa24f]
/usr/lib/habanalabs/libhl_logger.so(+0xfb47) [0x7f8063e37b47]
/usr/lib/habanalabs/libhl_logger.so(signalHandler(int, siginfo_t*, void*)+0x29) [0x7f8063e41a09]
===============================================================================
====================== USER CODE STACK TRACE START POINT ======================
===============================================================================
/lib/x86_64-linux-gnu/libc.so.6(+0x43090) [0x7f81c60bc090]
/lib/x86_64-linux-gnu/libc.so.6(gsignal+0xcb) [0x7f81c60bc00b]
/usr/lib/habanalabs/libTPCFuser.so(+0x17e7058) [0x7f80624c4058]
/lib/x86_64-linux-gnu/libc.so.6(+0x43090) [0x7f81c60bc090]
/usr/lib/habanalabs/libSynapse.so(DfaBase::dumpEngStatus()+0x85) [0x7f80657255f5]
/usr/lib/habanalabs/libSynapse.so(DfaBase::checkFailure(DfaStatus)+0x255) [0x7f8065734b75]
/usr/lib/habanalabs/libSynapse.so(DfaBase::notifyHlthunkFailure(DfaErrorCode)+0x75) [0x7f80657242b5]
/usr/lib/habanalabs/libSynapse.so(synSingleton::notifyHlthunkFailure(DfaErrorCode)+0x42) [0x7f806591abb2] [109/1979]
/usr/lib/habanalabs/libSynapse.so(hclNotifyFailure(DfaErrorCode, unsigned long)+0x44) [0x7f806575de84]
/usr/lib/habanalabs/libhcl.so(waitForList(std::__cxx11::list<HCL_Request, std::allocator<HCL_Request> >&, unsigned short)+0x1140) [0x7f8064431d20]
/usr/lib/habanalabs/libhcl.so(HclDevice::sync(unsigned int, unsigned short)+0x49b) [0x7f806465fa7b]
/usr/lib/habanalabs/libhcl.so(HclDevice::onNewCommEnd(unsigned int, HclConfig&)+0x9a) [0x7f80646699da]
/usr/lib/habanalabs/libhcl.so(hccl::hccl_communicator::initialize(hccl::internal_unique_id_t const*)+0x2569) [0x7f806425eed9]
/usr/lib/habanalabs/libhcl.so(hccl::hccl_context::comm_init_rank(void**, int, hcclUniqueId&, int)+0x121) [0x7f8064277031]
/usr/lib/habanalabs/libhcl.so(hcclCommInitRank_Original(void**, int, hcclUniqueId&, int)+0xcb) [0x7f80641ed98b]
/opt/habanalabs/habana_plugins/libhost_profiler.so(HcclSingletonHostProfiler::hcclCommInitRank(void**, int, hcclUniqueId&, int)+0x59) [0x7f8060c3ca59]
/usr/lib/habanalabs/libhcl.so(hcclCommInitRank_impl(void**, int, hcclUniqueId, int)+0x37) [0x7f80641da447]
/usr/local/lib/python3.8/dist-packages/habana_frameworks/torch/lib/libhabana_pytorch_backend.so(std::_Function_handler<hcclResult_t (void**, int, hcclUniqueId, int), hcclResult_t (*)(void**, int, hcclUniqueId, int)
>::_M_invoke(std::_Any_data const&, void**&&, int&&, hcclUniqueId&&, int&&)+0x2e) [0x7f80692dcc7e]
/usr/local/lib/python3.8/dist-packages/habana_frameworks/torch/lib/libhabana_pytorch_backend.so(hcclCommInitRank+0x6a) [0x7f80692c926a]
/usr/local/lib/python3.8/dist-packages/habana_frameworks/torch/distributed/_hccl_C.so(c10d::ProcessGroupHCCL::getComm(int)+0x4e4) [0x7f8068902e84]
/usr/local/lib/python3.8/dist-packages/habana_frameworks/torch/distributed/_hccl_C.so(c10d::ProcessGroupHCCL::getCommList(std::vector<int, std::allocator<int> > const&)+0x112) [0x7f80689030f2]
/usr/local/lib/python3.8/dist-packages/habana_frameworks/torch/distributed/_hccl_C.so(c10d::ProcessGroupHCCL::allgather(std::vector<std::vector<at::Tensor, std::allocator<at::Tensor> >, std::allocator<std::vector<a
t::Tensor, std::allocator<at::Tensor> > > >&, std::vector<at::Tensor, std::allocator<at::Tensor> >&, c10d::AllgatherOptions const&)+0x74a) [0x7f806891665a]
/usr/local/lib/python3.8/dist-packages/torch/lib/libtorch_cpu.so(+0x4f91139) [0x7f81bd974139]
/usr/local/lib/python3.8/dist-packages/torch/lib/libtorch_cpu.so(+0x4f96c32) [0x7f81bd979c32]
/usr/local/lib/python3.8/dist-packages/torch/lib/libtorch_cpu.so(c10d::ops::allgather(c10::intrusive_ptr<c10d::ProcessGroup, c10::detail::intrusive_target_default_null_type<c10d::ProcessGroup> > const&, std::vector
<std::vector<at::Tensor, std::allocator<at::Tensor> >, std::allocator<std::vector<at::Tensor, std::allocator<at::Tensor> > > > const&, std::vector<at::Tensor, std::allocator<at::Tensor> > const&, c10d::AllgatherOpt
ions const&)+0x157) [0x7f81bd977467]
backtrace (up to 30)
/usr/lib/habanalabs/libhl_logger.so(hl_logger::v1_0::logStackTrace(std::ostream&)+0x50) [0x7fd5d40e1240]
/usr/lib/habanalabs/libSynapse.so(+0x12676ec) [0x7fd5d5ea26ec]
/usr/lib/habanalabs/libSynapse.so(std::_Function_handler<void (int, char const*, bool), hl_logger::v1_2_fmt_compile::ModuleLoggerData<synapse::LogManager::LogType>::ModuleLoggerData(char const*)::{lambda(int, char
const*, bool)#1}>::_M_invoke(std::_Any_data const&, int&&, char const*&&, bool&&)+0x1f) [0x7fd5d5ea324f]
/usr/lib/habanalabs/libhl_logger.so(+0xfb47) [0x7fd5d40e0b47]
/usr/lib/habanalabs/libhl_logger.so(signalHandler(int, siginfo_t*, void*)+0x29) [0x7fd5d40eaa09]
===============================================================================
====================== USER CODE STACK TRACE START POINT ======================
===============================================================================
/lib/x86_64-linux-gnu/libc.so.6(+0x43090) [0x7fd735b24090]
/lib/x86_64-linux-gnu/libc.so.6(gsignal+0xcb) [0x7fd735b2400b]
/usr/lib/habanalabs/libTPCFuser.so(+0x17e7058) [0x7fd5d276d058]
/lib/x86_64-linux-gnu/libc.so.6(+0x43090) [0x7fd735b24090]
/lib/x86_64-linux-gnu/libc.so.6(__read+0x4c) [0x7fd735bef00c]
/lib/x86_64-linux-gnu/libc.so.6(_IO_file_underflow+0x17f) [0x7fd735b71b9f]
/lib/x86_64-linux-gnu/libc.so.6(_IO_default_uflow+0x36) [0x7fd735b72f86]
/lib/x86_64-linux-gnu/libc.so.6(_IO_getline_info+0xac) [0x7fd735b6486c]
/lib/x86_64-linux-gnu/libc.so.6(fgets+0x9a) [0x7fd735b636ca]
/usr/lib/habanalabs/libSynapse.so(exec(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)+0x129) [0x7fd5d59ce9e9]
/usr/lib/habanalabs/libSynapse.so(DfaBase::logHlSmi()+0x12a) [0x7fd5d59cee3a]
/usr/lib/habanalabs/libSynapse.so(DfaBase::checkFailure(DfaStatus)+0x370) [0x7fd5d59ddc90]
/usr/lib/habanalabs/libSynapse.so(DfaBase::notifyHlthunkFailure(DfaErrorCode)+0x75) [0x7fd5d59cd2b5]
/usr/lib/habanalabs/libSynapse.so(synSingleton::notifyHlthunkFailure(DfaErrorCode)+0x42) [0x7fd5d5bc3bb2]
/usr/lib/habanalabs/libSynapse.so(hclNotifyFailure(DfaErrorCode, unsigned long)+0x44) [0x7fd5d5a06e84]
/usr/lib/habanalabs/libhcl.so(waitForList(std::__cxx11::list<HCL_Request, std::allocator<HCL_Request> >&, unsigned short)+0x1140) [0x7fd5d46dad20]
/usr/lib/habanalabs/libhcl.so(HclDevice::sync(unsigned int, unsigned short)+0x49b) [0x7fd5d4908a7b]
/usr/lib/habanalabs/libhcl.so(HclDevice::onNewCommEnd(unsigned int, HclConfig&)+0x9a) [0x7fd5d49129da]
/usr/lib/habanalabs/libhcl.so(hccl::hccl_communicator::initialize(hccl::internal_unique_id_t const*)+0x2569) [0x7fd5d4507ed9]
/usr/lib/habanalabs/libhcl.so(hccl::hccl_context::comm_init_rank(void**, int, hcclUniqueId&, int)+0x121) [0x7fd5d4520031]
/usr/lib/habanalabs/libhcl.so(hcclCommInitRank_Original(void**, int, hcclUniqueId&, int)+0xcb) [0x7fd5d449698b]
/opt/habanalabs/habana_plugins/libhost_profiler.so(HcclSingletonHostProfiler::hcclCommInitRank(void**, int, hcclUniqueId&, int)+0x59) [0x7fd5d0ef0a59]
/usr/lib/habanalabs/libhcl.so(hcclCommInitRank_impl(void**, int, hcclUniqueId, int)+0x37) [0x7fd5d4483447]
/usr/local/lib/python3.8/dist-packages/habana_frameworks/torch/lib/libhabana_pytorch_backend.so(std::_Function_handler<hcclResult_t (void**, int, hcclUniqueId, int), hcclResult_t (*)(void**, int, hcclUniqueId, int)
>::_M_invoke(std::_Any_data const&, void**&&, int&&, hcclUniqueId&&, int&&)+0x2e) [0x7fd5d8d44c7e]
/usr/local/lib/python3.8/dist-packages/habana_frameworks/torch/lib/libhabana_pytorch_backend.so(hcclCommInitRank+0x6a) [0x7fd5d8d3126a]
backtrace (up to 30)
/usr/lib/habanalabs/libhl_logger.so(hl_logger::v1_0::logStackTrace(std::ostream&)+0x50) [0x7f8af0aaf240]
/usr/lib/habanalabs/libSynapse.so(+0x12676ec) [0x7f8af28706ec]
/usr/lib/habanalabs/libSynapse.so(std::_Function_handler<void (int, char const*, bool), hl_logger::v1_2_fmt_compile::ModuleLoggerData<synapse::LogManager::LogType>::ModuleLoggerData(char const*)::{lambda(int, char
const*, bool)#1}>::_M_invoke(std::_Any_data const&, int&&, char const*&&, bool&&)+0x1f) [0x7f8af287124f]
/usr/lib/habanalabs/libhl_logger.so(+0xfb47) [0x7f8af0aaeb47]
/usr/lib/habanalabs/libhl_logger.so(signalHandler(int, siginfo_t*, void*)+0x29) [0x7f8af0ab8a09]
===============================================================================
====================== USER CODE STACK TRACE START POINT ======================
===============================================================================
/lib/x86_64-linux-gnu/libc.so.6(+0x43090) [0x7f8c524f2090]
/lib/x86_64-linux-gnu/libc.so.6(gsignal+0xcb) [0x7f8c524f200b]
/usr/lib/habanalabs/libTPCFuser.so(+0x17e7058) [0x7f8aef13b058]
/lib/x86_64-linux-gnu/libc.so.6(+0x43090) [0x7f8c524f2090]
/lib/x86_64-linux-gnu/libc.so.6(__read+0x4c) [0x7f8c525bd00c]
/lib/x86_64-linux-gnu/libc.so.6(_IO_file_underflow+0x17f) [0x7f8c5253fb9f]
/lib/x86_64-linux-gnu/libc.so.6(_IO_default_uflow+0x36) [0x7f8c52540f86]
/lib/x86_64-linux-gnu/libc.so.6(_IO_getline_info+0xac) [0x7f8c5253286c]
/lib/x86_64-linux-gnu/libc.so.6(fgets+0x9a) [0x7f8c525316ca]
/usr/lib/habanalabs/libSynapse.so(exec(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)+0x129) [0x7f8af239c9e9]
/usr/lib/habanalabs/libSynapse.so(DfaBase::logHlSmi()+0x12a) [0x7f8af239ce3a]
/usr/lib/habanalabs/libSynapse.so(DfaBase::checkFailure(DfaStatus)+0x370) [0x7f8af23abc90]
/usr/lib/habanalabs/libSynapse.so(DfaBase::notifyHlthunkFailure(DfaErrorCode)+0x75) [0x7f8af239b2b5]
/usr/lib/habanalabs/libSynapse.so(synSingleton::notifyHlthunkFailure(DfaErrorCode)+0x42) [0x7f8af2591bb2]
/usr/lib/habanalabs/libSynapse.so(hclNotifyFailure(DfaErrorCode, unsigned long)+0x44) [0x7f8af23d4e84]
/usr/lib/habanalabs/libhcl.so(waitForList(std::__cxx11::list<HCL_Request, std::allocator<HCL_Request> >&, unsigned short)+0x1140) [0x7f8af10a8d20]
/usr/lib/habanalabs/libhcl.so(HclDevice::sync(unsigned int, unsigned short)+0x49b) [0x7f8af12d6a7b]
/usr/lib/habanalabs/libhcl.so(HclDevice::onNewCommEnd(unsigned int, HclConfig&)+0x9a) [0x7f8af12e09da]
/usr/lib/habanalabs/libhcl.so(hccl::hccl_communicator::initialize(hccl::internal_unique_id_t const*)+0x2569) [0x7f8af0ed5ed9]
/usr/lib/habanalabs/libhcl.so(hccl::hccl_context::comm_init_rank(void**, int, hcclUniqueId&, int)+0x121) [0x7f8af0eee031]
/usr/lib/habanalabs/libhcl.so(hcclCommInitRank_Original(void**, int, hcclUniqueId&, int)+0xcb) [0x7f8af0e6498b]
/opt/habanalabs/habana_plugins/libhost_profiler.so(HcclSingletonHostProfiler::hcclCommInitRank(void**, int, hcclUniqueId&, int)+0x59) [0x7f8aed8bea59]
/usr/lib/habanalabs/libhcl.so(hcclCommInitRank_impl(void**, int, hcclUniqueId, int)+0x37) [0x7f8af0e51447]
/usr/local/lib/python3.8/dist-packages/habana_frameworks/torch/lib/libhabana_pytorch_backend.so(std::_Function_handler<hcclResult_t (void**, int, hcclUniqueId, int), hcclResult_t (*)(void**, int, hcclUniqueId, int)
>::_M_invoke(std::_Any_data const&, void**&&, int&&, hcclUniqueId&&, int&&)+0x2e) [0x7f8af5712c7e]
/usr/local/lib/python3.8/dist-packages/habana_frameworks/torch/lib/libhabana_pytorch_backend.so(hcclCommInitRank+0x6a) [0x7f8af56ff26a]
backtrace (up to 30)
/usr/lib/habanalabs/libhl_logger.so(hl_logger::v1_0::logStackTrace(std::ostream&)+0x50) [0x7f8d6effe240]
/usr/lib/habanalabs/libSynapse.so(+0x12676ec) [0x7f8d70dbf6ec]
/usr/lib/habanalabs/libSynapse.so(std::_Function_handler<void (int, char const*, bool), hl_logger::v1_2_fmt_compile::ModuleLoggerData<synapse::LogManager::LogType>::ModuleLoggerData(char const*)::{lambda(int, char
const*, bool)#1}>::_M_invoke(std::_Any_data const&, int&&, char const*&&, bool&&)+0x1f) [0x7f8d70dc024f]
/usr/lib/habanalabs/libhl_logger.so(+0xfb47) [0x7f8d6effdb47]
/usr/lib/habanalabs/libhl_logger.so(signalHandler(int, siginfo_t*, void*)+0x29) [0x7f8d6f007a09]
===============================================================================
====================== USER CODE STACK TRACE START POINT ======================
===============================================================================
/lib/x86_64-linux-gnu/libc.so.6(+0x43090) [0x7f8ed0a41090]
/lib/x86_64-linux-gnu/libc.so.6(gsignal+0xcb) [0x7f8ed0a4100b]
/usr/lib/habanalabs/libTPCFuser.so(+0x17e7058) [0x7f8d6d68a058]
/lib/x86_64-linux-gnu/libc.so.6(+0x43090) [0x7f8ed0a41090]
/lib/x86_64-linux-gnu/libc.so.6(__read+0x4c) [0x7f8ed0b0c00c]
/lib/x86_64-linux-gnu/libc.so.6(_IO_file_underflow+0x17f) [0x7f8ed0a8eb9f]
/lib/x86_64-linux-gnu/libc.so.6(_IO_default_uflow+0x36) [0x7f8ed0a8ff86]
/usr/lib/habanalabs/libSynapse.so(exec(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)+0x129) [0x7f8d708eb9e9]
/usr/lib/habanalabs/libSynapse.so(DfaBase::logHlSmi()+0x12a) [0x7f8d708ebe3a]
/usr/lib/habanalabs/libSynapse.so(DfaBase::checkFailure(DfaStatus)+0x370) [0x7f8d708fac90]
/usr/lib/habanalabs/libSynapse.so(DfaBase::notifyHlthunkFailure(DfaErrorCode)+0x75) [0x7f8d708ea2b5]
/usr/lib/habanalabs/libSynapse.so(synSingleton::notifyHlthunkFailure(DfaErrorCode)+0x42) [0x7f8d70ae0bb2]
/usr/lib/habanalabs/libSynapse.so(hclNotifyFailure(DfaErrorCode, unsigned long)+0x44) [0x7f8d70923e84]
/usr/lib/habanalabs/libhcl.so(waitForList(std::__cxx11::list<HCL_Request, std::allocator<HCL_Request> >&, unsigned short)+0x1140) [0x7f8d6f5f7d20]
/usr/lib/habanalabs/libhcl.so(HclDevice::sync(unsigned int, unsigned short)+0x49b) [0x7f8d6f825a7b]
/usr/lib/habanalabs/libhcl.so(HclDevice::onNewCommEnd(unsigned int, HclConfig&)+0x9a) [0x7f8d6f82f9da]
/usr/lib/habanalabs/libhcl.so(hccl::hccl_communicator::initialize(hccl::internal_unique_id_t const*)+0x2569) [0x7f8d6f424ed9]
/usr/lib/habanalabs/libhcl.so(hccl::hccl_context::comm_init_rank(void**, int, hcclUniqueId&, int)+0x121) [0x7f8d6f43d031]
/usr/lib/habanalabs/libhcl.so(hcclCommInitRank_Original(void**, int, hcclUniqueId&, int)+0xcb) [0x7f8d6f3b398b]
/opt/habanalabs/habana_plugins/libhost_profiler.so(HcclSingletonHostProfiler::hcclCommInitRank(void**, int, hcclUniqueId&, int)+0x59) [0x7f8d6be0da59]
/usr/lib/habanalabs/libhcl.so(hcclCommInitRank_impl(void**, int, hcclUniqueId, int)+0x37) [0x7f8d6f3a0447]
/usr/local/lib/python3.8/dist-packages/habana_frameworks/torch/lib/libhabana_pytorch_backend.so(std::_Function_handler<hcclResult_t (void**, int, hcclUniqueId, int), hcclResult_t (*)(void**, int, hcclUniqueId, int)
>::_M_invoke(std::_Any_data const&, void**&&, int&&, hcclUniqueId&&, int&&)+0x2e) [0x7f8d73c61c7e]
/usr/local/lib/python3.8/dist-packages/habana_frameworks/torch/lib/libhabana_pytorch_backend.so(hcclCommInitRank+0x6a) [0x7f8d73c4e26a]
backtrace (up to 30)
/usr/lib/habanalabs/libhl_logger.so(hl_logger::v1_0::logStackTrace(std::ostream&)+0x50) [0x7f1883987240]
/usr/lib/habanalabs/libSynapse.so(+0x12676ec) [0x7f18857486ec]
/usr/lib/habanalabs/libSynapse.so(std::_Function_handler<void (int, char const*, bool), hl_logger::v1_2_fmt_compile::ModuleLoggerData<synapse::LogManager::LogType>::ModuleLoggerData(char const*)::{lambda(int, char
const*, bool)#1}>::_M_invoke(std::_Any_data const&, int&&, char const*&&, bool&&)+0x1f) [0x7f188574924f]
/usr/lib/habanalabs/libhl_logger.so(+0xfb47) [0x7f1883986b47]
/usr/lib/habanalabs/libhl_logger.so(signalHandler(int, siginfo_t*, void*)+0x29) [0x7f1883990a09]
===============================================================================
====================== USER CODE STACK TRACE START POINT ======================
===============================================================================
/lib/x86_64-linux-gnu/libc.so.6(+0x43090) [0x7f19e53ca090]
/lib/x86_64-linux-gnu/libc.so.6(gsignal+0xcb) [0x7f19e53ca00b]
/usr/lib/habanalabs/libTPCFuser.so(+0x17e7058) [0x7f1882013058]
/lib/x86_64-linux-gnu/libc.so.6(+0x43090) [0x7f19e53ca090]
/lib/x86_64-linux-gnu/libc.so.6(__read+0x4c) [0x7f19e549500c]
/lib/x86_64-linux-gnu/libc.so.6(_IO_file_underflow+0x17f) [0x7f19e5417b9f]
/lib/x86_64-linux-gnu/libc.so.6(_IO_default_uflow+0x36) [0x7f19e5418f86]
/lib/x86_64-linux-gnu/libc.so.6(_IO_getline_info+0xac) [0x7f19e540a86c]
/lib/x86_64-linux-gnu/libc.so.6(fgets+0x9a) [0x7f19e54096ca]
/usr/lib/habanalabs/libSynapse.so(exec(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)+0x129) [0x7f18852749e9]
/usr/lib/habanalabs/libSynapse.so(DfaBase::logHlSmi()+0x12a) [0x7f1885274e3a]
/usr/lib/habanalabs/libSynapse.so(DfaBase::checkFailure(DfaStatus)+0x370) [0x7f1885283c90]
/usr/lib/habanalabs/libSynapse.so(DfaBase::notifyHlthunkFailure(DfaErrorCode)+0x75) [0x7f18852732b5]
/usr/lib/habanalabs/libSynapse.so(synSingleton::notifyHlthunkFailure(DfaErrorCode)+0x42) [0x7f1885469bb2]
/usr/lib/habanalabs/libSynapse.so(hclNotifyFailure(DfaErrorCode, unsigned long)+0x44) [0x7f18852ace84]
/usr/lib/habanalabs/libhcl.so(waitForList(std::__cxx11::list<HCL_Request, std::allocator<HCL_Request> >&, unsigned short)+0x1140) [0x7f1883f80d20]
/usr/lib/habanalabs/libhcl.so(HclDevice::sync(unsigned int, unsigned short)+0x49b) [0x7f18841aea7b]
/usr/lib/habanalabs/libhcl.so(HclDevice::onNewCommEnd(unsigned int, HclConfig&)+0x9a) [0x7f18841b89da]
/usr/lib/habanalabs/libhcl.so(hccl::hccl_communicator::initialize(hccl::internal_unique_id_t const*)+0x2569) [0x7f1883daded9]
/usr/lib/habanalabs/libhcl.so(hccl::hccl_context::comm_init_rank(void**, int, hcclUniqueId&, int)+0x121) [0x7f1883dc6031]
/usr/lib/habanalabs/libhcl.so(hcclCommInitRank_Original(void**, int, hcclUniqueId&, int)+0xcb) [0x7f1883d3c98b]
/opt/habanalabs/habana_plugins/libhost_profiler.so(HcclSingletonHostProfiler::hcclCommInitRank(void**, int, hcclUniqueId&, int)+0x59) [0x7f1880796a59]
/usr/lib/habanalabs/libhcl.so(hcclCommInitRank_impl(void**, int, hcclUniqueId, int)+0x37) [0x7f1883d29447]
/usr/local/lib/python3.8/dist-packages/habana_frameworks/torch/lib/libhabana_pytorch_backend.so(std::_Function_handler<hcclResult_t (void**, int, hcclUniqueId, int), hcclResult_t (*)(void**, int, hcclUniqueId, int)
>::_M_invoke(std::_Any_data const&, void**&&, int&&, hcclUniqueId&&, int&&)+0x2e) [0x7f18885eac7e]