A customer is reporting: Last crash: abort() was called on core 1 Backtrace: 0x4008e627 0x4008e8c1 0x400f2be4 0x400f3145 0x4014a7d1 0x401538b0 0x401082be 0x401092ed 0x401082be 0x40108430 0x4010a3bb 0x4010647a 0x400fb63f 0x400fb6ed Event: ticker.1@EventScript 0 secs Version: 3.2.014/ota_0/main (build idf v3.3.2-881-g22d636b Aug 4 2020 16:26:20) He is running a Nissan Leaf, with 3.2.014 firmware. Both v2 and v3 servers enabled, as well as Abrp and pushover. Trace is: Using elf file: 3.2.014.ovms3.elf 0x4008e627 is in invoke_abort (/home/openvehicles/build/esp-idf/components/esp32/panic.c:156). 151 #endif 152 while (1) { 153 if (esp_cpu_in_ocd_debug_mode()) { 154 __asm__ ("break 0,0"); 155 } 156 *((int *) 0) = 0; 157 } 158 } 159 160 void abort() 0x4008e8c1 is in abort (/home/openvehicles/build/esp-idf/components/esp32/panic.c:171). 166 * don't overwrite that. 167 */ 168 if (esp_reset_reason_get_hint() == ESP_RST_UNKNOWN) { 169 esp_reset_reason_set_hint(ESP_RST_PANIC); 170 } 171 invoke_abort(); 172 } 173 174 175 static const char *edesc[] = { 0x400f2be4 is in CheckQueueOverflow(char const*, char*) (/home/openvehicles/build/Open-Vehicle-Monitoring-System-3.1/vehicle/OVMS.V3/main/ovms_events.cpp:339). 334 // We've dropped a potentially important event, system is instable now. 335 // As the event queue is full, a normal reboot is no option, so… 336 ESP_LOGE(TAG, "%s: lost important event => aborting", from); 337 MyCommandApp.CloseLogfile(); 338 vTaskDelay(pdMS_TO_TICKS(100)); 339 abort(); 340 } 341 } 342 343 static void SignalScheduledEvent(TimerHandle_t timer) 0x400f3145 is in OvmsEvents::SignalEvent(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, void*, void (*)(char const*, void*), unsigned int) (/home/openvehicles/build/Open-Vehicle-Monitoring-System-3.1/vehicle/OVMS.V3/main/ovms_events.cpp:417). 412 413 if (delay_ms == 0) 414 { 415 if (xQueueSend(m_taskqueue, &msg, 0) != pdTRUE) 416 { 417 CheckQueueOverflow("SignalEvent", msg.body.signal.event); 418 FreeQueueSignalEvent(&msg); 419 } 420 } 421 else 0x4014a7d1 is in OvmsWebServer::DestroyWebSocketHandler(WebSocketHandler*) (/home/openvehicles/build/Open-Vehicle-Monitoring-System-3.1/vehicle/OVMS.V3/components/ovms_webserver/src/ovms_websockethandler.cpp:502). 497 498 // clear unqueued notifications if any: 499 MyNotify.ClearReader(m_client_slots[i].reader); 500 501 ESP_LOGD(TAG, "WebSocket[%p] handler %p closed; %d clients active", nc, handler, m_client_cnt); 502 MyEvents.SignalEvent("server.web.socket.closed", (void*)m_client_cnt); 503 504 break; 505 } 506 } 0x401538b0 is in OvmsWebServer::EventHandler(mg_connection*, int, void*) (/home/openvehicles/build/Open-Vehicle-Monitoring-System-3.1/vehicle/OVMS.V3/components/ovms_webserver/src/ovms_webserver.cpp:534). 529 530 case MG_EV_CLOSE: // connection has been closed 531 { 532 if (handler) { 533 if (nc->flags & MG_F_IS_WEBSOCKET) 534 MyWebServer.DestroyWebSocketHandler((WebSocketHandler*)handler); 535 else 536 delete handler; 537 } 538 } 0x401082be is in mg_call (/home/openvehicles/build/Open-Vehicle-Monitoring-System-3.1/vehicle/OVMS.V3/components/mongoose/mongoose/mongoose.c:2368). 2363 } 2364 #endif 2365 if (ev_handler != NULL) { 2366 unsigned long flags_before = nc->flags; 2367 size_t recv_mbuf_before = nc->recv_mbuf.len, recved; 2368 ev_handler(nc, ev, ev_data MG_UD_ARG(user_data)); 2369 recved = (recv_mbuf_before - nc->recv_mbuf.len); 2370 /* Prevent user handler from fiddling with system flags. */ 2371 if (ev_handler == nc->handler && nc->flags != flags_before) { 2372 nc->flags = (flags_before & ~_MG_CALLBACK_MODIFIABLE_FLAGS_MASK) | 0x401092ed is in mg_ws_handler (/home/openvehicles/build/Open-Vehicle-Monitoring-System-3.1/vehicle/OVMS.V3/components/mongoose/mongoose/mongoose.c:9950). 9945 } 9946 } 9947 9948 MG_INTERNAL void mg_ws_handler(struct mg_connection *nc, int ev, 9949 void *ev_data MG_UD_ARG(void *user_data)) { 9950 mg_call(nc, nc->handler, nc->user_data, ev, ev_data); 9951 9952 switch (ev) { 9953 case MG_EV_RECV: 9954 do { 0x401082be is in mg_call (/home/openvehicles/build/Open-Vehicle-Monitoring-System-3.1/vehicle/OVMS.V3/components/mongoose/mongoose/mongoose.c:2368). 2363 } 2364 #endif 2365 if (ev_handler != NULL) { 2366 unsigned long flags_before = nc->flags; 2367 size_t recv_mbuf_before = nc->recv_mbuf.len, recved; 2368 ev_handler(nc, ev, ev_data MG_UD_ARG(user_data)); 2369 recved = (recv_mbuf_before - nc->recv_mbuf.len); 2370 /* Prevent user handler from fiddling with system flags. */ 2371 if (ev_handler == nc->handler && nc->flags != flags_before) { 2372 nc->flags = (flags_before & ~_MG_CALLBACK_MODIFIABLE_FLAGS_MASK) | 0x40108430 is in mg_close_conn (/home/openvehicles/build/Open-Vehicle-Monitoring-System-3.1/vehicle/OVMS.V3/components/mongoose/mongoose/mongoose.c:2435). 2430 mg_ssl_if_conn_close_notify(conn); 2431 } 2432 #endif 2433 mg_remove_conn(conn); 2434 conn->iface->vtable->destroy_conn(conn); 2435 mg_call(conn, NULL, conn->user_data, MG_EV_CLOSE, NULL); 2436 mg_destroy_conn(conn, 0 /* destroy_if */); 2437 } 2438 2439 void mg_mgr_init(struct mg_mgr *m, void *user_data) { 0x4010a3bb is in mg_socket_if_poll (/home/openvehicles/build/Open-Vehicle-Monitoring-System-3.1/vehicle/OVMS.V3/components/mongoose/mongoose/mongoose.c:4049). 4044 4045 for (nc = mgr->active_connections; nc != NULL; nc = tmp) { 4046 tmp = nc->next; 4047 if ((nc->flags & MG_F_CLOSE_IMMEDIATELY) || 4048 (nc->send_mbuf.len == 0 && (nc->flags & MG_F_SEND_AND_CLOSE))) { 4049 mg_close_conn(nc); 4050 } 4051 } 4052 4053 return (time_t) now; 0x4010647a is in mg_mgr_poll (/home/openvehicles/build/Open-Vehicle-Monitoring-System-3.1/vehicle/OVMS.V3/components/mongoose/mongoose/mongoose.c:2538). 2533 LOG(LL_ERROR, ("cannot poll: no interfaces")); 2534 return 0; 2535 } 2536 2537 for (i = 0; i < m->num_ifaces; i++) { 2538 now = m->ifaces[i]->vtable->poll(m->ifaces[i], timeout_ms); 2539 } 2540 return now; 2541 } 2542 0x400fb63f is in OvmsNetManager::MongooseTask() (/home/openvehicles/build/Open-Vehicle-Monitoring-System-3.1/vehicle/OVMS.V3/main/ovms_netmanager.cpp:733). 728 729 // Main event loop 730 while (m_mongoose_running) 731 { 732 // poll interfaces: 733 if (mg_mgr_poll(&m_mongoose_mgr, 250) == 0) 734 { 735 ESP_LOGD(TAG, "MongooseTask: no interfaces available => exit"); 736 break; 737 } 0x400fb6ed is in MongooseRawTask(void*) (/home/openvehicles/build/Open-Vehicle-Monitoring-System-3.1/vehicle/OVMS.V3/main/ovms_netmanager.cpp:715). 710 #ifdef CONFIG_OVMS_SC_GPL_MONGOOSE 711 712 static void MongooseRawTask(void *pvParameters) 713 { 714 OvmsNetManager* me = (OvmsNetManager*)pvParameters; 715 me->MongooseTask(); 716 } 717 718 void OvmsNetManager::MongooseTask() 719 { It seems to be a web socket (web ui?) connection went away, a logging message was produced, and that overflowed the queue. Perhaps the user is just logging too much? Can you log at debug level to web uI (or does it cause the problem I mentioned earlier about debug logging the debug logs)? Any ideas? Regards, Mark.
Am 26.08.20 um 06:17 schrieb Mark Webb-Johnson:
It seems to be a web socket (web ui?) connection went away, a logging message was produced, and that overflowed the queue. Perhaps the user is just logging too much? Can you log at debug level to web uI (or does it cause the problem I mentioned earlier about debug logging the debug logs)? Any ideas?
I haven't been able to get a correlation to any specific configuration or system situation yet on these event queue overflows. The event queue has room for 40 entries by default, so the system normally has been in an overload situation (starved event task) for at least 30-40 seconds when the abort is triggered.
Event: ticker.1@EventScript 0 secs
…tells the events task wasn't completely stuck before the abort, just didn't get a lot of CPU share. That's why I thought raising the task priority could have an effect. I totally need user data on this, as the crash doesn't happen at all on my module. So it's also possible the issue is caused by a vehicle module bug, or by the poller receive bug we discovered a few days ago: https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/pull/405#is... This is solved now by the merge of the fast polling branch, which includes the fixes for this. So we may see already an improvement with the next build. But there are at least three more bugs/shortcomings in the current poller which can potentially also cause this issue (and others) by high vehicle task load / wrong memory allocations / out of bounds accesses. Which is why I'm currently working on a rewrite of the receiver. Regards, Michael -- Michael Balzer * Helkenberger Weg 9 * D-58256 Ennepetal Fon 02333 / 833 5735 * Handy 0176 / 206 989 26
I've just found another clue to the events task starvations: https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/issues/418 TL;DR: the events task can be blocked by running lower priority tasks as well, not just higher priority tasks. So raising the events task priority cannot be the solution. Regards, Michael Am 26.08.20 um 20:28 schrieb Michael Balzer:
Am 26.08.20 um 06:17 schrieb Mark Webb-Johnson:
It seems to be a web socket (web ui?) connection went away, a logging message was produced, and that overflowed the queue. Perhaps the user is just logging too much? Can you log at debug level to web uI (or does it cause the problem I mentioned earlier about debug logging the debug logs)? Any ideas? I haven't been able to get a correlation to any specific configuration or system situation yet on these event queue overflows.
The event queue has room for 40 entries by default, so the system normally has been in an overload situation (starved event task) for at least 30-40 seconds when the abort is triggered.
Event: ticker.1@EventScript 0 secs …tells the events task wasn't completely stuck before the abort, just didn't get a lot of CPU share. That's why I thought raising the task priority could have an effect.
I totally need user data on this, as the crash doesn't happen at all on my module. So it's also possible the issue is caused by a vehicle module bug, or by the poller receive bug we discovered a few days ago:
https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/pull/405#is...
This is solved now by the merge of the fast polling branch, which includes the fixes for this. So we may see already an improvement with the next build.
But there are at least three more bugs/shortcomings in the current poller which can potentially also cause this issue (and others) by high vehicle task load / wrong memory allocations / out of bounds accesses. Which is why I'm currently working on a rewrite of the receiver.
Regards, Michael
-- Michael Balzer * Helkenberger Weg 9 * D-58256 Ennepetal Fon 02333 / 833 5735 * Handy 0176 / 206 989 26
I think I've found & fixed this, maybe also the events starvation issue. See https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/issues/418#issuecomment-683286134<https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/issues/418> and https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/commit/6b70... Please test. Regards, Michael Am 29.08.20 um 10:34 schrieb Michael Balzer:
I've just found another clue to the events task starvations:
https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/issues/418
TL;DR: the events task can be blocked by running lower priority tasks as well, not just higher priority tasks.
So raising the events task priority cannot be the solution.
Regards, Michael
Am 26.08.20 um 20:28 schrieb Michael Balzer:
Am 26.08.20 um 06:17 schrieb Mark Webb-Johnson:
It seems to be a web socket (web ui?) connection went away, a logging message was produced, and that overflowed the queue. Perhaps the user is just logging too much? Can you log at debug level to web uI (or does it cause the problem I mentioned earlier about debug logging the debug logs)? Any ideas? I haven't been able to get a correlation to any specific configuration or system situation yet on these event queue overflows.
The event queue has room for 40 entries by default, so the system normally has been in an overload situation (starved event task) for at least 30-40 seconds when the abort is triggered.
Event: ticker.1@EventScript 0 secs …tells the events task wasn't completely stuck before the abort, just didn't get a lot of CPU share. That's why I thought raising the task priority could have an effect.
I totally need user data on this, as the crash doesn't happen at all on my module. So it's also possible the issue is caused by a vehicle module bug, or by the poller receive bug we discovered a few days ago:
https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/pull/405#is...
This is solved now by the merge of the fast polling branch, which includes the fixes for this. So we may see already an improvement with the next build.
But there are at least three more bugs/shortcomings in the current poller which can potentially also cause this issue (and others) by high vehicle task load / wrong memory allocations / out of bounds accesses. Which is why I'm currently working on a rewrite of the receiver.
Regards, Michael
-- Michael Balzer * Helkenberger Weg 9 * D-58256 Ennepetal Fon 02333 / 833 5735 * Handy 0176 / 206 989 26
I've made a stupid mistake leading to events at least not being forwarded correctly to Duktape, fix is pushed. Regards, Michael PS: Mark, your split of the scripting module may not allow easy merging of my two commits. They are simple, but I can do the merge if you push them into a branch. Am 29.08.20 um 15:54 schrieb Michael Balzer:
I think I've found & fixed this, maybe also the events starvation issue.
See https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/issues/418#...
and https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/commit/6b70...
Please test.
Regards, Michael
Am 29.08.20 um 10:34 schrieb Michael Balzer:
I've just found another clue to the events task starvations:
https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/issues/418
TL;DR: the events task can be blocked by running lower priority tasks as well, not just higher priority tasks.
So raising the events task priority cannot be the solution.
Regards, Michael
Am 26.08.20 um 20:28 schrieb Michael Balzer:
Am 26.08.20 um 06:17 schrieb Mark Webb-Johnson:
It seems to be a web socket (web ui?) connection went away, a logging message was produced, and that overflowed the queue. Perhaps the user is just logging too much? Can you log at debug level to web uI (or does it cause the problem I mentioned earlier about debug logging the debug logs)? Any ideas? I haven't been able to get a correlation to any specific configuration or system situation yet on these event queue overflows.
The event queue has room for 40 entries by default, so the system normally has been in an overload situation (starved event task) for at least 30-40 seconds when the abort is triggered.
Event: ticker.1@EventScript 0 secs …tells the events task wasn't completely stuck before the abort, just didn't get a lot of CPU share. That's why I thought raising the task priority could have an effect.
I totally need user data on this, as the crash doesn't happen at all on my module. So it's also possible the issue is caused by a vehicle module bug, or by the poller receive bug we discovered a few days ago:
https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/pull/405#is...
This is solved now by the merge of the fast polling branch, which includes the fixes for this. So we may see already an improvement with the next build.
But there are at least three more bugs/shortcomings in the current poller which can potentially also cause this issue (and others) by high vehicle task load / wrong memory allocations / out of bounds accesses. Which is why I'm currently working on a rewrite of the receiver.
Regards, Michael
-- Michael Balzer * Helkenberger Weg 9 * D-58256 Ennepetal Fon 02333 / 833 5735 * Handy 0176 / 206 989 26
_______________________________________________ OvmsDev mailing list OvmsDev@lists.openvehicles.com http://lists.openvehicles.com/mailman/listinfo/ovmsdev
-- Michael Balzer * Helkenberger Weg 9 * D-58256 Ennepetal Fon 02333 / 833 5735 * Handy 0176 / 206 989 26
My work is in the public for-v3.3 branch.
On 31 Aug 2020, at 4:24 PM, Michael Balzer <dexter@expeedo.de> wrote:
I've made a stupid mistake leading to events at least not being forwarded correctly to Duktape, fix is pushed.
Regards, Michael
PS: Mark, your split of the scripting module may not allow easy merging of my two commits. They are simple, but I can do the merge if you push them into a branch.
Am 29.08.20 um 15:54 schrieb Michael Balzer:
I think I've found & fixed this, maybe also the events starvation issue.
See https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/issues/418#...
and https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/commit/6b70...
Please test.
Regards, Michael
Am 29.08.20 um 10:34 schrieb Michael Balzer:
I've just found another clue to the events task starvations:
https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/issues/418
TL;DR: the events task can be blocked by running lower priority tasks as well, not just higher priority tasks.
So raising the events task priority cannot be the solution.
Regards, Michael
Am 26.08.20 um 20:28 schrieb Michael Balzer:
Am 26.08.20 um 06:17 schrieb Mark Webb-Johnson: It seems to be a web socket (web ui?) connection went away, a logging message was produced, and that overflowed the queue. Perhaps the user is just logging too much? Can you log at debug level to web uI (or does it cause the problem I mentioned earlier about debug logging the debug logs)? Any ideas? I haven't been able to get a correlation to any specific configuration or system situation yet on these event queue overflows.
The event queue has room for 40 entries by default, so the system normally has been in an overload situation (starved event task) for at least 30-40 seconds when the abort is triggered.
Event: ticker.1@EventScript 0 secs …tells the events task wasn't completely stuck before the abort, just didn't get a lot of CPU share. That's why I thought raising the task priority could have an effect.
I totally need user data on this, as the crash doesn't happen at all on my module. So it's also possible the issue is caused by a vehicle module bug, or by the poller receive bug we discovered a few days ago:
https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/pull/405#is...
This is solved now by the merge of the fast polling branch, which includes the fixes for this. So we may see already an improvement with the next build.
But there are at least three more bugs/shortcomings in the current poller which can potentially also cause this issue (and others) by high vehicle task load / wrong memory allocations / out of bounds accesses. Which is why I'm currently working on a rewrite of the receiver.
Regards, Michael
-- Michael Balzer * Helkenberger Weg 9 * D-58256 Ennepetal Fon 02333 / 833 5735 * Handy 0176 / 206 989 26
_______________________________________________ OvmsDev mailing list OvmsDev@lists.openvehicles.com http://lists.openvehicles.com/mailman/listinfo/ovmsdev
-- Michael Balzer * Helkenberger Weg 9 * D-58256 Ennepetal Fon 02333 / 833 5735 * Handy 0176 / 206 989 26 _______________________________________________ OvmsDev mailing list OvmsDev@lists.openvehicles.com http://lists.openvehicles.com/mailman/listinfo/ovmsdev
Merge is pushed. Am 31.08.20 um 10:52 schrieb Mark Webb-Johnson:
My work is in the public for-v3.3 branch.
On 31 Aug 2020, at 4:24 PM, Michael Balzer <dexter@expeedo.de> wrote:
I've made a stupid mistake leading to events at least not being forwarded correctly to Duktape, fix is pushed.
Regards, Michael
PS: Mark, your split of the scripting module may not allow easy merging of my two commits. They are simple, but I can do the merge if you push them into a branch.
Am 29.08.20 um 15:54 schrieb Michael Balzer:
I think I've found & fixed this, maybe also the events starvation issue.
See https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/issues/418#...
and https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/commit/6b70...
Please test.
Regards, Michael
Am 29.08.20 um 10:34 schrieb Michael Balzer:
I've just found another clue to the events task starvations:
https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/issues/418
TL;DR: the events task can be blocked by running lower priority tasks as well, not just higher priority tasks.
So raising the events task priority cannot be the solution.
Regards, Michael
Am 26.08.20 um 20:28 schrieb Michael Balzer:
Am 26.08.20 um 06:17 schrieb Mark Webb-Johnson:
It seems to be a web socket (web ui?) connection went away, a logging message was produced, and that overflowed the queue. Perhaps the user is just logging too much? Can you log at debug level to web uI (or does it cause the problem I mentioned earlier about debug logging the debug logs)? Any ideas? I haven't been able to get a correlation to any specific configuration or system situation yet on these event queue overflows.
The event queue has room for 40 entries by default, so the system normally has been in an overload situation (starved event task) for at least 30-40 seconds when the abort is triggered.
Event: ticker.1@EventScript 0 secs …tells the events task wasn't completely stuck before the abort, just didn't get a lot of CPU share. That's why I thought raising the task priority could have an effect.
I totally need user data on this, as the crash doesn't happen at all on my module. So it's also possible the issue is caused by a vehicle module bug, or by the poller receive bug we discovered a few days ago:
https://github.com/openvehicles/Open-Vehicle-Monitoring-System-3/pull/405#is...
This is solved now by the merge of the fast polling branch, which includes the fixes for this. So we may see already an improvement with the next build.
But there are at least three more bugs/shortcomings in the current poller which can potentially also cause this issue (and others) by high vehicle task load / wrong memory allocations / out of bounds accesses. Which is why I'm currently working on a rewrite of the receiver.
Regards, Michael
-- Michael Balzer * Helkenberger Weg 9 * D-58256 Ennepetal Fon 02333 / 833 5735 * Handy 0176 / 206 989 26
_______________________________________________ OvmsDev mailing list OvmsDev@lists.openvehicles.com http://lists.openvehicles.com/mailman/listinfo/ovmsdev
-- Michael Balzer * Helkenberger Weg 9 * D-58256 Ennepetal Fon 02333 / 833 5735 * Handy 0176 / 206 989 26 _______________________________________________ OvmsDev mailing list OvmsDev@lists.openvehicles.com http://lists.openvehicles.com/mailman/listinfo/ovmsdev
_______________________________________________ OvmsDev mailing list OvmsDev@lists.openvehicles.com http://lists.openvehicles.com/mailman/listinfo/ovmsdev
-- Michael Balzer * Helkenberger Weg 9 * D-58256 Ennepetal Fon 02333 / 833 5735 * Handy 0176 / 206 989 26
participants (2)
-
Mark Webb-Johnson -
Michael Balzer