php小編新一為您介紹一種使用ucontext的Golang+CGO技術(shù),該技術(shù)在切換不同堆棧時(shí)可能會(huì)因?yàn)镾IGSEGV或SIGTRAP而導(dǎo)致崩潰。這種崩潰是故意造成的,通過(guò)這種方式可以更好地理解和調(diào)試程序中的問(wèn)題。本文將詳細(xì)介紹如何利用ucontext和Golang+CGO技術(shù)進(jìn)行堆棧切換,并對(duì)可能出現(xiàn)的崩潰進(jìn)行分析和解決。無(wú)論您是初學(xué)者還是有一定經(jīng)驗(yàn)的開(kāi)發(fā)者,相信這篇文章都會(huì)對(duì)您有所幫助。
問(wèn)題內(nèi)容
我目前正在編寫(xiě)Golang + CGO程序,并將在CGO中使用posix ucontext。由于我所有的核心邏輯都將在ucontext的bind函數(shù)中,所以我們應(yīng)該捕獲所有錯(cuò)誤的代碼。我通過(guò)訪問(wèn)空指針來(lái)測(cè)試它,這給了我完全不同的行為,所有這些行為都取決于 ucontext 使用的堆棧位置。以下是帶有簡(jiǎn)化示例的更多詳細(xì)信息。
如果我在線程的堆棧上分配ucontext堆棧,它將觸發(fā)SIGSEGV。但如果我在堆上分配它,它會(huì)首先觸發(fā) SIGSEGV,然后在調(diào)用 morestack_noctxt
時(shí)觸發(fā) SIGTRAP,然后再調(diào)用 runtime.sigpanic
。我該如何解決這個(gè)問(wèn)題或者如何獲取 SIGSEGV?為什么需要morestack?
以下均為詳細(xì)信息,任何建議或意見(jiàn)將不勝感激。謝謝!
崩潰(使用 malloc 堆棧):
fatal: morestack on g0
SIGTRAP: trace trap
PC=0x45f342 m=0 sigcode=128
signal arrived during cgo execution
goroutine 1 [syscall]:
runtime.cgocall(0x464870, 0xc000067f60)
/usr/local/go/src/runtime/cgocall.go:157 +0x5c fp=0xc000067f38 sp=0xc000067f00 pc=0x40465c
main._Cfunc_core_logic()
_cgo_gotypes.go:39 +0x45 fp=0xc000067f60 sp=0xc000067f38 pc=0x4646e5
main.coreLogic()
/container_share/works/badstack/main.go:46 +0x17 fp=0xc000067f70 sp=0xc000067f60 pc=0x464737
main.main()
/container_share/works/badstack/main.go:51 +0x17 fp=0xc000067f80 sp=0xc000067f70 pc=0x464777
runtime.main()
/usr/local/go/src/runtime/proc.go:250 +0x1d3 fp=0xc000067fe0 sp=0xc000067f80 pc=0x436913
runtime.goexit()
/usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc000067fe8 sp=0xc000067fe0 pc=0x45f4a1
goroutine 2 [force gc (idle)]:
runtime.gopark(0x47a860, 0x6cb4d0, 0x11, 0x14, 0x1)
/usr/local/go/src/runtime/proc.go:381 +0xfd fp=0xc000054f88 sp=0xc000054f58 pc=0x436dbd
runtime.goparkunlock(0x0?, 0x0?, 0x0?, 0x0?)
/usr/local/go/src/runtime/proc.go:387 +0x2a fp=0xc000054fb8 sp=0xc000054f88 pc=0x436e4a
runtime.forcegchelper()
/usr/local/go/src/runtime/proc.go:305 +0xb0 fp=0xc000054fe0 sp=0xc000054fb8 pc=0x436b90
runtime.goexit()
/usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc000054fe8 sp=0xc000054fe0 pc=0x45f4a1
created by runtime.init.6
/usr/local/go/src/runtime/proc.go:293 +0x25
goroutine 3 [GC sweep wait]:
runtime.gopark(0x47a860, 0x6cb640, 0xc, 0x14, 0x1)
/usr/local/go/src/runtime/proc.go:381 +0xfd fp=0xc000055758 sp=0xc000055728 pc=0x436dbd
runtime.goparkunlock(0x0?, 0x0?, 0x0?, 0x0?)
/usr/local/go/src/runtime/proc.go:387 +0x2a fp=0xc000055788 sp=0xc000055758 pc=0x436e4a
runtime.bgsweep(0x0?)
/usr/local/go/src/runtime/mgcsweep.go:278 +0x98 fp=0xc0000557c8 sp=0xc000055788 pc=0x421998
runtime.gcenable.func1()
/usr/local/go/src/runtime/mgc.go:178 +0x26 fp=0xc0000557e0 sp=0xc0000557c8 pc=0x415f66
runtime.goexit()
/usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc0000557e8 sp=0xc0000557e0 pc=0x45f4a1
created by runtime.gcenable
/usr/local/go/src/runtime/mgc.go:178 +0x6b
goroutine 4 [GC scavenge wait]:
runtime.gopark(0x47a860, 0x6cb6c0, 0xd, 0x14, 0x2)
/usr/local/go/src/runtime/proc.go:381 +0xfd fp=0xc000055f48 sp=0xc000055f18 pc=0x436dbd
runtime.goparkunlock(0x47ca80?, 0x1?, 0x0?, 0x0?)
/usr/local/go/src/runtime/proc.go:387 +0x2a fp=0xc000055f78 sp=0xc000055f48 pc=0x436e4a
runtime.(*scavengerState).park(0x6cb6c0)
/usr/local/go/src/runtime/mgcscavenge.go:400 +0x4b fp=0xc000055fa0 sp=0xc000055f78 pc=0x41f44b
runtime.bgscavenge(0x0?)
/usr/local/go/src/runtime/mgcscavenge.go:628 +0x45 fp=0xc000055fc8 sp=0xc000055fa0 pc=0x41fa25
runtime.gcenable.func2()
/usr/local/go/src/runtime/mgc.go:179 +0x26 fp=0xc000055fe0 sp=0xc000055fc8 pc=0x415f06
runtime.goexit()
/usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc000055fe8 sp=0xc000055fe0 pc=0x45f4a1
created by runtime.gcenable
/usr/local/go/src/runtime/mgc.go:179 +0xaa
rax 0x17
rbx 0x476413
rcx 0x460c95
rdx 0x17
rdi 0x2
rsi 0x476413
rbp 0x7f18906b3ff0
rsp 0x7f18906b3fd8
r8 0xffffffff
r9 0x0
r10 0x8
r11 0x246
r12 0xc000067c70
r13 0x0
r14 0x6cb760
r15 0x0
rip 0x45f342
rflags 0x206
cs 0x33
fs 0x0
gs 0x0
登錄后復(fù)制
崩潰(線程堆棧):
fatal error: unexpected signal during runtime execution
[signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x4647a0]
runtime stack:
runtime.throw({0x479118?, 0xffffffffffffffff?})
/usr/local/go/src/runtime/panic.go:1047 +0x5d fp=0x7fff293551f0 sp=0x7fff293551c0 pc=0x43417d
runtime.sigpanic()
/usr/local/go/src/runtime/signal_unix.go:825 +0x285 fp=0x7fff29355220 sp=0x7fff293551f0 pc=0x4495a5
goroutine 1 [syscall]:
runtime.cgocall(0x464890, 0xc000067f60)
/usr/local/go/src/runtime/cgocall.go:157 +0x5c fp=0xc000067f38 sp=0xc000067f00 pc=0x40465c
main._Cfunc_core_logic()
_cgo_gotypes.go:39 +0x45 fp=0xc000067f60 sp=0xc000067f38 pc=0x4646e5
main.coreLogic()
/container_share/works/badstack/main.go:46 +0x17 fp=0xc000067f70 sp=0xc000067f60 pc=0x464737
main.main()
/container_share/works/badstack/main.go:51 +0x17 fp=0xc000067f80 sp=0xc000067f70 pc=0x464777
runtime.main()
/usr/local/go/src/runtime/proc.go:250 +0x1d3 fp=0xc000067fe0 sp=0xc000067f80 pc=0x436913
runtime.goexit()
/usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc000067fe8 sp=0xc000067fe0 pc=0x45f4a1
goroutine 2 [force gc (idle)]:
runtime.gopark(0x47a880, 0x6cb4d0, 0x11, 0x14, 0x1)
/usr/local/go/src/runtime/proc.go:381 +0xfd fp=0xc000054f88 sp=0xc000054f58 pc=0x436dbd
runtime.goparkunlock(0x0?, 0x0?, 0x0?, 0x0?)
/usr/local/go/src/runtime/proc.go:387 +0x2a fp=0xc000054fb8 sp=0xc000054f88 pc=0x436e4a
runtime.forcegchelper()
/usr/local/go/src/runtime/proc.go:305 +0xb0 fp=0xc000054fe0 sp=0xc000054fb8 pc=0x436b90
runtime.goexit()
/usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc000054fe8 sp=0xc000054fe0 pc=0x45f4a1
created by runtime.init.6
/usr/local/go/src/runtime/proc.go:293 +0x25
goroutine 3 [GC sweep wait]:
runtime.gopark(0x47a880, 0x6cb640, 0xc, 0x14, 0x1)
/usr/local/go/src/runtime/proc.go:381 +0xfd fp=0xc000055758 sp=0xc000055728 pc=0x436dbd
runtime.goparkunlock(0x0?, 0x0?, 0x0?, 0x0?)
/usr/local/go/src/runtime/proc.go:387 +0x2a fp=0xc000055788 sp=0xc000055758 pc=0x436e4a
runtime.bgsweep(0x0?)
/usr/local/go/src/runtime/mgcsweep.go:278 +0x98 fp=0xc0000557c8 sp=0xc000055788 pc=0x421998
runtime.gcenable.func1()
/usr/local/go/src/runtime/mgc.go:178 +0x26 fp=0xc0000557e0 sp=0xc0000557c8 pc=0x415f66
runtime.goexit()
/usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc0000557e8 sp=0xc0000557e0 pc=0x45f4a1
created by runtime.gcenable
/usr/local/go/src/runtime/mgc.go:178 +0x6b
goroutine 4 [GC scavenge wait]:
runtime.gopark(0x47a880, 0x6cb6c0, 0xd, 0x14, 0x2)
/usr/local/go/src/runtime/proc.go:381 +0xfd fp=0xc000055f48 sp=0xc000055f18 pc=0x436dbd
runtime.goparkunlock(0x47caa0?, 0x1?, 0x0?, 0x0?)
/usr/local/go/src/runtime/proc.go:387 +0x2a fp=0xc000055f78 sp=0xc000055f48 pc=0x436e4a
runtime.(*scavengerState).park(0x6cb6c0)
/usr/local/go/src/runtime/mgcscavenge.go:400 +0x4b fp=0xc000055fa0 sp=0xc000055f78 pc=0x41f44b
runtime.bgscavenge(0x0?)
/usr/local/go/src/runtime/mgcscavenge.go:628 +0x45 fp=0xc000055fc8 sp=0xc000055fa0 pc=0x41fa25
runtime.gcenable.func2()
/usr/local/go/src/runtime/mgc.go:179 +0x26 fp=0xc000055fe0 sp=0xc000055fc8 pc=0x415f06
runtime.goexit()
/usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc000055fe8 sp=0xc000055fe0 pc=0x45f4a1
created by runtime.gcenable
/usr/local/go/src/runtime/mgc.go:179 +0xaa
登錄后復(fù)制
GDB(帶 malloc 堆棧):
這將調(diào)用runtime.morestack_noctxt,并最終得到badstack,因?yàn)樗挥趃0的堆棧上。
(gdb) b runtime.sigpanic
Breakpoint 1 at 0x449320: file /usr/local/go/src/runtime/signal_unix.go, line 822.
(gdb) r
Starting program: /container_share/works/badstack/main
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
[New Thread 0x7fffd05d3700 (LWP 213229)]
[New Thread 0x7fffcfdd2700 (LWP 213230)]
[New Thread 0x7fffcf5d1700 (LWP 213231)]
[New Thread 0x7fffcedd0700 (LWP 213232)]
[New Thread 0x7fffce58f700 (LWP 213233)]
[New Thread 0x7fffcdd8e700 (LWP 213234)]
Thread 1 "main" received signal SIGSEGV, Segmentation fault.
0x00000000004647a0 in core () at /container_share/works/badstack/main.go:18
18 *ptr = 1024;
(gdb) c
Continuing.
Thread 1 "main" hit Breakpoint 1, runtime.sigpanic () at /usr/local/go/src/runtime/signal_unix.go:822
822 func sigpanic() {
(gdb) p $rsp
$1 = (void *) 0x7fffcd58cfe8
(gdb) x/x $r14+0x10
0x6cb770 : 0xff7fed70
(gdb) c
Continuing.
fatal: morestack on g0
登錄后復(fù)制
GDB(帶有線程堆棧):
這似乎一切都按預(yù)期進(jìn)行。
(gdb) b runtime.sigpanic
Breakpoint 1 at 0x449320: file /usr/local/go/src/runtime/signal_unix.go, line 822.
(gdb) r
Starting program: /container_share/works/badstack/main
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
[New Thread 0x7fffd05d3700 (LWP 214288)]
[New Thread 0x7fffcfdd2700 (LWP 214289)]
[New Thread 0x7fffcf5d1700 (LWP 214290)]
[New Thread 0x7fffcedd0700 (LWP 214291)]
[New Thread 0x7fffce5cf700 (LWP 214292)]
Thread 1 "main" received signal SIGSEGV, Segmentation fault.
0x00000000004647a0 in core () at /container_share/works/badstack/main.go:18
18 *ptr = 1024;
(gdb) c
Continuing.
Thread 1 "main" hit Breakpoint 1, runtime.sigpanic () at /usr/local/go/src/runtime/signal_unix.go:822
822 func sigpanic() {
(gdb) p $rsp
$1 = (void *) 0x7fffffffd8e8
(gdb) x/x $r14+0x10
0x6cb770 : 0xff7fed70
(gdb) c
Continuing.
fatal error: unexpected signal during runtime execution
[signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x4647a0]
登錄后復(fù)制
環(huán)境:
? clang -v clang version 16.0.6 (Red Hat 16.0.6-2.module_el8+588+6f71ce7b) ? gcc -v gcc version 8.4.1 20200928 (Red Hat 8.4.1-1) (GCC) ? uname -a Linux 6cc94b77abd7 6.4.16-orbstack-00103-g02b40eb69695 #1 SMP Wed Sep 13 10:13:30 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux
登錄后復(fù)制
復(fù)制者:
編譯: CC=clang CXX=clang++ CFLAGS="-g -O0" go build -gcflags="all=-N -l" main.go
package main /* #include #include #include #include static ucontext_t uctx_main, uctx_core; void core() { // core logic // trigger crash int* ptr = NULL; *ptr = 1024; } void core_logic() { size_t size = 1024 * 1024; char stack[size]; // SIGSEGV //void* stack = malloc(size); // SIGTRAP if (getcontext(&uctx_core) == -1) printf("failed to getcontext"); uctx_core.uc_stack.ss_sp = stack; uctx_core.uc_stack.ss_size = size; uctx_core.uc_link = &uctx_main; makecontext(&uctx_core, core, 0); if (swapcontext(&uctx_main, &uctx_core) == -1) printf("failed to swapcontext"); printf("back\n"); } */ // #cgo CFLAGS: -g -O0 import "C" func coreLogic() { C.core_logic() } func main() { // Call the C function from Go coreLogic() }
登錄后復(fù)制
這是我的猜測(cè),但似乎不正確:當(dāng)我使用堆堆棧時(shí),它被視為有堆棧溢出,并且應(yīng)該更多堆棧,但最終發(fā)現(xiàn)它是g0,然后是致命的。但看起來(lái) goroutine 的堆棧比線程的堆棧低得多?
更新于2023年9月24日:
對(duì)于純 C 程序,無(wú)論我使用什么堆棧,它都會(huì)獲得 SIGSEGV。
#include
#include
#include
#include
static ucontext_t uctx_main, uctx_core;
void core()
{
// core logic
// trigger crash
int* ptr = NULL;
*ptr = 1024;
}
void core_logic()
{
size_t size = 100 * 1024 * 1024;
//char stack[size]; // SIGSEGV
void* stack = malloc(size); // SIGTRAP
if (getcontext(&uctx_core) == -1)
printf("failed to getcontext");
uctx_core.uc_stack.ss_sp = stack;
uctx_core.uc_stack.ss_size = size;
uctx_core.uc_link = &uctx_main;
makecontext(&uctx_core, core, 0);
if (swapcontext(&uctx_main, &uctx_core) == -1)
printf("failed to swapcontext");
printf("back\n");
}
void coreLogic() {
core_logic();
}
int main() {
coreLogic();
return 0;
}
登錄后復(fù)制
解決方法
最后,我在 Go 團(tuán)隊(duì)成員的幫助下解決了這個(gè)問(wèn)題,打開(kāi)了一個(gè)問(wèn)題 在 Golang 的 github 存儲(chǔ)庫(kù)中,如果您需要的話。
TL;DR:這是從 Go1.21 開(kāi)始的錯(cuò)誤,應(yīng)該在 Go1.22 中修復(fù)。即使您使用 Go1.20,由于另一個(gè)錯(cuò)誤,您可能仍然面臨這個(gè)問(wèn)題。如果有人需要的話,也許我稍后會(huì)發(fā)布更多詳細(xì)信息,或者您可以在我之前提到的問(wèn)題中查看更多詳細(xì)信息。