Compare commits
521 Commits
fix/no-ale
...
drift/adop
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0028993851 | ||
|
|
5c934de83d | ||
|
|
d1ebcdac10 | ||
|
|
51660ecbb1 | ||
|
|
bc99683432 | ||
|
|
b50614528e | ||
|
|
bbf5105fb4 | ||
|
|
d321f44e49 | ||
|
|
4b8f946699 | ||
|
|
e36c9b1800 | ||
|
|
7fa06731da | ||
|
|
4ec116c012 | ||
|
|
41ed3c0421 | ||
|
|
94f8c68b77 | ||
|
|
d709e25d69 | ||
|
|
ba1e7997ad | ||
|
|
213523c77d | ||
|
|
fbde48438b | ||
|
|
17d3c161e4 | ||
|
|
28c2b365b3 | ||
|
|
31f778d60b | ||
|
|
08a75f4b5a | ||
|
|
e4e1244c0f | ||
|
|
aff2a57db7 | ||
|
|
f3494e0bfb | ||
|
|
e81e3f7b8a | ||
|
|
32d4d1ea8b | ||
|
|
0e3c63ec15 | ||
|
|
be551ac761 | ||
|
|
20d62ee0cf | ||
|
|
584bd4b31b | ||
|
|
f35527c7ed | ||
|
|
1a16e083e7 | ||
|
|
ed37000eba | ||
|
|
82e33f6a17 | ||
|
|
c97230252a | ||
|
|
e9e6cda06e | ||
|
|
10965af845 | ||
|
|
8ca875e6ad | ||
|
|
ea96bb0971 | ||
|
|
1ee0740b13 | ||
|
|
79038a6efb | ||
|
|
5d36638c79 | ||
|
|
9d02ab8080 | ||
|
|
b9597d8d70 | ||
|
|
749b210997 | ||
|
|
5cb10a6d2d | ||
|
|
0e7fe211de | ||
|
|
64c7044282 | ||
|
|
989390f7ce | ||
|
|
98a10cbc7b | ||
|
|
df7d957310 | ||
|
|
a023c535db | ||
|
|
161e337e77 | ||
|
|
c4c1e22587 | ||
|
|
3f7bf24b23 | ||
|
|
1a2b04f5cf | ||
|
|
5c240744eb | ||
|
|
9f64739544 | ||
|
|
5d10c8fbfe | ||
|
|
168241e3c5 | ||
|
|
fd0888b092 | ||
|
|
daf672aa1e | ||
|
|
fd5ea0cf94 | ||
|
|
8bacb65a75 | ||
|
|
0dd4b486c5 | ||
|
|
ae18751d17 | ||
|
|
986d1a937d | ||
|
|
9f2974f4c5 | ||
|
|
e8b507be54 | ||
|
|
13d6aa41d8 | ||
|
|
902593f775 | ||
|
|
bc701b8fd3 | ||
|
|
756fe92601 | ||
|
|
41a7ec93d6 | ||
|
|
dca1eb642f | ||
|
|
ec18dec0d3 | ||
|
|
8a7a332190 | ||
|
|
24f4324ae9 | ||
|
|
6b60f6b086 | ||
|
|
a42e40a68c | ||
|
|
f0bb303655 | ||
|
|
40ec5055e1 | ||
|
|
68b20be2b4 | ||
|
|
9e1b15dabf | ||
|
|
06f64c6ddd | ||
|
|
913e1abcfa | ||
|
|
ba971e7a29 | ||
|
|
bb4041579c | ||
|
|
69f2ec5ec9 | ||
|
|
a6699c41f8 | ||
|
|
d4b2cf003f | ||
|
|
76c302ab5f | ||
|
|
2d579cdf1e | ||
|
|
6e9029273b | ||
|
|
ef1e28b73a | ||
|
|
6868a9a93d | ||
|
|
3aabceb234 | ||
|
|
0d9cde51aa | ||
|
|
a3f2b010f8 | ||
|
|
e6a62bb13b | ||
|
|
665e72ba33 | ||
|
|
171443ee94 | ||
|
|
5b8f324523 | ||
|
|
cfaa4d0a4a | ||
|
|
f02923b24a | ||
|
|
06489ef844 | ||
|
|
64fc19b4d5 | ||
|
|
5f3f8fc253 | ||
|
|
0592402779 | ||
|
|
27c2a3d980 | ||
|
|
3ca3502147 | ||
|
|
5af7108b18 | ||
|
|
befe503aa4 | ||
|
|
226f551e77 | ||
|
|
1db4ef093c | ||
|
|
bc89940564 | ||
|
|
6ec424b15c | ||
|
|
615fa23390 | ||
|
|
65001da0d8 | ||
|
|
f4a8390dc0 | ||
|
|
7257aa3a9f | ||
|
|
475f2e452d | ||
|
|
d9d119ede2 | ||
|
|
8d098f564d | ||
|
|
392cfb9025 | ||
|
|
53cd7f9d66 | ||
|
|
9870ed5e30 | ||
|
|
6aaaf87ade | ||
|
|
36cb9d6aeb | ||
|
|
3749cc2ab5 | ||
|
|
04fdaee83a | ||
|
|
102f92dfc3 | ||
|
|
cf173c49d8 | ||
|
|
44f7471b21 | ||
|
|
224ae9e202 | ||
|
|
aa63ae5eca | ||
|
|
f97127f704 | ||
|
|
33e4c9231e | ||
|
|
813d088339 | ||
|
|
0567135647 | ||
|
|
2582ad9425 | ||
|
|
bad48dee04 | ||
|
|
dd269b195c | ||
|
|
b1893395f0 | ||
|
|
485c58d085 | ||
|
|
bc1a11e373 | ||
|
|
e37cbe1910 | ||
|
|
809bc9670b | ||
|
|
6c16a7b162 | ||
|
|
7d3685ef58 | ||
|
|
21dcfbd991 | ||
|
|
d2a4a17969 | ||
|
|
cdb8bf6802 | ||
|
|
80a056539c | ||
|
|
b92c9e285f | ||
|
|
b677cb11de | ||
|
|
368386abc0 | ||
|
|
d1b0ee7e96 | ||
|
|
13cf02b740 | ||
|
|
1670ff1960 | ||
|
|
9b32d3a9e7 | ||
|
|
6220f52266 | ||
|
|
5ef9240583 | ||
|
|
08d28dc44b | ||
|
|
6571260dd2 | ||
|
|
687f37d837 | ||
|
|
e8c4512a40 | ||
|
|
aa8b72043b | ||
|
|
b5288d4b7d | ||
|
|
a9b846c82a | ||
|
|
5604dd0256 | ||
|
|
5361ad8f7e | ||
|
|
6f6d032ca9 | ||
|
|
a91c38675a | ||
|
|
5fb73a5612 | ||
|
|
c42b2dfe06 | ||
|
|
b1ecb55bd6 | ||
|
|
42d0d076d6 | ||
|
|
d835b666cf | ||
|
|
39581ab824 | ||
|
|
a0a0731cd6 | ||
|
|
5161a9dfd6 | ||
|
|
7a8cbb3241 | ||
|
|
ae643552e9 | ||
|
|
8885c1b49d | ||
|
|
4ee57b710d | ||
|
|
5a31702885 | ||
|
|
dcaf16cecc | ||
|
|
07ed014a83 | ||
|
|
c5f4bafcaf | ||
|
|
1277865343 | ||
|
|
7df94e9bef | ||
|
|
8bb601eecd | ||
|
|
1778a692e0 | ||
|
|
0337b62349 | ||
|
|
39e6ce747d | ||
|
|
e947e60d11 | ||
|
|
a21fc0f35a | ||
|
|
77aace7515 | ||
|
|
eb73591286 | ||
|
|
011085ce3d | ||
|
|
a524e468e4 | ||
|
|
365d93f07e | ||
|
|
795085170a | ||
|
|
c888444287 | ||
|
|
ea320a2087 | ||
|
|
ebf0f57272 | ||
|
|
dc865cf53d | ||
|
|
8d7b938f78 | ||
|
|
453e22f80d | ||
|
|
c6e47526a7 | ||
|
|
9b7a91d828 | ||
|
|
c2d01eb6f1 | ||
|
|
21042ad0e7 | ||
|
|
bcf2ed7841 | ||
|
|
6064e6d03f | ||
|
|
830dc0dcd0 | ||
|
|
88dbcd912e | ||
|
|
2f5d812608 | ||
|
|
74c47672da | ||
|
|
872abea008 | ||
|
|
edba52f401 | ||
|
|
596f2f6820 | ||
|
|
c68cbd3139 | ||
|
|
9c9cf68063 | ||
|
|
3bad354414 | ||
|
|
518a16e895 | ||
|
|
a28baa6197 | ||
|
|
2314badec5 | ||
|
|
cecadb331b | ||
|
|
55b28336e5 | ||
|
|
22beddc8a8 | ||
|
|
c1e2567b15 | ||
|
|
90156a7c1a | ||
|
|
356bfce2c8 | ||
|
|
94fc25dc39 | ||
|
|
e4203060f3 | ||
|
|
aafe7273e3 | ||
|
|
d339e3ebad | ||
|
|
ae7c7cbd23 | ||
|
|
c00e911b28 | ||
|
|
15ff939b1f | ||
|
|
0f080240c6 | ||
|
|
d886526f23 | ||
|
|
d33856f874 | ||
|
|
04c7bb1c97 | ||
|
|
3a1cedc90d | ||
|
|
d449ba4720 | ||
|
|
e2785899a2 | ||
|
|
e57474adfb | ||
|
|
971afafc01 | ||
|
|
7fa9f743dd | ||
|
|
7d506b785d | ||
|
|
8e14f1bf3e | ||
|
|
c486087294 | ||
|
|
51528b2cf9 | ||
|
|
5daa005c1b | ||
|
|
a99dccfc73 | ||
|
|
90603ad9bb | ||
|
|
77877dd501 | ||
|
|
34bfe56f53 | ||
|
|
ce83e8dc00 | ||
|
|
a0a2a5b1f0 | ||
|
|
85a1bcef52 | ||
|
|
f19fe4aa90 | ||
|
|
1a03bceb5c | ||
|
|
15873b9e0c | ||
|
|
8ac4ba24f7 | ||
|
|
42789dbe9e | ||
|
|
7ed9859260 | ||
|
|
0b707495a1 | ||
|
|
e177eca25d | ||
|
|
146cf411ae | ||
|
|
57ed07d1d0 | ||
|
|
5ecd21e664 | ||
|
|
c01012d767 | ||
|
|
af9798a62e | ||
|
|
5294f0712f | ||
|
|
631fc22090 | ||
|
|
1003fa4246 | ||
|
|
54814bc65e | ||
|
|
3d38039b86 | ||
|
|
5b34877429 | ||
|
|
b0a8302dd7 | ||
|
|
90b9ddb7a5 | ||
|
|
3799e0db0d | ||
|
|
f61747aeac | ||
|
|
07000dae3a | ||
|
|
49ffb5bb19 | ||
|
|
ca80972dc7 | ||
|
|
feda8a0b4b | ||
|
|
124c3c545b | ||
|
|
dba3e405f4 | ||
|
|
b4d367eeb4 | ||
|
|
b81cb28615 | ||
|
|
c18c6f6fe2 | ||
|
|
94d006eac8 | ||
|
|
96a8cf3ad5 | ||
|
|
f318fd3a89 | ||
|
|
1a62c322bc | ||
|
|
24b15f4ad2 | ||
|
|
c652f37b69 | ||
|
|
c523a22d89 | ||
|
|
f7c84530d6 | ||
|
|
56228dbb79 | ||
|
|
de16c88418 | ||
|
|
edd06485e0 | ||
|
|
7f94bc5776 | ||
|
|
7d92f0acd7 | ||
|
|
b7af597459 | ||
|
|
1617b73a9d | ||
|
|
8c4dc7a5a8 | ||
|
|
be8ddf4599 | ||
|
|
ff30c61c4c | ||
|
|
33c0577e93 | ||
|
|
f0255e0300 | ||
|
|
0bc1878778 | ||
|
|
a18e2f9c3f | ||
|
|
6b02f49fc6 | ||
|
|
216b7d78e2 | ||
|
|
abdab85362 | ||
|
|
116fdbb33f | ||
|
|
9db1e9b7a5 | ||
|
|
1a74286dfa | ||
|
|
b437a33043 | ||
|
|
03ba9678d5 | ||
|
|
d74beb2176 | ||
|
|
f824308b6a | ||
|
|
cb7151cc27 | ||
|
|
ad8ead2546 | ||
|
|
d356cd32fc | ||
|
|
80c36ba801 | ||
|
|
afb5f9556e | ||
|
|
b3dc41fcd4 | ||
|
|
c88d82f2ac | ||
|
|
395cf742b9 | ||
|
|
72d86ba70b | ||
|
|
a26ccf8d80 | ||
|
|
77ef400598 | ||
|
|
08097f4070 | ||
|
|
32e8a045f4 | ||
|
|
814f5d8c6c | ||
|
|
4f0d677e18 | ||
|
|
5d38115d2f | ||
|
|
200b760512 | ||
|
|
83f4ab0dad | ||
|
|
2df36b11e2 | ||
|
|
1b7f46f02c | ||
|
|
6ae3a55aed | ||
|
|
94e680add4 | ||
|
|
4810125e9a | ||
|
|
3df23112ef | ||
|
|
2ccc9d3071 | ||
|
|
624c1b26c3 | ||
|
|
beba668a4c | ||
|
|
c52ebfc042 | ||
|
|
8b9a974c66 | ||
|
|
f960a4a19b | ||
|
|
9d85ec5e96 | ||
|
|
c00c7be9ae | ||
|
|
336fd76774 | ||
|
|
cd637ef616 | ||
|
|
66e22e26cb | ||
|
|
f10ab71c52 | ||
|
|
d5555697a1 | ||
|
|
3f69e03fcb | ||
|
|
57df3582dd | ||
|
|
14180182d3 | ||
|
|
6ac61ab6d7 | ||
|
|
968de38a94 | ||
|
|
e5fd9395f7 | ||
|
|
251554c044 | ||
|
|
1a1dea00eb | ||
|
|
8485d99336 | ||
|
|
c49246b8c6 | ||
|
|
67c70c071b | ||
|
|
18b34fed31 | ||
|
|
1f4a16e625 | ||
|
|
1a72f771de | ||
|
|
68e741e0c3 | ||
|
|
341c3b6523 | ||
|
|
f046742a4f | ||
|
|
b1167edde7 | ||
|
|
82e9aea057 | ||
|
|
2a8b96cc7f | ||
|
|
328b24de6a | ||
|
|
de4d35e184 | ||
|
|
ecc65be6e1 | ||
|
|
7b98f71393 | ||
|
|
cf0b6be695 | ||
|
|
9365bdab93 | ||
|
|
012cd27b4a | ||
|
|
678d489978 | ||
|
|
c5964fbcd3 | ||
|
|
886657473e | ||
|
|
d2d29185c9 | ||
|
|
7f4f5b24ba | ||
|
|
d2205dc1c0 | ||
|
|
19e721d4af | ||
|
|
9dfecc4d1b | ||
|
|
53994e75f0 | ||
|
|
2e06077337 | ||
|
|
8396d37275 | ||
|
|
150f17b219 | ||
|
|
9a3afa11ed | ||
|
|
edef1aa4c7 | ||
|
|
780a742110 | ||
|
|
a0179cec6e | ||
|
|
ea6b7d8f27 | ||
|
|
dd75a3b943 | ||
|
|
ea5ad040da | ||
|
|
b2f0db0717 | ||
|
|
93c4b62826 | ||
|
|
a132bee1d7 | ||
|
|
d0e98192de | ||
|
|
bcb9397c38 | ||
|
|
1a1ab0df6e | ||
|
|
572e7640cd | ||
|
|
2ece75935e | ||
|
|
2aaaa5654f | ||
|
|
8882301243 | ||
|
|
3aba5c7f9a | ||
|
|
2ef54ccc94 | ||
|
|
d90414ddfa | ||
|
|
a158b77422 | ||
|
|
d79ec4f647 | ||
|
|
ef3b05439a | ||
|
|
0e2e856f12 | ||
|
|
9b0f55fd90 | ||
|
|
7473a01322 | ||
|
|
38b61e290e | ||
|
|
fa0e956c0e | ||
|
|
76aaaf480c | ||
|
|
c1ac157aaf | ||
|
|
73d7e332a4 | ||
|
|
33f85ec8ca | ||
|
|
38a4748e17 | ||
|
|
8f715fd3f2 | ||
|
|
a94435f143 | ||
|
|
a7a9ba996d | ||
|
|
fcf93aac11 | ||
|
|
1d9dbac112 | ||
|
|
4e9981c182 | ||
|
|
7ed8c95409 | ||
|
|
1e68d45659 | ||
|
|
60c00d7a5d | ||
|
|
72811b967e | ||
|
|
927c2a758d | ||
|
|
e5094c5c53 | ||
|
|
154aec849e | ||
|
|
22453161e9 | ||
|
|
d3e1b61096 | ||
|
|
f88a3a846b | ||
|
|
2adbf1e6cd | ||
|
|
6c4f8379ad | ||
|
|
d441f70693 | ||
|
|
033ac8129b | ||
|
|
4111ea4f9f | ||
|
|
578bf3bc7c | ||
|
|
ffd767d4bb | ||
|
|
6e2ab7cedc | ||
|
|
c4f40235f4 | ||
|
|
4753099155 | ||
|
|
eb71bc61ed | ||
|
|
8ae7789e93 | ||
|
|
2c2bf9d665 | ||
|
|
56b4d8165b | ||
|
|
c696b99ccf | ||
|
|
e6eae5cdc4 | ||
|
|
072cc23a42 | ||
|
|
682c0b9995 | ||
|
|
96ad3a18ee | ||
|
|
9ef9633aff | ||
|
|
df5e6c6626 | ||
|
|
d2aebdd477 | ||
|
|
09256be62c | ||
|
|
a4fece11cc | ||
|
|
c2c0b1ec82 | ||
|
|
1d0e80c091 | ||
|
|
3b64d66836 | ||
|
|
5890fffd7f | ||
|
|
eced8617d3 | ||
|
|
587551c1f1 | ||
|
|
a2c4b3d47e | ||
|
|
20ef0c1455 | ||
|
|
cb9551fb00 | ||
|
|
5ed396e390 | ||
|
|
6e96623884 | ||
|
|
87ce02f34d | ||
|
|
0315c2b510 | ||
|
|
2aa31c205a | ||
|
|
23932773ef | ||
|
|
2f50c67f5c | ||
|
|
85d5b5c823 | ||
|
|
25b1923d2e | ||
|
|
e208798531 | ||
|
|
1ba36697ca | ||
|
|
405b8b8ef9 | ||
|
|
1cc215ec30 | ||
|
|
83daeb3f87 | ||
|
|
c4854bb355 | ||
|
|
1dcc6d61dc | ||
|
|
ed7c6946cb | ||
|
|
7baa316224 | ||
|
|
31fd9cbf48 | ||
|
|
e8f279280f | ||
|
|
787acd3bda | ||
|
|
86bd6432ee | ||
|
|
bf847ad045 | ||
|
|
a4e9a04982 | ||
|
|
72a1d33f9d | ||
|
|
bec82127e7 | ||
|
|
8f83773431 | ||
|
|
8495a45002 | ||
|
|
333c8a9cfd | ||
|
|
1baeb7ee61 | ||
|
|
ee5e3bc94f | ||
|
|
7b0a4bce98 | ||
|
|
2221fd3256 | ||
|
|
84a661beaf |
@@ -1,832 +0,0 @@
|
||||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"Read(**)",
|
||||
"Glob(**)",
|
||||
"Grep(**)",
|
||||
"Bash(curl *)",
|
||||
"Bash(kubectl get *)",
|
||||
"Bash(kubectl describe *)",
|
||||
"Bash(kubectl logs *)",
|
||||
"Bash(kubectl rollout status *)",
|
||||
"Bash(docker ps *)",
|
||||
"Bash(docker logs *)",
|
||||
"Bash(ls *)",
|
||||
"Bash(cat *)",
|
||||
"Bash(head *)",
|
||||
"Bash(tail *)",
|
||||
"Bash(grep *)",
|
||||
"Bash(find *)",
|
||||
"Bash(pwd)",
|
||||
"Bash(which *)",
|
||||
"Bash(echo *)",
|
||||
"Bash(git status *)",
|
||||
"Bash(git log *)",
|
||||
"Bash(git diff *)",
|
||||
"Bash(git branch *)",
|
||||
"Bash(git remote *)",
|
||||
"Edit(**)",
|
||||
"Write(apps/**)",
|
||||
"Write(packages/**)",
|
||||
"Write(docs/**)",
|
||||
"Write(.agents/**)",
|
||||
"Write(k8s/**)",
|
||||
"Write(scripts/**)",
|
||||
"Bash(pnpm *)",
|
||||
"Bash(npm *)",
|
||||
"Bash(npx *)",
|
||||
"Bash(node *)",
|
||||
"Bash(python *)",
|
||||
"Bash(python3 *)",
|
||||
"Bash(pip *)",
|
||||
"Bash(cd *)",
|
||||
"Bash(mkdir *)",
|
||||
"Bash(touch *)",
|
||||
"Bash(cp *)",
|
||||
"Bash(mv *)",
|
||||
"Bash(chmod *)",
|
||||
"Bash(pytest *)",
|
||||
"Bash(playwright *)",
|
||||
"Bash(git add *)",
|
||||
"Bash(git commit *)",
|
||||
"Bash(git stash *)",
|
||||
"Bash(ssh *)",
|
||||
"Bash(scp *)",
|
||||
"Bash(export KUBECONFIG=*)",
|
||||
"Bash(git push:*)",
|
||||
"Bash(claude --version)",
|
||||
"Bash(git check-ignore:*)",
|
||||
"WebSearch",
|
||||
"Bash(claude plugin:*)",
|
||||
"Bash(claude --channels)",
|
||||
"Bash(claude --channels plugin:telegram@claude-plugins-official --help)",
|
||||
"Bash(bash)",
|
||||
"Bash(source ~/.zshrc)",
|
||||
"Bash(~/.bun/bin/bun --version)",
|
||||
"Bash(env)",
|
||||
"Bash(claude upgrade:*)",
|
||||
"Bash(/Users/ogt/.local/bin/claude --help)",
|
||||
"Bash(CLAUDE_CODE_EXPERIMENTAL_CHANNELS=1 claude --help)",
|
||||
"Bash(claude --channels plugin:telegram@claude-plugins-official --print \"hello\")",
|
||||
"Bash(mkdir -p ~/.claude/channels/telegram)",
|
||||
"Bash(~/.claude/channels/telegram/.env)",
|
||||
"Bash(~/.bun/bin/bun run:*)",
|
||||
"Bash(sudo ln:*)",
|
||||
"Bash(ln -sf ~/.bun/bin/bun /opt/homebrew/bin/bun)",
|
||||
"Bash(xargs python:*)",
|
||||
"Bash(uv --version)",
|
||||
"Bash(pip3 install:*)",
|
||||
"Bash(pip3 show:*)",
|
||||
"Bash(ruff *)",
|
||||
"Bash(mypy *)",
|
||||
"Bash(black *)",
|
||||
"Bash(isort *)",
|
||||
"Bash(timeout *)",
|
||||
"Bash(wc *)",
|
||||
"Bash(sort *)",
|
||||
"Bash(uniq *)",
|
||||
"Bash(awk *)",
|
||||
"Bash(sed *)",
|
||||
"Bash(tr *)",
|
||||
"Bash(tee *)",
|
||||
"Bash(xargs *)",
|
||||
"Bash(test *)",
|
||||
"Bash([ *)",
|
||||
"Bash(true)",
|
||||
"Bash(false)",
|
||||
"Bash(date *)",
|
||||
"Bash(sleep *)",
|
||||
"Bash(kill *)",
|
||||
"Bash(pkill *)",
|
||||
"Bash(ps *)",
|
||||
"Bash(top *)",
|
||||
"Bash(htop *)",
|
||||
"Bash(df *)",
|
||||
"Bash(du *)",
|
||||
"Bash(free *)",
|
||||
"Bash(uname *)",
|
||||
"Bash(hostname *)",
|
||||
"Bash(whoami)",
|
||||
"Bash(id *)",
|
||||
"Bash(groups *)",
|
||||
"Bash(stat *)",
|
||||
"Bash(file *)",
|
||||
"Bash(realpath *)",
|
||||
"Bash(dirname *)",
|
||||
"Bash(basename *)",
|
||||
"Bash(type *)",
|
||||
"Bash(command *)",
|
||||
"Bash(hash *)",
|
||||
"Bash(alias *)",
|
||||
"Bash(set *)",
|
||||
"Bash(unset *)",
|
||||
"Bash(printenv *)",
|
||||
"Bash(diff *)",
|
||||
"Bash(cmp *)",
|
||||
"Bash(comm *)",
|
||||
"Bash(join *)",
|
||||
"Bash(paste *)",
|
||||
"Bash(cut *)",
|
||||
"Bash(rev *)",
|
||||
"Bash(nl *)",
|
||||
"Bash(fmt *)",
|
||||
"Bash(fold *)",
|
||||
"Bash(pr *)",
|
||||
"Bash(expand *)",
|
||||
"Bash(unexpand *)",
|
||||
"Bash(od *)",
|
||||
"Bash(xxd *)",
|
||||
"Bash(hexdump *)",
|
||||
"Bash(strings *)",
|
||||
"Bash(base64 *)",
|
||||
"Bash(md5sum *)",
|
||||
"Bash(sha256sum *)",
|
||||
"Bash(jq *)",
|
||||
"Bash(yq *)",
|
||||
"Bash(gh *)",
|
||||
"Bash(docker build *)",
|
||||
"Bash(docker run *)",
|
||||
"Bash(docker exec *)",
|
||||
"Bash(docker compose *)",
|
||||
"Bash(docker-compose *)",
|
||||
"Bash(docker images *)",
|
||||
"Bash(docker inspect *)",
|
||||
"Bash(docker network *)",
|
||||
"Bash(docker volume *)",
|
||||
"Bash(kubectl apply *)",
|
||||
"Bash(kubectl create *)",
|
||||
"Bash(kubectl exec *)",
|
||||
"Bash(kubectl port-forward *)",
|
||||
"Bash(kubectl config *)",
|
||||
"Bash(helm *)",
|
||||
"Bash(terraform *)",
|
||||
"Bash(ansible *)",
|
||||
"Bash(bun *)",
|
||||
"Bash(deno *)",
|
||||
"Bash(cargo *)",
|
||||
"Bash(rustc *)",
|
||||
"Bash(go *)",
|
||||
"Bash(java *)",
|
||||
"Bash(javac *)",
|
||||
"Bash(gradle *)",
|
||||
"Bash(mvn *)",
|
||||
"Bash(make *)",
|
||||
"Bash(cmake *)",
|
||||
"Bash(ninja *)",
|
||||
"Bash(uv *)",
|
||||
"Bash(poetry *)",
|
||||
"Bash(pipx *)",
|
||||
"Bash(virtualenv *)",
|
||||
"Bash(venv *)",
|
||||
"Bash(conda *)",
|
||||
"Bash(brew *)",
|
||||
"Bash(apt *)",
|
||||
"Bash(apt-get *)",
|
||||
"Bash(yum *)",
|
||||
"Bash(dnf *)",
|
||||
"Bash(pacman *)",
|
||||
"Bash(snap *)",
|
||||
"Bash(flatpak *)",
|
||||
"Bash(systemctl status *)",
|
||||
"Bash(journalctl *)",
|
||||
"Bash(service * status)",
|
||||
"Bash(nc *)",
|
||||
"Bash(netstat *)",
|
||||
"Bash(ss *)",
|
||||
"Bash(lsof *)",
|
||||
"Bash(nmap *)",
|
||||
"Bash(dig *)",
|
||||
"Bash(nslookup *)",
|
||||
"Bash(host *)",
|
||||
"Bash(ping *)",
|
||||
"Bash(traceroute *)",
|
||||
"Bash(mtr *)",
|
||||
"Bash(wget *)",
|
||||
"Bash(http *)",
|
||||
"Bash(httpie *)",
|
||||
"Bash(hadolint apps/api/Dockerfile)",
|
||||
"Bash(docker info:*)",
|
||||
"Bash(kubectl cluster-info:*)",
|
||||
"Read(//var/run/**)",
|
||||
"Bash(open -a Docker)",
|
||||
"Bash(git rm:*)",
|
||||
"Bash(git reset:*)",
|
||||
"Bash(kubectl --kubeconfig ~/.kube/config get pods -n awoooi -o wide)",
|
||||
"Bash(kubectl scale:*)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollam@192.168.0.188 \"docker ps -a | grep -i claw\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps -a | grep -i claw\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker start clawbot && sleep 3 && docker logs clawbot --tail=10\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps | grep clawbot && docker port clawbot\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail=30\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/clawbot/.env | grep -E ''\\(TG_|TELEGRAM\\)'' | head -5\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker inspect clawbot --format=''{{range .Mounts}}{{.Source}}:{{.Destination}} {{end}}''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker inspect clawbot --format=''{{range .Config.Env}}{{println .}}{{end}}'' | grep -E ''\\(TG_|TELEGRAM|ENABLED\\)''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i ''logout\\\\|log.out\\\\|shutdown\\\\|stop'' | tail -20\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -E ''\\(getMe|getUpdates|sendMessage\\).*200'' | tail -5\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i ''success\\\\|started\\\\|初始化'' | head -20\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -E ''2026-03-\\(19|20|21\\)'' | grep -i ''error\\\\|fail\\\\|logout\\\\|400\\\\|401'' | head -20\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker stop clawbot && docker rm clawbot && echo ''✅ OpenClaw 已永久停用''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker-compose ps 2>/dev/null || ls -la docker-compose.yml 2>/dev/null || find /home/ollama -name ''docker-compose*.yml'' -type f 2>/dev/null | head -5\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker-compose up -d && sleep 3 && docker-compose ps\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker compose up -d 2>&1 || docker run -d --name clawbot --restart unless-stopped -p 8088:8088 -v /var/run/docker.sock:/var/run/docker.sock 192.168.0.110:5000/library/clawbot:stable-v6 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail=15 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''table {{.Names}}\\\\t{{.Status}}'' | grep -E ''clawbot|litellm''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && sed -i ''s|TELEGRAM_BOT_TOKEN=.*|TELEGRAM_BOT_TOKEN=8569720657:AAHrJ5CMOb4rP0IYJrCUiDViLsnpK69uEUI|'' .env && grep TELEGRAM_BOT_TOKEN .env\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker compose down && docker compose up -d && sleep 5 && docker logs clawbot --tail=10\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''{{.Names}}'' | grep -i alert\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker stop alertmanager && docker rm alertmanager && echo ''✅ 舊 AIOPS Alertmanager 已停用''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''table {{.Names}}\\\\t{{.Image}}\\\\t{{.Status}}''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/momo-pro/monitoring/prometheus/alert_rules.yml 2>/dev/null | grep -A5 ''ClawbotDown\\\\|telegram\\\\|AIOPS'' | head -30\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"find /home/ollama -name ''*.yml'' -type f 2>/dev/null | xargs grep -l ''ClawbotDown\\\\|telegram'' 2>/dev/null | head -5\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec clawbot grep -r ''協同警報\\\\|ClawbotDown'' /app 2>/dev/null | head -5\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec prometheus cat /etc/prometheus/prometheus.yml 2>/dev/null | grep -A10 ''alerting\\\\|alertmanager''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps | grep -i alert || echo ''✅ 沒有 alertmanager 在運行''\")",
|
||||
"Bash(jq -r '.status, .components | to_entries[] | \"\"\"\"\\\\\\(.key\\): \\\\\\(.value.status\\)\"\"\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''table {{.Names}}\\\\t{{.Status}}'' | grep clawbot && docker logs clawbot --tail=15\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker inspect clawbot --format=''{{range .Config.Env}}{{println .}}{{end}}'' | grep TELEGRAM\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && sed -i ''s|TELEGRAM_BOT_TOKEN=.*|TELEGRAM_BOT_TOKEN=8569720657:AAFjDyjAN94QQrjn1gBnFXAyS20EUyozH8c|'' .env && docker compose down && docker compose up -d && sleep 5 && docker logs clawbot --tail=10\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec clawbot grep -r ''ClawBotDown\\\\|ClawbotDown'' /app 2>/dev/null | head -5 || echo ''在程式碼中找不到''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec prometheus cat /etc/prometheus/alerts.yml 2>/dev/null | grep -A10 ''ClawBot\\\\|clawbot'' | head -30\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec prometheus cat /etc/prometheus/alerts.yml 2>/dev/null | grep -i ''clawbot\\\\|claw'' -A5 -B5\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --since=5m 2>&1 | grep -i ''clawbot\\\\|incident\\\\|alert'' | tail -20\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail 50 2>&1\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i ''telegram\\\\|polling\\\\|bot'' | tail -20\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps --format ''table {{.Names}}\\\\t{{.Status}}\\\\t{{.Ports}}'' | grep -E ''claw|NAME''\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -E ''telegram|Telegram|error|Error'' | tail -20\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps | grep ollama\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps -a --format ''table {{.Names}}\\\\t{{.Status}}'' | head -20\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"sed -i ''s|host.docker.internal|172.17.0.1|g'' /home/ollama/clawbot-v5/.env && cat /home/ollama/clawbot-v5/.env | grep OLLAMA\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker-compose restart clawbot && sleep 3 && docker logs clawbot --tail 30 2>&1\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker compose restart clawbot && sleep 5 && docker logs clawbot --tail 30 2>&1\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec clawbot curl -s http://172.17.0.1:11434/api/tags | head -c 200\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | tail -10\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -iE ''error|telegram|polling|alert|send'' | tail -30\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/clawbot-v5/.env | grep OLLAMA\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd /home/ollama/clawbot-v5 && docker compose up -d --force-recreate clawbot && sleep 5 && docker logs clawbot 2>&1 | tail -20\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec clawbot curl -s http://172.17.0.1:11434/api/tags | head -c 100\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --since 5m 2>&1 | tail -30\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec momo-db psql -U postgres -d clawbot -c \"\"SELECT enum_range\\(NULL::approvalstatus\\);\"\"\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec -e PGPASSWORD=clawbot123 momo-db psql -U clawbot -d clawbot -c \"\"SELECT enum_range\\(NULL::approvalstatus\\);\"\"\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps | grep -E ''postgres|db''\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker exec momo-db env | grep -i postgres\")",
|
||||
"Bash(sshpass -p \"0936223270\" ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"PGPASSWORD=AwoooiProd2026 psql -h localhost -U awoooi -d awoooi_prod -c \"\"SELECT enum_range\\(NULL::approvalstatus\\);\"\"\")",
|
||||
"Bash(KUBECONFIG=~/.kube/config kubectl config get-contexts)",
|
||||
"Bash(docker tag:*)",
|
||||
"Bash(docker push:*)",
|
||||
"Bash(ssh ollama@192.168.0.188 \"cd ~/awoooi-build && find apps/web/src -name ''''*.ts'''' -o -name ''''*.tsx'''' | head -30 | xargs md5sum\")",
|
||||
"Bash(rsync -avz --exclude 'node_modules' --exclude '.next' --exclude '.turbo' --exclude '*.log' /Users/ogt/awoooi/ ollama@192.168.0.188:~/awoooi-build/)",
|
||||
"Bash(gh run:*)",
|
||||
"Bash(APPROVAL_ID=\"ea43578e-17cd-40b9-b4c3-8fe8e92f225c\" __NEW_LINE_76dc92b2699cd7d5__ echo \"=== 檢查 Approval Metadata ===\" curl -s \"https://awoooi.wooo.work/api/v1/approvals/pending\")",
|
||||
"Bash(APPROVAL_ID=\"865ab726-c3b9-447e-86a9-65a6227516e6\" __NEW_LINE_db14ef76ca26af32__ echo \"=== 簽核 ===\" curl -s -X POST \"https://awoooi.wooo.work/api/v1/approvals/$APPROVAL_ID/sign\" -H \"Content-Type: application/json\" -d '{\"\"\"\"signer_id\"\"\"\":\"\"\"\"commander\"\"\"\",\"\"\"\"signer_name\"\"\"\":\"\"\"\"Commander\"\"\"\",\"\"\"\"comment\"\"\"\":\"\"\"\"Test resolution\"\"\"\"}')",
|
||||
"Read(//Users/ogt/awoooi/**)",
|
||||
"Bash(APPROVAL_ID=\"e9445e68-6c3e-4899-b507-3b9b7bcaf0a7\" __NEW_LINE_680ad94d4896e58a__ echo \"=== 簽核 ===\" curl -s -X POST \"https://awoooi.wooo.work/api/v1/approvals/$APPROVAL_ID/sign\" -H \"Content-Type: application/json\" -d '{\"\"\"\"signer_id\"\"\"\":\"\"\"\"commander\"\"\"\",\"\"\"\"signer_name\"\"\"\":\"\"\"\"Commander\"\"\"\",\"\"\"\"comment\"\"\"\":\"\"\"\"Final test\"\"\"\"}')",
|
||||
"Bash(APPROVAL_ID=\"eb0afb4e-834b-4af7-9ae0-3c58232fdd99\" INCIDENT=\"INC-20260323-F05CD6\" __NEW_LINE_47f1c3803a64b43c__ echo \"=== 簽核前 Incident 狀態 ===\" curl -s \"https://awoooi.wooo.work/api/v1/incidents/$INCIDENT\")",
|
||||
"Bash(mkdir -p /Users/ogt/awoooi/.claude/hooks)",
|
||||
"Bash(/Users/ogt/awoooi/.claude/hooks/pre-commit-check.sh:*)",
|
||||
"Bash(git -C /Users/ogt/awoooi status packages/lewooogo-core/)",
|
||||
"Bash(git -C /Users/ogt/awoooi ls-files packages/lewooogo-core/src/)",
|
||||
"Bash(git -C /Users/ogt/awoooi status --short)",
|
||||
"Bash(git -C /Users/ogt/awoooi add apps/api/pyproject.toml apps/api/scripts/ apps/api/src/ apps/web/.eslintrc.js apps/web/src/ packages/lewooogo-core/.eslintrc.js)",
|
||||
"Bash(git -C /Users/ogt/awoooi diff --cached --stat)",
|
||||
"Bash(git -C:*)",
|
||||
"Bash(for wf:*)",
|
||||
"Bash(do)",
|
||||
"Bash(done)",
|
||||
"Bash(jq 'if type == \"\"\"\"array\"\"\"\" then .[0] | {incident_id, status, decision} else . end')",
|
||||
"Bash(PYTHONPATH=. python -c \"from src.api.v1.stats import router; print\\(''✅ stats.py 載入成功,路由數:'', len\\(router.routes\\)\\)\")",
|
||||
"Bash(PYTHONPATH=. pytest tests/ -v --tb=short)",
|
||||
"Bash(PYTHONPATH=. pytest tests/test_stats_api.py -v --tb=short)",
|
||||
"Bash(PYTHONPATH=. pytest tests/test_webhook_telegram_integration.py::TestNewAlertTelegramPush -v --tb=long)",
|
||||
"Bash(PYTHONPATH=. pytest tests/test_webhook_telegram_integration.py::TestNewAlertTelegramPush -v --tb=short)",
|
||||
"Bash(PYTHONPATH=. pytest tests/test_webhook_telegram_integration.py -v --tb=short)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n awoooi')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get ns awoooi && kubectl get all -n awoooi')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get ns | head -20')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n awoooi-prod')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs awoooi-worker-bb89b5ffc-bpf45 -n awoooi-prod --tail=50')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs awoooi-worker-bb89b5ffc-bpf45 -n awoooi-prod --tail=100 | grep -i telegram')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs awoooi-api-8c9489b6c-cm8g5 -n awoooi-prod --tail=50 | grep -i webhook')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs awoooi-api-8c9489b6c-cm8g5 -n awoooi-prod --tail=30')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n monitoring | grep alertmanager')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl get configmap alertmanager-config -n monitoring -o jsonpath=''{.data.alertmanager\\\\.yml}'' | head -50\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get svc -n awoooi-prod')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl patch configmap alertmanager-config -n monitoring --type merge -p ''{\"\"data\"\":{\"\"alertmanager.yml\"\":\"\"global:\\\\n resolve_timeout: 5m\\\\n\\\\nroute:\\\\n group_by: [\\\\\"\"alertname\\\\\"\", \\\\\"\"severity\\\\\"\"]\\\\n group_wait: 30s\\\\n group_interval: 5m\\\\n repeat_interval: 4h\\\\n receiver: \\\\\"\"awoooi-webhook\\\\\"\"\\\\n routes:\\\\n - match:\\\\n severity: critical\\\\n receiver: \\\\\"\"awoooi-webhook\\\\\"\"\\\\n group_wait: 10s\\\\n repeat_interval: 1h\\\\n - match:\\\\n severity: warning\\\\n receiver: \\\\\"\"awoooi-webhook\\\\\"\"\\\\n group_wait: 1m\\\\n repeat_interval: 4h\\\\n\\\\nreceivers:\\\\n - name: \\\\\"\"awoooi-webhook\\\\\"\"\\\\n webhook_configs:\\\\n - url: \\\\\"\"http://192.168.0.120:32334/api/v1/webhook/alertmanager\\\\\"\"\\\\n send_resolved: true\\\\n\\\\ninhibit_rules:\\\\n - source_match:\\\\n severity: \\\\\"\"critical\\\\\"\"\\\\n target_match:\\\\n severity: \\\\\"\"warning\\\\\"\"\\\\n equal: [\\\\\"\"alertname\\\\\"\", \\\\\"\"instance\\\\\"\"]\\\\n\"\"}}''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl rollout restart deployment/alertmanager -n monitoring && kubectl rollout status deployment/alertmanager -n monitoring')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl get configmap alertmanager-config -n monitoring -o jsonpath=''{.data.alertmanager\\\\.yml}'' | grep -A 3 ''url:''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n awoooi-prod -o jsonpath=\"\"{range .items[*]}{.metadata.name}{\\\\\"\" \\\\\"\"}{.spec.containers[*].image}{\\\\\"\"\\\\\\\\n\\\\\"\"}{end}\"\"')",
|
||||
"Bash(git mv:*)",
|
||||
"Bash(for file:*)",
|
||||
"Bash(do echo:*)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 wooo@192.168.0.120 \"echo ''Connected''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl get deployment -n awoooi-prod -o jsonpath=''{range .items[*]}{.metadata.name}{\"\" selector: \"\"}{.spec.selector.matchLabels}{\"\"\\\\n\"\"}{end}''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl delete deployment awoooi-api awoooi-web awoooi-worker -n awoooi-prod\")",
|
||||
"WebFetch(domain:awoooi.wooo.work)",
|
||||
"WebFetch(domain:api.awoooi.wooo.work)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get pods -n awoooi-prod -o wide')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get svc,ingress -n awoooi-prod')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl exec -n awoooi-prod deploy/awoooi-api -- curl -sf http://localhost:8000/api/v1/health 2>&1')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'curl -sf http://10.43.125.201:8000/api/v1/health 2>&1 || echo \"\"FAILED\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'sudo nginx -t 2>&1 && sudo cat /etc/nginx/sites-enabled/awoooi* 2>/dev/null || sudo cat /etc/nginx/conf.d/awoooi* 2>/dev/null || echo \"\"No awoooi nginx config found\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'cat /etc/nginx/sites-enabled/* 2>/dev/null | grep -A5 awoooi || cat /etc/nginx/conf.d/* 2>/dev/null | grep -A5 awoooi || ls -la /etc/nginx/ 2>/dev/null || echo \"\"No nginx on this host\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'ls /etc/nginx/sites-enabled/ 2>/dev/null && cat /etc/nginx/sites-enabled/*awoooi* 2>/dev/null || echo \"\"Checking conf.d...\"\" && ls /etc/nginx/conf.d/ 2>/dev/null')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'grep -l awoooi /etc/nginx/sites-enabled/* 2>/dev/null || grep -r \"\"awoooi\"\" /etc/nginx/sites-enabled/ 2>/dev/null | head -20')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'grep -r \"\"awoooi\\\\|32334\\\\|32335\"\" /etc/nginx/ 2>/dev/null | head -20')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S cp /tmp/awoooi-prod.conf /etc/nginx/conf.d/ && echo \"\"Config copied\"\" && sudo nginx -t 2>&1')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S ls -la /etc/nginx/ssl/ 2>/dev/null || echo \"\"No ssl dir\"\" && sudo ls -la /etc/letsencrypt/live/ 2>/dev/null | head -10')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S sed -i \"\"s|/etc/nginx/ssl/awoooi.crt|/etc/letsencrypt/live/awoooi.wooo.work/fullchain.pem|g\"\" /etc/nginx/conf.d/awoooi-prod.conf && sudo sed -i \"\"s|/etc/nginx/ssl/awoooi.key|/etc/letsencrypt/live/awoooi.wooo.work/privkey.pem|g\"\" /etc/nginx/conf.d/awoooi-prod.conf && echo \"\"Paths fixed\"\" && sudo nginx -t 2>&1')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S nginx -s reload && echo \"\"Nginx reloaded!\"\" && sleep 2')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'grep -r \"\"awoooi\"\" /etc/nginx/sites-enabled/ 2>/dev/null | head -5')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S grep -rl \"\"awoooi.wooo.work\"\" /etc/nginx/ 2>/dev/null')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'curl -sf http://192.168.0.121:32334/api/v1/health 2>&1 || echo \"\"FAILED to reach 121\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S rm /etc/nginx/conf.d/awoooi-prod.conf && sudo nginx -t && sudo nginx -s reload && echo \"\"Cleaned up duplicate config\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -30 /var/log/nginx/error.log 2>/dev/null')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'grep -r \"\"api.awoooi\"\" /etc/nginx/ 2>/dev/null || echo \"\"No api.awoooi config found\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get configmap awoooi-config -n awoooi-prod -o yaml | grep -A5 NEXT_PUBLIC')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl get deployment awoooi-web -n awoooi-prod -o yaml | grep -A20 \"\"env:\"\" | head -25')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -10 /var/log/nginx/access.log 2>/dev/null | grep awoooi')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -5 /var/log/nginx/error.log 2>/dev/null')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S stat /etc/nginx/sites-available/awoooi.wooo.work.conf 2>/dev/null | grep -E \"\"Modify|Change|Birth\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl logs -n awoooi-prod -l app=awoooi-web --tail=30 2>/dev/null | grep -i \"\"api\\\\|error\\\\|fetch\"\" | head -20')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -20 /var/log/nginx/access.log 2>/dev/null | grep -E \"\"awoooi.*api\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S tail -20 /var/log/nginx/awoooi-prod-access.log 2>/dev/null')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl exec -n awoooi-prod deploy/awoooi-web -- env | grep -i api')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl exec -n awoooi-prod deploy/awoooi-web -- sh -c \"\"grep -r \\\\\"\"NEXT_PUBLIC_API_URL\\\\|api.awoooi\\\\\"\" /app/.next/static/chunks/*.js 2>/dev/null | head -5 || grep -r \\\\\"\"awoooi.wooo.work\\\\\"\" /app/.next/static/chunks/*.js 2>/dev/null | head -3\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'kubectl exec -n awoooi-prod deploy/awoooi-web -- sh -c \"\"find /app/.next -name \\\\\"\"*.js\\\\\"\" -exec grep -l \\\\\"\"awoooi\\\\\"\" {} \\\\; 2>/dev/null | head -3\"\"')",
|
||||
"Bash(./scripts/qa-zero-touch.sh)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S cat /etc/nginx/sites-available/awoooi.wooo.work.conf')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S cp /tmp/awoooi.wooo.work.conf /etc/nginx/sites-available/awoooi.wooo.work.conf && sudo nginx -t 2>&1')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'echo \"\"0936223270\"\" | sudo -S nginx -s reload && echo \"\"✅ Nginx reloaded with load balancing!\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt && sudo ls -la sentry 2>/dev/null || echo \"\"Sentry 目錄不存在,需要建立\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'sudo mkdir -p /opt/sentry && sudo chown wooo:wooo /opt/sentry && cd /opt/sentry && git clone https://github.com/getsentry/self-hosted.git . 2>&1 | tail -5')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"0936223270\"\" | sudo -S mkdir -p /opt/sentry && echo \"\"0936223270\"\" | sudo -S chown wooo:wooo /opt/sentry && cd /opt/sentry && git clone https://github.com/getsentry/self-hosted.git . 2>&1 | tail -10')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt/sentry && ls -la 2>&1 | head -20')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt/sentry && git describe --tags 2>/dev/null || git rev-parse --short HEAD')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt/sentry && ./install.sh --help 2>&1 | head -30 || echo \"\"No help available, checking script...\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'cd /opt/sentry && nohup ./install.sh --skip-user-creation --no-report-self-hosted-issues > /tmp/sentry-install.log 2>&1 &')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'tail -30 /tmp/sentry-install.log 2>/dev/null || echo \"\"日誌檔案尚未建立,等待中...\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'grep -E \"\"^\\\\▶|^Creating|^Starting|^Error|^✓|Pulling\"\" /tmp/sentry-install.log 2>/dev/null | tail -40')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 最近進度 ===\"\" && tail -10 /tmp/sentry-install.log')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 關鍵階段 ===\"\" && grep -E \"\"^▶|✓|Error|Creating|Starting\"\" /tmp/sentry-install.log | tail -20')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 最近 20 行 ===\"\" && tail -20 /tmp/sentry-install.log')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 關鍵階段 ===\"\" && grep -E \"\"^▶|✓|Error|Creating|Starting|Building|DONE\"\" /tmp/sentry-install.log | tail -30')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'echo \"\"=== 日誌行數 ===\"\" && wc -l /tmp/sentry-install.log && echo \"\"\"\" && echo \"\"=== 最近關鍵階段 ===\"\" && grep -E \"\"^▶|✓|Error|Creating|Starting|DONE|Completed|success\"\" /tmp/sentry-install.log | tail -25')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.110 'grep -E \"\"^▶|✓|Error|Completed|success|fail\"\" /tmp/sentry-install.log | tail -15')",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 KEYS incident:*)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/momo-pro/monitoring/alertmanager.yml 2>/dev/null || cat /etc/alertmanager/alertmanager.yml 2>/dev/null || echo ''Config not found''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail 30 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker logs clawbot --tail 20 2>&1 | grep -iE ''telegram|send|alert|incident|error''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/clawbot-v5/.env | grep -E ''TELEGRAM|TG_'' | head -5\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cat /home/ollama/clawbot-v5/.env | grep -E ''REDIS|POSTGRES|DATABASE'' | head -5\")",
|
||||
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9093/api/v2/alerts?active=true\"\" | python3 -c \"\"import sys,json; alerts=json.load\\(sys.stdin\\); print\\(f\\\\\"\"Active alerts: {len\\(alerts\\)}\\\\\"\"\\)\"\"')",
|
||||
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9093/api/v2/alerts\"\" | python3 -c \"\"import sys,json; alerts=json.load\\(sys.stdin\\); print\\(f\\\\\"\"Total alerts: {len\\(alerts\\)}\\\\\"\"\\); [print\\(a[\\\\\"\"labels\\\\\"\"][\\\\\"\"alertname\\\\\"\"]\\) for a in alerts[:5]]\"\"')",
|
||||
"Bash(ssh ollama@192.168.0.188 'redis-cli -p 6380 -n 0 GET incident:INC-20260324-36AF55 | python3 -c \"\"import sys,json; d=json.load\\(sys.stdin\\); print\\(f\\\\\"\"Status: {d.get\\(\\\\\"\"status\\\\\"\"\\)}\\\\\"\"\\); print\\(f\\\\\"\"message_id: {d.get\\(\\\\\"\"message_id\\\\\"\", \\\\\"\"NONE\\\\\"\"\\)}\\\\\"\"\\); print\\(f\\\\\"\"chat_id: {d.get\\(\\\\\"\"chat_id\\\\\"\", \\\\\"\"NONE\\\\\"\"\\)}\\\\\"\"\\)\"\"')",
|
||||
"Bash(ssh ollama@192.168.0.188 'redis-cli -p 6380 -n 0 GET incident:INC-20260324-36AF55 | python3 -c \"\"import sys,json; d=json.load\\(sys.stdin\\); print\\(f\\\\\"\"status: {d.get\\('status'\\)}\\\\\"\"\\); print\\(f\\\\\"\"message_id: {d.get\\('message_id'\\)}\\\\\"\"\\); print\\(f\\\\\"\"created_at: {d.get\\('created_at'\\)}\\\\\"\"\\)\"\"')",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 0 KEYS *approval*)",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 0 KEYS *incident*)",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 0 KEYS *pending*)",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 0 KEYS *)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/k3s-prod.yaml kubectl get pods -n awoooi-prod -o wide)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/k3s-prod.yaml kubectl get deployment awoooi-api -n awoooi-prod -o jsonpath='{.spec.template.spec.containers[0].image}')",
|
||||
"Bash(kubectl --kubeconfig=/Users/ogt/awoooi/k3s-prod.yaml get deployment awoooi-api -n awoooi-prod -o jsonpath='{.spec.template.spec.containers[0].image}')",
|
||||
"Bash(python3 -c \":*)",
|
||||
"Bash(/tmp/awoooi-tg-secret.yaml:*)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/k3s-prod.yaml kubectl apply -f /tmp/awoooi-tg-secret.yaml)",
|
||||
"Bash(for pod:*)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.188 \"curl -fsSL https://ollama.com/install.sh | sh\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no -o PreferredAuthentications=password wooo@192.168.0.188 \"echo connected && ollama --version\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no -o PreferredAuthentications=password ollama@192.168.0.188 \"curl -fsSL https://ollama.com/install.sh | sh\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S curl -fsSL https://ollama.com/install.sh | sudo -S sh\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"ollama --version\")",
|
||||
"Bash(__NEW_LINE_95e9df111552805b__ echo:*)",
|
||||
"Bash(sshpass -p '0936223270' scp /Users/ogt/awoooi/k8s/nginx/awoooi-prod.conf ollama@192.168.0.188:/tmp/awoooi-prod.conf)",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S cp /tmp/awoooi-prod.conf /etc/nginx/conf.d/awoooi-prod.conf && echo ''0936223270'' | sudo -S nginx -t 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S ls -la /etc/nginx/ssl/ 2>/dev/null || echo ''No ssl dir''; echo ''0936223270'' | sudo -S ls -la /etc/nginx/conf.d/ 2>/dev/null | head -10\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S grep -r ''ssl_certificate'' /etc/nginx/ 2>/dev/null | head -5\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S grep -A 20 ''server_name awoooi'' /etc/nginx/sites-enabled/all-sites.conf 2>/dev/null | head -30\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S ls -la /etc/nginx/sites-enabled/ 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S cat /etc/nginx/sites-available/awoooi.wooo.work.conf 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S rm /etc/nginx/conf.d/awoooi-prod.conf && echo ''0936223270'' | sudo -S nginx -t 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S nginx -s reload 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S systemctl reload nginx 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker logs openclaw 2>&1 | tail -30\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker ps -a --format ''table {{.Names}}\\\\t{{.Status}}\\\\t{{.Image}}'' 2>&1 | head -15\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i telegram | tail -20\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker logs clawbot 2>&1 | tail -30\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker exec alertmanager cat /etc/alertmanager/alertmanager.yml 2>&1 | head -30\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"curl -sf ''http://localhost:9093/api/v1/alerts'' | jq ''.data | length'' 2>/dev/null || curl -sf ''http://localhost:9093/api/v2/alerts'' | jq ''length'' 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker exec alertmanager wget -qO- ''http://localhost:9093/api/v2/alerts'' 2>&1 | head -100\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"KUBECONFIG=/etc/rancher/k3s/k3s.yaml kubectl -n awoooi-prod logs -l app=awoooi-worker --tail=50 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"cat /home/ollama/alertmanager/alertmanager.yml 2>/dev/null || docker exec alertmanager cat /etc/alertmanager/alertmanager.yml\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker cp /tmp/alertmanager.yml alertmanager:/etc/alertmanager/alertmanager.yml && docker exec alertmanager amtool check-config /etc/alertmanager/alertmanager.yml && docker kill -s SIGHUP alertmanager\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker inspect alertmanager --format ''{{range .Mounts}}{{.Source}} -> {{.Destination}}{{println}}{{end}}''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker exec alertmanager cat /etc/alertmanager/alertmanager.yml\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker restart alertmanager && sleep 3 && docker exec alertmanager cat /etc/alertmanager/alertmanager.yml\")",
|
||||
"Bash(sshpass -p '0936223270' ssh ollama@192.168.0.188 \"docker logs clawbot 2>&1 | grep -i ''telegram\\\\|webhook\\\\|alert'' | tail -10\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=30 2>/dev/null | grep -E ''''POST|webhook|alertmanager|ManualTest''''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=30 2>/dev/null | grep -iE ''''POST|webhook''''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=50 2>/dev/null | grep -iE ''''POST.*webhook|alertmanager_webhook|NewFingerprint''''\")",
|
||||
"Bash(kustomize build:*)",
|
||||
"Bash(KUBECONFIG=~/.kube/config kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath='{.data}')",
|
||||
"Bash(KUBECONFIG=/Users/ogt/.kube/config kubectl exec deploy/awoooi-api -n awoooi-prod -- env)",
|
||||
"Bash(git checkout:*)",
|
||||
"Bash(jq -r '.status // \"\"\"\"failed\"\"\"\"')",
|
||||
"Bash(jq -r '.total // \"\"\"\"error\"\"\"\"')",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 10 XLEN awoooi:signals)",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 10 XRANGE awoooi:signals - + COUNT 5)",
|
||||
"Bash(SENTRY_TOKEN=\"2b73050606d2b32f54095b4e177f4842f2bfe69d4b17da25f6daa4739148a972\" curl -s \"http://192.168.0.110:9000/api/0/organizations/\" -H \"Authorization: Bearer $SENTRY_TOKEN\")",
|
||||
"Bash(SENTRY_TOKEN=\"2b73050606d2b32f54095b4e177f4842f2bfe69d4b17da25f6daa4739148a972\" curl -s \"http://192.168.0.110:9000/api/0/organizations/sentry/projects/\" -H \"Authorization: Bearer $SENTRY_TOKEN\")",
|
||||
"Bash(SENTRY_TOKEN=\"2b73050606d2b32f54095b4e177f4842f2bfe69d4b17da25f6daa4739148a972\" curl -s \"http://192.168.0.110:9000/api/0/projects/sentry/awoooi-api/rules/\" -H \"Authorization: Bearer $SENTRY_TOKEN\")",
|
||||
"Bash(SENTRY_TOKEN=\"2b73050606d2b32f54095b4e177f4842f2bfe69d4b17da25f6daa4739148a972\" __NEW_LINE_583db0bbb6875db0__ echo \"=== Alert Rules ===\" curl -s \"http://192.168.0.110:9000/api/0/projects/sentry/awoooi-api/rules/\" -H \"Authorization: Bearer $SENTRY_TOKEN\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get nodes -o wide && echo ''---'' && kubectl top nodes 2>/dev/null || echo ''metrics-server not installed''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod -o wide && echo ''---'' && kubectl get pvc -n awoooi-prod 2>/dev/null && echo ''---'' && kubectl get sc 2>/dev/null && echo ''---'' && kubectl get deploy -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get ns && echo ''---'' && kubectl get svc -A | grep -E ''prometheus|grafana|metrics|signoz|longhorn|argocd'' || echo ''No monitoring/gitops services found''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"cat /etc/rancher/k3s/config.yaml 2>/dev/null || echo ''--- K3s default config \\(no custom config.yaml\\) ---'' && echo ''---'' && sudo k3s check-config 2>/dev/null | head -30 || echo ''check-config not available''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"free -h && echo ''---'' && swapon --show && echo ''---'' && df -h /var/lib/rancher/k3s\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n cnpg-system && echo ''---'' && kubectl get svc -n monitoring\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get all -n awoooi-prod -o wide 2>/dev/null && echo ''---QUOTA---'' && kubectl describe quota -n awoooi-prod 2>/dev/null && echo ''---EVENTS---'' && kubectl get events -n awoooi-prod --sort-by=''.lastTimestamp'' 2>/dev/null | tail -20\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get helmcharts -A 2>/dev/null || echo ''No HelmCharts'' && echo ''---'' && kubectl get helmreleases -A 2>/dev/null || echo ''No HelmReleases'' && echo ''---'' && kubectl api-resources | grep -E ''argo|flux|velero|longhorn'' || echo ''No GitOps/Backup CRDs''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get ds -A && echo ''---'' && kubectl get cm -n kube-system | grep -E ''traefik|coredns'' && echo ''---REGISTRIES---'' && sudo cat /etc/rancher/k3s/registries.yaml 2>/dev/null || echo ''No registries.yaml''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get ingress -A 2>/dev/null || echo ''No Ingress'' && echo ''---HPA---'' && kubectl get hpa -A 2>/dev/null || echo ''No HPA'' && echo ''---PDB---'' && kubectl get pdb -A 2>/dev/null || echo ''No PDB'' && echo ''---SYSCTL---'' && cat /proc/sys/net/core/somaxconn && cat /proc/sys/fs/file-max\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"systemctl status k3s | head -20 && echo ''---K3S-VERSION---'' && k3s --version && echo ''---ETCD-STATUS---'' && sudo k3s etcd-snapshot list 2>/dev/null | head -5 || echo ''No etcd snapshots''\")",
|
||||
"Bash(ssh wooo@192.168.0.121 \"free -h && swapon --show && echo ''---DISK---'' && df -h /var/lib/rancher/k3s 2>/dev/null\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"sudo ls -la /var/lib/rancher/k3s/server/db/ 2>/dev/null && echo ''---TOKEN---'' && sudo cat /var/lib/rancher/k3s/server/token 2>/dev/null | head -1 | cut -c1-20\")",
|
||||
"Bash(ssh -o ConnectTimeout=10 wooo@192.168.0.120 \"ps aux | grep k3s | grep -v grep | head -3 && echo ''---'' && sudo cat /etc/systemd/system/k3s.service 2>/dev/null | grep -E ''ExecStart|datastore''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"echo ''0936223270'' | sudo -S mkdir -p /backup/k3s_etcd 2>/dev/null && echo ''0936223270'' | sudo -S chown ollama:ollama /backup/k3s_etcd 2>/dev/null && echo ''=== 188 備份目錄 ==='' && ls -la /backup/\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"mkdir -p ~/.ssh && chmod 700 ~/.ssh && echo ''ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCnTnbjtSPwrI/pN6DByDxsFDOR4+sVnk7hb+eOr+Pb4e7o7QGbyKaJC2eKP7uRBilPqeScuvNKZhwmY8ZOuhjId+ZyLK0jZXHdq3a6tjsQ4MwPGyT2aMaD7x2jKzPbFojR0P5lmQWH2zjxeVuB7UeBIejaYk3gQEMFVES8Xh84yxFvy9jlwKmZFAI0gIhx0nPOTPB7onTyb8L5snUbwQQntoHWYFbb83+wui/kM15aLT5r8uvS2yZdsWWrDvAyuIShde1ceTBevwwqxezH1egXGoGkvZYYF7vHFu3X6jF7Nfp4qVfo0EfFV3omy90HzoFvoEXCC+jIWU0TjUqdEgGIEj2b+YXw3bIs+k+g/0/iJzA5LLUNb2vHVHoUmah4ZNlfiGU7e6hTYXjLjoXJlz9gfv6LYywhgktdThi9sUCn6rzbatlMrY0HNUE6uOwRTugMq1YUEJCvRqeFmtX5yF6xGp+FbOjIr1kMmplbRQRqKIrpQoqEn0+UBXC7OwJNCk8= wooo@mon'' >> ~/.ssh/authorized_keys && chmod 600 ~/.ssh/authorized_keys && echo ''SSH key 已加入''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"echo ''ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCs3nQ11B+V/VEchNR9Uzj57JoKXOJ8S1UVjCTHkUDL8FnrbdPFr0zvpYgX0a/Ipj9wHkqU6z6Ho6MQj3X2+HaK5fC0fZ3aZE1QT2df/x0xXdyka9XSaTFaymKzNTvfmum40koBkNccKyO5SLSjTcoTZCDHP4RqHHu/MYjQMejG7yeyCFmgumrHh5T/0DXPf5zl0Ff1C5U3VCLPxz5vq63JB2dTfrjQLg3sO0ZI3KTZE8aFj3txKz5snDZX3nE1tHZMKLecwwEqi130BtVZcm8zXDqX83gtUDp/WLfPyKCmzZzGf6YgEofIsyrVup8XnD9xNoFmbEeBdFocGWeoIVIn+faOpU22fvQ34L57GHhNQwygZOPKsZa9XNKjayKdKQl3gcAA2wnkZgN0cyIEYvTd3O+Z5Xvff2dat+0sDMK571V+0JEdAMOpQjFO7DkwjKHn/gHLmvRjYLiUOItX9JysFgYuHs8omad2LmeUIkQrBD2I2hyvY49HaJKWctk4Jm0= root@mon'' >> ~/.ssh/authorized_keys && echo ''Root SSH key added''\")",
|
||||
"Bash(grep -r \"\"\"zod\"\"\" /Users/ogt/awoooi/package.json /Users/ogt/awoooi/apps/*/package.json /Users/ogt/awoooi/packages/*/package.json)",
|
||||
"Bash(__NEW_LINE_144503b060dfd3dd__ echo:*)",
|
||||
"Bash(__NEW_LINE_ae2a22b14586d7aa__ echo:*)",
|
||||
"Bash(__NEW_LINE_e17561a4e55f74d4__ echo:*)",
|
||||
"Bash(ssh wooo@192.168.0.120 \"echo ''''0936223270'''' | sudo -S cat /etc/rancher/k3s/k3s.yaml 2>/dev/null | sed ''''s|https://127.0.0.1:6443|https://192.168.0.125:6443|g''''\")",
|
||||
"Bash(KUBECONFIG=/tmp/kubeconfig-vip.yaml kubectl get nodes)",
|
||||
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get rs -n awoooi-prod)",
|
||||
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get pods -A --no-headers)",
|
||||
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get jobs -A --no-headers)",
|
||||
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get rs -n awoooi-prod --no-headers)",
|
||||
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml delete job api-watchdog-29556380 -n wooo-aiops-uat)",
|
||||
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get pods -n awoooi-prod)",
|
||||
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get pods -A)",
|
||||
"Bash(kubectl --kubeconfig=/tmp/kubeconfig-vip.yaml get svc -A)",
|
||||
"Bash(PGPASSWORD=changeme psql -h 192.168.0.188 -U awoooi -d awoooi_prod -f /Users/ogt/awoooi/apps/api/scripts/migrate_phase18_audit_logs.sql)",
|
||||
"Bash(PLAYWRIGHT_BASE_URL=http://192.168.0.125:32335 npx playwright test phase11-conversational.spec.ts --reporter=list)",
|
||||
"Bash(PLAYWRIGHT_BASE_URL=http://192.168.0.125:32335 npx playwright test phase11-conversational.spec.ts --reporter=list --workers=1)",
|
||||
"Bash(KUBECONFIG=~/.kube/config kubectl get nodes --server=https://192.168.0.125:6443 --insecure-skip-tls-verify)",
|
||||
"Bash(source .venv/bin/activate)",
|
||||
"Read(//etc/postgresql/14/main/**)",
|
||||
"Bash(for port:*)",
|
||||
"Bash(kubectl top:*)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl top pods -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n awoooi-prod -o wide)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get svc -n awoooi-prod)",
|
||||
"Bash(jq -r '.components | to_entries[] | \"\"\"\"\\\\\\(.key\\): \\\\\\(.value.status\\)\"\"\"\"')",
|
||||
"Bash(tar -xzf velero-v1.13.0-darwin-arm64.tar.gz)",
|
||||
"Bash(sudo mv:*)",
|
||||
"Bash(velero version:*)",
|
||||
"Bash(mkdir -p ~/bin)",
|
||||
"Bash(mv velero-v1.13.0-darwin-arm64/velero ~/bin/)",
|
||||
"Bash(~/bin/velero version:*)",
|
||||
"Bash(k8s/velero/00-namespace.yaml:*)",
|
||||
"Bash(k8s/velero/01-credentials.yaml:*)",
|
||||
"Bash(k8s/velero/02-velero-install.yaml:*)",
|
||||
"Bash(tar -xzf velero.tar.gz)",
|
||||
"Bash(/tmp/velero-credentials:*)",
|
||||
"Bash(__NEW_LINE_e85d95513fc16492__ ~/bin/velero install --provider aws --plugins velero/velero-plugin-for-aws:v1.9.0 --bucket velero-backups --secret-file /tmp/velero-credentials --backup-location-config region=minio,s3ForcePathStyle=true,s3Url=http://192.168.0.188:9000 --use-volume-snapshots=false --dry-run -o yaml)",
|
||||
"Bash(__NEW_LINE_e85d95513fc16492__ head:*)",
|
||||
"Bash(k8s/velero/README.md:*)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/.kube/config kubectl apply -f /Users/ogt/awoooi/k8s/velero/velero-install-full.yaml)",
|
||||
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"whoami && hostname && cat /etc/sudoers.d/* 2>/dev/null | head -5 || echo ''no sudoers.d files''\")",
|
||||
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"kubectl get nodes 2>&1 || echo ''kubectl failed, checking k3s kubeconfig...'' && ls -la /etc/rancher/k3s/k3s.yaml 2>&1\")",
|
||||
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"sudo -l 2>&1 | head -20\")",
|
||||
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''09362233270'' | sudo -S -l 2>&1\")",
|
||||
"Bash(sshpass -p '09362233270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get nodes 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' scp /Users/ogt/awoooi/k8s/velero/velero-install-full.yaml wooo@192.168.0.120:/tmp/velero-install-full.yaml)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''''0936223270'''' | sudo -S kubectl apply -f /tmp/velero-install-full.yaml 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get pods -n velero 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get backupstoragelocation -n velero 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl logs -n velero deploy/velero --tail=30 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl logs -n velero deploy/velero --tail=10 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get secret cloud-credentials -n velero -o jsonpath=''{.data.cloud}'' 2>&1 | base64 -d\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S curl -s http://192.168.0.188:9000/velero-backups/ 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl rollout restart deployment/velero -n velero 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get backups -n velero 2>&1\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl describe backup test-backup-20260328-2114 -n velero 2>&1 | tail -30\")",
|
||||
"Bash(sshpass -p:*)",
|
||||
"Read(//Users/ogt/awoooi/=== 測試 /approvals/**)",
|
||||
"Bash(kubectl --kubeconfig=/Users/ogt/.kube/config get svc -n velero -o wide)",
|
||||
"Bash(kubectl --kubeconfig=/Users/ogt/.kube/config get pods -n velero -o wide)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/.kube/config kubectl get svc -n velero)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'echo \"\"0936223270\"\" | sudo -S sh -c \"\"kubectl get pods -A | grep -E \\\\\"\"kube-state|state-metrics\\\\\"\"\"\"')",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'echo \"\"0936223270\"\" | sudo -S sh -c \"\"kubectl get ns | grep -E \\\\\"\"wooo|aiops|legacy|old\\\\\"\"\"\"')",
|
||||
"Bash(KUBECONFIG=~/.kube/config kubectl get ns --no-headers)",
|
||||
"WebFetch(domain:build.nvidia.com)",
|
||||
"WebFetch(domain:ollama.com)",
|
||||
"WebFetch(domain:docs.api.nvidia.com)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"curl -s ''http://admin:admin@localhost:3002/api/search?type=dash-db'' | python3 -c \"\"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''Dashboard 數量: {len\\(d\\)}''\\); [print\\(f\\\\\"\" - {i[''title'']}\\\\\"\"\\) for i in d[:10]]\"\"\")",
|
||||
"Bash(jq '.ai_provider // .data.ai_provider // \"\"\"\"not found\"\"\"\"')",
|
||||
"Bash(KUBECONFIG=~/.kube/config kubectl logs -n awoooi-prod deployment/awoooi-api --tail=50)",
|
||||
"Bash(export NVIDIA_API_KEY=\"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\")",
|
||||
"Bash(curl -s -X POST \"https://integrate.api.nvidia.com/v1/chat/completions\" -H \"Content-Type: application/json\" -H \"Authorization: Bearer $NVIDIA_API_KEY\" -d '{:*)",
|
||||
"Bash(/tmp/fix-network-policy.yaml:*)",
|
||||
"Bash(__NEW_LINE_acde7a92ceae01f6__ scp:*)",
|
||||
"Bash(curl -s -X POST https://awoooi.wooo.work/api/v1/webhooks/alertmanager -H 'Content-Type: application/json' -d '{:*)",
|
||||
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9090/api/v1/targets\"\" 2>/dev/null | grep -o \"\"\\\\\"\"health\\\\\"\":\\\\\"\"[^\\\\\"\"]*\\\\\"\"\"\" | sort | uniq -c')",
|
||||
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9090/api/v1/rules\"\" 2>/dev/null | grep -o \"\"\\\\\"\"name\\\\\"\":\\\\\"\"[^\\\\\"\"]*\\\\\"\"\"\" | sort | uniq')",
|
||||
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9090/api/v1/targets\"\" 2>/dev/null | grep -o \"\"\\\\\"\"job\\\\\"\":\\\\\"\"[^\\\\\"\"]*\\\\\"\"\"\" | sort | uniq -c | sort -rn')",
|
||||
"Bash(ssh ollama@192.168.0.188 'curl -s \"\"http://localhost:9090/api/v1/query?query=up\"\" 2>/dev/null | grep -o \"\"\\\\\"\"instance\\\\\"\":\\\\\"\"[^\\\\\"\"]*\\\\\"\"\"\" | sort | uniq')",
|
||||
"Bash(for i:*)",
|
||||
"Bash(do sleep:*)",
|
||||
"Bash(kubectl patch:*)",
|
||||
"Bash(ssh wooo@192.168.0.110 \"cat /tmp/runner_clean.log 2>/dev/null; echo ''---''; ps aux | grep ''Runner.Listener'' | grep -v grep | wc -l\")",
|
||||
"Bash(KUBECONFIG=~/.kube/config kubectl logs -n awoooi-prod -l app=awoooi-api --tail=200)",
|
||||
"Bash(/Users/ogt/awoooi/ops/monitoring/deploy-exporters.sh:*)",
|
||||
"WebFetch(domain:github.com)",
|
||||
"WebFetch(domain:docs.ollama.com)",
|
||||
"Skill(telegram:configure)",
|
||||
"Skill(telegram:configure:*)",
|
||||
"Bash(USE_NEW_ENGINE=true pytest tests/test_incident*.py -v --tb=short -x)",
|
||||
"Bash(USE_NEW_ENGINE=true pytest tests/test_approval_field_alignment.py tests/test_learning_service.py -v --tb=short)",
|
||||
"Bash(/tmp/debug_approval.py:*)",
|
||||
"Bash(/tmp/debug_approval2.py:*)",
|
||||
"Bash(/tmp/bulk_sign.sh:*)",
|
||||
"Bash(bash /tmp/bulk_sign.sh)",
|
||||
"Bash(/tmp/check_deploy.py:*)",
|
||||
"Bash(/tmp/check_buttons.py:*)",
|
||||
"Bash(ssh ollama@192.168.0.188 \"docker logs openclaw --since=10s 2>&1 | grep -Ev ''\\(GET|POST\\) /health'' | tail -10 && echo ''---'' && docker exec openclaw env | grep OPENAI_API_KEY | cut -c1-30\")",
|
||||
"Read(//Users/ogt/awoooi/https:/awoooi.wooo.work/_next/static/chunks/app/%5Blocale%5D/**)",
|
||||
"Bash(find /Users/ogt/awoooi/apps/web -type f \\\\\\(-name *.spec.ts -o -name *.spec.tsx \\\\\\))",
|
||||
"Bash(kubectl -n awoooi-prod get pods)",
|
||||
"Bash(kubectl -n production get pods)",
|
||||
"Bash(ssh -o StrictHostKeyChecking=no wooo@192.168.0.121 \"export KUBECONFIG=/etc/rancher/k3s/k3s.yaml && sudo kubectl get deployment awoooi-web -n awoooi-prod -o jsonpath=''{.spec.template.spec.containers[0].image}'' && echo '''' && sudo kubectl get pods -n awoooi-prod -l app=awoooi-web --no-headers\")",
|
||||
"Bash(KUBECONFIG=/Users/ogt/.kube/config kubectl get pods -n awoooi-prod)",
|
||||
"Bash(for run_id in 166 165)",
|
||||
"mcp__plugin_playwright_playwright__browser_navigate",
|
||||
"mcp__plugin_playwright_playwright__browser_take_screenshot",
|
||||
"Bash(open \"http://192.168.0.110:3001/wooo/awoooi/actions\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=5\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs/166/jobs\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=10\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runners\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/admin/runners\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=3\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs/169/jobs\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/jobs/179/logs\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" JOB_ID=180 curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/jobs/$JOB_ID/logs\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=2\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" JOB_ID=181 curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/jobs/$JOB_ID/logs\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs/172/jobs\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/jobs/182/logs\" -H \"Authorization: token $TOKEN\")",
|
||||
"Bash(TOKEN=\"REDACTED_GITEA_TOKEN\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs/178\" -H \"Authorization: token $TOKEN\")",
|
||||
"mcp__plugin_playwright_playwright__browser_snapshot",
|
||||
"mcp__plugin_playwright_playwright__browser_fill_form",
|
||||
"mcp__plugin_playwright_playwright__browser_click",
|
||||
"Bash(GITEA_TOKEN=\"e6c9fecb1f0148939493ae0fa30407d28c91279d\" curl -s \"http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/runs?limit=5\" -H \"Authorization: token $GITEA_TOKEN\")",
|
||||
<<<<<<< Updated upstream
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 /tmp/a4_smoke.py)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.repositories.aider_event_repository import AiderEventRepository; print\\('import OK'\\)\")",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py -v --tb=short)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.services.aider_event_service import classify_severity, should_create_incident, build_signal_data; print\\('✓ All imports successful'\\)\")",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py::test_build_signal_data_redacts_secrets_in_annotations -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_events_api.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_processor.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_processor.py tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.workers.aider_event_processor import AiderEventProcessor, get_aider_event_processor, run_aider_event_processor_loop; print\\('✓ All imports successful'\\)\")",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_processor.py -v --tb=short)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_processor.py tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py --tb=short)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_ai_router_feedback.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py tests/test_aider_event_processor.py tests/test_ai_router_feedback.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.services.ai_router import AIRouter; from src.db.base import get_session_factory; print\\('✓ Imports successful, no circular imports'\\)\")",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_ai_router_feedback.py tests/test_aider_event_service.py -v --tb=short)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.api.v1 import aider_events; from src.workers.aider_event_processor import run_aider_event_processor_loop; from src.core.config import settings; print\\('AIDER_WEBHOOK_SECRET' in settings.__fields__, 'USE_AIDER_FEEDBACK' in settings.__fields__\\)\")",
|
||||
"Bash(AIDER_WEBHOOK_SECRET=testsecret /Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.main import app; print\\('app OK; title:', app.title\\)\")",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_action_parsing.py tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py tests/test_aider_event_processor.py tests/test_ai_router_feedback.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_action_parsing.py tests/test_aider_event_service.py tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_secret_redactor.py tests/test_aider_event_processor.py tests/test_ai_router_feedback.py -q)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pip install -e .[dev] --quiet)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pip install -e '.[dev]' --quiet)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/ -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from aider_watch_client.aiderw import main as awmain; from aider_watch_client.cli import main as climain; print\\('✓ imports ok'\\)\")",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pip show aider-watch-client)",
|
||||
"Bash(tailscale status *)",
|
||||
"Bash(kubectl rollout *)",
|
||||
"Bash(bash /Users/ogt/awoooi/scripts/aider_watch_client/scripts/install.sh)",
|
||||
"Bash(git rebase *)",
|
||||
"Bash(/opt/homebrew/bin/aiderw --message \"add docstring to hello function\" --exit)",
|
||||
"Bash(kubectl -n awoooi-prod get pod -l app=awoooi-api -o jsonpath='{.items[0].metadata.name}')",
|
||||
"Bash(kubectl -n awoooi-prod exec awoooi-api-7b9464c969-8ml88 -- python -c ' *)",
|
||||
"Bash(kubectl -n awoooi-prod rollout restart deployment/awoooi-api)",
|
||||
"Bash(kubectl -n awoooi-prod get pod -l app=awoooi-api --no-headers)",
|
||||
"Bash(kubectl -n awoooi-prod rollout status deployment/awoooi-api --timeout=120s)",
|
||||
"Bash(/opt/homebrew/bin/aider-watch flush *)",
|
||||
"Bash(kubectl -n awoooi-prod get pod -l app=awoooi-api -o wide)",
|
||||
"Bash(kubectl -n awoooi-prod rollout status deployment/awoooi-api --timeout=30s)",
|
||||
"Bash(kubectl -n awoooi-prod exec awoooi-api-6657fb9cf7-47lcg -- python -c \"import src.services.telegram_gateway as tg; import inspect; lines = inspect.getsource\\(tg\\); idx = lines.find\\('response_body=e.response.text'\\); print\\('FOUND' if idx >= 0 else 'NOT FOUND'\\)\")",
|
||||
"Read(//opt/gitea/**)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/ -q)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/unit/test_aider_event_service.py tests/unit/test_aider_model.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_events_api.py tests/test_aider_event_models.py tests/test_aider_event_service.py tests/test_aider_event_processor.py -v)",
|
||||
"Bash(kubectl -n awoooi-prod get svc)",
|
||||
"Bash(kubectl -n openclaw get pod)",
|
||||
"Bash(kubectl -n awoooi-prod exec awoooi-api-7cd784c875-r4qkz -- python -c ' *)",
|
||||
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=10m)",
|
||||
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=15m)",
|
||||
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=20m)",
|
||||
"Bash(kubectl -n awoooi-prod get secret awoooi-secrets -o yaml)",
|
||||
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=30m)",
|
||||
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2 --since=2h)",
|
||||
"Bash(kubectl -n awoooi-prod logs awoooi-api-7cd784c875-qt6j2)",
|
||||
"Bash(kubectl -n awoooi-prod get pod -l app=awoooi-api -o jsonpath='{range .items[*]}{.metadata.name} {.status.containerStatuses[0].imageID}{\"\\\\n\"}{end}')",
|
||||
"Bash(kubectl -n awoooi-prod get ingress)",
|
||||
"Bash(kubectl -n awoooi-prod get svc awoooi-api-svc)",
|
||||
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --since=60s --prefix)",
|
||||
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --since=5m --prefix)",
|
||||
"Bash(kubectl -n awoooi-prod logs pod/awoooi-api-86bc79766d-dn5ll --since=5m)",
|
||||
"Bash(kubectl -n awoooi-prod logs pod/awoooi-api-86bc79766d-dn5ll --since=10m)",
|
||||
"Bash(kubectl -n awoooi-prod logs pod/awoooi-api-86bc79766d-dn5ll)",
|
||||
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --since=90s --prefix)",
|
||||
"Bash(kubectl -n awoooi-prod logs pod/awoooi-api-86bc79766d-4x69p --since=5m)",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 10 SCAN 0 MATCH \"playbook:PB-*\" COUNT 500)",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 -n 10 DBSIZE)",
|
||||
"Bash(wait)",
|
||||
"Read(//Users/**)",
|
||||
"Read(//Users/ooo/.claude/**)",
|
||||
"Bash(mkdir -p /Users/ogt/awoooi/.claude/agents)",
|
||||
"Bash(cp /Users/ogt/.claude/agents/*.md /Users/ogt/awoooi/.claude/agents/)",
|
||||
"Bash(kubectl -n awoooi-prod logs --tail=400 -l app=awoooi-api --prefix=true)",
|
||||
"Bash(kubectl -n awoooi-prod logs --tail=300 awoooi-api-65c69fd649-bxbwp)",
|
||||
"Bash(kubectl -n awoooi-prod logs --tail=20000 -l app=awoooi-api --prefix=false --since=24h)",
|
||||
"Bash(kubectl -n awoooi-prod logs --since=24h awoooi-api-65c69fd649-bxbwp)",
|
||||
"Bash(kubectl -n awoooi-prod logs --since=24h -l app=awoooi-api --prefix=false)",
|
||||
"Bash(kubectl -n awoooi-prod logs --since=24h awoooi-api-65c69fd649-fmbxd)",
|
||||
"Bash(kubectl -n awoooi-prod logs --since=3h awoooi-api-65c69fd649-fmbxd)",
|
||||
"Bash(kubectl -n awoooi-prod logs --since=3h awoooi-api-65c69fd649-bxbwp)",
|
||||
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --tail=30 --since=30m)",
|
||||
"Bash(kubectl -n awoooi-prod get pods -o wide)",
|
||||
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api -o jsonpath='{.items[0].metadata.creationTimestamp}')",
|
||||
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --tail=5 --since=5m)",
|
||||
"Bash(kubectl -n awoooi-prod describe pod -l app=awoooi-api)",
|
||||
"Bash(kubectl -n awoooi-prod logs -l app=awoooi-api --tail=20 --since=10m)",
|
||||
"Bash(kubectl -n awoooi-prod exec deployment/awoooi-api -- python3 -c ' *)",
|
||||
"Bash(PGPASSWORD=\"\" psql -h 188.188.188.188 -U aiops -d aiops -c \"\\\\d timeline_events\")",
|
||||
"Bash(kubectl -n awoooi-prod get deploy awoooi-api -o yaml)",
|
||||
"Bash(PGPASSWORD=\"\" psql --version)",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- env)",
|
||||
"Bash(kubectl -n awoooi-prod logs --tail=500 deploy/awoooi-api)",
|
||||
"Bash(kubectl cp *)",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'curl -sG \"$PROMETHEUS_URL/api/v1/query\" --data-urlencode \"query=up\" 2>&1 | head -c 400')",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'for q in \"sum\\(rate\\(http_requests_total{status=~\\\\\"5..\\\\\"}[5m]\\)\\) / sum\\(rate\\(http_requests_total[5m]\\)\\)\" \"avg\\(rate\\(container_cpu_usage_seconds_total{namespace=\\\\\"awoooi-prod\\\\\",container=\\\\\"awoooi-api\\\\\"}[5m]\\)\\)\" \"pg_stat_activity_count{datname=\\\\\"awoooi\\\\\"}\" \"increase\\(kube_pod_container_status_restarts_total{namespace=\\\\\"awoooi-prod\\\\\"}[15m]\\)\"; do echo \"---- $q\"; curl -sG \"$PROMETHEUS_URL/api/v1/query\" --data-urlencode \"query=$q\" 2>&1 | head -c 250; echo; done')",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'PGPASSWORD=as0V1mohktaFbGIx3R0iCatbMJ6XxFDL psql -h 192.168.0.188 -U awoooi -d awoooi_prod -c \"SELECT metric_name, count\\(*\\), max\\(trained_at\\) FROM dynamic_baseline_record GROUP BY metric_name;\" 2>&1 | head -20')",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'PGPASSWORD=as0V1mohktaFbGIx3R0iCatbMJ6XxFDL psql -h 192.168.0.188 -U awoooi -d awoooi_prod -c \"SELECT count\\(*\\) as asset_count FROM asset_inventory; SELECT count\\(*\\) as coverage_count FROM asset_coverage_snapshot; SELECT count\\(*\\) as host_cap_count FROM host_capacity_snapshot; SELECT count\\(*\\) as compl_count FROM asset_compliance_snapshot; SELECT count\\(*\\) as rule_cat FROM alert_rule_catalog; SELECT count\\(*\\) as log_cluster FROM log_cluster_record;\" 2>&1')",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'python3 -c \" *)",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- python3 -c ' *)",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'for q in \"http_requests_total\" \"container_cpu_usage_seconds_total\" \"container_memory_usage_bytes\" \"kube_pod_container_status_restarts_total\" \"pg_stat_activity_count\" \"node_cpu_seconds_total\" \"node_load1\"; do echo -n \"$q => \"; curl -sG \"$PROMETHEUS_URL/api/v1/query\" --data-urlencode \"query=count\\($q\\)\" 2>&1 | head -c 180; echo; done')",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'curl -sG \"$PROMETHEUS_URL/api/v1/query\" --data-urlencode \"query=container_cpu_usage_seconds_total\" 2>&1 | python3 -c \"import json,sys; d=json.load\\(sys.stdin\\); rs=d[\\\\\"data\\\\\"][\\\\\"result\\\\\"][:3]; [print\\(r[\\\\\"metric\\\\\"]\\) for r in rs]; print\\(\\\\\"total series:\\\\\", len\\(d[\\\\\"data\\\\\"][\\\\\"result\\\\\"]\\)\\)\"')",
|
||||
"Bash(kubectl -n awoooi-prod exec deploy/awoooi-api -- sh -c 'which kubectl 2>&1; kubectl version --client 2>&1 | head -3; kubectl -n awoooi-prod get deploy awoooi-api 2>&1 | head -3')",
|
||||
"Bash(kubectl -n awoooi-prod logs --tail=2000 deploy/awoooi-api)",
|
||||
"Bash(psql --version)",
|
||||
"WebFetch(domain:core.telegram.org)",
|
||||
"mcp__plugin_context7_context7__resolve-library-id",
|
||||
"mcp__plugin_context7_context7__query-docs",
|
||||
"WebFetch(domain:docs.claude.com)",
|
||||
"Bash(git tag *)",
|
||||
"Read(//usr/**)",
|
||||
"Bash(psql -h 192.168.0.110 -U awoooi_user -d awoooi -c \"SELECT id, alertname, status, confidence, description, created_at FROM approval_records WHERE status='PENDING' AND DATE\\(created_at AT TIME ZONE 'Asia/Taipei'\\) = CURRENT_DATE AT TIME ZONE 'Asia/Taipei' ORDER BY created_at DESC LIMIT 10;\")",
|
||||
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.spec.template.spec.containers[0].image}')",
|
||||
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.spec.template.spec.containers[0].imagePullPolicy}{\"\\\\n\"}{.spec.template.metadata.labels}{\"\\\\n\"}')",
|
||||
"Bash(kubectl kustomize *)",
|
||||
"Bash(kubectl -n awoooi-prod rollout status deployment/awoooi-api --timeout=60s)",
|
||||
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api --no-headers)",
|
||||
"Bash(kubectl -n awoooi-prod patch deployment awoooi-api -p '{\"spec\":{\"template\":{\"spec\":{\"containers\":[{\"name\":\"api\",\"image\":\"192.168.0.110:5000/awoooi/api:cbd28e29a08435deb8c66af51654d8fa65120a14\"}]}}}}')",
|
||||
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.spec.template.spec.containers[0].image}{\"\\\\n\"}')",
|
||||
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api -o jsonpath='{range .items[*]}{.metadata.name}{\"\\\\t\"}{.spec.containers[0].image}{\"\\\\n\"}{end}')",
|
||||
"Bash(kubectl -n awoooi-prod get pdb awoooi-api-pdb -o jsonpath='{.spec.minAvailable}')",
|
||||
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api -o wide)",
|
||||
"Bash(kubectl -n awoooi-prod describe rs -l app=awoooi-api)",
|
||||
"Bash(kubectl -n awoooi-prod get events --sort-by='.lastTimestamp')",
|
||||
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.spec.replicas}{\"\\\\n\"}{.status.replicas}{\"\\\\n\"}{.status.readyReplicas}{\"\\\\n\"}{.status.updatedReplicas}{\"\\\\n\"}')",
|
||||
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api --sort-by=.metadata.creationTimestamp -o jsonpath='{range .items[*]}{.metadata.name}{\":\"}{.metadata.creationTimestamp}{\"\\\\n\"}{end}')",
|
||||
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.status.conditions[*]}')",
|
||||
"Bash(kubectl -n awoooi-prod describe deployment awoooi-api)",
|
||||
"Bash(kubectl -n awoooi-prod get rs -l app=awoooi-api -o jsonpath='{range .items[*]}{.metadata.name}{\":\"}{.spec.template.spec.containers[0].image}{\"\\\\n\"}{end}')",
|
||||
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o yaml)",
|
||||
"Bash(kubectl -n awoooi-prod rollout status deployment/awoooi-api --timeout=180s)",
|
||||
"Bash(kubectl -n awoooi-prod set image deployment/awoooi-api api=192.168.0.110:5000/awoooi/api:cbd28e29a08435deb8c66af51654d8fa65120a14 --record=false)",
|
||||
"Bash(kubectl -n awoooi-prod get pods -l app=awoooi-api -o jsonpath='{range .items[*]}{.metadata.name}{\"\\\\t\"}{.spec.containers[0].image}{\"\\\\t\"}{.status.phase}{\"\\\\n\"}{end}')",
|
||||
"Bash(kubectl -n awoooi-prod get deployment awoooi-api -o jsonpath='{.status.replicas}{\"\\\\t\"}{.status.readyReplicas}{\"\\\\t\"}{.status.updatedReplicas}')",
|
||||
"Bash(bash /tmp/diagnostic.sh)",
|
||||
"WebFetch(domain:docs.github.com)",
|
||||
"WebFetch(domain:docs.sonarsource.com)",
|
||||
"WebFetch(domain:gitea.com)",
|
||||
"WebFetch(domain:docs.gitea.com)",
|
||||
"WebFetch(domain:www.sonarsource.com)",
|
||||
"WebFetch(domain:golangci-lint.run)",
|
||||
"WebFetch(domain:www.uber.com)",
|
||||
"Bash(bash scripts/ops/deploy-alerts.sh --dry-run)",
|
||||
"Bash(bash scripts/ops/deploy-alerts.sh)",
|
||||
"Bash(promtool check *)",
|
||||
"WebFetch(domain:openrouter.ai)",
|
||||
"WebFetch(domain:qwenlm.github.io)",
|
||||
"WebFetch(domain:aclanthology.org)",
|
||||
"WebFetch(domain:datanorth.ai)",
|
||||
"WebFetch(domain:www.infoq.com)",
|
||||
"WebFetch(domain:aws.amazon.com)",
|
||||
"WebFetch(domain:artificialanalysis.ai)",
|
||||
"WebFetch(domain:www.alibabacloud.com)",
|
||||
"WebFetch(domain:docs.langchain.com)",
|
||||
"WebFetch(domain:arxiv.org)",
|
||||
"WebFetch(domain:blog.kilo.ai)",
|
||||
"WebFetch(domain:www.siliconflow.com)",
|
||||
"WebFetch(domain:aicompetence.org)",
|
||||
"Bash(redis-cli -h 192.168.0.188 -p 6380 ping)",
|
||||
"Bash(redis-cli ping *)"
|
||||
=======
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest apps/api/tests/test_aider_event_models.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_action_parsing.py -v --collect-only)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_action_parsing.py --collect-only)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -m pytest tests/test_aider_event_models.py tests/test_secret_redactor.py -v)",
|
||||
"Bash(/Users/ogt/.pyenv/versions/3.11.7/bin/python3 -c \"from src.repositories.aider_event_repository import AiderEventRepository; print\\('import OK'\\)\")"
|
||||
>>>>>>> Stashed changes
|
||||
],
|
||||
"deny": [
|
||||
"Bash(rm -rf *)",
|
||||
"Bash(git push --force *)",
|
||||
"Bash(git reset --hard *)",
|
||||
"Bash(kubectl delete *)",
|
||||
"Bash(docker rm -f *)"
|
||||
],
|
||||
"additionalDirectories": [
|
||||
"/Users/ogt/.claude/projects/-Users-ogt-awoooi/memory",
|
||||
"/Users/ogt/awoooi/.claude/hooks",
|
||||
"/Users/ogt/.claude/channels/telegram",
|
||||
<<<<<<< Updated upstream
|
||||
"/Users/ogt",
|
||||
"/Users/ogt/.claude",
|
||||
"/Users/ogt/awoooi/apps/web/src/app/[locale]/aiops"
|
||||
]
|
||||
},
|
||||
"hooks": {
|
||||
"PreToolUse": [
|
||||
{
|
||||
"matcher": "",
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node $CLAUDE_PROJECT_DIR/.claude/hooks/awoooi-guard.js 2>/dev/null || true"
|
||||
},
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node /Users/ogt/.claude/hooks/branch-protection.js"
|
||||
},
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node /Users/ogt/.claude/hooks/commit-quality.js"
|
||||
},
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node /Users/ogt/.claude/hooks/large-file-warner.js"
|
||||
},
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node /Users/ogt/.claude/hooks/mcp-health.js"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"PostToolUse": [
|
||||
{
|
||||
"matcher": "",
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node /Users/ogt/.claude/hooks/audit-log.js"
|
||||
},
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node /Users/ogt/.claude/hooks/suggest-compact.js"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"Stop": [
|
||||
{
|
||||
"matcher": "",
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node /Users/ogt/.claude/hooks/cost-tracker.js"
|
||||
},
|
||||
{
|
||||
"type": "command",
|
||||
"command": "node /Users/ogt/.claude/hooks/session-summary.js"
|
||||
}
|
||||
]
|
||||
}
|
||||
=======
|
||||
"/Users/ogt/aider-watch"
|
||||
>>>>>>> Stashed changes
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -1,827 +0,0 @@
|
||||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"Bash(pnpm install:*)",
|
||||
"Bash(npm --version)",
|
||||
"Bash(npm install:*)",
|
||||
"Bash(pnpm --version)",
|
||||
"Bash(pnpm dev:*)",
|
||||
"Bash(pnpm add:*)",
|
||||
"Bash(ls -la /Users/ogt/awoooi/apps/web/next.config.*)",
|
||||
"Bash(pkill -f \"next dev\")",
|
||||
"Bash(curl -sL http://localhost:3000/zh-TW)",
|
||||
"Bash(curl -s http://localhost:3000/zh-TW)",
|
||||
"Bash(pnpm --filter web build)",
|
||||
"Bash(curl -s http://localhost:3001/zh-TW)",
|
||||
"Bash(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:3000/zh-TW)",
|
||||
"Bash(kubectl apply:*)",
|
||||
"Bash(chmod +x /Users/ogt/awoooi/deploy-infra.sh)",
|
||||
"Bash(./deploy-infra.sh)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"mkdir -p /tmp/awoooi-k8s\")",
|
||||
"Bash(sshpass -p '0936223270' scp -o StrictHostKeyChecking=no /Users/ogt/awoooi/k8s/awoooi-prod/01-namespace-quota.yaml /Users/ogt/awoooi/k8s/awoooi-prod/02-network-policy.yaml /Users/ogt/awoooi/k8s/awoooi-prod/04-configmap.yaml wooo@192.168.0.120:/tmp/awoooi-k8s/)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"sudo kubectl apply -f /tmp/awoooi-k8s/01-namespace-quota.yaml\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl apply -f /tmp/awoooi-k8s/01-namespace-quota.yaml 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl apply -f /tmp/awoooi-k8s/02-network-policy.yaml 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl apply -f /tmp/awoooi-k8s/04-configmap.yaml 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get ns awoooi-prod -o wide 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get networkpolicy -n awoooi-prod 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get resourcequota,limitrange,configmap -n awoooi-prod 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"rm -rf /tmp/awoooi-k8s\")",
|
||||
"Bash(PYTHONPATH=. python -c \"from src.main import app; print\\(''Import OK''\\)\")",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/health/ready)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/health/live)",
|
||||
"Bash(curl -s http://localhost:8000/)",
|
||||
"Bash(pkill -f \"uvicorn src.main:app\")",
|
||||
"Bash(pkill -f \"node.*next\")",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/health)",
|
||||
"Read(//Users/ogt/awoooi/apps/api/**)",
|
||||
"Bash(pnpm typecheck:*)",
|
||||
"Read(//Users/ogt/awoooi/apps/web/**)",
|
||||
"Bash(curl -s -X POST http://localhost:8000/api/v1/dashboard/demo/spike/clear)",
|
||||
"Read(//Users/ogt/awoooi/=== 驗證英文頁面 \\(/en/**)",
|
||||
"Bash(jq \".devDependencies | keys | map\\(select\\(startswith\\(\"\"@playwright\"\"\\) or startswith\\(\"\"playwright\"\"\\)\\)\\)\")",
|
||||
"Bash(npx playwright:*)",
|
||||
"Bash(curl -s http://localhost:3000/zh-TW/demo -o /dev/null -w \"Frontend: HTTP %{http_code}\\\\n\")",
|
||||
"Bash(__NEW_LINE_ef548029029cdfac__ echo:*)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/health -o /dev/null -w \"Backend: HTTP %{http_code}\\\\n\")",
|
||||
"Bash(echo '=== 已產出的截圖 ===' find /Users/ogt/awoooi/apps/web/test-results -name *.png)",
|
||||
"Bash(echo '=== Playwright E2E 測試結果 ===' echo echo '📸 截圖證據 \\(test-results/screenshots/\\):' ls -la /Users/ogt/awoooi/apps/web/test-results/screenshots/ __NEW_LINE_db74e5f56e34db17__ echo echo '🎬 錄影證據 \\(.webm\\):' find /Users/ogt/awoooi/apps/web/test-results -name *.webm -exec ls -la {})",
|
||||
"Bash(__NEW_LINE_db74e5f56e34db17__ echo:*)",
|
||||
"Bash(source .venv/bin/activate)",
|
||||
"Bash(python scripts/demo_multisig.py)",
|
||||
"Bash(python -c \"from src.api.v1.approvals import router; print\\(''✅ Approvals router loaded:'', len\\(router.routes\\), ''routes''\\)\")",
|
||||
"Bash(npx tsc:*)",
|
||||
"Bash(chmod +x /Users/ogt/awoooi/scripts/demo-multisig-flow.sh)",
|
||||
"Bash(python -c \"from src.main import app; print\\(''✅ API loads successfully''\\)\")",
|
||||
"Bash(jq)",
|
||||
"Bash(/Users/ogt/awoooi/scripts/demo-multisig-flow.sh)",
|
||||
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/approvals\" -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/openapi.json)",
|
||||
"Bash(python -c \":*)",
|
||||
"Bash(curl -s http://localhost:3000 -o /dev/null -w \"%{http_code}\")",
|
||||
"Bash(lsof -ti:3000,3001,8000)",
|
||||
"Bash(curl -s http://localhost:8000/health)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/approvals/pending)",
|
||||
"Bash(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:3001/zh-TW/demo)",
|
||||
"Bash(ls -la test-results/*.png)",
|
||||
"Bash(cp test-results/cpo102-*.png /Users/ogt/awoooi/docs/screenshots/)",
|
||||
"Bash(ssh ogt@192.168.0.120 'cat /etc/rancher/k3s/k3s.yaml')",
|
||||
"Bash(python -c \"from src.main import app; print\\(''✅ main.py imports OK''\\)\")",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/approvals/k8s-test)",
|
||||
"Bash(sqlite3 awoooi.db \".tables\")",
|
||||
"Bash(sshpass -p 0936223270 ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 'sudo cat /etc/rancher/k3s/k3s.yaml')",
|
||||
"Bash(kubectl --kubeconfig=/Users/ogt/awoooi/apps/api/k3s-prod.yaml get deployments -n awoooi-prod)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get deployments -n awoooi-prod 2>/dev/null\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get deployments -A 2>/dev/null\")",
|
||||
"Bash(curl -s -X POST http://localhost:8000/api/v1/approvals -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(APPROVAL_ID=\"b58a0d86-fa4e-43ca-881c-02e978cd7943\")",
|
||||
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/approvals/$APPROVAL_ID/sign\" -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT operation_type, target_resource, namespace, success, dry_run_passed, dry_run_message, error_message, execution_duration_ms, created_at FROM audit_logs ORDER BY created_at DESC LIMIT 1;\" -header -column)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S kubectl get pods -n monitoring -l app=grafana 2>/dev/null\")",
|
||||
"Bash(curl -s http://192.168.0.188:11434/api/tags)",
|
||||
"Bash(python -c \"from src.main import app; print\\(''✅ Compile OK''\\)\")",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/ai/status)",
|
||||
"Bash(curl -s -X POST http://localhost:8000/api/v1/ai/analyze-and-propose -H \"Content-Type: application/json\" -d '{}')",
|
||||
"Bash(curl -s -X POST http://192.168.0.188:11434/api/generate -H \"Content-Type: application/json\" -d '{\"\"\"\"model\"\"\"\":\"\"\"\"llama3.2:1b\"\"\"\",\"\"\"\"prompt\"\"\"\":\"\"\"\"Output only JSON: {\\\\\"\"\"\"action\\\\\"\"\"\":\\\\\"\"\"\"test\\\\\"\"\"\"}\"\"\"\",\"\"\"\"stream\"\"\"\":false,\"\"\"\"format\"\"\"\":\"\"\"\"json\"\"\"\"}' --max-time 30)",
|
||||
"Bash(curl -s -X POST http://localhost:8000/api/v1/ai/analyze-and-propose -H \"Content-Type: application/json\" -d '{}' --max-time 60)",
|
||||
"Bash(PROMPT='你是 ClawBot AI。分析以下監控數據,輸出純 JSON(無其他文字)。:*)",
|
||||
"Bash(curl -s -X POST http://192.168.0.188:11434/api/generate -H \"Content-Type: application/json\" -d \"{\"\"model\"\":\"\"llama3.2:1b\"\",\"\"prompt\"\":\"\"$PROMPT\"\",\"\"stream\"\":false,\"\"format\"\":\"\"json\"\",\"\"options\"\":{\"\"num_predict\"\":256,\"\"temperature\"\":0.1}}\" --max-time 60)",
|
||||
"Bash(curl -s -X POST http://192.168.0.188:11434/api/generate -H \"Content-Type: application/json\" -d '{\"\"\"\"model\"\"\"\":\"\"\"\"llama3.2:1b\"\"\"\",\"\"\"\"prompt\"\"\"\":\"\"\"\"Harbor service returning 404. Output JSON: {\\\\\"\"\"\"suggested_action\\\\\"\"\"\":\\\\\"\"\"\"RESTART_DEPLOYMENT\\\\\"\"\"\",\\\\\"\"\"\"target_resource\\\\\"\"\"\":\\\\\"\"\"\"harbor\\\\\"\"\"\",\\\\\"\"\"\"namespace\\\\\"\"\"\":\\\\\"\"\"\"default\\\\\"\"\"\",\\\\\"\"\"\"risk_level\\\\\"\"\"\":\\\\\"\"\"\"medium\\\\\"\"\"\",\\\\\"\"\"\"reasoning\\\\\"\"\"\":\\\\\"\"\"\"Service down\\\\\"\"\"\",\\\\\"\"\"\"confidence\\\\\"\"\"\":0.8,\\\\\"\"\"\"affected_services\\\\\"\"\"\":[]}\"\"\"\",\"\"\"\"stream\"\"\"\":false,\"\"\"\"format\"\"\"\":\"\"\"\"json\"\"\"\",\"\"\"\"options\"\"\"\":{\"\"\"\"num_predict\"\"\"\":128,\"\"\"\"temperature\"\"\"\":0.1}}' --max-time 30)",
|
||||
"Bash(curl -v -X POST http://192.168.0.188:11434/api/generate -H \"Content-Type: application/json\" -d '{\"\"\"\"model\"\"\"\":\"\"\"\"llama3.2:1b\"\"\"\",\"\"\"\"prompt\"\"\"\":\"\"\"\"Say hello\"\"\"\",\"\"\"\"stream\"\"\"\":false}' --max-time 30)",
|
||||
"Bash(curl -s -X POST http://localhost:8000/api/v1/ai/analyze-and-propose -H \"Content-Type: application/json\" -d '{}' --max-time 120)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/ai/analyze-and-propose -X POST -H \"Content-Type: application/json\")",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/dashboard)",
|
||||
"Bash(ls -la ~/Downloads/image*.png)",
|
||||
"Bash(ls -la ~/Desktop/image*.png)",
|
||||
"Bash(ls -la /Users/ogt/awoooi/apps/web/public/*.png)",
|
||||
"WebFetch(domain:openclaw.ai)",
|
||||
"Bash(ls -la /Users/ogt/Downloads/*.png)",
|
||||
"Bash(ls -la /Users/ogt/.gemini/antigravity/brain/*/image*.png)",
|
||||
"Bash(ls -lat /Users/ogt/Downloads/*.png)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/approvals)",
|
||||
"Bash(curl -s -X GET http://localhost:8000/api/v1/approvals/)",
|
||||
"Bash(APPROVAL_ID=\"4989729e-e518-4e7e-8dff-5c3269e0c82b\")",
|
||||
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/approvals/$APPROVAL_ID/sign\" -H \"Content-Type: application/json\" -d '{\"\"\"\"signer_id\"\"\"\": \"\"\"\"ciso-001\"\"\"\", \"\"\"\"signer_name\"\"\"\": \"\"\"\"Demo CISO\"\"\"\", \"\"\"\"comment\"\"\"\": \"\"\"\"資安確認,核准執行\"\"\"\"}')",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/webhooks/health)",
|
||||
"Bash(curl -s -X POST http://localhost:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(curl -s http://localhost:3000)",
|
||||
"Bash(ls -la apps/web/test-results/*.png)",
|
||||
"Bash(curl -s http://localhost:3000/zh-TW/demo)",
|
||||
"Bash(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:3333/zh-TW/demo)",
|
||||
"Bash(curl -s http://localhost:8001/api/v1/approvals/pending)",
|
||||
"Bash(curl -s -X POST http://localhost:8001/api/v1/approvals -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(curl -s http://localhost:8001/openapi.json)",
|
||||
"Bash(curl -s http://localhost:8001/docs)",
|
||||
"Bash(curl -s http://localhost:8001/api/v1/webhooks/grafana -X OPTIONS)",
|
||||
"Bash(pnpm run:*)",
|
||||
"Bash(node scripts/screenshot-rbac.mjs)",
|
||||
"Bash(pnpm exec:*)",
|
||||
"Bash(curl -s http://localhost:3333 -o /dev/null -w \"%{http_code}\")",
|
||||
"Bash(curl -s http://localhost:3333/zh-TW/demo -o /dev/null -w \"%{http_code}\")",
|
||||
"Bash(python3 -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''''Count: {d[count]}''''\\); [print\\(f''''- {a[id][:8]}... risk={a[risk_level]}''''\\) for a in d[''''approvals''''][:3]]\")",
|
||||
"Bash(curl -s http://localhost:3000/zh-TW/demo -o /dev/null -w \"%{http_code}\")",
|
||||
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f'''' Connected: {d[\"\"success\"\"]}''''\\); print\\(f'''' Namespaces: {d[\"\"namespaces\"\"][:3]}...''''\\)\" __NEW_LINE_57ae1c1c812968e7__ echo \"\" echo \"3. 資料庫持久化:\" sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT COUNT\\(*\\) as approvals FROM approval_records;\" sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT COUNT\\(*\\) as timeline FROM timeline_events;\" sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT COUNT\\(*\\) as audits FROM audit_logs;\")",
|
||||
"Bash(head -2 __NEW_LINE_9bf9481fbdf30d4e__ echo \"\" echo \"2. 告警收斂跳過 LLM 日誌 \\(應該有 4 次\\):\" grep -c \"alert_converged_skip_llm\" /tmp/api-server.log)",
|
||||
"Bash(python -m json.tool)",
|
||||
"Bash(__NEW_LINE_7463bff94cecc20f__ echo:*)",
|
||||
"Bash(__NEW_LINE_13846c8488c5fa9a__ echo:*)",
|
||||
"Bash(__NEW_LINE_13846c8488c5fa9a__ ls:*)",
|
||||
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f'''' Status: {d[\"\"status\"\"]}''''\\)\" __NEW_LINE_32366ca1bb050259__ echo \"\" echo \"2. 待簽核記錄 \\(含 hit_count\\):\" curl -s http://localhost:8000/api/v1/approvals/pending)",
|
||||
"Read(//Users/ogt/awoooi/**)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/timeline/events?limit=10)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/timeline/events?limit=5)",
|
||||
"Bash(ls -la /Users/ogt/awoooi/apps/api/*.txt /Users/ogt/awoooi/apps/api/*.toml)",
|
||||
"Bash(ls -la /Users/ogt/awoooi/docker-compose*.yml)",
|
||||
"Bash(ls /Users/ogt/awoooi/k8s/awoooi-prod/*rbac* /Users/ogt/awoooi/k8s/awoooi-prod/*service-account*)",
|
||||
"Bash(kubectl kustomize:*)",
|
||||
"Bash(docker compose:*)",
|
||||
"Bash(docker info:*)",
|
||||
"Bash(python3 -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(''''API Status:'''', d.get\\(''''status'''', ''''unknown''''\\)\\)\")",
|
||||
"Bash(pkill -9 -f uvicorn)",
|
||||
"Bash(lsof -ti:8000)",
|
||||
"Bash(open -a Docker)",
|
||||
"Bash(docker stop:*)",
|
||||
"Bash(lsof -ti:3000)",
|
||||
"Bash(docker start:*)",
|
||||
"Bash(docker ps:*)",
|
||||
"Bash(curl -s http://localhost:3000 -o /dev/null -w 'HTTP Status: %{http_code}\\\\n')",
|
||||
"Bash(curl -I http://localhost:8000/api/v1/dashboard/stream)",
|
||||
"Bash(curl -s http://localhost:8000/openapi.json)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/dashboard/stream --max-time 3 -w \"\\\\n--- HTTP Status: %{http_code} ---\\\\n\")",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/dashboard/stream --max-time 3)",
|
||||
"Bash(curl -s http://localhost:3000/zh-TW -o /dev/null -w \"HTTP Status: %{http_code}\\\\n\")",
|
||||
"Bash(curl -s -D - http://localhost:8000/api/v1/dashboard/stream --max-time 2)",
|
||||
"Bash(chmod +x /Users/ogt/awoooi/scripts/deploy-infra.sh)",
|
||||
"Bash(./scripts/deploy-infra.sh)",
|
||||
"Bash(pnpm --filter @awoooi/web build)",
|
||||
"Bash(timeout 10 env MOCK_MODE=true OTEL_ENABLED=false uvicorn src.main:app --host 0.0.0.0 --port 8099)",
|
||||
"Bash(timeout 8 pnpm --filter @awoooi/web dev)",
|
||||
"Bash(git diff:*)",
|
||||
"Bash(curl -s -I http://localhost:8000/api/v1/dashboard/stream)",
|
||||
"Bash(timeout 3 curl -s -N http://localhost:8000/api/v1/dashboard/stream)",
|
||||
"Bash(grep -n \"NEXT_PUBLIC\\\\|API_URL\\\\|localhost\" /Users/ogt/awoooi/apps/web/.env*)",
|
||||
"Bash(timeout 2 curl -s -D - -N http://localhost:8000/api/v1/dashboard/stream)",
|
||||
"Bash(curl -s http://localhost:3000/)",
|
||||
"Bash(python -m py_compile scripts/fire_test_alert.py)",
|
||||
"Bash(python -m scripts.fire_test_alert --help)",
|
||||
"Bash(python -m scripts.fire_test_alert)",
|
||||
"Bash(python -m scripts.fire_test_alert --type k8s_pod_crash)",
|
||||
"Bash(timeout 3 curl -s -N -H \"Origin: http://localhost:3000\" http://localhost:8000/api/v1/dashboard/stream)",
|
||||
"Bash(python -m scripts.fire_test_alert --type disk_full)",
|
||||
"Bash(docker restart:*)",
|
||||
"Bash(curl -s -w \"\\\\nHTTP_CODE: %{http_code}\\\\n\" http://localhost:3000)",
|
||||
"Bash(docker exec:*)",
|
||||
"Bash(docker rmi:*)",
|
||||
"Bash(timeout 5 curl -s -N http://localhost:8000/api/v1/dashboard/stream)",
|
||||
"Bash(curl -s http://localhost:3000 -w \"\\\\nHTTP: %{http_code}\\\\n\")",
|
||||
"Bash(timeout 120 docker logs awoooi-api -f --since 1s)",
|
||||
"Bash(curl -s -I -H \"Origin: http://localhost:3000\" http://localhost:8000/api/v1/dashboard/stream)",
|
||||
"Bash(curl -s -X OPTIONS -H \"Origin: http://localhost:3000\" -H \"Access-Control-Request-Method: GET\" http://localhost:8000/api/v1/dashboard/stream -I)",
|
||||
"Bash(node /Users/ogt/awoooi/scripts/verify-sse.js)",
|
||||
"Bash(python -m scripts.fire_test_alert --type db_connection_timeout)",
|
||||
"Bash(npm run:*)",
|
||||
"Bash(docker-compose down:*)",
|
||||
"Bash(docker-compose build:*)",
|
||||
"Bash(docker-compose up:*)",
|
||||
"Bash(pkill -f 'next dev')",
|
||||
"Bash(node /Users/ogt/awoooi/scripts/test-approval-flow.js)",
|
||||
"Bash(python -m scripts.fire_test_alert --type pod_crash)",
|
||||
"Bash(node /Users/ogt/awoooi/scripts/test-k8s-executor.js)",
|
||||
"Bash(kubectl cluster-info:*)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl cluster-info)",
|
||||
"Bash(ls -la /Users/ogt/awoooi/apps/web/src/app/[locale]/)",
|
||||
"Bash(python -c \"from src.api.v1 import audit_logs; print\\(''API module loads OK''\\)\")",
|
||||
"Bash(curl -s http://localhost:3000/zh-TW/action-logs)",
|
||||
"Bash(pnpm build:*)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/audit-logs)",
|
||||
"Bash(xargs -r kill -9 2)",
|
||||
"Bash(/dev/null source:*)",
|
||||
"Bash(python -c \"from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor; print\\(''''httpx ok''''\\)\")",
|
||||
"Bash(sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT * FROM audit_logs ORDER BY created_at DESC LIMIT 5;\")",
|
||||
"Bash(sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT name FROM sqlite_master WHERE type=''table'';\")",
|
||||
"Bash(sqlite3 /Users/ogt/awoooi/apps/api/awoooi.db \"SELECT id, event_type, status, title, created_at FROM timeline_events ORDER BY created_at DESC LIMIT 5;\")",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/audit-logs/stats)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/timeline?limit=10)",
|
||||
"Bash(curl -s \"http://localhost:8000/api/v1/timeline\")",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/docs)",
|
||||
"Bash(chmod +x /Users/ogt/awoooi/scripts/setup-guardrails.sh /Users/ogt/awoooi/scripts/ai_code_reviewer.py)",
|
||||
"Bash(ls -la /Users/ogt/awoooi/apps/web/.eslintrc*)",
|
||||
"Bash(ls -la scripts/*.py scripts/*.sh .pre-commit-config.yaml .secrets.baseline apps/web/.eslintrc.js)",
|
||||
"Bash(python -m src.services.test_context_gatherer)",
|
||||
"Bash(python -m pytest src/services/test_context_gatherer.py -v)",
|
||||
"Bash(grep -r \"ClawBot\\\\|clawbot\\\\|CLAWBOT\" --include=*.py --include=*.ts --include=*.tsx apps/)",
|
||||
"Bash(python scripts/e2e_openclaw_test.py)",
|
||||
"Bash(python -m pytest tests/e2e_network_test.py -v --tb=short)",
|
||||
"Bash(chmod +x /Users/ogt/awoooi/apps/api/scripts/apply_prometheus_config.sh /Users/ogt/awoooi/apps/api/scripts/fire_live_alert.py)",
|
||||
"Bash(./scripts/apply_prometheus_config.sh)",
|
||||
"Bash(python scripts/fire_live_alert.py oomkilled)",
|
||||
"Bash(python scripts/fire_live_alert.py oomkilled --api-url http://localhost:8000)",
|
||||
"Bash(python scripts/fire_live_alert.py highcpu --api-url http://localhost:8000)",
|
||||
"Bash(python scripts/fire_live_alert.py podcrash --api-url http://localhost:8000)",
|
||||
"Bash(python -m pytest tests/test_webhook_telegram_integration.py -v)",
|
||||
"Bash(ls -la /Users/ogt/awoooi/apps/api/.env*)",
|
||||
"Bash(ls -la /Users/ogt/wooo-aiops/.env*)",
|
||||
"Bash(ls -la /Users/ogt/AIOps/.env*)",
|
||||
"Bash(/Users/ogt/awoooi/apps/api/.env:*)",
|
||||
"Bash(/tmp/deploy-188-home.sh:*)",
|
||||
"Bash(chmod +x /tmp/deploy-188-home.sh)",
|
||||
"Bash(scp /tmp/awoooi-api-deploy.tar.gz /tmp/deploy-188-home.sh ollama@192.168.0.188:/tmp/)",
|
||||
"Bash(ssh ollama@192.168.0.188 \"bash /tmp/deploy-188-home.sh\")",
|
||||
"Bash(ssh ollama@192.168.0.188 \"curl -s http://localhost:8000/api/v1/webhooks/health\")",
|
||||
"Bash(ssh ollama@192.168.0.188 \"tail -50 /tmp/openclaw.log\")",
|
||||
"Bash(ssh ollama@192.168.0.188 \"cd /home/ollama/awoooi-api && source .venv/bin/activate && pip install sqlalchemy aiosqlite -q && pip install httpx python-dotenv pydantic-settings -q\")",
|
||||
"Bash(ssh ollama@192.168.0.188 \"cd /home/ollama/awoooi-api && pkill -f ''uvicorn src.main:app'' 2>/dev/null; sleep 1; source .venv/bin/activate && nohup uvicorn src.main:app --host 0.0.0.0 --port 8000 > /tmp/openclaw.log 2>&1 & sleep 3 && curl -s http://localhost:8000/api/v1/webhooks/health\")",
|
||||
"Bash(ssh ollama@192.168.0.188:*)",
|
||||
"Bash(pkill -f ngrok)",
|
||||
"Bash(pkill -f \"ssh -fN.*8001\")",
|
||||
"Bash(ssh -fN -L 8001:localhost:8000 ollama@192.168.0.188)",
|
||||
"Bash(curl -s http://localhost:8001/api/v1/webhooks/health)",
|
||||
"Bash(BOT_TOKEN=\"8569720657:AAHdvKf_P2ms-QKFTyqTLtLiqEggz8cpjMk\" curl -s \"https://api.telegram.org/bot$BOT_TOKEN/getWebhookInfo\")",
|
||||
"Bash(curl -s https://api.telegram.org/bot$BOT_TOKEN/getWebhookInfo)",
|
||||
"Bash(curl -s http://localhost:8001/api/v1/webhooks/)",
|
||||
"Bash(curl -s http://localhost:8001/)",
|
||||
"Bash(curl -s http://localhost:8001/api/v1/health)",
|
||||
"Bash(scp /tmp/awoooi-api-v7.tar.gz ollama@192.168.0.188:/tmp/)",
|
||||
"Bash(tar -czvf /tmp/awoooi-api-v7.1.tar.gz src/ requirements.txt pyproject.toml)",
|
||||
"Bash(scp /tmp/awoooi-api-v7.1.tar.gz ollama@192.168.0.188:/tmp/)",
|
||||
"Bash(ssh ollama@192.168.0.188 \"tail -10 /tmp/openclaw.log | grep -E ''''clickhouse|signoz_gold''''\")",
|
||||
"Bash(ssh ogt@192.168.0.188 \"cd /home/ollama/awoooi-api && tail -50 nohup.out 2>/dev/null || journalctl -u awoooi-api --no-pager -n 50 2>/dev/null || echo ''請手動檢查日誌''\")",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8123/ -d \"SELECT 1 FORMAT JSONEachRow\")",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:11434/api/tags)",
|
||||
"Bash(ssh -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o BatchMode=yes -o ConnectTimeout=5 ollama@192.168.0.188 \"echo ok\")",
|
||||
"Bash(ssh -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o BatchMode=yes -o ConnectTimeout=5 wooo@192.168.0.188 \"echo ok\")",
|
||||
"Bash(ssh -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o BatchMode=yes -o ConnectTimeout=5 root@192.168.0.188 \"echo ok\")",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8001/health)",
|
||||
"Bash(ssh root@192.168.0.188 \"cat /tmp/openclaw.log 2>/dev/null | tail -100 || echo ''Log file not found''\")",
|
||||
"Bash(ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5 ollama@192.168.0.188 \"echo ok\")",
|
||||
"Bash(ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=5 wooo@192.168.0.188 \"echo ok\")",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/src/services/signoz_client.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/services/)",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/src/services/openclaw.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/services/)",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/src/services/telegram_gateway.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/services/)",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/src/api/v1/webhooks.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/api/v1/)",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/src/models/ai.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/models/)",
|
||||
"Bash(ssh ollama@192.168.0.188 \"cd /home/ollama/awoooi-api && pkill -f ''''uvicorn src.main:app'''' && sleep 2 && nohup .venv/bin/python3 -m uvicorn src.main:app --host 0.0.0.0 --port 8000 > nohup.out 2>&1 &\")",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8000/health)",
|
||||
"Bash(curl -s --connect-timeout 10 http://192.168.0.188:8000/health)",
|
||||
"Bash(curl -s -X POST http://192.168.0.188:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(curl -s -X POST http://192.168.0.188:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"high_cpu\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"api-gateway\"\",\"\"namespace\"\":\"\"awoooi-prod\"\",\"\"message\"\":\"\"CPU 92% test\"\"}')",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"high_cpu\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"api-gateway\"\",\"\"namespace\"\":\"\"awoooi-prod\"\",\"\"message\"\":\"\"CPU 92% - 統帥全自主驗收 v2\"\"}')",
|
||||
"Bash(curl -s --connect-timeout 30 --max-time 120 -X POST http://192.168.0.188:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(curl -s --connect-timeout 30 --max-time 180 -X POST http://192.168.0.188:8000/api/v1/webhooks/alerts -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"k8s_pod_crash\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"inventory-api\"\",\"\"namespace\"\":\"\"commerce\"\",\"\"message\"\":\"\"Pod crash - 統帥終極驗收\"\"}' --connect-timeout 30 --max-time 180)",
|
||||
"Bash(ssh -o ConnectTimeout=10 ollama@192.168.0.188 \"echo OK && ps aux | grep uvicorn | grep -v grep | head -2\")",
|
||||
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"ssl_expiry\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"nginx-ingress\"\",\"\"namespace\"\":\"\"ingress\"\",\"\"message\"\":\"\"SSL 即將過期 - 終極驗收\"\"}' --connect-timeout 30 --max-time 180)",
|
||||
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"db_connection_timeout\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"postgres-primary\"\",\"\"namespace\"\":\"\"database\"\",\"\"message\"\":\"\"DB 連線逾時 - SignOz 整合終極測試\"\"}' --connect-timeout 30 --max-time 180)",
|
||||
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"service_404\"\",\"\"severity\"\":\"\"critical\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"auth-service\"\",\"\"namespace\"\":\"\"identity\"\",\"\"message\"\":\"\"Service 404 - SignOz + Ollama 整合終極測試\"\"}' --connect-timeout 30 --max-time 180)",
|
||||
"Bash(curl -s http://192.168.0.188:8000/api/v1/webhooks/alerts -X POST -H \"Content-Type: application/json\" -d '{\"\"alert_type\"\":\"\"high_cpu\"\",\"\"severity\"\":\"\"warning\"\",\"\"source\"\":\"\"signoz\"\",\"\"target_resource\"\":\"\"recommendation-engine\"\",\"\"namespace\"\":\"\"ml\"\",\"\"message\"\":\"\"CPU 78% - Ollama 最終測試\"\"}' --connect-timeout 30 --max-time 200)",
|
||||
"Bash(scp apps/api/src/services/openclaw.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/services/openclaw.py)",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/src/core/http_client.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/core/)",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/src/main.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/)",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/src/core/config.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/core/)",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/src/api/v1/health.py ollama@192.168.0.188:/home/ollama/awoooi-api/src/api/v1/)",
|
||||
"Bash(ssh -o ConnectTimeout=5 ollama@192.168.0.188 \"ps aux | grep uvicorn | grep -v grep\")",
|
||||
"Bash(curl -s -H \"Origin: http://localhost:3000\" -H \"Access-Control-Request-Method: GET\" -X OPTIONS http://192.168.0.188:8000/api/v1/health -v)",
|
||||
"Bash(curl -s http://192.168.0.188:8000/api/v1/health)",
|
||||
"Bash(curl -s -N --max-time 3 http://192.168.0.188:8000/api/v1/dashboard/stream)",
|
||||
"Bash(curl -s http://localhost:3000/zh-TW -o /dev/null -w \"%{http_code}\")",
|
||||
"Bash(open http://localhost:3000/zh-TW)",
|
||||
"Bash(open http://localhost:3001/zh-TW)",
|
||||
"Bash(curl -s -H \"Origin: http://localhost:3001\" http://192.168.0.188:8000/api/v1/dashboard/stream --max-time 3)",
|
||||
"Bash(curl -s -I -H \"Origin: http://localhost:3001\" http://192.168.0.188:8000/api/v1/health)",
|
||||
"Bash(curl -s http://192.168.0.188:8000/api/v1/approvals/pending)",
|
||||
"Bash(curl -s http://192.168.0.188:8000/api/v1/approvals)",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/approvals?status=pending_approval\")",
|
||||
"Bash(xargs sed:*)",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/approvals/history?limit=5\")",
|
||||
"Bash(curl -s http://192.168.0.188:8000/api/v1/approvals/approved)",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/timeline?limit=10\")",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/action-logs\")",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/timeline/events?limit=10\")",
|
||||
"Bash(ssh ogt@192.168.0.188 \"kubectl get nodes\")",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/approvals/k8s-test\")",
|
||||
"Bash(scp /Users/ogt/awoooi/apps/api/k3s-prod.yaml ogt@192.168.0.188:~/awoooi-api/k3s-prod.yaml)",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/timeline/events?limit=5\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.120 \"cat /etc/rancher/k3s/k3s.yaml\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no wooo@192.168.0.188 \"echo ''SSH OK'' && pwd\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"echo ''SSH OK'' && pwd && ls -la ~/awoooi-api/ 2>/dev/null || echo ''Directory not found''\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"sshpass -p ''0936223270'' scp -o StrictHostKeyChecking=no wooo@192.168.0.120:/etc/rancher/k3s/k3s.yaml ~/awoooi-api/k3s-prod.yaml && sed -i ''s/127.0.0.1/192.168.0.120/g'' ~/awoooi-api/k3s-prod.yaml && echo ''Kubeconfig deployed!'' && head -10 ~/awoooi-api/k3s-prod.yaml\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd ~/awoooi-api && pkill -f ''uvicorn'' 2>/dev/null; sleep 1; nohup .venv/bin/uvicorn src.main:app --host 0.0.0.0 --port 8000 --reload > nohup.out 2>&1 & sleep 3; echo ''=== API Restarted ==='' && tail -20 nohup.out\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 \"cd ~/awoooi-api && pkill -f ''uvicorn src.main'' || true\")",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/health\" --connect-timeout 5)",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 ollama@192.168.0.188 \"cd ~/awoooi-api && source .venv/bin/activate && nohup uvicorn src.main:app --host 0.0.0.0 --port 8000 > nohup.out 2>&1 &\")",
|
||||
"Bash(sshpass -p:*)",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/health\" --connect-timeout 10)",
|
||||
"Bash(curl -s \"http://192.168.0.188:8000/api/v1/timeline/events?limit=8\")",
|
||||
"Bash(curl -s http://localhost:3000/zh-TW -o /dev/null -w \"Frontend: HTTP %{http_code}\\\\n\")",
|
||||
"Bash(sshpass -p '0936223270' ssh -o StrictHostKeyChecking=no ollama@192.168.0.188 'curl -s http://localhost:8000/api/v1/approvals/pending | jq -r \"\".approvals[] | \\\\\"\"ID: \\\\\\(.id\\) | Action: \\\\\\(.action\\)\\\\\"\"\"\"')",
|
||||
"Bash(curl -s --connect-timeout 5 https://awoooi.wooo.tw/api/v1/health)",
|
||||
"Bash(curl -s --connect-timeout 5 https://awoooi.wooo.tw/api/v1/approvals/pending)",
|
||||
"Bash(ssh ollama@192.168.70.188 \"ps aux | grep uvicorn | grep -v grep | head -3\")",
|
||||
"Bash(ssh -o ConnectTimeout=10 ollama@192.168.70.188 \"echo ''SSH Connected''\")",
|
||||
"Bash(ping -c 2 -t 5 192.168.70.188)",
|
||||
"Bash(curl -s --connect-timeout 10 https://awoooi.wooo.tw/api/v1/health)",
|
||||
"Bash(ssh -o ConnectTimeout=10 ollama@192.168.0.188 \"echo ''SSH Connected to 188 Base''\")",
|
||||
"Bash(grep -B 5 -A 30 \"async def add_signature\" /Users/ogt/awoooi/apps/api/src/services/*.py)",
|
||||
"Bash(ssh ogt@192.168.0.188 \"cd /home/ogt/awoooi && docker compose ps\")",
|
||||
"Bash(ls -la .env*)",
|
||||
"Bash(.env:*)",
|
||||
"Bash(timeout 15 python -m uvicorn src.main:app --host 0.0.0.0 --port 8001)",
|
||||
"Bash(timeout 20 python -m uvicorn src.main:app --host 0.0.0.0 --port 8001)",
|
||||
"Bash(timeout 25 python -m uvicorn src.main:app --host 0.0.0.0 --port 8001)",
|
||||
"Bash(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no ogt@192.168.0.188 \"cd /home/ogt/wooo-aiops && docker compose ps clawbot 2>/dev/null || docker ps | grep -i claw\")",
|
||||
"Bash(ls -la ~/.ssh/*.pub)",
|
||||
"Bash(ssh -i ~/.ssh/id_rsa -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o PasswordAuthentication=no ogt@192.168.0.188 \"echo connected\")",
|
||||
"Bash(curl -s \"https://api.telegram.org/bot8569720657:AAHdvKf_P2ms-QKFTyqTLtLiqEggz8cpjMk/logOut\")",
|
||||
"Bash(curl -s \"https://api.telegram.org/bot8569720657:AAHdvKf_P2ms-QKFTyqTLtLiqEggz8cpjMk/close\")",
|
||||
"Bash(curl -s \"https://api.telegram.org/bot8569720657:AAHdvKf_P2ms-QKFTyqTLtLiqEggz8cpjMk/getUpdates?timeout=3&limit=1\")",
|
||||
"Bash(ping -c 1 192.168.0.188)",
|
||||
"Bash(python -m tests.test_redis_multisig)",
|
||||
"Bash(curl -v -X POST http://localhost:8000/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(python3 -c \":*)",
|
||||
"Bash(echo ' 無法連線' __NEW_LINE_8fc87454f9798a7d__ echo echo [結論]: echo ' /signals 端點尚未部署到 .188' echo ' 程式碼已完成,需要執行:' echo \" cd apps/api && docker build -t awoooi-api . && docker-compose up -d\")",
|
||||
"Bash(__NEW_LINE_dc88f37970737861__ cd:*)",
|
||||
"Bash(__NEW_LINE_dc88f37970737861__ echo:*)",
|
||||
"Read(//Users/**)",
|
||||
"Bash(tail -20 __NEW_LINE_8b049957a9782734__ echo \"\" echo \"[Step 2] 等待容器啟動 \\(10 秒\\)...\" sleep 10 __NEW_LINE_8b049957a9782734__ echo \"\" echo \"[Step 3] 檢查容器狀態...\" docker compose ps)",
|
||||
"Bash(tail -5 __NEW_LINE_275e0094e9dcb44a__ echo \"\" echo \"[1.2] 重建 API 容器 \\(含 Signal Worker\\)...\" docker compose build api)",
|
||||
"Bash(1 __NEW_LINE_275e0094e9dcb44a__ echo \"\" echo \"[1.4] 等待服務就緒 \\(15 秒\\)...\" sleep 15 __NEW_LINE_275e0094e9dcb44a__ echo \"\" echo \"[1.5] 檢查容器狀態...\" docker compose ps)",
|
||||
"Bash(__NEW_LINE_f4c8301ec5249760__ echo:*)",
|
||||
"Bash(__NEW_LINE_21ba3cf3700d942d__ cd:*)",
|
||||
"Bash(1 __NEW_LINE_9a14b79fc58c11ba__ echo \"\" echo \"[1.3] 等待服務就緒 \\(15 秒\\)...\" sleep 15 __NEW_LINE_9a14b79fc58c11ba__ echo \"\" echo \"[1.4] 檢查容器狀態...\" docker compose ps api)",
|
||||
"Bash(1 __NEW_LINE_6b654ca5be87c137__ echo \"\" echo \"[2] 等待服務就緒 \\(15 秒\\)...\" sleep 15 __NEW_LINE_6b654ca5be87c137__ echo \"\" echo \"[3] 發送測試 Signal...\" curl -s -X POST http://localhost:8000/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(__NEW_LINE_564908ddf866c081__ echo:*)",
|
||||
"Bash(chmod +x /Users/ogt/awoooi/apps/api/scripts/test_phase63_aggregation.py)",
|
||||
"Bash(python scripts/test_phase63_aggregation.py)",
|
||||
"Bash(xargs -r docker exec -i awoooi-redis redis-cli DEL)",
|
||||
"Bash(chmod +x /Users/ogt/awoooi/apps/api/scripts/test_race_condition.py)",
|
||||
"Bash(python scripts/test_race_condition.py)",
|
||||
"Bash(chmod +x /Users/ogt/awoooi/apps/api/scripts/test_phase64_proposal.py)",
|
||||
"Bash(python scripts/test_phase64_proposal.py)",
|
||||
"Bash(python agent.py --alert FINAL_PHASE_6_TEST)",
|
||||
"Bash(AWOOOI_REDIS_URL=\"redis://localhost:6379/0\" python agent.py --alert FINAL_PHASE_6_TEST)",
|
||||
"Bash(curl -s http://localhost:8000/api/v1/incidents)",
|
||||
"Bash(curl -s -X POST http://localhost:8000/api/v1/incidents/INC-20260322-06085B/proposal)",
|
||||
"Bash(grep -r \"mock\\\\|Mock\\\\|MOCK\\\\|fake\\\\|Fake\\\\|dummy\\\\|hardcode\" /Users/ogt/awoooi/apps/web/src --include=*.tsx --include=*.ts -l)",
|
||||
"Bash(NEXT_PUBLIC_API_URL=http://localhost:8000 pnpm next build --no-lint)",
|
||||
"Bash(grep -v \"Traceback\\\\|File \"\"/usr\\\\|^\\\\s*$\")",
|
||||
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''''Signal Count: {len\\(d[\"\"signals\"\"]\\)}''''\\); [print\\(f'''' - {s[\"\"alert_name\"\"]} \\({s[\"\"signal_id\"\"]}\\)''''\\) for s in d[''''signals'''']]\")",
|
||||
"Bash(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:3003/zh-TW)",
|
||||
"Bash(curl -s -X GET \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3003\" -H \"Access-Control-Request-Method: GET\" -v)",
|
||||
"Bash(grep -r TELEGRAM /Users/ogt/awoooi/apps/api/.env*)",
|
||||
"Bash(grep -r TELEGRAM_BOT_TOKEN /Users/ogt/awoooi --include=*.env* --include=*.yaml --include=*.yml)",
|
||||
"Bash(curl -s -I -X OPTIONS \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3000\" -H \"Access-Control-Request-Method: GET\")",
|
||||
"Bash(curl -s \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3000\")",
|
||||
"Bash(python /tmp/e2e_drill.py)",
|
||||
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); i=[x for x in d[''''incidents''''] if x[''''incident_id'''']==''''INC-20260322-06085B''''][0]; print\\(f\"\"Incident: {i[''''incident_id'''']}\"\"\\); print\\(f\"\"Signals: {i[''''signal_count'''']}\"\"\\); print\\(f\"\"Updated: {i[''''updated_at'''']}\"\"\\)\")",
|
||||
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/telegram/test\")",
|
||||
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/telegram/test-push\" -H \"Content-Type: application/json\" -d '{\"\"\"\"approval_id\"\"\"\": \"\"\"\"15ab6844-ca4e-4a13-aead-dc71cd342445\"\"\"\", \"\"\"\"risk_level\"\"\"\": \"\"\"\"critical\"\"\"\", \"\"\"\"resource_name\"\"\"\": \"\"\"\"api-gateway\"\"\"\", \"\"\"\"root_cause\"\"\"\": \"\"\"\"E2E DRILL - PodCrashLoopBackOff\"\"\"\", \"\"\"\"suggested_action\"\"\"\": \"\"\"\"RESTART_DEPLOYMENT\"\"\"\", \"\"\"\"estimated_downtime\"\"\"\": \"\"\"\"5-15 min\"\"\"\"}')",
|
||||
"Bash(curl -s -o /dev/null -w \"HTTP Status: %{http_code}\\\\n\" http://localhost:3000/zh-TW)",
|
||||
"Bash(curl -s -I \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3000\")",
|
||||
"Bash(curl -s -X POST http://localhost:8000/api/v1/incidents/INC-20260322-19DF60/proposal)",
|
||||
"Bash(curl -s -X POST \"http://localhost:8000/api/v1/telegram/test-push\" -H \"Content-Type: application/json\" -d '{\"\"\"\"approval_id\"\"\"\": \"\"\"\"942e762e-fb97-480f-b21a-d3be67fa70b1\"\"\"\", \"\"\"\"risk_level\"\"\"\": \"\"\"\"critical\"\"\"\", \"\"\"\"resource_name\"\"\"\": \"\"\"\"core-system\"\"\"\", \"\"\"\"root_cause\"\"\"\": \"\"\"\"E2E DRILL TAKE 2 - 二次實彈演習\"\"\"\", \"\"\"\"suggested_action\"\"\"\": \"\"\"\"INVESTIGATE_SERVICE\"\"\"\", \"\"\"\"estimated_downtime\"\"\"\": \"\"\"\"5-15 min\"\"\"\"}')",
|
||||
"Bash(curl -s \"http://localhost:8000/api/v1/incidents\" -H \"Origin: http://localhost:3000\" -H \"Accept: application/json\")",
|
||||
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''''Incidents: {d[\"\"count\"\"]}''''\\); [print\\(f'''' - {i[\"\"incident_id\"\"]} | {i[\"\"severity\"\"]} | {i[\"\"signal_count\"\"]} signals | {i[\"\"affected_services\"\"]}''''\\) for i in d[''''incidents'''']]\")",
|
||||
"Bash(curl -s \"http://localhost:8000/api/v1/approvals/pending\" -H \"Origin: http://localhost:3000\")",
|
||||
"Bash(python -c \"import sys,json; d=json.load\\(sys.stdin\\); print\\(f''''Pending: {d[\"\"count\"\"]} approvals''''\\); [print\\(f'''' - {a[\"\"id\"\"][:8]}... | {a[\"\"risk_level\"\"]} | {a[\"\"action\"\"][:30]}...''''\\) for a in d[''''approvals''''][:3]]\")",
|
||||
"Bash(mkdir -p /Users/ogt/awoooi/apps/web/public/fonts)",
|
||||
"Bash(curl -sL -o DSEG7Classic-Bold.woff2 \"https://cdn.jsdelivr.net/npm/dseg@0.46.0/fonts/DSEG7-Classic/DSEG7Classic-Bold.woff2\")",
|
||||
"Bash(curl -sL -o DSEG7Classic-Bold.woff \"https://cdn.jsdelivr.net/npm/dseg@0.46.0/fonts/DSEG7-Classic/DSEG7Classic-Bold.woff\")",
|
||||
"Bash(curl -sL -o DSEG7Classic-Regular.woff2 \"https://cdn.jsdelivr.net/npm/dseg@0.46.0/fonts/DSEG7-Classic/DSEG7Classic-Regular.woff2\")",
|
||||
"Bash(curl -sL -o DSEG7Classic-Regular.woff \"https://cdn.jsdelivr.net/npm/dseg@0.46.0/fonts/DSEG7-Classic/DSEG7Classic-Regular.woff\")",
|
||||
"Bash(pnpm next:*)",
|
||||
"Bash(chmod +x /Users/ogt/awoooi/scripts/bootstrap_prod.sh)",
|
||||
"Bash(/Users/ogt/awoooi/.env:*)",
|
||||
"Bash(grep -E \"^\\\\.env$|03-secrets\\\\.yaml\" .gitignore)",
|
||||
"Bash(echo 'Adding to .gitignore...' if ! grep -q ^.env$ .gitignore)",
|
||||
"Bash(then echo:*)",
|
||||
"Bash(git add:*)",
|
||||
"Bash(git commit:*)",
|
||||
"Bash(git push:*)",
|
||||
"Bash(git remote:*)",
|
||||
"Bash(gh repo:*)",
|
||||
"Bash(gh api:*)",
|
||||
"Bash(gh run:*)",
|
||||
"Bash(ls -la pnpm-*.yaml package.json turbo.json)",
|
||||
"Bash(git status:*)",
|
||||
"Bash(gh workflow:*)",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod -o wide\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-77545758fc-xnncc -n awoooi-prod --tail=50\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-77545758fc-xnncc -n awoooi-prod 2>&1 | grep -i ''cors'' -A 5 -B 5\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-79948cbbbf-b8cgj -n awoooi-prod --tail=100\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod -l app=awoooi-api --sort-by=.metadata.creationTimestamp -o name | tail -1 | xargs kubectl logs -n awoooi-prod --tail=50\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''{.data.OPENCLAW_TG_USER_WHITELIST}'' | base64 -d\")",
|
||||
"Bash(ssh wooo@192.168.0.120 'kubectl patch secret awoooi-secrets -n awoooi-prod --type='\"''\"'json'\"''\"' -p='\"''\"'[:*)",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout restart deployment/awoooi-api -n awoooi-prod && kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=120s\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout restart deployment/awoooi-worker -n awoooi-prod && kubectl rollout status deployment/awoooi-worker -n awoooi-prod --timeout=120s\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-747967b787-fcx2r -n awoooi-prod --tail=30\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"ps aux | grep -E ''actions-runner|Runner'' | grep -v grep\")",
|
||||
"Bash(curl -sf http://192.168.0.120:32334/api/v1/health)",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-fd795cd87-rdpgn -n awoooi-prod --tail=30\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"curl -sf http://192.168.0.120:32334/api/v1/health | jq .status\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"curl -sf http://192.168.0.120:32334/api/v1/health\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -sf http://localhost:32334/api/v1/health\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get svc -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -sf http://10.43.125.201:8000/api/v1/health\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -sf http://10.43.105.105:3000/ -o /dev/null && echo ''Web OK''\")",
|
||||
"Bash(ssh ogt@192.168.0.188 \"ls -la /etc/nginx/sites-available/\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=50\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-795c95ff76-wch2p -n awoooi-prod --tail=30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod && ss -tlnp | grep 32334\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -sf http://127.0.0.1:32334/api/v1/health | head -c 200\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"sudo ufw status 2>/dev/null || sudo iptables -L INPUT -n | head -20\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"curl -sf --connect-timeout 5 http://192.168.0.120:32334/api/v1/health | head -c 100\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"curl -v --connect-timeout 5 http://192.168.0.120:32334/api/v1/health 2>&1 | head -30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"cat /etc/systemd/system/k3s.service 2>/dev/null | grep -i exec || ps aux | grep k3s | head -3\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"cat /etc/systemd/system/k3s.service\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"netstat -tlnp 2>/dev/null | grep 32334 || ss -tlnp | grep 32334\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"curl -sf --connect-timeout 5 http://192.168.0.120:31234/health 2>&1 | head -c 100\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy allow-nginx-ingress -n awoooi-prod -o yaml\")",
|
||||
"Bash(curl -sk https://awoooi.wooo.work/api/v1/health)",
|
||||
"Bash(curl -sk -I -X OPTIONS https://awoooi.wooo.work/api/v1/health -H \"Origin: https://awoooi.wooo.work\" -H \"Access-Control-Request-Method: GET\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -sI --connect-timeout 3 http://127.0.0.1:32334/api/v1/health 2>&1 | head -5\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -sI --connect-timeout 3 http://127.0.0.1:32335/ 2>&1 | head -5\")",
|
||||
"Bash(ssh wooo@192.168.0.121 \"curl -sI --connect-timeout 3 http://127.0.0.1:32334/api/v1/health 2>&1 | head -5\")",
|
||||
"Bash(ssh wooo@192.168.0.121 \"curl -sI --connect-timeout 3 http://127.0.0.1:32335/ 2>&1 | head -5\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"sudo iptables -t nat -L KUBE-NODEPORTS -n 2>/dev/null | head -20\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"sudo netstat -tlnp | grep -E ''32334|32335''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"ss -tlnp 2>/dev/null | grep -E ''32334|32335'' || netstat -tln | grep -E ''32334|32335''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"ss -tln | grep -E ''32334|32335|:323''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"ss -tln\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"export KUBECONFIG=/home/wooo/.kube/config-120; /home/wooo/bin/kubectl get svc -n awoooi-prod -o wide\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"which kubectl || find /usr -name kubectl 2>/dev/null | head -1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get svc -n awoooi-prod && kubectl get pods -n awoooi-prod -o wide\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"export KUBECONFIG=/home/wooo/.kube/config-120 && kubectl logs awoooi-api-546b88465d-lb8zm -n awoooi-prod --tail 80\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"KUBECONFIG=/home/wooo/.kube/config-120 kubectl logs awoooi-api-546b88465d-lb8zm -n awoooi-prod --tail 80 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"ls -la /home/wooo/.kube/ && cat /home/wooo/.kube/config-120 2>/dev/null | head -20 || cat /etc/rancher/k3s/k3s.yaml 2>/dev/null | head -20\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"sudo cat /etc/rancher/k3s/k3s.yaml | head -20\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && kubectl logs awoooi-api-546b88465d-lb8zm -n awoooi-prod --tail 100 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"which kubectl 2>/dev/null || find /home/wooo -name kubectl 2>/dev/null | head -1 || ls -la /home/wooo/bin/\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs awoooi-api-546b88465d-lb8zm -n awoooi-prod --tail 100 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl describe pod awoooi-api-546b88465d-lb8zm -n awoooi-prod | tail -40\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get svc -n awoooi-prod -o wide\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl exec -n awoooi-prod deploy/awoooi-api -- curl -sf http://localhost:8000/api/v1/health 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl exec -n awoooi-prod deploy/awoooi-api -- wget -qO- http://localhost:8000/api/v1/health 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 20 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"curl -sf http://192.168.0.120:32334/api/v1/health 2>&1 || echo ''FAILED to connect to 120:32334''\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"curl -sf http://192.168.0.121:32334/api/v1/health 2>&1 || echo ''FAILED to connect to 121:32334''\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"ssh wooo@192.168.0.120 ''cat /etc/rancher/k3s/k3s.yaml 2>/dev/null || echo No k3s.yaml''\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get pods -n awoooi-prod -o wide | grep Running\")",
|
||||
"Bash(ssh -o ConnectTimeout=5 wooo@192.168.0.120 \"ufw status 2>/dev/null || firewall-cmd --state 2>/dev/null || echo ''No firewall command found''\")",
|
||||
"Bash(ssh -o ConnectTimeout=5 wooo@192.168.0.121 \"ufw status 2>/dev/null || firewall-cmd --state 2>/dev/null || echo ''No firewall command found''\")",
|
||||
"Bash(pip3 show:*)",
|
||||
"Bash(docker build:*)",
|
||||
"Bash(docker version:*)",
|
||||
"Bash(docker run:*)",
|
||||
"Bash(curl -vI -H \"Origin: https://awoooi.wooo.work\" http://localhost:8889/api/v1/health)",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get endpoints awoooi-api-svc -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get pods -n awoooi-prod -o wide\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"sudo -n ufw status 2>/dev/null || sudo -n iptables -L INPUT -n 2>/dev/null | head -20 || echo ''Need sudo for firewall check''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"ss -tln | grep -E ''32334|32335|:323'' || echo ''No NodePort listeners found''\")",
|
||||
"Bash(ssh wooo@192.168.0.121 \"ss -tln | grep -E ''32334|32335|:323'' || echo ''No NodePort listeners found''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"ps aux | grep -E ''kube-proxy|k3s'' | grep -v grep | head -5\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"cat /proc/sys/net/ipv4/ip_forward\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"systemctl status k3s 2>/dev/null | head -15 || ps aux | grep ''k3s server'' | grep -v grep\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -sf --connect-timeout 5 http://127.0.0.1:32334/api/v1/health 2>&1 || echo ''LOCALHOST NodePort FAILED''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -sf --connect-timeout 5 http://192.168.0.120:32334/api/v1/health 2>&1 || echo ''EXTERNAL IP NodePort FAILED''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"cat /etc/iptables/rules.v4 2>/dev/null || iptables-save 2>/dev/null | grep -E ''DROP|REJECT|32334|32335'' | head -10 || echo ''Cannot read iptables without sudo''\")",
|
||||
"Bash(ssh wooo@192.168.0.121 \"curl -sf --connect-timeout 5 http://192.168.0.120:32334/api/v1/health 2>&1 || echo ''Worker->Master NodePort FAILED''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"cat /etc/rancher/k3s/config.yaml 2>/dev/null || ls -la /etc/rancher/k3s/ 2>/dev/null || echo ''No K3s config found''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"netstat -an 2>/dev/null | grep 32334 || ss -an | grep 32334 || echo ''No socket found for 32334''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S iptables -L INPUT -n 2>&1 | head -20\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S iptables -t nat -L KUBE-NODEPORTS -n 2>&1 | head -20\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S iptables -L KUBE-ROUTER-INPUT -n 2>&1 | head -30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"echo ''0936223270'' | sudo -S iptables -t nat -L KUBE-NODEPORTS -n 2>&1 | grep -i awoooi || echo ''NO AWOOOI RULES FOUND''\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get svc awoooi-api-svc -n awoooi-prod -o yaml | grep -A5 ''spec:''\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get networkpolicy -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl apply -f - 2>&1\")",
|
||||
"Bash(curl -sf --connect-timeout 10 https://awoooi.wooo.work/api/v1/health)",
|
||||
"Bash(curl -skf --connect-timeout 10 https://awoooi.wooo.work/api/v1/health)",
|
||||
"Bash(curl -sI https://awoooi.wooo.work/)",
|
||||
"Bash(curl -skI https://awoooi.wooo.work/)",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 50 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl rollout restart deployment/awoooi-api -n awoooi-prod && /home/wooo/kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=120s\")",
|
||||
"Bash(curl -sf https://awoooi.wooo.work/api/v1/health)",
|
||||
"Bash(curl -skf https://awoooi.wooo.work/api/v1/health)",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 40 2>&1\")",
|
||||
"Bash(for i:*)",
|
||||
"Bash(do curl:*)",
|
||||
"Bash(echo \"Request $i sent\")",
|
||||
"Bash(done)",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 100 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 30 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get configmap awoooi-config -n awoooi-prod -o yaml | grep OTEL\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl exec deployment/awoooi-api -n awoooi-prod -- env | grep OTEL\")",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl exec deployment/awoooi-api -n awoooi-prod -- python -c \"\"import socket; s=socket.socket\\(\\); s.settimeout\\(5\\); s.connect\\(\\(''192.168.0.188'', 24317\\)\\); print\\(''✅ Connection to 24317 OK''\\); s.close\\(\\)\"\" 2>&1\")",
|
||||
"Bash(curl -vI https://awoooi.wooo.work)",
|
||||
"Bash(curl -vI https://awoooi.wooo.work/api/v1/health)",
|
||||
"Bash(curl -sf -X POST https://awoooi.wooo.work/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(curl -s -X POST https://awoooi.wooo.work/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{\"\"source\"\": \"\"prometheus\"\", \"\"severity\"\": \"\"P1\"\", \"\"message\"\": \"\"Test alert from CLI\"\"}')",
|
||||
"Bash(curl -s -X POST https://awoooi.wooo.work/api/v1/webhooks/signals -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(ssh wooo@192.168.0.110 \"export KUBECONFIG=/home/wooo/.kube/config-120 && /home/wooo/kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''''{.data.WEBHOOK_HMAC_SECRET}'''' 2>/dev/null\")",
|
||||
"Bash(timeout 15 curl -N -s https://awoooi.wooo.work/api/v1/dashboard/stream)",
|
||||
"Bash(bash:*)",
|
||||
"Bash(curl -s https://awoooi.wooo.work/api/v1/metrics/gold)",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT DISTINCT metric_name FROM signoz_metrics.distributed_samples_v4 WHERE unix_milli > \\(toUnixTimestamp\\(now\\(\\)\\) - 1800\\) * 1000 LIMIT 20 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) as trace_count FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 30 MINUTE FORMAT TabSeparated\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"KUBECONFIG=/home/wooo/.kube/config-120 /home/wooo/bin/kubectl get configmap awoooi-config -n awoooi-prod -o jsonpath=''{.data}'' | python3 -m json.tool 2>/dev/null | head -30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"KUBECONFIG=/home/wooo/.kube/config-120 /home/wooo/bin/kubectl logs deployment/awoooi-api -n awoooi-prod --tail 50 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"which kubectl || ls -la ~/bin/kubectl 2>/dev/null || ls -la /usr/local/bin/kubectl 2>/dev/null || echo ''kubectl not found''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"export KUBECONFIG=/home/wooo/.kube/config-120 && kubectl get configmap awoooi-config -n awoooi-prod -o jsonpath=''{.data}'' 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"ls -la ~/.kube/ 2>/dev/null; cat ~/.kube/config 2>/dev/null | head -20 || echo ''checking k3s default...''; sudo cat /etc/rancher/k3s/k3s.yaml 2>/dev/null | head -5 || echo ''no k3s config''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"sudo k3s kubectl get configmap awoooi-config -n awoooi-prod -o yaml 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"sudo k3s kubectl logs deployment/awoooi-api -n awoooi-prod --tail 100 2>&1\")",
|
||||
"Bash(nc -zv 192.168.0.188 24317)",
|
||||
"Bash(curl -s http://192.168.0.188:24318/v1/traces -X POST -H \"Content-Type: application/json\" -d '{}')",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT DISTINCT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 24 HOUR GROUP BY serviceName ORDER BY cnt DESC LIMIT 20 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"DESCRIBE TABLE signoz_traces.distributed_signoz_index_v2 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 5 MINUTE GROUP BY serviceName ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s https://awoooi.wooo.work/api/v1/health)",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 10 MINUTE GROUP BY serviceName ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT service_name, count\\(\\) as cnt FROM signoz_logs.distributed_logs WHERE timestamp > now\\(\\) - INTERVAL 30 MINUTE GROUP BY service_name ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SHOW TABLES FROM signoz_logs FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) as total FROM signoz_logs.distributed_logs_v2 WHERE timestamp > now\\(\\) - INTERVAL 30 MINUTE FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT JSONExtractString\\(resources_string, ''service.name''\\) as svc, count\\(\\) as cnt FROM signoz_logs.distributed_logs_v2 WHERE timestamp > now\\(\\) - INTERVAL 5 MINUTE GROUP BY svc ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"DESCRIBE TABLE signoz_logs.distributed_logs_v2 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT resources_string[''service.name''] as svc, count\\(\\) as cnt FROM signoz_logs.distributed_logs_v2 WHERE timestamp > \\(toUnixTimestamp64Nano\\(now64\\(\\)\\) - 300000000000\\) GROUP BY svc ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT body, resources_string FROM signoz_logs.distributed_logs_v2 WHERE timestamp > \\(toUnixTimestamp64Nano\\(now64\\(\\)\\) - 60000000000\\) LIMIT 1 FORMAT JSONEachRow\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 2 MINUTE GROUP BY serviceName ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, name, timestamp FROM signoz_traces.distributed_signoz_index_v2 WHERE timestamp > now\\(\\) - INTERVAL 5 MINUTE ORDER BY timestamp DESC LIMIT 5 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, name, formatDateTime\\(timestamp, ''%Y-%m-%d %H:%M:%S''\\) as ts FROM signoz_traces.distributed_signoz_index_v2 ORDER BY timestamp DESC LIMIT 10 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) FROM signoz_traces.distributed_signoz_index_v2 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) FROM signoz_traces.distributed_signoz_spans FORMAT TabSeparated\")",
|
||||
"Bash(ssh wooo@192.168.0.188 \"docker ps | grep -E ''otel|signoz''\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT metric_name, sum\\(value\\) as total FROM signoz_metrics.distributed_samples_v4 WHERE metric_name LIKE ''otelcol%span%'' AND unix_milli > \\(toUnixTimestamp\\(now\\(\\)\\) - 300\\) * 1000 GROUP BY metric_name FORMAT TabSeparated\")",
|
||||
"Bash(for t:*)",
|
||||
"Bash(do)",
|
||||
"Bash(echo -n \"$t: \")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT count\\(\\) FROM signoz_traces.$t FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"SELECT serviceName, count\\(\\) as cnt FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp > now\\(\\) - INTERVAL 10 MINUTE GROUP BY serviceName ORDER BY cnt DESC LIMIT 10 FORMAT TabSeparated\")",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \":*)",
|
||||
"Bash(curl -s 'http://192.168.0.188:8123/' --data \"DESCRIBE TABLE signoz_traces.distributed_signoz_index_v3 FORMAT TabSeparated\")",
|
||||
"Bash(AWOOOI_API_URL=https://awoooi.wooo.work WEBHOOK_HMAC_SECRET=\"CHANGE_ME_TO_RANDOM_64_CHARS\" python scripts/fire_live_alert.py oomkilled)",
|
||||
"Bash(timeout 10 curl -sN https://awoooi.wooo.work/api/v1/dashboard/stream)",
|
||||
"Bash(curl -s https://awoooi.wooo.work/api/v1/dashboard)",
|
||||
"Bash(npm list:*)",
|
||||
"Bash(node scripts/verify-frontend.js)",
|
||||
"Bash(node /Users/ogt/awoooi/scripts/verify-frontend.js)",
|
||||
"Bash(python -c \"from src.services.proposal_service import ProposalService; print\\(''''✅ ProposalService OK''''\\)\")",
|
||||
"Bash(python -c \"from src.services.openclaw import OpenClawService; print\\(''''✅ OpenClawService OK''''\\)\")",
|
||||
"Bash(curl -s http://192.168.0.120:32334/api/v1/incidents)",
|
||||
"Bash(jq -r \".incidents[:2] | .[] | \"\"\\\\\\(.incident_id\\) - \\\\\\(.status\\) - \\\\\\(.severity\\)\"\"\")",
|
||||
"Bash(curl -s -X POST \"http://192.168.0.120:32334/api/v1/incidents/INC-20260322-4B3152/propose\" -H \"Content-Type: application/json\")",
|
||||
"Bash(kubectl logs:*)",
|
||||
"Bash(ssh ogt@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail 30\")",
|
||||
"Bash(curl -sv -X POST \"http://192.168.0.120:32334/api/v1/incidents/INC-20260322-4B3152/propose\" -H \"Content-Type: application/json\")",
|
||||
"Bash(curl -s http://192.168.0.120:32334/api/v1/health)",
|
||||
"Bash(curl -s \"http://192.168.0.120:32334/api/v1/incidents/INC-20260322-4B3152\")",
|
||||
"Bash(curl -sv \"http://192.168.0.120:32334/api/v1/incidents\")",
|
||||
"Bash(curl -s --retry 3 --retry-delay 2 \"http://192.168.0.120:32334/api/v1/health\")",
|
||||
"Bash(curl -s --retry 3 --retry-delay 2 http://192.168.0.120:32334/api/v1/health)",
|
||||
"Bash(do echo:*)",
|
||||
"Bash(curl -s -X POST \"https://awoooi.wooo.work/api/v1/incidents/INC-20260322-4B3152/propose\" -H \"Content-Type: application/json\")",
|
||||
"Bash(curl -s -X POST \"https://awoooi.wooo.work/api/v1/incidents/INC-20260322-4B3152/proposal\" -H \"Content-Type: application/json\")",
|
||||
"Bash(curl -s -X POST \"https://awoooi.wooo.work/api/v1/incidents/INC-20260322-D6C6A0/proposal\" -H \"Content-Type: application/json\")",
|
||||
"Bash(curl -s http://192.168.0.120:32334/api/v1/approvals/pending)",
|
||||
"Bash(kubectl get:*)",
|
||||
"Bash(curl -s -w \"\\\\nHTTP_CODE: %{http_code}\\\\n\" http://192.168.0.120:32334/api/v1/health)",
|
||||
"Bash(curl -s http://awoooi.wooo.work/api/v1/health)",
|
||||
"Bash(curl -s http://awoooi.wooo.work/api/v1/approvals/pending)",
|
||||
"Bash(curl -sL https://awoooi.wooo.work/api/v1/approvals/pending -k)",
|
||||
"Bash(ssh root@192.168.0.120 \"kubectl get pods -n awoooi-prod -o wide\")",
|
||||
"Bash(ssh root@192.168.0.120 \"kubectl logs -n awoooi-prod -l app=awoooi-api --tail=30\")",
|
||||
"Bash(curl -sL https://awoooi.wooo.work/api/v1/timeline -k)",
|
||||
"Bash(curl -sL https://awoooi.wooo.work/api/v1/incidents -k)",
|
||||
"Bash(curl -sL \"https://awoooi.wooo.work/api/v1/approvals?include_history=true\" -k)",
|
||||
"Bash(curl -sL \"https://awoooi.wooo.work/api/v1/incidents/INC-20260322-4B3152\" -k)",
|
||||
"Bash(curl -sL \"https://awoooi.wooo.work/api/v1/audit-logs?limit=10\" -k)",
|
||||
"Bash(curl -sL https://awoooi.wooo.work/api/v1/audit-logs?limit=10 -k)",
|
||||
"Bash(ssh ogt@192.168.0.120 \"kubectl logs -n awoooi-prod -l app=awoooi-api --tail=100\")",
|
||||
"Bash(ssh ogt@192.168.0.120 \"kubectl logs -n awoooi-prod -l app=awoooi-web --tail=50\")",
|
||||
"Bash(ssh ogt@192.168.0.188 \"kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml logs -n awoooi-prod -l app=awoooi-api --tail=100 2>/dev/null || docker logs awoooi-api --tail=100 2>/dev/null\")",
|
||||
"Bash(curl -sL \"https://awoooi.wooo.work/api/v1/approvals/pending\" -k -w \"\\\\n\\\\nHTTP: %{http_code}\\\\nTime: %{time_total}s\\\\n\")",
|
||||
"Bash(curl -sL -X POST https://awoooi.wooo.work/api/v1/approvals/182e07c1-118a-49d7-b71c-7d33c5484d9b/sign -H 'Content-Type: application/json' -d '{\"\"\"\"signer_id\"\"\"\": \"\"\"\"test-debug\"\"\"\", \"\"\"\"signer_name\"\"\"\": \"\"\"\"Debug Test\"\"\"\", \"\"\"\"comment\"\"\"\": \"\"\"\"Testing\"\"\"\"}' -k)",
|
||||
"Bash(curl -s https://wwooo.aiops.tw/api/v1/health)",
|
||||
"Bash(curl -s https://wwooo.aiops.tw/api/v1/incidents?limit=5)",
|
||||
"Bash(curl -s https://wwooo.aiops.tw/api/v1/approvals/pending)",
|
||||
"Bash(curl -v -s \"https://wwooo.aiops.tw/api/v1/health\")",
|
||||
"Bash(curl -s \"https://wwooo.aiops.tw/\")",
|
||||
"Bash(curl -s --connect-timeout 5 \"http://192.168.0.120:32334/api/v1/health\")",
|
||||
"Bash(curl -s --connect-timeout 5 \"http://192.168.0.120:32334/api/v1/incidents?limit=5\")",
|
||||
"Bash(ssh -o ConnectTimeout=5 wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-867f67f55d-kvdl2 -n awoooi-prod --tail=50\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod | grep -E ''NAME|worker''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod | grep worker\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-5bdc5699bb-kcv9q -n awoooi-prod --tail=30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy -n awoooi-prod -o wide\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod --show-labels | grep worker\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy allow-required-egress -n awoooi-prod -o yaml\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl patch networkpolicy allow-required-egress -n awoooi-prod --type=''json'' -p=''[{\"\"op\"\": \"\"replace\"\", \"\"path\"\": \"\"/spec/podSelector/matchLabels\"\", \"\"value\"\": {\"\"system\"\": \"\"awoooi\"\"}}]''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout restart deployment/awoooi-worker -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-5bdc5699bb-kcv9q -n awoooi-prod --tail=15\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=40\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod 2>&1 | grep -E ''signal_worker|redis_pool|INFO'' | tail -10\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/health\")",
|
||||
"Bash(ssh wooo@192.168.0.120 'curl -s -X POST \"\"http://localhost:32334/api/v1/webhooks/signals\"\" -H \"\"Content-Type: application/json\"\" -d \"\"{:*)",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod | grep -E ''NAME|worker|api''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod && echo ''==='' && kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/incidents?limit=5\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/approvals/pending\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod 2>&1 | head -50\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/health | jq ''.components''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret -n awoooi-prod -o name\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''{.data.WEBHOOK_HMAC_SECRET}'' | base64 -d\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=20 2>&1 | grep -E ''signal|incident|telegram|INFO''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -s ''http://localhost:32334/api/v1/incidents?limit=5''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod 2>&1 | grep -iE ''telegram|notification|send'' | tail -10\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -s ''http://localhost:32334/api/v1/approvals/pending''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -s ''http://localhost:32334/api/v1/incidents?limit=2'' && echo ''---'' && curl -s ''http://localhost:32334/api/v1/approvals/pending''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod | grep worker && echo ''---'' && kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-6b8cc94d9c-xjdwr -n awoooi-prod --tail=40\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy allow-required-egress -n awoooi-prod -o jsonpath=''{.spec.podSelector}''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl patch networkpolicy allow-required-egress -n awoooi-prod --type=''json'' -p=''[{\"\"op\"\": \"\"replace\"\", \"\"path\"\": \"\"/spec/podSelector\"\", \"\"value\"\": {\"\"matchLabels\"\": {\"\"system\"\": \"\"awoooi\"\"}}}]''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl delete pod awoooi-worker-6b8cc94d9c-xjdwr -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-6b8cc94d9c-pmzj7 -n awoooi-prod --tail=30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-6b8cc94d9c-pmzj7 -n awoooi-prod --tail=20\")",
|
||||
"Bash(ls -la /Users/ogt/awoooi/apps/api/scripts/fire*.py)",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=50\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -s ''http://localhost:32334/api/v1/incidents?limit=3''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod 2>&1 | grep -iE ''proposal|approval|llm|ai|ollama|generate'' | tail -20\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get deployment awoooi-worker -n awoooi-prod -o jsonpath=''{.spec.template.spec.containers[0].envFrom}''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get deployment awoooi-api -n awoooi-prod -o jsonpath=''{.spec.template.spec.containers[0].envFrom}''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get configmap awoooi-config -n awoooi-prod -o jsonpath=''''{.data}''''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''{.data}'' | tr '','' ''\\\\n''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl exec deployment/awoooi-api -n awoooi-prod -- python -c ''import os; print\\(os.getenv\\(\"\"DATABASE_URL\"\", \"\"NOT SET\"\"\\)[:50]\\)''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-75ffbfb88b-2htfh -n awoooi-prod --tail=50\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl exec awoooi-api-6687db5564-rv755 -n awoooi-prod -- env | grep DATABASE\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"PGPASSWORD=''CHANGE_ME'' psql -h 192.168.0.188 -U awoooi -d awoooi_prod -c ''SELECT 1'' 2>&1 || echo ''Connection failed''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod\")",
|
||||
"Bash(curl -sv http://192.168.0.120:32334/api/v1/health)",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-75ffbfb88b-2htfh -n awoooi-prod --tail=20 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-7fb7d5b55f-n48gk -n awoooi-prod --tail=20 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get rs -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl scale rs awoooi-api-75ffbfb88b -n awoooi-prod --replicas=0\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl scale rs awoooi-worker-7fb7d5b55f -n awoooi-prod --replicas=0\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=10\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get deploy -n awoooi-prod -o wide\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get deploy awoooi-api -n awoooi-prod -o jsonpath=''{.spec.replicas}''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get deploy awoooi-worker -n awoooi-prod -o jsonpath=''{.spec.replicas}''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=5s\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout history deployment/awoooi-api -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout undo deployment/awoooi-api -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout undo deployment/awoooi-worker -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=30s\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get rs awoooi-api-6687db5564 -n awoooi-prod -o jsonpath=''{.metadata.annotations.deployment\\\\.kubernetes\\\\.io/revision}''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl delete pod awoooi-api-7f487f7cbb-5f88g -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout undo deployment/awoooi-api -n awoooi-prod --to-revision=46\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --tail=15\")",
|
||||
"Bash(curl -s http://192.168.0.120:32334/api/v1/incidents?limit=3)",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --since=2m\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --since=2m | grep -i webhook\")",
|
||||
"Bash(curl -sv -X POST http://192.168.0.120:32334/api/v1/webhooks/alertmanager -H \"Content-Type: application/json\" -d '{:*)",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get endpoints -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"curl -s http://localhost:32334/api/v1/health | jq ''{status}''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-worker -n awoooi-prod --since=30s\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-fc4744758-7wfv5 -n awoooi-prod --tail=30 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-worker-6fc548887b-b9mtf -n awoooi-prod --tail=30 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get configmap awoooi-config -n awoooi-prod -o yaml\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath=''''{.data}''''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pod awoooi-worker-6fc548887b-b9mtf -n awoooi-prod -o jsonpath=''{.metadata.labels}''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get networkpolicy -n awoooi-prod -o yaml\")",
|
||||
"Bash(ssh wooo@192.168.0.120 'kubectl patch networkpolicy allow-required-egress -n awoooi-prod --type=json -p=\"\"[{\\\\\"\"op\\\\\"\": \\\\\"\"replace\\\\\"\", \\\\\"\"path\\\\\"\": \\\\\"\"/spec/podSelector/matchLabels\\\\\"\", \\\\\"\"value\\\\\"\": {\\\\\"\"system\\\\\"\": \\\\\"\"awoooi\\\\\"\"}}]\"\"')",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout restart deployment/awoooi-api deployment/awoooi-worker -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs awoooi-api-6c69b77894-d6jqq -n awoooi-prod --tail=20\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl run nc-test --rm -it --restart=Never --image=busybox -- nc -zv 192.168.0.188 5432\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get pods -n awoooi-prod -o=custom-columns=''NAME:.metadata.name,IMAGE:.spec.containers[0].image''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl exec awoooi-api-6687db5564-rv755 -n awoooi-prod -- ls -la *.db 2>/dev/null || echo ''No SQLite files''\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl exec awoooi-api-6687db5564-rv755 -n awoooi-prod -- env | grep -E ''MOCK|DATABASE|SQLITE''\")",
|
||||
"Bash(curl -s \"http://192.168.0.120:32334/api/v1/approvals\")",
|
||||
"Bash(python -m py_compile src/lewooogo_brain/engines/incident_engine.py src/lewooogo_brain/engines/proposal_engine.py src/lewooogo_brain/skills/loader.py)",
|
||||
"Bash(python packages/lewooogo-brain/tests/test_skill_loader.py)",
|
||||
"Bash(python packages/lewooogo-brain/tests/test_incident_engine.py)",
|
||||
"Bash(python packages/lewooogo-brain/tests/test_guardrails.py)",
|
||||
"Bash(python -m py_compile src/lewooogo_brain/engines/proposal_engine.py src/lewooogo_brain/engines/incident_engine.py src/lewooogo_brain/skills/loader.py)",
|
||||
"Bash(PYTHONPATH=/Users/ogt/awoooi/packages/lewooogo-brain/src python -c \":*)",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8000/api/v1/health)",
|
||||
"Bash(curl -s \"https://awoooi.wooo.work/api/v1/approvals/pending\")",
|
||||
"Bash(curl -s \"https://awoooi.wooo.work/api/v1/approvals?status=pending\")",
|
||||
"Bash(curl -s \"https://awoooi.wooo.work/api/v1/incidents\")",
|
||||
"Bash(uv sync:*)",
|
||||
"Bash(python -c \"from src.routers.proposals import router; print\\(''✅ Router 語法驗證通過''\\)\")",
|
||||
"Bash(curl -s -X GET \"https://awoooi.wooo.work/api/v1/health\" --connect-timeout 10)",
|
||||
"Bash(curl -s -X GET \"https://awoooi.wooo.work/api/v1/incidents\" --connect-timeout 10)",
|
||||
"Bash(curl -s -o /dev/null -w \"%{http_code}\" \"https://awoooi.wooo.work\" --connect-timeout 10)",
|
||||
"Bash(curl -s -o /dev/null -w \"%{http_code}\" -L \"https://awoooi.wooo.work\" --connect-timeout 10)",
|
||||
"Bash(curl -s -X POST \"https://awoooi.wooo.work/api/v1/incidents/test-123/propose\" -H \"Content-Type: application/json\" -d '{\"\"require_dry_run\"\": true}' --connect-timeout 10)",
|
||||
"Bash(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no ollama@192.168.0.120 \"kubectl get pods -n awoooi-prod -o wide\")",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs awoooi-api-64c8659cff-grslz -n awoooi-prod --tail=50)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath='{.data.DATABASE_URL}')",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl rollout restart deployment/awoooi-api -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n awoooi-prod -l app=awoooi-api)",
|
||||
"Bash(curl -s \"https://awoooi.wooo.work/api/v1/health\" --connect-timeout 10)",
|
||||
"Bash(curl -s -o /dev/null -w \"%{http_code}\" -L \"https://awoooi.wooo.work/zh-TW\" --connect-timeout 10)",
|
||||
"Bash(python -c \"from src.routers.proposals import router; print\\(''✅ Router import successful''\\)\")",
|
||||
"Bash(PGPASSWORD=postgres psql -h 192.168.0.188 -U awoooi -d awoooi_dev -c \"SELECT incident_id, status, severity FROM incidents LIMIT 5;\")",
|
||||
"Bash(PGPASSWORD=AwoooiProd2026 psql -h 192.168.0.188 -U awoooi -d awoooi_prod -c \"SELECT incident_id, status, severity FROM incidents LIMIT 5;\")",
|
||||
"Bash(curl -sf http://192.168.0.120:32334/api/v1/incidents)",
|
||||
"Bash(curl -v \"http://192.168.0.120:32334/api/v1/incidents\")",
|
||||
"Bash(export KUBECONFIG=/Users/ogt/.kube/config-120)",
|
||||
"Bash(curl -sI \"http://awoooi.wooo.work/\")",
|
||||
"Bash(openssl s_client -servername awoooi.wooo.work -connect awoooi.wooo.work:443)",
|
||||
"Bash(openssl x509:*)",
|
||||
"Bash(curl -s -X POST \"http://192.168.0.120:32334/api/v1/incidents/INC-20260323-7DE10B/propose\" -H \"Content-Type: application/json\" -d '{\"\"\"\"require_dry_run\"\"\"\": true}')",
|
||||
"Bash(python -c \"from src.services.executor import execute_approved_proposal, get_executor, ActionExecutor; print\\(''✅ Import successful''\\)\")",
|
||||
"Bash(curl -s https://awoooi.woooo.cc/api/v1/incidents)",
|
||||
"Bash(curl -s https://awoooi.woooo.cc/api/v1/health)",
|
||||
"Bash(curl -s --connect-timeout 10 https://awoooi.woooo.cc/api/v1/health)",
|
||||
"Bash(ssh ogt@192.168.70.202 \"sudo kubectl get pods -n awoooi 2>/dev/null\")",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.70.200:8000/api/v1/health)",
|
||||
"Bash(ssh ogt@192.168.70.202 \"sudo kubectl get pods -n awoooi-prod\")",
|
||||
"Bash(ssh -o StrictHostKeyChecking=no ogt@192.168.70.202 \"sudo kubectl get pods -n awoooi-prod\")",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -A)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-worker-7479556d76-jbbps --tail 30)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod -l app=awoooi-api --tail 20)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod deployment/awoooi-api -- curl -s http://localhost:8000/api/v1/incidents)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod deployment/awoooi-api -- python -c \"import httpx; r = httpx.get\\(''http://localhost:8000/api/v1/incidents''\\); print\\(r.text\\)\")",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get ingress -n awoooi-prod -o wide)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get svc -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get deployment awoooi-worker -n awoooi-prod -o jsonpath='{.spec.template.spec.containers[0].env}')",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.70.202:32334/api/v1/health)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl describe deployment awoooi-worker -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get configmap -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl describe deployment awoooi-api -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get configmap awoooi-config -n awoooi-prod -o yaml)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get secrets -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath='{.data}')",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get secret awoooi-secrets -n awoooi-prod -o jsonpath='{.data.REDIS_URL}')",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl rollout restart deployment/awoooi-worker -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n awoooi-prod -l app=awoooi-worker)",
|
||||
"Bash(curl -s --connect-timeout 5 https://awoooi.wooo.work/api/v1/health)",
|
||||
"Bash(curl -s https://awoooi.wooo.work/api/v1/incidents)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod -l app=awoooi-worker --tail 10)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get svc -n wooo-aiops-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get svc -A)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-worker-76bdf9786d-rvtmz --tail 15)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod deployment/awoooi-api -- python -c \"import os; print\\(os.getenv\\(''REDIS_URL'', ''NOT_SET''\\)\\)\")",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get deployment awoooi-api -n awoooi-prod -o yaml)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl rollout restart deployment/awoooi-api deployment/awoooi-worker -n awoooi-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-api-865cdc97db-6mpzz --tail 20)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n wooo-aiops-prod -l app=redis)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get pods -n wooo-aiops-prod)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n wooo-aiops-prod redis-6c6fcd64b8-8wznx -- redis-cli ping)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod awoooi-api-6445c76797-mrl7p -- python -c \"import redis; r=redis.Redis\\(host=''10.43.239.47'', port=6379, db=10\\); print\\(r.ping\\(\\)\\)\")",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get networkpolicy -A)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get networkpolicy allow-required-egress -n awoooi-prod -o yaml)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl patch networkpolicy allow-required-egress -n awoooi-prod --type='json' -p='[{\"\"op\"\": \"\"add\"\", \"\"path\"\": \"\"/spec/egress/0/ports/-\"\", \"\"value\"\": {\"\"port\"\": 6379, \"\"protocol\"\": \"\"TCP\"\"}}]')",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-api-5fcc484b85-qpwt6 --tail 15)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl exec -n awoooi-prod awoooi-api-6445c76797-mrl7p -- python -c \"import os; print\\(''REDIS_URL:'', os.getenv\\(''REDIS_URL''\\)\\); import redis; r=redis.Redis.from_url\\(os.getenv\\(''REDIS_URL''\\)\\); print\\(''PING:'', r.ping\\(\\)\\)\")",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-worker-59d7588d75-p5tht --tail 20)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod -l app=awoooi-worker --tail 30)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get deployment awoooi-worker -n awoooi-prod -o yaml)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get networkpolicy -n awoooi-prod -o wide)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl apply -f -)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs -n awoooi-prod awoooi-worker-6cd7dcbc9-5mtfq --tail 15)",
|
||||
"Bash(jq .incidents[0])",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl get configmap awoooi-config -n awoooi-prod -o jsonpath='{.data.OPENCLAW_URL}')",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8088/health)",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8088/)",
|
||||
"Bash(nc -zv 192.168.0.188 8088 -w 5)",
|
||||
"Bash(ping -c 2 192.168.0.188)",
|
||||
"Bash(ping -c 2 192.168.70.202)",
|
||||
"Bash(grep -n \"mapToDualState\" /Users/ogt/awoooi/apps/web/src/app/[locale]/page.tsx -A 30)",
|
||||
"Bash(head -40 /Users/ogt/awoooi/apps/web/src/app/[locale]/page.tsx)",
|
||||
"Bash(ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no ollama@192.168.0.188 \"docker ps -a | grep -i claw; docker start openclaw 2>/dev/null || docker start clawbot 2>/dev/null || echo ''Container not found, listing all:'' && docker ps -a --format ''table {{.Names}}\\\\t{{.Status}}'' | head -10\")",
|
||||
"Bash(curl -s --connect-timeout 5 http://192.168.0.188:8089/health)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl rollout status deployment/awoooi-web -n awoooi-prod --timeout=60s)",
|
||||
"Bash(grep -rn \"clawbot\\\\|ClawBot\" /Users/ogt/awoooi/ --include=*.yaml --include=*.yml --include=*.json)",
|
||||
"Bash(grep -rn \"ClawBot\\\\|clawbot\" /Users/ogt/awoooi/apps/ --include=*.py --include=*.ts --include=*.tsx)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs deployment/awoooi-api -n awoooi-prod --tail=100)",
|
||||
"Bash(KUBECONFIG=/Users/ogt/awoooi/apps/api/k3s-prod.yaml kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200)",
|
||||
"Bash(export KUBECONFIG=/Users/ogt/awoooi/k3s-prod.yaml)",
|
||||
"Bash(ssh root@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200 2>&1 | grep -iE ''error|fail|exception|execute|background|parse'' | tail -40\")",
|
||||
"Bash(curl -s https://awoooi.wooo.work/api/v1/approvals)",
|
||||
"Bash(ssh k3s@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200 2>&1 | grep -iE ''error|fail|execute|background|parse'' | tail -40\")",
|
||||
"Bash(ssh ubuntu@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200 2>&1 | grep -iE ''error|fail|execute|background|parse'' | tail -40\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=200 2>&1 | grep -iE ''error|fail|execute|background|parse|skip'' | tail -50\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=500 2>&1 | grep -iE ''background_execution|approve_action|reject|k8s_executor'' | tail -30\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl get deploy,sts -n awoooi-prod\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=120s 2>&1\")",
|
||||
"Bash(ssh wooo@192.168.0.120 \"kubectl logs deployment/awoooi-api -n awoooi-prod --tail=50 2>&1 | grep -iE ''background_execution|k8s_executor|parse'' | tail -10\")"
|
||||
],
|
||||
"additionalDirectories": [
|
||||
"/Users/ogt/awoooi/docs",
|
||||
"/Users/ogt/.claude/projects/-Users-ogt-awoooi/memory",
|
||||
"/Users/ogt/awoooi/apps/web/src/app",
|
||||
"/Users/ogt/awoooi/apps/api",
|
||||
"/Users/ogt/awoooi/apps/api/http:/localhost:8000/api/v1",
|
||||
"/Users/ogt/awoooi/apps/web/public",
|
||||
"/Users/ogt/Downloads",
|
||||
"/Users/ogt/awoooi/apps/web/test-results",
|
||||
"/Users/ogt/awoooi",
|
||||
"/Users/ogt/awoooi/apps/web/src/app/[locale]",
|
||||
"/tmp"
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -19,10 +19,14 @@
|
||||
|
||||
# 文件與腳本(不需要進 image)
|
||||
# 注意: docs/runbooks/, docs/adr/, .agents/skills/ 供 RAG 索引 (ADR-067 Phase 33)
|
||||
# scripts/ 大部分不需要進 image,但 CronJob 腳本需要
|
||||
# scripts/ 大部分不需要進 image,僅白名單 production runtime/ops 種子腳本
|
||||
# 2026-04-12 ogt (ADR-073 P2-1): 白名單允許 cron_km_vectorize.py
|
||||
scripts
|
||||
# 2026-05-13 codex: 白名單 T16 auto-repair canary PlayBook seed script
|
||||
scripts/**
|
||||
!scripts/
|
||||
!scripts/cron_km_vectorize.py
|
||||
!scripts/ops/
|
||||
!scripts/ops/awooop-seed-auto-repair-canary-playbook.py
|
||||
|
||||
# Node 快取(monorepo 根目錄)
|
||||
node_modules
|
||||
|
||||
@@ -10,7 +10,7 @@ on:
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: self-hosted
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
|
||||
@@ -43,10 +43,19 @@ jobs:
|
||||
├ 📝 ${{ steps.commit.outputs.message }}
|
||||
├ 🔖 <code>${{ steps.commit.outputs.short_sha }}</code>
|
||||
└ 🌿 dev branch"
|
||||
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text@-"
|
||||
if AWOOI_CICD_STATUS=running \
|
||||
AWOOI_CICD_STAGE=dev-deploy \
|
||||
AWOOI_CICD_JOB_NAME="[DEV] 部署開始" \
|
||||
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
|
||||
AWOOI_CICD_SUMMARY="${{ steps.commit.outputs.message }}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "Dev deploy start notification mirrored through AWOOI API"
|
||||
else
|
||||
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text@-"
|
||||
fi
|
||||
|
||||
# API 測試 (同 prod CI,確保 dev 也通過)
|
||||
- name: Run API Tests
|
||||
@@ -78,11 +87,18 @@ jobs:
|
||||
echo "✅ API 測試通過"
|
||||
|
||||
- name: Login to Harbor
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.HARBOR }}
|
||||
username: ${{ secrets.HARBOR_USERNAME }}
|
||||
password: ${{ secrets.HARBOR_PASSWORD }}
|
||||
run: |
|
||||
HARBOR_USERNAME="$(cat <<'AWOOOI_SECRET_HARBOR_USERNAME'
|
||||
${{ secrets.HARBOR_USERNAME }}
|
||||
AWOOOI_SECRET_HARBOR_USERNAME
|
||||
)"
|
||||
HARBOR_PASSWORD="$(cat <<'AWOOOI_SECRET_HARBOR_PASSWORD'
|
||||
${{ secrets.HARBOR_PASSWORD }}
|
||||
AWOOOI_SECRET_HARBOR_PASSWORD
|
||||
)"
|
||||
printf '%s' "$HARBOR_PASSWORD" | docker login "${{ env.HARBOR }}" \
|
||||
-u "$HARBOR_USERNAME" \
|
||||
--password-stdin
|
||||
|
||||
# Dev API 鏡像:強制重建,不用 cache(確保 models.json 等配置文件更新)
|
||||
- name: Build and Push API (Dev)
|
||||
@@ -98,34 +114,57 @@ jobs:
|
||||
|
||||
# 注入 Dev K8s Secrets
|
||||
- name: Inject Dev K8s Secrets
|
||||
env:
|
||||
SSH_PRIVATE_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
|
||||
TG_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
TG_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
|
||||
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
|
||||
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||
run: |
|
||||
secret_b64() {
|
||||
python3 -c 'import base64, sys; data=sys.stdin.buffer.read(); data=data[:-1] if data.endswith(b"\n") else data; sys.stdout.write(base64.b64encode(data).decode())'
|
||||
}
|
||||
write_deploy_key() {
|
||||
mkdir -p ~/.ssh
|
||||
umask 077
|
||||
cat > ~/.ssh/deploy_key <<'AWOOOI_DEPLOY_KEY'
|
||||
${{ secrets.DEPLOY_SSH_KEY }}
|
||||
AWOOOI_DEPLOY_KEY
|
||||
chmod 600 ~/.ssh/deploy_key
|
||||
}
|
||||
TG_BOT_TOKEN_B64="$(secret_b64 <<'AWOOOI_SECRET_TG_BOT_TOKEN'
|
||||
${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
AWOOOI_SECRET_TG_BOT_TOKEN
|
||||
)"
|
||||
TG_CHAT_ID_B64="$(secret_b64 <<'AWOOOI_SECRET_TG_CHAT_ID'
|
||||
${{ secrets.TELEGRAM_CHAT_ID }}
|
||||
AWOOOI_SECRET_TG_CHAT_ID
|
||||
)"
|
||||
NVIDIA_API_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_NVIDIA_API_KEY'
|
||||
${{ secrets.NVIDIA_API_KEY }}
|
||||
AWOOOI_SECRET_NVIDIA_API_KEY
|
||||
)"
|
||||
GEMINI_API_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_GEMINI_API_KEY'
|
||||
${{ secrets.GEMINI_API_KEY }}
|
||||
AWOOOI_SECRET_GEMINI_API_KEY
|
||||
)"
|
||||
|
||||
mkdir -p ~/.ssh
|
||||
echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key
|
||||
chmod 600 ~/.ssh/deploy_key
|
||||
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.121 << SECRETS
|
||||
write_deploy_key
|
||||
# 2026-05-05 Codex: kubectl runs on 120 control-plane. 121 is a
|
||||
# worker and its local kubeconfig points at 127.0.0.1:6443.
|
||||
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.120 << SECRETS
|
||||
set -e
|
||||
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
|
||||
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-dev --type='json' -p='[
|
||||
{"op":"replace","path":"/data/OPENCLAW_TG_BOT_TOKEN","value":"'"$(echo -n "${TG_BOT_TOKEN}" | base64 -w 0)"'"},
|
||||
{"op":"replace","path":"/data/OPENCLAW_TG_CHAT_ID","value":"'"$(echo -n "${TG_CHAT_ID}" | base64 -w 0)"'"}
|
||||
{"op":"replace","path":"/data/OPENCLAW_TG_BOT_TOKEN","value":"${TG_BOT_TOKEN_B64}"},
|
||||
{"op":"replace","path":"/data/OPENCLAW_TG_CHAT_ID","value":"${TG_CHAT_ID_B64}"}
|
||||
]' || echo "⚠️ Telegram Secrets patch 跳過"
|
||||
|
||||
if [ -n "${NVIDIA_API_KEY}" ]; then
|
||||
if [ -n "${NVIDIA_API_KEY_B64}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-dev --type='json' -p='[
|
||||
{"op":"replace","path":"/data/NVIDIA_API_KEY","value":"'"$(echo -n "${NVIDIA_API_KEY}" | base64 -w 0)"'"}
|
||||
{"op":"replace","path":"/data/NVIDIA_API_KEY","value":"${NVIDIA_API_KEY_B64}"}
|
||||
]' && echo "✅ NVIDIA_API_KEY 已注入 dev"
|
||||
fi
|
||||
|
||||
if [ -n "${GEMINI_API_KEY}" ]; then
|
||||
if [ -n "${GEMINI_API_KEY_B64}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-dev --type='json' -p='[
|
||||
{"op":"replace","path":"/data/GEMINI_API_KEY","value":"'"$(echo -n "${GEMINI_API_KEY}" | base64 -w 0)"'"}
|
||||
{"op":"replace","path":"/data/GEMINI_API_KEY","value":"${GEMINI_API_KEY_B64}"}
|
||||
]' && echo "✅ GEMINI_API_KEY 已注入 dev"
|
||||
fi
|
||||
|
||||
@@ -134,14 +173,12 @@ jobs:
|
||||
|
||||
# 部署到 awoooi-dev
|
||||
- name: Deploy to Dev K8s
|
||||
env:
|
||||
SSH_PRIVATE_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
|
||||
run: |
|
||||
cat k8s/awoooi-dev/02-configmap.yaml | \
|
||||
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.121 \
|
||||
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.120 \
|
||||
"export KUBECONFIG=/etc/rancher/k3s/k3s.yaml && sudo kubectl apply -f -"
|
||||
|
||||
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.121 << 'DEPLOY'
|
||||
ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy_key wooo@192.168.0.120 << 'DEPLOY'
|
||||
set -e
|
||||
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
|
||||
|
||||
@@ -182,10 +219,20 @@ jobs:
|
||||
├ 🔖 <code>${{ steps.commit.outputs.short_sha }}</code>
|
||||
├ ⏱️ 耗時: ${MINUTES}m ${SECONDS}s
|
||||
└ 🩺 http://192.168.0.125:32344/api/v1/health"
|
||||
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text@-"
|
||||
if AWOOI_CICD_STATUS=success \
|
||||
AWOOI_CICD_STAGE=dev-deploy \
|
||||
AWOOI_CICD_JOB_NAME="[DEV] 部署完成" \
|
||||
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
|
||||
AWOOI_CICD_DURATION_SECONDS="${DURATION}" \
|
||||
AWOOI_CICD_SUMMARY="${{ steps.commit.outputs.message }}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "Dev deploy success notification mirrored through AWOOI API"
|
||||
else
|
||||
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text@-"
|
||||
fi
|
||||
|
||||
- name: Notify Dev Deploy Failure
|
||||
if: failure()
|
||||
@@ -194,7 +241,16 @@ jobs:
|
||||
├ 📝 ${{ steps.commit.outputs.message }}
|
||||
├ 🔖 <code>${{ steps.commit.outputs.short_sha }}</code>
|
||||
└ 🔗 <a href=\"http://192.168.0.110:3001/wooo/awoooi/actions\">查看日誌</a>"
|
||||
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text@-"
|
||||
if AWOOI_CICD_STATUS=failed \
|
||||
AWOOI_CICD_STAGE=dev-deploy \
|
||||
AWOOI_CICD_JOB_NAME="[DEV] 部署失敗" \
|
||||
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
|
||||
AWOOI_CICD_SUMMARY="${{ steps.commit.outputs.message }}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "Dev deploy failure notification mirrored through AWOOI API"
|
||||
else
|
||||
printf '%b' "$MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text@-"
|
||||
fi
|
||||
|
||||
@@ -17,6 +17,9 @@ on:
|
||||
- 'apps/**'
|
||||
- 'k8s/**'
|
||||
- '.dockerignore'
|
||||
# Dockerfile COPY scripts/ into the API image; keep production ops
|
||||
# seed scripts deploy-coupled instead of repo-only.
|
||||
- 'scripts/ops/awooop-seed-auto-repair-canary-playbook.py'
|
||||
# Workflow-only changes do not rebuild runtime images. Use workflow_dispatch
|
||||
# when an operator explicitly wants to test the CD pipeline itself.
|
||||
# docs/、memory/、ADR 等不觸發
|
||||
@@ -42,6 +45,15 @@ env:
|
||||
OTEL_SERVICE_NAME: awoooi-cd
|
||||
OTEL_RESOURCE_ATTRIBUTES: service.version=${{ github.sha }},deployment.environment=production
|
||||
CI_IMAGE: 192.168.0.110:5000/awoooi/ci-runner:act-22.04
|
||||
# 2026-05-06 Codex: deploy through the 120 control-plane node. After dirty
|
||||
# reboots, 121 host-key prompts can block the non-interactive host runner.
|
||||
# Both nodes support the sudo kubectl path, but 120 removes the extra hop.
|
||||
K8S_SSH_HOST: 192.168.0.120
|
||||
K8S_API_SERVER: https://192.168.0.120:6443
|
||||
# 2026-05-05 Codex: health/smoke probes use the keepalived VIP instead of a
|
||||
# fixed node. Kubectl still tunnels through K8S_SSH_HOST with --server=120.
|
||||
API_HEALTH_URL: http://192.168.0.125:32334/api/v1/health
|
||||
ALERT_CHAIN_API_URL: http://192.168.0.125:32334
|
||||
|
||||
jobs:
|
||||
tests:
|
||||
@@ -53,8 +65,20 @@ jobs:
|
||||
# 2026-04-10 ogt: B5 改用 docker run 本地啟動,移除 services: 宣告
|
||||
# Gitea act runner 的 services: container name 為空,導致 CI 失敗
|
||||
steps:
|
||||
- name: Bootstrap Host Runner Tools
|
||||
# 2026-05-05 Codex: awoooi-host maps to the long-lived act-runner
|
||||
# container. After dirty reboots it may not contain node/curl/git, and
|
||||
# actions/checkout@v4 fails before tests can start.
|
||||
run: |
|
||||
if command -v apk >/dev/null 2>&1; then
|
||||
apk add --no-cache nodejs npm git curl bash openssh-client docker-cli docker-cli-buildx
|
||||
fi
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Guard Workflow Secret Surfaces
|
||||
run: node scripts/ci/check-gitea-step-env-secrets.js
|
||||
|
||||
# 2026-03-31 ogt: 優化告警格式 - 提高可讀性
|
||||
- name: Get Commit Info
|
||||
id: commit
|
||||
@@ -74,10 +98,20 @@ jobs:
|
||||
MSG=$(printf '🚀 <b>AWOOOI 部署開始</b>\n├ 📝 <code>%s</code>\n├ 🔖 <code>%s</code>\n└ 👤 %s' "${COMMIT_ESC}" "${SHORT_SHA}" "${ACTOR}")
|
||||
# 2026-05-02 Claude Opus 4.7 + 統帥 ogt: notify 失敗不該擋整條 CI(鐵證:
|
||||
# curl 400 從 5/1 起連續炸 14 個 commit 的 build-and-deploy)— 對齊 line 922 既有 pattern
|
||||
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
|
||||
if AWOOI_CICD_STATUS=running \
|
||||
AWOOI_CICD_STAGE=tests \
|
||||
AWOOI_CICD_JOB_NAME="AWOOOI 部署開始" \
|
||||
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
|
||||
AWOOI_CICD_TRIGGERED_BY="${ACTOR}" \
|
||||
AWOOI_CICD_SUMMARY="${COMMIT_MSG}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "✅ CI/CD start notification mirrored through AWOOI API"
|
||||
else
|
||||
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
|
||||
fi
|
||||
|
||||
|
||||
|
||||
@@ -237,10 +271,20 @@ jobs:
|
||||
ACTOR="${{ github.actor }}"
|
||||
COMMIT_ESC=$(echo "$COMMIT_MSG" | sed 's/&/\&/g; s/</\</g; s/>/\>/g')
|
||||
MSG=$(printf '❌ <b>AWOOOI 部署失敗</b>\n├ 📝 <code>%s</code>\n├ 🔖 <code>%s</code>\n├ 👤 %s\n├ 🧪 Stage: tests\n└ 🔗 http://192.168.0.110:3001/wooo/awoooi/actions' "${COMMIT_ESC}" "${SHORT_SHA}" "${ACTOR}")
|
||||
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
|
||||
if AWOOI_CICD_STATUS=failed \
|
||||
AWOOI_CICD_STAGE=tests \
|
||||
AWOOI_CICD_JOB_NAME="AWOOOI 部署失敗" \
|
||||
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
|
||||
AWOOI_CICD_TRIGGERED_BY="${ACTOR}" \
|
||||
AWOOI_CICD_SUMMARY="${COMMIT_MSG}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "✅ CI/CD tests failure notification mirrored through AWOOI API"
|
||||
else
|
||||
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
|
||||
fi
|
||||
|
||||
build-and-deploy:
|
||||
# 2026-04-30 Codex: Docker builds run on the host runner. Long docker build
|
||||
@@ -249,6 +293,14 @@ jobs:
|
||||
timeout-minutes: 60
|
||||
runs-on: awoooi-host
|
||||
steps:
|
||||
- name: Bootstrap Host Runner Tools
|
||||
# 2026-05-05 Codex: keep the host-mode runner self-healing before
|
||||
# actions/checkout@v4 and Telegram failure notifications run.
|
||||
run: |
|
||||
if command -v apk >/dev/null 2>&1; then
|
||||
apk add --no-cache nodejs npm git curl bash openssh-client docker-cli docker-cli-buildx
|
||||
fi
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Get Commit Info
|
||||
@@ -274,6 +326,7 @@ jobs:
|
||||
run: |
|
||||
LOCK_NAME="awoooi-cd-docker-build-lock"
|
||||
STALE_SECONDS=7200
|
||||
EMPTY_LOCK_SECONDS=300
|
||||
WAIT_ATTEMPTS=180
|
||||
|
||||
for attempt in $(seq 1 "$WAIT_ATTEMPTS"); do
|
||||
@@ -297,9 +350,24 @@ jobs:
|
||||
python3 -c "import sys, datetime, re; ts = re.sub(r'\\.\d+', '', sys.argv[1]); ts = re.sub(r'\\s+[A-Z]{2,4}$', '', ts.strip()); print(int(datetime.datetime.strptime(ts, '%Y-%m-%d %H:%M:%S %z').timestamp()))" \
|
||||
"$CREATED_AT" 2>/dev/null || echo 0)
|
||||
NOW_EPOCH=$(date +%s)
|
||||
LOCK_AGE=$((NOW_EPOCH - CREATED_EPOCH))
|
||||
# 2026-05-05 Codex: dirty reboot / cancelled Actions can leave
|
||||
# the Docker-network lock behind with no active build or push.
|
||||
# Waiting the full 30m CD timeout keeps deploys queued even
|
||||
# though no job is protected, so clear empty locks after 5m.
|
||||
# 2026-05-12 Codex: 用 bracket pattern 避免 lock-check shell 自己的
|
||||
# grep/awk pattern 被誤判成 active docker work,導致 empty lock 永不自清。
|
||||
ACTIVE_DOCKER_WORK=$(ps -eo pid,args | awk '$0 ~ /[d]ocker (build|push)|[b]uildx build/ {print}' || true)
|
||||
if [ "$CREATED_EPOCH" -gt 0 ] && \
|
||||
[ $((NOW_EPOCH - CREATED_EPOCH)) -gt "$STALE_SECONDS" ]; then
|
||||
echo "⚠️ stale Docker build lock detected (age=$((NOW_EPOCH - CREATED_EPOCH))s > ${STALE_SECONDS}s), removing ${LOCK_NAME}"
|
||||
[ "$LOCK_AGE" -gt "$EMPTY_LOCK_SECONDS" ] && \
|
||||
[ -z "$ACTIVE_DOCKER_WORK" ]; then
|
||||
echo "⚠️ empty Docker build lock detected (age=${LOCK_AGE}s > ${EMPTY_LOCK_SECONDS}s, no active docker build/push), removing ${LOCK_NAME}"
|
||||
docker network rm "$LOCK_NAME" >/dev/null 2>&1 || true
|
||||
continue
|
||||
fi
|
||||
if [ "$CREATED_EPOCH" -gt 0 ] && \
|
||||
[ "$LOCK_AGE" -gt "$STALE_SECONDS" ]; then
|
||||
echo "⚠️ stale Docker build lock detected (age=${LOCK_AGE}s > ${STALE_SECONDS}s), removing ${LOCK_NAME}"
|
||||
docker network rm "$LOCK_NAME" >/dev/null 2>&1 || true
|
||||
continue
|
||||
fi
|
||||
@@ -315,8 +383,8 @@ jobs:
|
||||
# ── API 鏡像建置(含 Layer Cache 加速)──────────────────────────────
|
||||
# 2026-04-01 ogt: CACHE_BUST=git_sha 確保 src/ 和 models.json 層每次重建
|
||||
# deps 層 (pip install) 仍可 cache → 加速;代碼/配置層強制失效
|
||||
# 首席架構師 Review C1 (2026-04-05 Claude Code): 補 DOCKER_BUILDKIT=1
|
||||
# BUILDKIT_INLINE_CACHE=1 只有在 BuildKit 啟用時才有效
|
||||
# 2026-05-05 Codex: host runner bootstrap installs docker-cli-buildx;
|
||||
# keep BuildKit enabled because the web Dockerfile uses RUN --mount.
|
||||
- name: Build and Push API
|
||||
env:
|
||||
DOCKER_BUILDKIT: "1"
|
||||
@@ -338,7 +406,7 @@ jobs:
|
||||
# 2026-04-01 Claude Code: CACHE_BUST=git_sha 取代 --no-cache
|
||||
# - deps 層 (pnpm install) 仍可 cache → 節省 ~2-3 min
|
||||
# - COPY . . 以下由 CACHE_BUST 強制失效 → 業務邏輯/CSRF 等變更正確進入 bundle
|
||||
# 2026-04-12 ogt: 實測 --no-cache=10m50s;CACHE_BUST=5m50s,恢復此方案
|
||||
# 2026-05-05 Codex: mirror API build mode; BuildKit required for cache mounts.
|
||||
- name: Build and Push Web
|
||||
env:
|
||||
DOCKER_BUILDKIT: "1"
|
||||
@@ -369,113 +437,204 @@ jobs:
|
||||
# 2026-03-31 ogt: P0-1 Secrets 自動注入 (ADR-035 強制)
|
||||
# 2026-03-31 ogt: 加入 AI API Keys (修復 mock_fallback 問題)
|
||||
- name: Inject K8s Secrets
|
||||
env:
|
||||
SSH_PRIVATE_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
|
||||
TG_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
TG_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
|
||||
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
|
||||
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||
# 2026-04-01 Claude Code: Langfuse LLMOps keys (Phase 15.1 補齊 CD 注入)
|
||||
LANGFUSE_PUBLIC_KEY: ${{ secrets.LANGFUSE_PUBLIC_KEY }}
|
||||
LANGFUSE_SECRET_KEY: ${{ secrets.LANGFUSE_SECRET_KEY }}
|
||||
# 2026-04-02 Claude Code: Telegram 白名單 (授權簽核用)
|
||||
TG_USER_WHITELIST: ${{ secrets.OPENCLAW_TG_USER_WHITELIST }}
|
||||
# Phase O-4.1 2026-04-02: Sentry API Token (Wave A.1 ADR-037)
|
||||
SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
|
||||
# ADR-059 2026-04-05: Gitea Webhook Secret (GITEA_ 前綴為保留字,改用 AWOOOI_ 前綴)
|
||||
GITEA_WEBHOOK_SECRET: ${{ secrets.AWOOOI_GITEA_WEBHOOK_SECRET }}
|
||||
# MCP Phase 3: ArgoCD API Token (2026-04-11 Claude Sonnet 4.6)
|
||||
ARGOCD_API_TOKEN: ${{ secrets.ARGOCD_API_TOKEN }}
|
||||
# 2026-04-18 ogt + Claude Opus 4.7: ADR-090-B L3-only 升級 L2(永久連線串 + 應用 secret)
|
||||
DATABASE_URL: ${{ secrets.DATABASE_URL }}
|
||||
MIGRATION_DATABASE_URL: ${{ secrets.MIGRATION_DATABASE_URL }}
|
||||
REDIS_URL: ${{ secrets.REDIS_URL }}
|
||||
JWT_SECRET: ${{ secrets.JWT_SECRET }}
|
||||
JWT_ALGORITHM: ${{ secrets.JWT_ALGORITHM }}
|
||||
WEBHOOK_HMAC_SECRET: ${{ secrets.WEBHOOK_HMAC_SECRET }}
|
||||
SENTRY_DSN: ${{ secrets.SENTRY_DSN }}
|
||||
CLAUDE_API_KEY: ${{ secrets.CLAUDE_API_KEY }}
|
||||
# AWOOOI_ 前綴避開 Gitea 保留字(同 AWOOOI_GITEA_WEBHOOK_SECRET 模式)
|
||||
GITEA_API_TOKEN: ${{ secrets.AWOOOI_GITEA_API_TOKEN }}
|
||||
NEMOTRON_BOT_TOKEN: ${{ secrets.NEMOTRON_BOT_TOKEN }}
|
||||
OPENCLAW_BOT_TOKEN: ${{ secrets.OPENCLAW_BOT_TOKEN }}
|
||||
SMTP_HOST: ${{ secrets.SMTP_HOST }}
|
||||
SRE_GROUP_CHAT_ID: ${{ secrets.SRE_GROUP_CHAT_ID }}
|
||||
run: |
|
||||
# 2026-05-18 Codex: 不把 secrets 放進 step-level env。
|
||||
# Gitea/act_runner 的 job log 可能展開 env;這裡只在 shell 內短暫轉
|
||||
# base64,並避免輸出原值。
|
||||
secret_b64() {
|
||||
if command -v python3.11 >/dev/null 2>&1; then
|
||||
python3.11 -c 'import base64, sys; data=sys.stdin.buffer.read(); data=data[:-1] if data.endswith(b"\n") else data; sys.stdout.write(base64.b64encode(data).decode())'
|
||||
elif command -v python3 >/dev/null 2>&1; then
|
||||
python3 -c 'import base64, sys; data=sys.stdin.buffer.read(); data=data[:-1] if data.endswith(b"\n") else data; sys.stdout.write(base64.b64encode(data).decode())'
|
||||
else
|
||||
secret_value="$(cat)"
|
||||
printf '%s' "${secret_value}" | base64 | tr -d '\n'
|
||||
fi
|
||||
}
|
||||
write_deploy_key() {
|
||||
mkdir -p "${HOME}/.ssh"
|
||||
umask 077
|
||||
cat > "${HOME}/.ssh/deploy_key" <<'AWOOOI_DEPLOY_KEY'
|
||||
${{ secrets.DEPLOY_SSH_KEY }}
|
||||
AWOOOI_DEPLOY_KEY
|
||||
chmod 600 "${HOME}/.ssh/deploy_key"
|
||||
}
|
||||
|
||||
TG_BOT_TOKEN_B64="$(secret_b64 <<'AWOOOI_SECRET_TG_BOT_TOKEN'
|
||||
${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
AWOOOI_SECRET_TG_BOT_TOKEN
|
||||
)"
|
||||
TG_CHAT_ID_B64="$(secret_b64 <<'AWOOOI_SECRET_TG_CHAT_ID'
|
||||
${{ secrets.TELEGRAM_CHAT_ID }}
|
||||
AWOOOI_SECRET_TG_CHAT_ID
|
||||
)"
|
||||
NVIDIA_API_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_NVIDIA_API_KEY'
|
||||
${{ secrets.NVIDIA_API_KEY }}
|
||||
AWOOOI_SECRET_NVIDIA_API_KEY
|
||||
)"
|
||||
GEMINI_API_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_GEMINI_API_KEY'
|
||||
${{ secrets.GEMINI_API_KEY }}
|
||||
AWOOOI_SECRET_GEMINI_API_KEY
|
||||
)"
|
||||
LANGFUSE_PUBLIC_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_LANGFUSE_PUBLIC_KEY'
|
||||
${{ secrets.LANGFUSE_PUBLIC_KEY }}
|
||||
AWOOOI_SECRET_LANGFUSE_PUBLIC_KEY
|
||||
)"
|
||||
LANGFUSE_SECRET_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_LANGFUSE_SECRET_KEY'
|
||||
${{ secrets.LANGFUSE_SECRET_KEY }}
|
||||
AWOOOI_SECRET_LANGFUSE_SECRET_KEY
|
||||
)"
|
||||
TG_USER_WHITELIST_B64="$(secret_b64 <<'AWOOOI_SECRET_TG_USER_WHITELIST'
|
||||
${{ secrets.OPENCLAW_TG_USER_WHITELIST }}
|
||||
AWOOOI_SECRET_TG_USER_WHITELIST
|
||||
)"
|
||||
SENTRY_AUTH_TOKEN_B64="$(secret_b64 <<'AWOOOI_SECRET_SENTRY_AUTH_TOKEN'
|
||||
${{ secrets.SENTRY_AUTH_TOKEN }}
|
||||
AWOOOI_SECRET_SENTRY_AUTH_TOKEN
|
||||
)"
|
||||
GITEA_WEBHOOK_SECRET_B64="$(secret_b64 <<'AWOOOI_SECRET_GITEA_WEBHOOK_SECRET'
|
||||
${{ secrets.AWOOOI_GITEA_WEBHOOK_SECRET }}
|
||||
AWOOOI_SECRET_GITEA_WEBHOOK_SECRET
|
||||
)"
|
||||
ARGOCD_API_TOKEN_B64="$(secret_b64 <<'AWOOOI_SECRET_ARGOCD_API_TOKEN'
|
||||
${{ secrets.ARGOCD_API_TOKEN }}
|
||||
AWOOOI_SECRET_ARGOCD_API_TOKEN
|
||||
)"
|
||||
DATABASE_URL_B64="$(secret_b64 <<'AWOOOI_SECRET_DATABASE_URL'
|
||||
${{ secrets.DATABASE_URL }}
|
||||
AWOOOI_SECRET_DATABASE_URL
|
||||
)"
|
||||
MIGRATION_DATABASE_URL_B64="$(secret_b64 <<'AWOOOI_SECRET_MIGRATION_DATABASE_URL'
|
||||
${{ secrets.MIGRATION_DATABASE_URL }}
|
||||
AWOOOI_SECRET_MIGRATION_DATABASE_URL
|
||||
)"
|
||||
REDIS_URL_B64="$(secret_b64 <<'AWOOOI_SECRET_REDIS_URL'
|
||||
${{ secrets.REDIS_URL }}
|
||||
AWOOOI_SECRET_REDIS_URL
|
||||
)"
|
||||
JWT_SECRET_B64="$(secret_b64 <<'AWOOOI_SECRET_JWT_SECRET'
|
||||
${{ secrets.JWT_SECRET }}
|
||||
AWOOOI_SECRET_JWT_SECRET
|
||||
)"
|
||||
JWT_ALGORITHM_B64="$(secret_b64 <<'AWOOOI_SECRET_JWT_ALGORITHM'
|
||||
${{ secrets.JWT_ALGORITHM }}
|
||||
AWOOOI_SECRET_JWT_ALGORITHM
|
||||
)"
|
||||
WEBHOOK_HMAC_SECRET_B64="$(secret_b64 <<'AWOOOI_SECRET_WEBHOOK_HMAC_SECRET'
|
||||
${{ secrets.WEBHOOK_HMAC_SECRET }}
|
||||
AWOOOI_SECRET_WEBHOOK_HMAC_SECRET
|
||||
)"
|
||||
AWOOOP_OPERATOR_API_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_AWOOOP_OPERATOR_API_KEY'
|
||||
${{ secrets.AWOOOP_OPERATOR_API_KEY }}
|
||||
AWOOOI_SECRET_AWOOOP_OPERATOR_API_KEY
|
||||
)"
|
||||
SENTRY_DSN_B64="$(secret_b64 <<'AWOOOI_SECRET_SENTRY_DSN'
|
||||
${{ secrets.SENTRY_DSN }}
|
||||
AWOOOI_SECRET_SENTRY_DSN
|
||||
)"
|
||||
CLAUDE_API_KEY_B64="$(secret_b64 <<'AWOOOI_SECRET_CLAUDE_API_KEY'
|
||||
${{ secrets.CLAUDE_API_KEY }}
|
||||
AWOOOI_SECRET_CLAUDE_API_KEY
|
||||
)"
|
||||
GITEA_API_TOKEN_B64="$(secret_b64 <<'AWOOOI_SECRET_GITEA_API_TOKEN'
|
||||
${{ secrets.AWOOOI_GITEA_API_TOKEN }}
|
||||
AWOOOI_SECRET_GITEA_API_TOKEN
|
||||
)"
|
||||
NEMOTRON_BOT_TOKEN_B64="$(secret_b64 <<'AWOOOI_SECRET_NEMOTRON_BOT_TOKEN'
|
||||
${{ secrets.NEMOTRON_BOT_TOKEN }}
|
||||
AWOOOI_SECRET_NEMOTRON_BOT_TOKEN
|
||||
)"
|
||||
OPENCLAW_BOT_TOKEN_B64="$(secret_b64 <<'AWOOOI_SECRET_OPENCLAW_BOT_TOKEN'
|
||||
${{ secrets.OPENCLAW_BOT_TOKEN }}
|
||||
AWOOOI_SECRET_OPENCLAW_BOT_TOKEN
|
||||
)"
|
||||
SMTP_HOST_B64="$(secret_b64 <<'AWOOOI_SECRET_SMTP_HOST'
|
||||
${{ secrets.SMTP_HOST }}
|
||||
AWOOOI_SECRET_SMTP_HOST
|
||||
)"
|
||||
SRE_GROUP_CHAT_ID_B64="$(secret_b64 <<'AWOOOI_SECRET_SRE_GROUP_CHAT_ID'
|
||||
${{ secrets.SRE_GROUP_CHAT_ID }}
|
||||
AWOOOI_SECRET_SRE_GROUP_CHAT_ID
|
||||
)"
|
||||
|
||||
# S1/S2: 統一命名 deploy_key,改用 ssh-keyscan(比 StrictHostKeyChecking=no 更安全)
|
||||
mkdir -p ~/.ssh
|
||||
echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key
|
||||
chmod 600 ~/.ssh/deploy_key
|
||||
ssh-keyscan 192.168.0.121 >> ~/.ssh/known_hosts 2>/dev/null
|
||||
ssh -i ~/.ssh/deploy_key wooo@192.168.0.121 << SECRETS
|
||||
write_deploy_key
|
||||
# 2026-05-13 Codex: keyscan must include ED25519 explicitly. Some
|
||||
# OpenSSH builds otherwise record only RSA/ECDSA, then strict deploy
|
||||
# SSH fails with "No ED25519 host key is known" after image push.
|
||||
ssh-keyscan -T 5 -t ed25519,rsa,ecdsa "${K8S_SSH_HOST}" > "${HOME}/.ssh/known_hosts" 2>/dev/null
|
||||
test -s "${HOME}/.ssh/known_hosts" || { echo "❌ K8S host keyscan failed: ${K8S_SSH_HOST}"; exit 1; }
|
||||
SSH_OPTS="-i ${HOME}/.ssh/deploy_key -o BatchMode=yes -o StrictHostKeyChecking=yes -o UserKnownHostsFile=${HOME}/.ssh/known_hosts -o ConnectTimeout=10"
|
||||
ssh $SSH_OPTS "wooo@${{ env.K8S_SSH_HOST }}" << SECRETS
|
||||
set -e
|
||||
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
|
||||
K8S_API_SERVER="${{ env.K8S_API_SERVER }}"
|
||||
KUBECTL="sudo kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml --server=\${K8S_API_SERVER}"
|
||||
|
||||
# 注入 Telegram Secrets (ADR-035 鐵律)
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/OPENCLAW_TG_BOT_TOKEN","value":"'$(echo -n "${TG_BOT_TOKEN}" | base64 -w 0)'"},
|
||||
{"op":"add","path":"/data/OPENCLAW_TG_CHAT_ID","value":"'$(echo -n "${TG_CHAT_ID}" | base64 -w 0)'"}
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/OPENCLAW_TG_BOT_TOKEN","value":"${TG_BOT_TOKEN_B64}"},
|
||||
{"op":"add","path":"/data/OPENCLAW_TG_CHAT_ID","value":"${TG_CHAT_ID_B64}"}
|
||||
]' || { echo "❌ Telegram Secrets patch 失敗 — ADR-035 鐵律"; exit 1; }
|
||||
|
||||
# 2026-03-31 ogt: 注入 AI API Keys (修復 NVIDIA/Gemini mock_fallback)
|
||||
# 2026-04-01 Claude Code: base64 -w 0 防止長 key 換行破壞 JSON
|
||||
# NVIDIA NIM (免費 tier)
|
||||
if [ -n "${NVIDIA_API_KEY}" ] && [ "${NVIDIA_API_KEY}" != "" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/NVIDIA_API_KEY","value":"'$(echo -n "${NVIDIA_API_KEY}" | base64 -w 0)'"}
|
||||
if [ -n "${NVIDIA_API_KEY_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/NVIDIA_API_KEY","value":"${NVIDIA_API_KEY_B64}"}
|
||||
]' && echo "✅ NVIDIA_API_KEY 已注入" || echo "⚠️ NVIDIA_API_KEY patch 失敗"
|
||||
else
|
||||
echo "⚠️ NVIDIA_API_KEY 未設定,跳過"
|
||||
fi
|
||||
|
||||
# Gemini (備援)
|
||||
if [ -n "${GEMINI_API_KEY}" ] && [ "${GEMINI_API_KEY}" != "" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/GEMINI_API_KEY","value":"'$(echo -n "${GEMINI_API_KEY}" | base64 -w 0)'"}
|
||||
if [ -n "${GEMINI_API_KEY_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/GEMINI_API_KEY","value":"${GEMINI_API_KEY_B64}"}
|
||||
]' && echo "✅ GEMINI_API_KEY 已注入" || echo "⚠️ GEMINI_API_KEY patch 失敗"
|
||||
else
|
||||
echo "⚠️ GEMINI_API_KEY 未設定,跳過"
|
||||
fi
|
||||
|
||||
# 2026-04-01 Claude Code: Langfuse LLMOps keys (補齊 CD 注入,之前只有手動設定)
|
||||
if [ -n "${LANGFUSE_PUBLIC_KEY}" ] && [ -n "${LANGFUSE_SECRET_KEY}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/LANGFUSE_PUBLIC_KEY","value":"'$(echo -n "${LANGFUSE_PUBLIC_KEY}" | base64 -w 0)'"},
|
||||
{"op":"add","path":"/data/LANGFUSE_SECRET_KEY","value":"'$(echo -n "${LANGFUSE_SECRET_KEY}" | base64 -w 0)'"}
|
||||
if [ -n "${LANGFUSE_PUBLIC_KEY_B64}" ] && [ -n "${LANGFUSE_SECRET_KEY_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/LANGFUSE_PUBLIC_KEY","value":"${LANGFUSE_PUBLIC_KEY_B64}"},
|
||||
{"op":"add","path":"/data/LANGFUSE_SECRET_KEY","value":"${LANGFUSE_SECRET_KEY_B64}"}
|
||||
]' && echo "✅ LANGFUSE keys 已注入" || echo "⚠️ LANGFUSE keys patch 失敗"
|
||||
else
|
||||
echo "⚠️ LANGFUSE_PUBLIC_KEY/SECRET_KEY 未設定,跳過 (現有 K8s secret 值維持不變)"
|
||||
fi
|
||||
|
||||
# 2026-04-02 Claude Code: Telegram Whitelist (授權簽核用戶 ID)
|
||||
if [ -n "${TG_USER_WHITELIST}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/OPENCLAW_TG_USER_WHITELIST","value":"'$(echo -n "${TG_USER_WHITELIST}" | base64 -w 0)'"}
|
||||
if [ -n "${TG_USER_WHITELIST_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/OPENCLAW_TG_USER_WHITELIST","value":"${TG_USER_WHITELIST_B64}"}
|
||||
]' && echo "✅ TG_USER_WHITELIST 已注入" || echo "⚠️ TG_USER_WHITELIST patch 失敗"
|
||||
fi
|
||||
|
||||
# Phase O-4.1 2026-04-02: Sentry Auth Token (Wave A.1 ADR-037)
|
||||
if [ -n "${SENTRY_AUTH_TOKEN}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/SENTRY_AUTH_TOKEN","value":"'$(echo -n "${SENTRY_AUTH_TOKEN}" | base64 -w 0)'"}
|
||||
if [ -n "${SENTRY_AUTH_TOKEN_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/SENTRY_AUTH_TOKEN","value":"${SENTRY_AUTH_TOKEN_B64}"}
|
||||
]' && echo "✅ SENTRY_AUTH_TOKEN 已注入" || echo "⚠️ SENTRY_AUTH_TOKEN patch 失敗"
|
||||
else
|
||||
echo "⚠️ SENTRY_AUTH_TOKEN 未設定,Sentry Comment API 將跳過"
|
||||
fi
|
||||
|
||||
# ADR-059 2026-04-05 Claude Code: Gitea Webhook Secret
|
||||
if [ -n "${GITEA_WEBHOOK_SECRET}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/GITEA_WEBHOOK_SECRET","value":"'$(echo -n "${GITEA_WEBHOOK_SECRET}" | base64 -w 0)'"}
|
||||
if [ -n "${GITEA_WEBHOOK_SECRET_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/GITEA_WEBHOOK_SECRET","value":"${GITEA_WEBHOOK_SECRET_B64}"}
|
||||
]' && echo "✅ GITEA_WEBHOOK_SECRET 已注入" || echo "⚠️ GITEA_WEBHOOK_SECRET patch 失敗"
|
||||
else
|
||||
echo "⚠️ GITEA_WEBHOOK_SECRET 未設定,Gitea Webhook 簽章驗證將在 prod 失效"
|
||||
fi
|
||||
|
||||
# MCP Phase 3: ArgoCD API Token (2026-04-11 Claude Sonnet 4.6)
|
||||
if [ -n "${ARGOCD_API_TOKEN}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/ARGOCD_API_TOKEN","value":"'$(echo -n "${ARGOCD_API_TOKEN}" | base64 -w 0)'"}
|
||||
if [ -n "${ARGOCD_API_TOKEN_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/ARGOCD_API_TOKEN","value":"${ARGOCD_API_TOKEN_B64}"}
|
||||
]' && echo "✅ ARGOCD_API_TOKEN 已注入" || echo "⚠️ ARGOCD_API_TOKEN patch 失敗"
|
||||
else
|
||||
echo "⚠️ ARGOCD_API_TOKEN 未設定,ArgoCD MCP 將使用空 token"
|
||||
@@ -488,91 +647,98 @@ jobs:
|
||||
# 注意: 每個 block 與上方維持相同結構(if guard + base64 -w 0 + json patch)
|
||||
|
||||
# DATABASE_URL — PG 應用連線串(2026-04-18 輪替)
|
||||
if [ -n "${DATABASE_URL}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/DATABASE_URL","value":"'$(echo -n "${DATABASE_URL}" | base64 -w 0)'"}
|
||||
if [ -n "${DATABASE_URL_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/DATABASE_URL","value":"${DATABASE_URL_B64}"}
|
||||
]' && echo "✅ DATABASE_URL 已注入" || echo "⚠️ DATABASE_URL patch 失敗"
|
||||
else
|
||||
echo "⚠️ DATABASE_URL 未設定,awoooi-api 將無法連 PG"
|
||||
fi
|
||||
|
||||
# MIGRATION_DATABASE_URL — CI migration 用 awoooi_migrator 限權帳號(ADR-090-B)
|
||||
if [ -n "${MIGRATION_DATABASE_URL}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/MIGRATION_DATABASE_URL","value":"'$(echo -n "${MIGRATION_DATABASE_URL}" | base64 -w 0)'"}
|
||||
if [ -n "${MIGRATION_DATABASE_URL_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/MIGRATION_DATABASE_URL","value":"${MIGRATION_DATABASE_URL_B64}"}
|
||||
]' && echo "✅ MIGRATION_DATABASE_URL 已注入" || echo "⚠️ MIGRATION_DATABASE_URL patch 失敗"
|
||||
fi
|
||||
|
||||
# REDIS_URL — Redis 連線(6380 on 188)
|
||||
if [ -n "${REDIS_URL}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/REDIS_URL","value":"'$(echo -n "${REDIS_URL}" | base64 -w 0)'"}
|
||||
if [ -n "${REDIS_URL_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/REDIS_URL","value":"${REDIS_URL_B64}"}
|
||||
]' && echo "✅ REDIS_URL 已注入" || echo "⚠️ REDIS_URL patch 失敗"
|
||||
else
|
||||
echo "⚠️ REDIS_URL 未設定"
|
||||
fi
|
||||
|
||||
# JWT_SECRET / JWT_ALGORITHM — API 認證
|
||||
if [ -n "${JWT_SECRET}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/JWT_SECRET","value":"'$(echo -n "${JWT_SECRET}" | base64 -w 0)'"}
|
||||
if [ -n "${JWT_SECRET_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/JWT_SECRET","value":"${JWT_SECRET_B64}"}
|
||||
]' && echo "✅ JWT_SECRET 已注入" || echo "⚠️ JWT_SECRET patch 失敗"
|
||||
fi
|
||||
if [ -n "${JWT_ALGORITHM}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/JWT_ALGORITHM","value":"'$(echo -n "${JWT_ALGORITHM}" | base64 -w 0)'"}
|
||||
if [ -n "${JWT_ALGORITHM_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/JWT_ALGORITHM","value":"${JWT_ALGORITHM_B64}"}
|
||||
]' && echo "✅ JWT_ALGORITHM 已注入" || echo "⚠️ JWT_ALGORITHM patch 失敗"
|
||||
fi
|
||||
|
||||
# WEBHOOK_HMAC_SECRET — Alertmanager webhook HMAC 簽章
|
||||
if [ -n "${WEBHOOK_HMAC_SECRET}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/WEBHOOK_HMAC_SECRET","value":"'$(echo -n "${WEBHOOK_HMAC_SECRET}" | base64 -w 0)'"}
|
||||
if [ -n "${WEBHOOK_HMAC_SECRET_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/WEBHOOK_HMAC_SECRET","value":"${WEBHOOK_HMAC_SECRET_B64}"}
|
||||
]' && echo "✅ WEBHOOK_HMAC_SECRET 已注入" || echo "⚠️ WEBHOOK_HMAC_SECRET patch 失敗"
|
||||
fi
|
||||
|
||||
# AWOOOP_OPERATOR_API_KEY — AwoooP Operator mutation endpoints
|
||||
if [ -n "${AWOOOP_OPERATOR_API_KEY_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/AWOOOP_OPERATOR_API_KEY","value":"${AWOOOP_OPERATOR_API_KEY_B64}"}
|
||||
]' && echo "✅ AWOOOP_OPERATOR_API_KEY 已注入" || echo "⚠️ AWOOOP_OPERATOR_API_KEY patch 失敗"
|
||||
fi
|
||||
|
||||
# SENTRY_DSN — Sentry 錯誤追蹤(不是 auth token)
|
||||
if [ -n "${SENTRY_DSN}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/SENTRY_DSN","value":"'$(echo -n "${SENTRY_DSN}" | base64 -w 0)'"}
|
||||
if [ -n "${SENTRY_DSN_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/SENTRY_DSN","value":"${SENTRY_DSN_B64}"}
|
||||
]' && echo "✅ SENTRY_DSN 已注入" || echo "⚠️ SENTRY_DSN patch 失敗"
|
||||
fi
|
||||
|
||||
# CLAUDE_API_KEY — Claude 備援 LLM
|
||||
if [ -n "${CLAUDE_API_KEY}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/CLAUDE_API_KEY","value":"'$(echo -n "${CLAUDE_API_KEY}" | base64 -w 0)'"}
|
||||
if [ -n "${CLAUDE_API_KEY_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/CLAUDE_API_KEY","value":"${CLAUDE_API_KEY_B64}"}
|
||||
]' && echo "✅ CLAUDE_API_KEY 已注入" || echo "⚠️ CLAUDE_API_KEY patch 失敗"
|
||||
fi
|
||||
|
||||
# GITEA_API_TOKEN — Gitea API Token(從 AWOOOI_GITEA_API_TOKEN 映射)
|
||||
if [ -n "${GITEA_API_TOKEN}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/GITEA_API_TOKEN","value":"'$(echo -n "${GITEA_API_TOKEN}" | base64 -w 0)'"}
|
||||
if [ -n "${GITEA_API_TOKEN_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/GITEA_API_TOKEN","value":"${GITEA_API_TOKEN_B64}"}
|
||||
]' && echo "✅ GITEA_API_TOKEN 已注入" || echo "⚠️ GITEA_API_TOKEN patch 失敗"
|
||||
fi
|
||||
|
||||
# NEMOTRON_BOT_TOKEN / OPENCLAW_BOT_TOKEN — 多 Bot 架構
|
||||
if [ -n "${NEMOTRON_BOT_TOKEN}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/NEMOTRON_BOT_TOKEN","value":"'$(echo -n "${NEMOTRON_BOT_TOKEN}" | base64 -w 0)'"}
|
||||
if [ -n "${NEMOTRON_BOT_TOKEN_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/NEMOTRON_BOT_TOKEN","value":"${NEMOTRON_BOT_TOKEN_B64}"}
|
||||
]' && echo "✅ NEMOTRON_BOT_TOKEN 已注入" || echo "⚠️ NEMOTRON_BOT_TOKEN patch 失敗"
|
||||
fi
|
||||
if [ -n "${OPENCLAW_BOT_TOKEN}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/OPENCLAW_BOT_TOKEN","value":"'$(echo -n "${OPENCLAW_BOT_TOKEN}" | base64 -w 0)'"}
|
||||
if [ -n "${OPENCLAW_BOT_TOKEN_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/OPENCLAW_BOT_TOKEN","value":"${OPENCLAW_BOT_TOKEN_B64}"}
|
||||
]' && echo "✅ OPENCLAW_BOT_TOKEN 已注入" || echo "⚠️ OPENCLAW_BOT_TOKEN patch 失敗"
|
||||
fi
|
||||
|
||||
# SMTP_HOST / SRE_GROUP_CHAT_ID
|
||||
if [ -n "${SMTP_HOST}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/SMTP_HOST","value":"'$(echo -n "${SMTP_HOST}" | base64 -w 0)'"}
|
||||
if [ -n "${SMTP_HOST_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/SMTP_HOST","value":"${SMTP_HOST_B64}"}
|
||||
]' && echo "✅ SMTP_HOST 已注入" || echo "⚠️ SMTP_HOST patch 失敗"
|
||||
fi
|
||||
if [ -n "${SRE_GROUP_CHAT_ID}" ]; then
|
||||
sudo kubectl patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/SRE_GROUP_CHAT_ID","value":"'$(echo -n "${SRE_GROUP_CHAT_ID}" | base64 -w 0)'"}
|
||||
if [ -n "${SRE_GROUP_CHAT_ID_B64}" ]; then
|
||||
\$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
|
||||
{"op":"add","path":"/data/SRE_GROUP_CHAT_ID","value":"${SRE_GROUP_CHAT_ID_B64}"}
|
||||
]' && echo "✅ SRE_GROUP_CHAT_ID 已注入" || echo "⚠️ SRE_GROUP_CHAT_ID patch 失敗"
|
||||
fi
|
||||
|
||||
@@ -590,26 +756,27 @@ jobs:
|
||||
EXPECTED_HOSTS=4
|
||||
PRESENT=0
|
||||
for ip in 192.168.0.110 192.168.0.120 192.168.0.121 192.168.0.188; do
|
||||
if grep -qE "^${ip}[[:space:]]" /tmp/known_hosts_repair 2>/dev/null; then
|
||||
PRESENT=$((PRESENT + 1))
|
||||
if grep -qE "^\${ip}[[:space:]]" /tmp/known_hosts_repair 2>/dev/null; then
|
||||
PRESENT=\$((PRESENT + 1))
|
||||
else
|
||||
echo "⚠️ ssh-keyscan 缺主機 ${ip}"
|
||||
echo "⚠️ ssh-keyscan 缺主機 \${ip}"
|
||||
fi
|
||||
done
|
||||
if [ "$PRESENT" -eq "$EXPECTED_HOSTS" ]; then
|
||||
sudo kubectl create secret generic awoooi-repair-known-hosts \
|
||||
if [ "\$PRESENT" -eq "\$EXPECTED_HOSTS" ]; then
|
||||
\$KUBECTL create secret generic awoooi-repair-known-hosts \
|
||||
-n awoooi-prod \
|
||||
--from-file=known_hosts=/tmp/known_hosts_repair \
|
||||
--dry-run=client -o yaml | sudo kubectl apply -f - \
|
||||
--dry-run=client -o yaml | \$KUBECTL apply -f - \
|
||||
&& echo "✅ awoooi-repair-known-hosts Secret 已建立/更新" \
|
||||
|| echo "⚠️ awoooi-repair-known-hosts Secret 建立失敗 (非致命)"
|
||||
sudo kubectl patch secret ssh-mcp-key -n awoooi-prod --type=merge \
|
||||
-p='{"data":{"known_hosts":"'$(base64 -w 0 /tmp/known_hosts_repair)'"}}' \
|
||||
KNOWN_HOSTS_B64=\$(base64 -w 0 /tmp/known_hosts_repair)
|
||||
\$KUBECTL patch secret ssh-mcp-key -n awoooi-prod --type=merge \
|
||||
-p="{\"data\":{\"known_hosts\":\"\${KNOWN_HOSTS_B64}\"}}" \
|
||||
&& echo "✅ ssh-mcp-key known_hosts 已更新(4 台主機完整)" \
|
||||
|| echo "⚠️ ssh-mcp-key known_hosts 更新失敗 (非致命)"
|
||||
rm -f /tmp/known_hosts_repair /tmp/known_hosts_scan_err
|
||||
else
|
||||
echo "❌ ssh-keyscan 只抓到 ${PRESENT}/${EXPECTED_HOSTS} 台主機,跳過 patch(保留現有 secret)"
|
||||
echo "❌ ssh-keyscan 只抓到 \${PRESENT}/\${EXPECTED_HOSTS} 台主機,跳過 patch(保留現有 secret)"
|
||||
cat /tmp/known_hosts_scan_err 2>/dev/null | head -10
|
||||
rm -f /tmp/known_hosts_repair /tmp/known_hosts_scan_err
|
||||
fi
|
||||
@@ -627,27 +794,36 @@ jobs:
|
||||
# 4. 等待 ArgoCD sync + rollout 完成
|
||||
# 5. Health Check
|
||||
- name: Deploy to K8s (ArgoCD GitOps)
|
||||
env:
|
||||
SSH_PRIVATE_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
|
||||
GITEA_TOKEN: ${{ secrets.CD_PUSH_TOKEN }}
|
||||
run: |
|
||||
write_deploy_key() {
|
||||
mkdir -p "${HOME}/.ssh"
|
||||
umask 077
|
||||
cat > "${HOME}/.ssh/deploy_key" <<'AWOOOI_DEPLOY_KEY'
|
||||
${{ secrets.DEPLOY_SSH_KEY }}
|
||||
AWOOOI_DEPLOY_KEY
|
||||
chmod 600 "${HOME}/.ssh/deploy_key"
|
||||
}
|
||||
|
||||
mkdir -p ~/.ssh
|
||||
echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key
|
||||
chmod 600 ~/.ssh/deploy_key
|
||||
ssh-keyscan 192.168.0.121 >> ~/.ssh/known_hosts 2>/dev/null
|
||||
write_deploy_key
|
||||
# 2026-05-13 Codex: mirror Inject K8s Secrets host-key handling so the
|
||||
# deploy job never reaches SSH with a known_hosts file missing ED25519.
|
||||
ssh-keyscan -T 5 -t ed25519,rsa,ecdsa "${K8S_SSH_HOST}" > "${HOME}/.ssh/known_hosts" 2>/dev/null
|
||||
test -s "${HOME}/.ssh/known_hosts" || { echo "❌ K8S host keyscan failed: ${K8S_SSH_HOST}"; exit 1; }
|
||||
SSH_OPTS="-i ${HOME}/.ssh/deploy_key -o BatchMode=yes -o StrictHostKeyChecking=yes -o UserKnownHostsFile=${HOME}/.ssh/known_hosts -o ConnectTimeout=10"
|
||||
|
||||
IMAGE_TAG="${{ github.sha }}"
|
||||
HARBOR=192.168.0.110:5000
|
||||
|
||||
# ─── Step 1: Apply ConfigMap + ServiceRegistry (ArgoCD 管的是 Deployment,ConfigMap 仍直接 apply) ───
|
||||
cat k8s/awoooi-prod/04-configmap.yaml | \
|
||||
ssh -i ~/.ssh/deploy_key wooo@192.168.0.121 \
|
||||
"export KUBECONFIG=/etc/rancher/k3s/k3s.yaml && sudo kubectl apply -f -"
|
||||
ssh $SSH_OPTS "wooo@${{ env.K8S_SSH_HOST }}" \
|
||||
"KUBECTL='sudo kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml --server=${{ env.K8S_API_SERVER }}'; \$KUBECTL apply -f -"
|
||||
echo "✅ ConfigMap 已更新"
|
||||
|
||||
cat k8s/awoooi-prod/15-service-registry-configmap.yaml | \
|
||||
ssh -i ~/.ssh/deploy_key wooo@192.168.0.121 \
|
||||
"export KUBECONFIG=/etc/rancher/k3s/k3s.yaml && sudo kubectl apply -f -"
|
||||
ssh $SSH_OPTS "wooo@${{ env.K8S_SSH_HOST }}" \
|
||||
"KUBECTL='sudo kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml --server=${{ env.K8S_API_SERVER }}'; \$KUBECTL apply -f -"
|
||||
echo "✅ Service Registry ConfigMap 已更新"
|
||||
|
||||
# ─── Step 2: 更新 kustomization.yaml image tag ───
|
||||
@@ -677,7 +853,7 @@ jobs:
|
||||
git commit -m "chore(cd): deploy ${IMAGE_TAG::7} [skip ci]"
|
||||
# 用 token 推送(避免 SSH key 需要額外設定 push 權限)
|
||||
git remote remove gitea 2>/dev/null || true
|
||||
git remote add gitea http://wooo:${GITEA_TOKEN}@192.168.0.110:3001/wooo/awoooi.git
|
||||
git remote add gitea "http://wooo:${{ secrets.CD_PUSH_TOKEN }}@192.168.0.110:3001/wooo/awoooi.git"
|
||||
# 先 rebase 避免 non-fast-forward (其他 commit 在 CI 期間已推入)
|
||||
# 2026-04-17 ogt: -X theirs — kustomization.yaml 衝突時採用當次部署的 image tag
|
||||
git fetch gitea main
|
||||
@@ -688,23 +864,24 @@ jobs:
|
||||
}
|
||||
|
||||
# ─── Step 4: 等待 ArgoCD sync + rollout ───
|
||||
ssh -i ~/.ssh/deploy_key wooo@192.168.0.121 \
|
||||
ssh $SSH_OPTS "wooo@${{ env.K8S_SSH_HOST }}" \
|
||||
"EXPECTED_REVISION='${DEPLOY_REVISION}' bash -s" << 'ARGOCD_WAIT'
|
||||
set -e
|
||||
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
|
||||
K8S_API_SERVER="${{ env.K8S_API_SERVER }}"
|
||||
KUBECTL="sudo kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml --server=${K8S_API_SERVER}"
|
||||
|
||||
# 等待 ArgoCD Application Synced(最多 180s)。只看
|
||||
# Synced/Healthy 可能誤判成上一個 revision 已同步,因此有
|
||||
# deploy commit 時必須同時確認 status.sync.revision。
|
||||
echo "⏳ 等待 ArgoCD sync..."
|
||||
sudo kubectl annotate application awoooi-prod -n argocd \
|
||||
$KUBECTL annotate application awoooi-prod -n argocd \
|
||||
argocd.argoproj.io/refresh=hard --overwrite >/dev/null 2>&1 || true
|
||||
for i in $(seq 1 36); do
|
||||
SYNC=$(sudo kubectl get application awoooi-prod -n argocd \
|
||||
SYNC=$($KUBECTL get application awoooi-prod -n argocd \
|
||||
-o jsonpath='{.status.sync.status}' 2>/dev/null || echo "Unknown")
|
||||
HEALTH=$(sudo kubectl get application awoooi-prod -n argocd \
|
||||
HEALTH=$($KUBECTL get application awoooi-prod -n argocd \
|
||||
-o jsonpath='{.status.health.status}' 2>/dev/null || echo "Unknown")
|
||||
REVISION=$(sudo kubectl get application awoooi-prod -n argocd \
|
||||
REVISION=$($KUBECTL get application awoooi-prod -n argocd \
|
||||
-o jsonpath='{.status.sync.revision}' 2>/dev/null || echo "Unknown")
|
||||
SHORT_REVISION=$(echo "$REVISION" | cut -c1-8)
|
||||
SHORT_EXPECTED=$(echo "$EXPECTED_REVISION" | cut -c1-8)
|
||||
@@ -723,15 +900,15 @@ jobs:
|
||||
done
|
||||
|
||||
# 確認 rollout 完成
|
||||
sudo kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=120s
|
||||
sudo kubectl rollout status deployment/awoooi-web -n awoooi-prod --timeout=120s
|
||||
sudo kubectl rollout status deployment/awoooi-worker -n awoooi-prod --timeout=120s
|
||||
$KUBECTL rollout status deployment/awoooi-api -n awoooi-prod --timeout=120s
|
||||
$KUBECTL rollout status deployment/awoooi-web -n awoooi-prod --timeout=120s
|
||||
$KUBECTL rollout status deployment/awoooi-worker -n awoooi-prod --timeout=120s
|
||||
echo "✅ 部署完成"
|
||||
|
||||
# Health Check
|
||||
HEALTH_PASS=0
|
||||
for i in 1 2 3; do
|
||||
HTTP_CODE=$(curl -s -w "%{http_code}" -o /dev/null --connect-timeout 10 "http://localhost:32334/api/v1/health")
|
||||
HTTP_CODE=$(curl -s -w "%{http_code}" -o /dev/null --connect-timeout 10 "${{ env.API_HEALTH_URL }}")
|
||||
if [ "$HTTP_CODE" = "200" ]; then
|
||||
echo "✅ API 健康檢查通過"
|
||||
HEALTH_PASS=1
|
||||
@@ -747,36 +924,18 @@ jobs:
|
||||
ARGOCD_WAIT
|
||||
|
||||
# 2026-04-09 Claude Sonnet 4.6: Sprint 5.2 — 同步 ops 腳本到 188 (ollama user)
|
||||
# DEPLOY_SSH_KEY_188 = gitea-cd-deploy-188 (ed25519,只有 188 authorized_keys)
|
||||
# 腳本: docker-health-monitor.sh + pg-backup.sh (感知層 + 備份)
|
||||
# 188 deploy key is rotated and must not be read by this disabled step.
|
||||
# 腳本: docker-health-monitor.sh + pg-backup.sh + notify-awoooi-ops.sh
|
||||
# 感知層與備份通知都先走 AWOOI API/AwoooP,Telegram 直發只保留 API 離線 fallback。
|
||||
- name: Sync Ops Scripts to 188
|
||||
# 2026-05-13 Codex T14e/P0:
|
||||
# Disabled until the 188 ops sync path is moved to a file-secret or
|
||||
# Ansible-controlled channel. Gitea Actions logs step env values, and
|
||||
# multiline SSH secrets must not be exposed through CD logs.
|
||||
if: ${{ false }}
|
||||
continue-on-error: true
|
||||
env:
|
||||
SSH_KEY_188: ${{ secrets.DEPLOY_SSH_KEY_188 }}
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
echo "$SSH_KEY_188" > ~/.ssh/deploy_key_188
|
||||
chmod 600 ~/.ssh/deploy_key_188
|
||||
ssh-keyscan 192.168.0.188 >> ~/.ssh/known_hosts 2>/dev/null
|
||||
|
||||
# 同步 docker-health-monitor.sh
|
||||
scp -i ~/.ssh/deploy_key_188 \
|
||||
scripts/ops/docker-health-monitor.sh \
|
||||
ollama@192.168.0.188:~/awoooi-ops/docker-health-monitor.sh \
|
||||
&& echo "✅ docker-health-monitor.sh 已同步" \
|
||||
|| echo "⚠️ docker-health-monitor.sh 同步失敗"
|
||||
|
||||
# 同步 pg-backup.sh
|
||||
scp -i ~/.ssh/deploy_key_188 \
|
||||
scripts/ops/pg-backup.sh \
|
||||
ollama@192.168.0.188:~/awoooi-ops/pg-backup.sh \
|
||||
&& echo "✅ pg-backup.sh 已同步" \
|
||||
|| echo "⚠️ pg-backup.sh 同步失敗"
|
||||
|
||||
# 確保執行權限
|
||||
ssh -i ~/.ssh/deploy_key_188 ollama@192.168.0.188 \
|
||||
"chmod +x ~/awoooi-ops/docker-health-monitor.sh ~/awoooi-ops/pg-backup.sh && echo '✅ 權限設定完成'" \
|
||||
|| echo "⚠️ 權限設定失敗"
|
||||
echo "188 ops script sync disabled pending secure key rotation path"
|
||||
|
||||
- name: Notify Pipeline Failure
|
||||
if: failure()
|
||||
@@ -786,10 +945,20 @@ jobs:
|
||||
ACTOR="${{ github.actor }}"
|
||||
COMMIT_ESC=$(echo "$COMMIT_MSG" | sed 's/&/\&/g; s/</\</g; s/>/\>/g')
|
||||
MSG=$(printf '❌ <b>AWOOOI 部署失敗</b>\n├ 📝 <code>%s</code>\n├ 🔖 <code>%s</code>\n├ 👤 %s\n├ 🏗️ Stage: build-and-deploy\n└ 🔗 http://192.168.0.110:3001/wooo/awoooi/actions' "${COMMIT_ESC}" "${SHORT_SHA}" "${ACTOR}")
|
||||
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
|
||||
if AWOOI_CICD_STATUS=failed \
|
||||
AWOOI_CICD_STAGE=build-and-deploy \
|
||||
AWOOI_CICD_JOB_NAME="AWOOOI 部署失敗" \
|
||||
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
|
||||
AWOOI_CICD_TRIGGERED_BY="${ACTOR}" \
|
||||
AWOOI_CICD_SUMMARY="${COMMIT_MSG}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "✅ CI/CD build failure notification mirrored through AWOOI API"
|
||||
else
|
||||
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
|
||||
fi
|
||||
|
||||
post-deploy-checks:
|
||||
needs: build-and-deploy
|
||||
@@ -798,6 +967,14 @@ jobs:
|
||||
# install-deps can also kill the act-managed job container with RWLayer=nil.
|
||||
runs-on: awoooi-host
|
||||
steps:
|
||||
- name: Bootstrap Host Runner Tools
|
||||
# 2026-05-05 Codex: post-deploy also uses checkout and curl-based
|
||||
# notifications, so it needs the same runner bootstrap as earlier jobs.
|
||||
run: |
|
||||
if command -v apk >/dev/null 2>&1; then
|
||||
apk add --no-cache nodejs npm git curl bash openssh-client docker-cli docker-cli-buildx
|
||||
fi
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Get Commit Info
|
||||
@@ -814,7 +991,7 @@ jobs:
|
||||
- name: Alert Chain Smoke Test
|
||||
id: alert_chain_smoke
|
||||
run: |
|
||||
# 2026-04-05 Claude Code: 使用真實 API 地址(192.168.0.121:32334 NodePort)
|
||||
# 2026-05-05 Codex: use the keepalived VIP instead of a fixed node.
|
||||
# Host runner launches the CI image explicitly to avoid act RWLayer=nil.
|
||||
if docker run --rm \
|
||||
--name "awoooi-cd-${GITHUB_RUN_ID:-manual}-${GITHUB_RUN_ATTEMPT:-1}-alert-smoke" \
|
||||
@@ -824,7 +1001,7 @@ jobs:
|
||||
-v awoooi-api-venv-cache:/opt/api-venv \
|
||||
-w /workspace \
|
||||
"${{ env.CI_IMAGE }}" \
|
||||
bash -lc 'source /opt/api-venv/bin/activate && python3 scripts/alert_chain_smoke_test.py --api-url http://192.168.0.121:32334 --json | tee /tmp/alert_chain_result.json'; then
|
||||
bash -lc 'source /opt/api-venv/bin/activate && python3 scripts/alert_chain_smoke_test.py --api-url ${{ env.ALERT_CHAIN_API_URL }} --json | tee /tmp/alert_chain_result.json'; then
|
||||
echo "alert_chain_status=pass" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "alert_chain_status=fail" >> $GITHUB_OUTPUT
|
||||
@@ -941,9 +1118,19 @@ jobs:
|
||||
COMMIT_MSG="${{ steps.commit.outputs.message }}"
|
||||
SHORT_SHA="${{ steps.commit.outputs.short_sha }}"
|
||||
TG_MSG="✅ AWOOOI 部署完成\n├ 📝 ${COMMIT_MSG}\n├ 🔖 ${SHORT_SHA}\n├ ⏱️ 耗時: ${MINUTES}m ${SECONDS}s\n├ 📦 API: ✅ Web: ✅\n├ 🩺 Health: ✅\n├ 🔗 Alert Chain: ${ALERT_CHAIN_RESULT}\n├ 📊 Monitoring: ${MONITORING_RESULT}\n└ 🎭 Smoke: ${SMOKE_RESULT}"
|
||||
printf '%b' "$TG_MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
--data-urlencode "text@-" || echo "TG notify warning (non-fatal)"
|
||||
if AWOOI_CICD_STATUS=success \
|
||||
AWOOI_CICD_STAGE=post-deploy \
|
||||
AWOOI_CICD_JOB_NAME="AWOOOI 部署完成" \
|
||||
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
|
||||
AWOOI_CICD_DURATION_SECONDS="${DURATION}" \
|
||||
AWOOI_CICD_SUMMARY="API=✅; Web=✅; AlertChain=${ALERT_CHAIN_RESULT}; Monitoring=${MONITORING_RESULT}; Smoke=${SMOKE_RESULT}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "✅ CI/CD success notification mirrored through AWOOI API"
|
||||
else
|
||||
printf '%b' "$TG_MSG" | curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
--data-urlencode "text@-" || echo "TG notify warning (non-fatal)"
|
||||
fi
|
||||
|
||||
- name: Notify Pipeline Failure
|
||||
# 2026-04-16 ogt + Claude Sonnet 4.6: 改用 HTML 結構化格式
|
||||
@@ -954,7 +1141,17 @@ jobs:
|
||||
ACTOR="${{ github.actor }}"
|
||||
COMMIT_ESC=$(echo "$COMMIT_MSG" | sed 's/&/\&/g; s/</\</g; s/>/\>/g')
|
||||
MSG=$(printf '❌ <b>AWOOOI 部署失敗</b>\n├ 📝 <code>%s</code>\n├ 🔖 <code>%s</code>\n├ 👤 %s\n├ 🩺 Stage: post-deploy-checks\n└ 🔗 http://192.168.0.110:3001/wooo/awoooi/actions' "${COMMIT_ESC}" "${SHORT_SHA}" "${ACTOR}")
|
||||
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
|
||||
if AWOOI_CICD_STATUS=failed \
|
||||
AWOOI_CICD_STAGE=post-deploy-checks \
|
||||
AWOOI_CICD_JOB_NAME="AWOOOI 部署失敗" \
|
||||
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
|
||||
AWOOI_CICD_TRIGGERED_BY="${ACTOR}" \
|
||||
AWOOI_CICD_SUMMARY="${COMMIT_MSG}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "✅ CI/CD post-deploy failure notification mirrored through AWOOI API"
|
||||
else
|
||||
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text=${MSG}" || echo "TG notify failed (non-fatal): exit=$?"
|
||||
fi
|
||||
|
||||
@@ -30,6 +30,9 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 50
|
||||
|
||||
- name: Guard Workflow Secret Surfaces
|
||||
run: node scripts/ci/check-gitea-step-env-secrets.js
|
||||
|
||||
- name: Skip Stale Main Push
|
||||
id: stale
|
||||
run: |
|
||||
@@ -102,7 +105,6 @@ jobs:
|
||||
- name: Notify Code Review Start
|
||||
if: steps.stale.outputs.skip != 'true'
|
||||
env:
|
||||
TG_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
TG_CHAT_ID: ${{ env.TELEGRAM_ALERT_CHAT_ID }}
|
||||
SHORT_SHA: ${{ steps.ctx.outputs.short_sha }}
|
||||
BRANCH: ${{ steps.ctx.outputs.branch }}
|
||||
@@ -110,18 +112,33 @@ jobs:
|
||||
FILES_DISPLAY: ${{ steps.ctx.outputs.files_display }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${TG_CHAT_ID:-}" ]; then
|
||||
echo "Telegram secret missing; skip start notification"
|
||||
exit 0
|
||||
fi
|
||||
TG_BOT_TOKEN="$(cat <<'AWOOOI_SECRET_TG_BOT_TOKEN'
|
||||
${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
AWOOOI_SECRET_TG_BOT_TOKEN
|
||||
)"
|
||||
html_escape() { sed 's/&/\&/g; s/</\</g; s/>/\>/g'; }
|
||||
COMMIT_ESC="$(printf '%s' "$COMMIT_MSG" | html_escape)"
|
||||
FILES_ESC="$(printf '%s\n' "$FILES_DISPLAY" | html_escape)"
|
||||
MSG="$(printf '🔍 <b>Code Review 啟動</b>\n──────────────────────\n📦 Commit <code>%s</code> 🌿 <code>%s</code>\n📝 <code>%s</code>\n📁 <b>變更檔案:</b>\n%s\n──────────────────────\n🤖 <b>Hermes → OpenClaw → Elephant Alpha → NemoTron</b>\n📊 即時進度:<a href=\"%s\">%s</a>' "$SHORT_SHA" "$BRANCH" "$COMMIT_ESC" "$FILES_ESC" "$REPORT_URL" "$REPORT_URL")"
|
||||
curl -fsS -X POST "https://api.telegram.org/bot${TG_BOT_TOKEN}/sendMessage" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$(jq -n --arg c "$TG_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
|
||||
>/dev/null
|
||||
if AWOOI_CICD_STATUS=running \
|
||||
AWOOI_CICD_STAGE=code-review \
|
||||
AWOOI_CICD_JOB_NAME="Code Review 啟動" \
|
||||
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
|
||||
AWOOI_CICD_TRIGGERED_BY="${GITHUB_ACTOR:-CI}" \
|
||||
AWOOI_CICD_SUMMARY="${COMMIT_MSG}" \
|
||||
AWOOI_CICD_WORKFLOW_URL="${REPORT_URL}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "Code review start notification mirrored through AWOOI API"
|
||||
else
|
||||
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${TG_CHAT_ID:-}" ]; then
|
||||
echo "Telegram secret missing and AWOOI API notify failed; skip start notification"
|
||||
exit 0
|
||||
fi
|
||||
curl -fsS -X POST "https://api.telegram.org/bot${TG_BOT_TOKEN}/sendMessage" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$(jq -n --arg c "$TG_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
|
||||
>/dev/null
|
||||
fi
|
||||
|
||||
- name: Run Deterministic Review
|
||||
if: steps.stale.outputs.skip != 'true'
|
||||
@@ -139,15 +156,14 @@ jobs:
|
||||
- name: Notify Code Review Completion
|
||||
if: always() && steps.stale.outputs.skip != 'true'
|
||||
env:
|
||||
TG_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
TG_CHAT_ID: ${{ env.TELEGRAM_ALERT_CHAT_ID }}
|
||||
SHORT_SHA: ${{ steps.ctx.outputs.short_sha }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${TG_CHAT_ID:-}" ]; then
|
||||
echo "Telegram secret missing; skip completion notification"
|
||||
exit 0
|
||||
fi
|
||||
TG_BOT_TOKEN="$(cat <<'AWOOOI_SECRET_TG_BOT_TOKEN'
|
||||
${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
AWOOOI_SECRET_TG_BOT_TOKEN
|
||||
)"
|
||||
REPORT=/tmp/code-review-report.json
|
||||
if [ ! -s "$REPORT" ]; then
|
||||
cat > "$REPORT" <<'JSON'
|
||||
@@ -180,7 +196,25 @@ jobs:
|
||||
TOP_ESC="$(printf '%s' "$TOP_ISSUE" | html_escape)"
|
||||
|
||||
MSG="$(printf '%s <b>Code Review 完成・%s</b>\n──────────────────────\n🔴 CRITICAL <code>%s</code> 🟠 HIGH <code>%s</code> 🟡 MEDIUM <code>%s</code> 🟢 LOW <code>%s</code>\n──────────────────────\n⚠️ <b>主要問題</b>\n%s\n\n🔍 <b>整體風險等級</b>\n%s:%s\n\n⚠️ <b>最高關注問題</b>\n1. %s\n──────────────────────\n🤖 Elephant Alpha:<b>%s</b> ✅ %s\n📊 完整報告:<a href=\"%s\">%s</a>' "$STATUS" "$SHORT_SHA" "$CRITICAL" "$HIGH" "$MEDIUM" "$LOW" "$ISSUE_LINE" "$RISK" "$SUMMARY_ESC" "$TOP_ESC" "$RISK" "$ACTION_ESC" "$REPORT_URL" "$REPORT_URL")"
|
||||
curl -fsS -X POST "https://api.telegram.org/bot${TG_BOT_TOKEN}/sendMessage" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$(jq -n --arg c "$TG_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
|
||||
>/dev/null
|
||||
CICD_STATUS=success
|
||||
if [ "$RISK" = "MEDIUM" ]; then CICD_STATUS=pending; fi
|
||||
if [ "$RISK" = "HIGH" ] || [ "$RISK" = "CRITICAL" ]; then CICD_STATUS=failed; fi
|
||||
if AWOOI_CICD_STATUS="${CICD_STATUS}" \
|
||||
AWOOI_CICD_STAGE=code-review \
|
||||
AWOOI_CICD_JOB_NAME="Code Review 完成・${RISK}" \
|
||||
AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
|
||||
AWOOI_CICD_TRIGGERED_BY="${GITHUB_ACTOR:-CI}" \
|
||||
AWOOI_CICD_SUMMARY="CRITICAL=${CRITICAL}; HIGH=${HIGH}; MEDIUM=${MEDIUM}; LOW=${LOW}; ${SUMMARY}" \
|
||||
AWOOI_CICD_WORKFLOW_URL="${REPORT_URL}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "Code review completion notification mirrored through AWOOI API"
|
||||
else
|
||||
if [ -z "${TG_BOT_TOKEN:-}" ] || [ -z "${TG_CHAT_ID:-}" ]; then
|
||||
echo "Telegram secret missing and AWOOI API notify failed; skip completion notification"
|
||||
exit 0
|
||||
fi
|
||||
curl -fsS -X POST "https://api.telegram.org/bot${TG_BOT_TOKEN}/sendMessage" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$(jq -n --arg c "$TG_CHAT_ID" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML",disable_web_page_preview:true}')" \
|
||||
>/dev/null
|
||||
fi
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# =============================================================================
|
||||
# Deploy Prometheus Alert Rules (獨立 workflow)
|
||||
# 2026-04-05 Claude Code (ADR-039 I3): 從 cd.yaml 分離
|
||||
# 觸發條件: ops/monitoring/alerts-unified.yml 有變更 或 workflow_dispatch
|
||||
# 觸發條件: ops/monitoring/alerts-unified.yml / slo-rules.yml 有變更 或 workflow_dispatch
|
||||
# 說明: 告警規則部署不依賴應用構建,獨立觸發以加快響應速度
|
||||
# =============================================================================
|
||||
|
||||
@@ -12,6 +12,8 @@ on:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'ops/monitoring/alerts-unified.yml'
|
||||
- 'ops/monitoring/slo-rules.yml'
|
||||
- 'scripts/ops/deploy-alerts.sh'
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
@@ -30,11 +32,15 @@ jobs:
|
||||
run: |
|
||||
pip3 install -q pyyaml 2>/dev/null || pip install -q pyyaml
|
||||
python3 -c "import yaml; yaml.safe_load(open('ops/monitoring/alerts-unified.yml')); print('YAML OK')"
|
||||
python3 -c "import yaml; yaml.safe_load(open('ops/monitoring/slo-rules.yml')); print('SLO YAML OK')"
|
||||
|
||||
- name: Setup SSH key
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
echo "${{ secrets.DEPLOY_SSH_KEY }}" > ~/.ssh/id_ed25519
|
||||
umask 077
|
||||
cat > ~/.ssh/id_ed25519 <<'AWOOOI_DEPLOY_KEY'
|
||||
${{ secrets.DEPLOY_SSH_KEY }}
|
||||
AWOOOI_DEPLOY_KEY
|
||||
chmod 600 ~/.ssh/id_ed25519
|
||||
ssh-keyscan 192.168.0.110 >> ~/.ssh/known_hosts
|
||||
|
||||
@@ -50,6 +56,17 @@ jobs:
|
||||
SHORT_SHA="${{ github.sha }}"
|
||||
SHORT_SHA="${SHORT_SHA:0:7}"
|
||||
MSG="${EMOJI} Prometheus 告警規則部署 ${STATUS} (${SHORT_SHA})"
|
||||
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
--data-urlencode "text=${MSG}" || true
|
||||
CICD_STATUS="success"
|
||||
[ "$STATUS" != "success" ] && CICD_STATUS="failed"
|
||||
if AWOOI_CICD_STATUS="${CICD_STATUS}" \
|
||||
AWOOI_CICD_STAGE=deploy-alerts \
|
||||
AWOOI_CICD_JOB_NAME="Prometheus 告警規則部署" \
|
||||
AWOOI_CICD_COMMIT_SHA="${{ github.sha }}" \
|
||||
AWOOI_CICD_SUMMARY="${MSG}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "Alert rule deploy notification mirrored through AWOOI API"
|
||||
else
|
||||
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
--data-urlencode "text=${MSG}" || true
|
||||
fi
|
||||
|
||||
@@ -54,7 +54,17 @@ jobs:
|
||||
- name: Notify Telegram on Failure
|
||||
if: failure()
|
||||
run: |
|
||||
curl -s -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
|
||||
-d chat_id="${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d parse_mode="HTML" \
|
||||
-d text="🔴 <b>[E2E Health Check]</b> 失敗%0A%0A📅 $(TZ=Asia/Taipei date '+%Y-%m-%d %H:%M')%0A🔗 API 健康檢查未通過%0A%0A請檢查 K3s 叢集狀態"
|
||||
MSG="E2E Health Check 失敗;API 健康檢查未通過"
|
||||
if AWOOI_CICD_STATUS=failed \
|
||||
AWOOI_CICD_STAGE=e2e-health \
|
||||
AWOOI_CICD_JOB_NAME="E2E Health Check" \
|
||||
AWOOI_CICD_COMMIT_SHA="${{ github.sha }}" \
|
||||
AWOOI_CICD_SUMMARY="${MSG}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "E2E failure notification mirrored through AWOOI API"
|
||||
else
|
||||
curl -s -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
|
||||
-d chat_id="${{ env.TELEGRAM_ALERT_CHAT_ID }}" \
|
||||
-d parse_mode="HTML" \
|
||||
-d text="🔴 <b>[E2E Health Check]</b> 失敗%0A%0A📅 $(TZ=Asia/Taipei date '+%Y-%m-%d %H:%M')%0A🔗 API 健康檢查未通過%0A%0A請檢查 K3s 叢集狀態"
|
||||
fi
|
||||
|
||||
@@ -17,6 +17,7 @@ on:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'apps/api/migrations/*.sql'
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
TELEGRAM_ALERT_CHAT_ID: "-1003711974679"
|
||||
@@ -56,45 +57,101 @@ jobs:
|
||||
- name: Identify new migrations
|
||||
id: diff
|
||||
run: |
|
||||
NEW_FILES=$(git diff --name-only --diff-filter=A HEAD~1 HEAD -- 'apps/api/migrations/*.sql' || true)
|
||||
ALL_NEW_FILES=$(git diff --no-renames --name-only --diff-filter=A HEAD~1 HEAD -- 'apps/api/migrations/*.sql' || true)
|
||||
NEW_FILES=$(echo "$ALL_NEW_FILES" | grep -Ev '(_down|rollback)\.sql$' || true)
|
||||
SKIPPED_ROLLBACK_FILES=$(echo "$ALL_NEW_FILES" | grep -E '(_down|rollback)\.sql$' || true)
|
||||
echo "new_files<<EOF" >> $GITHUB_OUTPUT
|
||||
echo "$NEW_FILES" >> $GITHUB_OUTPUT
|
||||
echo "EOF" >> $GITHUB_OUTPUT
|
||||
echo "=== New migration files ==="
|
||||
echo "$NEW_FILES"
|
||||
if [ -n "$SKIPPED_ROLLBACK_FILES" ]; then
|
||||
echo "=== Rollback/down migrations skipped by design ==="
|
||||
echo "$SKIPPED_ROLLBACK_FILES"
|
||||
fi
|
||||
|
||||
- name: Apply new migrations
|
||||
if: steps.diff.outputs.new_files != ''
|
||||
env:
|
||||
# 從 Gitea secrets 取,不直接明碼
|
||||
PGURL: ${{ secrets.MIGRATION_DATABASE_URL }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# 從 Gitea secrets 取,不放 step-level env,避免 runner log 展開。
|
||||
# MIGRATION_DATABASE_URL 是限權帳號;DATABASE_URL 只在 PostgreSQL
|
||||
# 明確回報「必須是 table owner」時作為受控 fallback。
|
||||
PGURL="$(cat <<'AWOOOI_SECRET_MIGRATION_DATABASE_URL'
|
||||
${{ secrets.MIGRATION_DATABASE_URL }}
|
||||
AWOOOI_SECRET_MIGRATION_DATABASE_URL
|
||||
)"
|
||||
OWNER_PGURL="$(cat <<'AWOOOI_SECRET_DATABASE_URL'
|
||||
${{ secrets.DATABASE_URL }}
|
||||
AWOOOI_SECRET_DATABASE_URL
|
||||
)"
|
||||
if [ -z "$PGURL" ]; then
|
||||
echo "::error::MIGRATION_DATABASE_URL secret not set in Gitea"
|
||||
exit 1
|
||||
fi
|
||||
PGURL_PSQL="${PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
|
||||
OWNER_PGURL_PSQL="${OWNER_PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
|
||||
|
||||
apply_migration() {
|
||||
local url="$1"
|
||||
local file="$2"
|
||||
psql "$url" \
|
||||
-v ON_ERROR_STOP=1 \
|
||||
--single-transaction \
|
||||
-f "$file"
|
||||
}
|
||||
|
||||
# 套用每個新檔 (single transaction per file)
|
||||
echo "${{ steps.diff.outputs.new_files }}" | while IFS= read -r file; do
|
||||
[ -z "$file" ] && continue
|
||||
echo "=== Applying: $file ==="
|
||||
psql "$PGURL_PSQL" \
|
||||
-v ON_ERROR_STOP=1 \
|
||||
--single-transaction \
|
||||
-f "$file"
|
||||
migration_err="$(mktemp)"
|
||||
if ! apply_migration "$PGURL_PSQL" "$file" 2>"$migration_err"; then
|
||||
if grep -Eq "(must be owner of table|permission denied for table)" "$migration_err"; then
|
||||
if [ -z "$OWNER_PGURL_PSQL" ]; then
|
||||
cat "$migration_err" >&2
|
||||
echo "::error::migration requires table owner but DATABASE_URL secret is not set"
|
||||
exit 1
|
||||
fi
|
||||
echo "::warning::migration requires table owner; retrying with owner connection"
|
||||
apply_migration "$OWNER_PGURL_PSQL" "$file"
|
||||
else
|
||||
cat "$migration_err" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
rm -f "$migration_err"
|
||||
echo "=== OK: $file ==="
|
||||
done
|
||||
|
||||
- name: Seed asset_discovery_run (audit)
|
||||
if: steps.diff.outputs.new_files != ''
|
||||
env:
|
||||
PGURL: ${{ secrets.MIGRATION_DATABASE_URL }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
PGURL="$(cat <<'AWOOOI_SECRET_MIGRATION_DATABASE_URL'
|
||||
${{ secrets.MIGRATION_DATABASE_URL }}
|
||||
AWOOOI_SECRET_MIGRATION_DATABASE_URL
|
||||
)"
|
||||
OWNER_PGURL="$(cat <<'AWOOOI_SECRET_DATABASE_URL'
|
||||
${{ secrets.DATABASE_URL }}
|
||||
AWOOOI_SECRET_DATABASE_URL
|
||||
)"
|
||||
if [ -z "$PGURL" ]; then
|
||||
echo "::error::MIGRATION_DATABASE_URL secret not set in Gitea"
|
||||
exit 1
|
||||
fi
|
||||
PGURL_PSQL="${PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
|
||||
OWNER_PGURL_PSQL="${OWNER_PGURL/postgresql+asyncpg:\/\//postgresql:\/\/}"
|
||||
FILES_JSON=$(echo "${{ steps.diff.outputs.new_files }}" | jq -Rn '[inputs | select(length > 0)]')
|
||||
psql "$PGURL_PSQL" -c "
|
||||
SUMMARY_JSON=$(jq -cn \
|
||||
--arg commit_sha "${{ github.sha }}" \
|
||||
--argjson files "$FILES_JSON" \
|
||||
'{type: "ci_migration", commit_sha: $commit_sha, files: $files}')
|
||||
SUMMARY_JSON_SQL=${SUMMARY_JSON//\'/\'\'}
|
||||
|
||||
seed_audit() {
|
||||
local url="$1"
|
||||
psql "$url" -v ON_ERROR_STOP=1 <<SQL
|
||||
INSERT INTO asset_discovery_run (
|
||||
run_id, triggered_by, scope, scan_depth, status,
|
||||
started_at, ended_at, tools_used, summary
|
||||
@@ -106,23 +163,51 @@ jobs:
|
||||
'success',
|
||||
NOW(),
|
||||
NOW(),
|
||||
'{\"psql\": 1, \"gitea_ci\": 1}'::jsonb,
|
||||
jsonb_build_object(
|
||||
'type', 'ci_migration',
|
||||
'commit_sha', '${{ github.sha }}',
|
||||
'files', $FILES_JSON
|
||||
)
|
||||
'{"psql": 1, "gitea_ci": 1}'::jsonb,
|
||||
'${SUMMARY_JSON_SQL}'::jsonb
|
||||
);
|
||||
"
|
||||
SQL
|
||||
}
|
||||
|
||||
audit_err="$(mktemp)"
|
||||
if ! seed_audit "$PGURL_PSQL" 2>"$audit_err"; then
|
||||
if grep -q "permission denied for table asset_discovery_run" "$audit_err"; then
|
||||
if [ -z "$OWNER_PGURL_PSQL" ]; then
|
||||
cat "$audit_err" >&2
|
||||
echo "::error::audit requires table insert privilege but DATABASE_URL secret is not set"
|
||||
exit 1
|
||||
fi
|
||||
echo "::warning::audit requires owner connection; retrying with owner connection"
|
||||
seed_audit "$OWNER_PGURL_PSQL"
|
||||
else
|
||||
cat "$audit_err" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
rm -f "$audit_err"
|
||||
|
||||
- name: Notify Telegram (if configured)
|
||||
if: always()
|
||||
env:
|
||||
TG_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
TG_CHAT: ${{ env.TELEGRAM_ALERT_CHAT_ID }}
|
||||
run: |
|
||||
TG_TOKEN="$(cat <<'AWOOOI_SECRET_TG_TOKEN'
|
||||
${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
AWOOOI_SECRET_TG_TOKEN
|
||||
)"
|
||||
STATUS="${{ job.status }}"
|
||||
CICD_STATUS="success"
|
||||
[ "$STATUS" != "success" ] && CICD_STATUS="failed"
|
||||
if AWOOI_CICD_STATUS="${CICD_STATUS}" \
|
||||
AWOOI_CICD_STAGE=run-migration \
|
||||
AWOOI_CICD_JOB_NAME="Migration CI" \
|
||||
AWOOI_CICD_COMMIT_SHA="${{ github.sha }}" \
|
||||
AWOOI_CICD_SUMMARY="Migration CI: ${STATUS}" \
|
||||
scripts/ci/notify-awoooi-cicd.sh; then
|
||||
echo "Migration notification mirrored through AWOOI API"
|
||||
exit 0
|
||||
fi
|
||||
if [ -n "$TG_TOKEN" ] && [ -n "$TG_CHAT" ]; then
|
||||
STATUS="${{ job.status }}"
|
||||
MSG="🗄️ Migration CI: \`${STATUS}\` — commit ${{ github.sha }}"
|
||||
curl -s -X POST "https://api.telegram.org/bot${TG_TOKEN}/sendMessage" \
|
||||
-d chat_id="${TG_CHAT}" \
|
||||
|
||||
25
.github/workflows/cd.yaml
vendored
25
.github/workflows/cd.yaml
vendored
@@ -13,12 +13,10 @@
|
||||
|
||||
name: CD
|
||||
|
||||
# 2026-05-12 Codex: GitHub 僅保留唯讀備份;生產 CI/CD 只能從 Gitea 執行。
|
||||
# 本 workflow 曾可 push / workflow_dispatch 後 build、patch secret、kubectl apply,
|
||||
# 會和 `.gitea/workflows/cd.yaml` 競爭 K3s production 狀態,因此硬停用。
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- 'docs/**'
|
||||
- '*.md'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
force_deploy:
|
||||
@@ -60,6 +58,7 @@ jobs:
|
||||
# ==================== Pre-flight Check (10s Fail-Fast) ====================
|
||||
pre-flight-check:
|
||||
name: "Pre-flight Check"
|
||||
if: ${{ false }}
|
||||
runs-on: [self-hosted, harbor, k8s]
|
||||
timeout-minutes: 1
|
||||
steps:
|
||||
@@ -133,6 +132,7 @@ jobs:
|
||||
# 2026-03-29 Claude Code: 確保監控覆蓋率 >= 90%
|
||||
monitoring-coverage:
|
||||
name: "Monitoring Coverage"
|
||||
if: ${{ false }}
|
||||
runs-on: [self-hosted, harbor, k8s]
|
||||
needs: pre-flight-check
|
||||
timeout-minutes: 2
|
||||
@@ -152,6 +152,7 @@ jobs:
|
||||
# ==================== 路徑偵測 (使用 dorny/paths-filter) ====================
|
||||
detect-changes:
|
||||
name: Detect Changes
|
||||
if: ${{ false }}
|
||||
runs-on: [self-hosted, harbor, k8s]
|
||||
needs: [pre-flight-check, monitoring-coverage]
|
||||
timeout-minutes: 1
|
||||
@@ -197,11 +198,7 @@ jobs:
|
||||
runs-on: [self-hosted, harbor, k8s]
|
||||
needs: [detect-changes, build-web]
|
||||
timeout-minutes: 20
|
||||
if: |
|
||||
!inputs.skip_api && (
|
||||
needs.detect-changes.outputs.api == 'true' ||
|
||||
(needs.detect-changes.outputs.api == 'false' && needs.detect-changes.outputs.web == 'false')
|
||||
)
|
||||
if: ${{ false }}
|
||||
outputs:
|
||||
image_tag: ${{ steps.tag.outputs.tag }}
|
||||
steps:
|
||||
@@ -238,11 +235,7 @@ jobs:
|
||||
runs-on: [self-hosted, harbor, k8s]
|
||||
needs: detect-changes
|
||||
timeout-minutes: 20
|
||||
if: |
|
||||
!inputs.skip_web && (
|
||||
needs.detect-changes.outputs.web == 'true' ||
|
||||
(needs.detect-changes.outputs.api == 'false' && needs.detect-changes.outputs.web == 'false')
|
||||
)
|
||||
if: ${{ false }}
|
||||
outputs:
|
||||
image_tag: ${{ steps.tag.outputs.tag }}
|
||||
steps:
|
||||
@@ -293,7 +286,7 @@ jobs:
|
||||
concurrency:
|
||||
group: runner-awoooi-cd-mutex
|
||||
cancel-in-progress: false
|
||||
if: always() && (needs.build-api.result == 'success' || needs.build-api.result == 'skipped') && (needs.build-web.result == 'success' || needs.build-web.result == 'skipped')
|
||||
if: ${{ false }}
|
||||
environment: production
|
||||
steps:
|
||||
# 2026-03-29: Runner 診斷檔案清理 (防止並行衝突)
|
||||
|
||||
17
.github/workflows/deploy-prod.yml
vendored
17
.github/workflows/deploy-prod.yml
vendored
@@ -14,15 +14,10 @@
|
||||
|
||||
name: Deploy to Production
|
||||
|
||||
# 2026-05-12 Codex: GitHub 是唯讀備份,production deploy 只能從 Gitea 進入。
|
||||
# 這份歷史 workflow 仍含 Harbor build/push 與 kubectl apply/rollout,會和 Gitea CD 競爭。
|
||||
# 保留檔案供稽核,但停用所有 job。
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- 'apps/api/**'
|
||||
- 'apps/web/**'
|
||||
- 'k8s/awoooi-prod/**'
|
||||
- '.github/workflows/deploy-prod.yml'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
deploy_api:
|
||||
@@ -70,6 +65,7 @@ jobs:
|
||||
# ===========================================================================
|
||||
build:
|
||||
name: "Build Images"
|
||||
if: ${{ false }}
|
||||
runs-on: [self-hosted, harbor, k8s]
|
||||
outputs:
|
||||
image_tag: ${{ steps.meta.outputs.tag }}
|
||||
@@ -138,6 +134,7 @@ jobs:
|
||||
deploy:
|
||||
name: "Deploy to K3s"
|
||||
needs: build
|
||||
if: ${{ false }}
|
||||
runs-on: [self-hosted, harbor, k8s]
|
||||
|
||||
steps:
|
||||
@@ -210,7 +207,7 @@ jobs:
|
||||
smoke-test:
|
||||
name: "Smoke Tests"
|
||||
needs: deploy
|
||||
if: ${{ !inputs.skip_tests }}
|
||||
if: ${{ false }}
|
||||
runs-on: [self-hosted, harbor, k8s]
|
||||
|
||||
steps:
|
||||
@@ -248,7 +245,7 @@ jobs:
|
||||
notify:
|
||||
name: "Send Notification"
|
||||
needs: [build, deploy, smoke-test]
|
||||
if: always()
|
||||
if: ${{ false }}
|
||||
runs-on: [self-hosted, harbor, k8s]
|
||||
|
||||
steps:
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -93,3 +93,4 @@ tsconfig.tsbuildinfo
|
||||
!.aiderignore
|
||||
.claude/settings.local.json
|
||||
.claude/settings.json
|
||||
.claude/settings.json.bak*
|
||||
|
||||
@@ -31,6 +31,9 @@
|
||||
|
||||
## 🔴 絕對禁止 → [HARD_RULES.md](docs/HARD_RULES.md)
|
||||
|
||||
## 🔴 文件語言鐵律 → [文件語言規範](docs/HARD_RULES.md#文件語言規範)
|
||||
Markdown、ADR、LOGBOOK、Runbook、交接文件與計畫文件一律使用繁體中文;程式符號、API、指令、錯誤碼、服務名稱與原始 log 可保留英文。
|
||||
|
||||
## 🔴 紅區治理 → [RED_ZONES.md](docs/RED_ZONES.md)
|
||||
Tier 3 核心檔案 (decision_manager, trust_engine, config 等) 修改需首席架構師授權
|
||||
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
-- ADR-090 capacity_violation_event metric violation types
|
||||
-- 日期:2026-05-07(台北)
|
||||
-- 目的:讓 capacity_scanner_job.py 寫入的 cpu/mem/swap 細項違規符合 DB constraint。
|
||||
--
|
||||
-- 背景:
|
||||
-- capacity_scanner_job.py 會寫入:
|
||||
-- - cpu_over_threshold
|
||||
-- - mem_over_threshold
|
||||
-- - swap_over_threshold
|
||||
-- 但原始 ADR-090 DDL 只允許較粗的 host_saturation,導致 production 出現
|
||||
-- capacity_violation_event_type_valid check violation,容量治理事件漏記。
|
||||
|
||||
BEGIN;
|
||||
|
||||
ALTER TABLE capacity_violation_event
|
||||
DROP CONSTRAINT IF EXISTS capacity_violation_event_type_valid;
|
||||
|
||||
ALTER TABLE capacity_violation_event
|
||||
ADD CONSTRAINT capacity_violation_event_type_valid
|
||||
CHECK (violation_type IN (
|
||||
'no_limit_set',
|
||||
'over_request',
|
||||
'over_limit',
|
||||
'host_saturation',
|
||||
'over_sla_budget',
|
||||
'unauthorized_new_deploy',
|
||||
'cpu_over_threshold',
|
||||
'mem_over_threshold',
|
||||
'swap_over_threshold',
|
||||
'load_over_threshold'
|
||||
));
|
||||
|
||||
COMMIT;
|
||||
|
||||
-- Rollback(需人工確認後執行):
|
||||
-- BEGIN;
|
||||
-- ALTER TABLE capacity_violation_event
|
||||
-- DROP CONSTRAINT IF EXISTS capacity_violation_event_type_valid;
|
||||
-- ALTER TABLE capacity_violation_event
|
||||
-- ADD CONSTRAINT capacity_violation_event_type_valid
|
||||
-- CHECK (violation_type IN (
|
||||
-- 'no_limit_set',
|
||||
-- 'over_request',
|
||||
-- 'over_limit',
|
||||
-- 'host_saturation',
|
||||
-- 'over_sla_budget',
|
||||
-- 'unauthorized_new_deploy'
|
||||
-- ));
|
||||
-- COMMIT;
|
||||
36
apps/api/migrations/adr090d_ansible_operation_types.sql
Normal file
36
apps/api/migrations/adr090d_ansible_operation_types.sql
Normal file
@@ -0,0 +1,36 @@
|
||||
-- ADR-090-D: automation_operation_log.operation_type adds Ansible executor audit states
|
||||
-- Created: 2026-05-12 Taipei
|
||||
--
|
||||
-- Purpose:
|
||||
-- T3 Ansible declarative executor visibility. These operation types allow
|
||||
-- the AI automation truth chain to record that Ansible was matched,
|
||||
-- check-mode executed, applied, rolled back, or explicitly skipped.
|
||||
--
|
||||
-- Safety:
|
||||
-- This migration only expands the CHECK allowlist. It does not execute
|
||||
-- Ansible, change approval behavior, or create auto-remediation rows.
|
||||
|
||||
ALTER TABLE automation_operation_log
|
||||
DROP CONSTRAINT IF EXISTS automation_operation_log_type_valid;
|
||||
|
||||
ALTER TABLE automation_operation_log
|
||||
ADD CONSTRAINT automation_operation_log_type_valid CHECK (operation_type IN (
|
||||
'monitor_configured','monitor_removed',
|
||||
'alert_fired','alert_suppressed','alert_routed',
|
||||
'rule_created','rule_updated','rule_matched','rule_rejected','rule_deprecated',
|
||||
'playbook_generated','playbook_updated','playbook_executed',
|
||||
'remediation_executed','remediation_verified','remediation_rolled_back',
|
||||
'self_correction_attempted',
|
||||
'km_created','km_updated','km_linked',
|
||||
'asset_discovered','coverage_recalculated',
|
||||
'capacity_recommendation','quota_enforced',
|
||||
'notification_formatted',
|
||||
'ansible_candidate_matched',
|
||||
'ansible_check_mode_executed',
|
||||
'ansible_apply_executed',
|
||||
'ansible_rollback_executed',
|
||||
'ansible_execution_skipped'
|
||||
));
|
||||
|
||||
COMMENT ON CONSTRAINT automation_operation_log_type_valid ON automation_operation_log IS
|
||||
'ADR-090-D: allow first-class Ansible executor audit states for AwoooP truth-chain visibility.';
|
||||
19
apps/api/migrations/adr090d_ansible_operation_types_down.sql
Normal file
19
apps/api/migrations/adr090d_ansible_operation_types_down.sql
Normal file
@@ -0,0 +1,19 @@
|
||||
-- ADR-090-D rollback: remove Ansible executor audit states from operation_type allowlist.
|
||||
-- Only apply after confirming no automation_operation_log rows use ansible_* operation types.
|
||||
|
||||
ALTER TABLE automation_operation_log
|
||||
DROP CONSTRAINT IF EXISTS automation_operation_log_type_valid;
|
||||
|
||||
ALTER TABLE automation_operation_log
|
||||
ADD CONSTRAINT automation_operation_log_type_valid CHECK (operation_type IN (
|
||||
'monitor_configured','monitor_removed',
|
||||
'alert_fired','alert_suppressed','alert_routed',
|
||||
'rule_created','rule_updated','rule_matched','rule_rejected','rule_deprecated',
|
||||
'playbook_generated','playbook_updated','playbook_executed',
|
||||
'remediation_executed','remediation_verified','remediation_rolled_back',
|
||||
'self_correction_attempted',
|
||||
'km_created','km_updated','km_linked',
|
||||
'asset_discovered','coverage_recalculated',
|
||||
'capacity_recommendation','quota_enforced',
|
||||
'notification_formatted'
|
||||
));
|
||||
@@ -0,0 +1,164 @@
|
||||
-- T9: approved SSH execution MCP Gateway seed
|
||||
-- 目的:讓 Telegram/Approval 已批准的 SSH 修復動作通過 AwoooP Gateway 五閘門。
|
||||
-- 邊界:只授權 approval_executor;write/admin 仍需 Gate 5 短效 approval key。
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
WITH agent_body AS (
|
||||
SELECT jsonb_build_object(
|
||||
'schema_version', 'awooop_agent_contract_v1',
|
||||
'agent_id', 'approval_executor',
|
||||
'display_name', 'Approval Executor',
|
||||
'project_id', 'awoooi',
|
||||
'purpose', 'Approved SSH execution through AwoooP MCP Gateway',
|
||||
'allowed_scopes', jsonb_build_array('read', 'write', 'admin'),
|
||||
'requires_gate5_for_scopes', jsonb_build_array('write', 'admin'),
|
||||
'stage', 't9_ssh_approval_gateway'
|
||||
) AS body_json
|
||||
),
|
||||
inserted_revision AS (
|
||||
INSERT INTO awooop_contract_revisions (
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
version_major,
|
||||
version_minor,
|
||||
lifecycle_status,
|
||||
body_json,
|
||||
body_hash,
|
||||
body_schema_version,
|
||||
publisher_id,
|
||||
published_at
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
'agent',
|
||||
'approval_executor',
|
||||
1,
|
||||
0,
|
||||
'active',
|
||||
body_json,
|
||||
encode(digest(body_json::text, 'sha256'), 'hex'),
|
||||
'v1.0',
|
||||
'migration:t9_ssh_approval_gateway',
|
||||
NOW()
|
||||
FROM agent_body
|
||||
ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor)
|
||||
DO NOTHING
|
||||
RETURNING revision_id, project_id, contract_family, contract_id
|
||||
),
|
||||
chosen_revision AS (
|
||||
SELECT revision_id, project_id, contract_family, contract_id
|
||||
FROM inserted_revision
|
||||
UNION ALL
|
||||
SELECT revision_id, project_id, contract_family, contract_id
|
||||
FROM awooop_contract_revisions
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id = 'approval_executor'
|
||||
AND version_major = 1
|
||||
AND version_minor = 0
|
||||
AND lifecycle_status = 'active'
|
||||
),
|
||||
upsert_pointer AS (
|
||||
INSERT INTO awooop_active_revisions (
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
active_revision_id,
|
||||
updated_at
|
||||
)
|
||||
SELECT DISTINCT ON (project_id, contract_family, contract_id)
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
revision_id,
|
||||
NOW()
|
||||
FROM chosen_revision
|
||||
ORDER BY project_id, contract_family, contract_id, revision_id
|
||||
ON CONFLICT (project_id, contract_family, contract_id)
|
||||
DO UPDATE SET
|
||||
active_revision_id = EXCLUDED.active_revision_id,
|
||||
updated_at = NOW()
|
||||
RETURNING contract_id
|
||||
)
|
||||
SELECT 'approval_executor_active_contracts', count(*) FROM upsert_pointer;
|
||||
|
||||
WITH gateway_tools(tool_name, description, required_scope) AS (
|
||||
VALUES
|
||||
('ssh_diagnose', 'SSH host diagnosis read', 'read'),
|
||||
('ssh_docker_restart', 'Approved Docker container restart over SSH', 'write'),
|
||||
('ssh_docker_compose_restart', 'Approved Docker Compose service restart over SSH', 'write'),
|
||||
('ssh_systemctl_restart', 'Approved systemd service restart over SSH', 'write'),
|
||||
('ssh_clear_docker_logs', 'Approved Docker log truncation over SSH', 'write'),
|
||||
('ssh_renew_ssl', 'Approved certbot renewal over SSH', 'write'),
|
||||
('ssh_reload_nginx', 'Approved nginx config test and reload over SSH', 'write'),
|
||||
('ssh_docker_prune', 'Approved Docker prune over SSH with provider disk guard', 'admin')
|
||||
),
|
||||
upsert_tools AS (
|
||||
INSERT INTO awooop_mcp_tool_registry (
|
||||
project_id,
|
||||
tool_name,
|
||||
tool_type,
|
||||
description,
|
||||
allowed_scopes,
|
||||
environment_tags,
|
||||
is_active,
|
||||
updated_at
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
tool_name,
|
||||
'mcp_server',
|
||||
description,
|
||||
jsonb_build_array(required_scope),
|
||||
'{"env": "prod"}'::jsonb,
|
||||
TRUE,
|
||||
NOW()
|
||||
FROM gateway_tools
|
||||
ON CONFLICT (project_id, tool_name)
|
||||
DO UPDATE SET
|
||||
description = EXCLUDED.description,
|
||||
allowed_scopes = EXCLUDED.allowed_scopes,
|
||||
environment_tags = EXCLUDED.environment_tags,
|
||||
is_active = TRUE,
|
||||
updated_at = NOW()
|
||||
RETURNING tool_id, tool_name, allowed_scopes
|
||||
),
|
||||
upsert_grants AS (
|
||||
INSERT INTO awooop_mcp_grants (
|
||||
project_id,
|
||||
agent_id,
|
||||
tool_id,
|
||||
granted_by,
|
||||
granted_scopes,
|
||||
expires_at,
|
||||
is_revoked,
|
||||
revoked_at,
|
||||
revoked_by
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
'approval_executor',
|
||||
tool_id,
|
||||
'migration:t9_ssh_approval_gateway',
|
||||
allowed_scopes,
|
||||
NULL,
|
||||
FALSE,
|
||||
NULL,
|
||||
NULL
|
||||
FROM upsert_tools
|
||||
ON CONFLICT (project_id, agent_id, tool_id)
|
||||
DO UPDATE SET
|
||||
granted_by = EXCLUDED.granted_by,
|
||||
granted_scopes = EXCLUDED.granted_scopes,
|
||||
expires_at = NULL,
|
||||
is_revoked = FALSE,
|
||||
revoked_at = NULL,
|
||||
revoked_by = NULL
|
||||
RETURNING grant_id
|
||||
)
|
||||
SELECT
|
||||
'approval_executor_ssh_gateway',
|
||||
(SELECT count(*) FROM upsert_tools) AS tool_rows,
|
||||
(SELECT count(*) FROM upsert_grants) AS grant_rows;
|
||||
@@ -0,0 +1,43 @@
|
||||
-- Rollback for T9 approved SSH execution MCP Gateway seed.
|
||||
-- Contract revisions are append-only; rollback revokes approval_executor grants
|
||||
-- and deactivates only the write/admin tools introduced here.
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
UPDATE awooop_mcp_grants
|
||||
SET
|
||||
is_revoked = TRUE,
|
||||
revoked_at = NOW(),
|
||||
revoked_by = 'rollback:t9_ssh_approval_gateway'
|
||||
WHERE project_id = 'awoooi'
|
||||
AND agent_id = 'approval_executor'
|
||||
AND granted_by = 'migration:t9_ssh_approval_gateway'
|
||||
AND is_revoked = FALSE;
|
||||
|
||||
UPDATE awooop_mcp_tool_registry
|
||||
SET
|
||||
is_active = FALSE,
|
||||
updated_at = NOW()
|
||||
WHERE project_id = 'awoooi'
|
||||
AND tool_name IN (
|
||||
'ssh_docker_restart',
|
||||
'ssh_docker_compose_restart',
|
||||
'ssh_systemctl_restart',
|
||||
'ssh_clear_docker_logs',
|
||||
'ssh_renew_ssl',
|
||||
'ssh_reload_nginx',
|
||||
'ssh_docker_prune'
|
||||
);
|
||||
|
||||
DELETE FROM awooop_active_revisions
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id = 'approval_executor';
|
||||
|
||||
UPDATE awooop_contract_revisions
|
||||
SET lifecycle_status = 'revoked'
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id = 'approval_executor'
|
||||
AND publisher_id = 'migration:t9_ssh_approval_gateway'
|
||||
AND lifecycle_status = 'active';
|
||||
@@ -0,0 +1,166 @@
|
||||
-- T23: auto-repair executor read-only MCP Gateway seed
|
||||
-- 目的:讓 YAML_RULE/PlayBook 的只讀 SSH 診斷步驟經過 AwoooP MCP Gateway。
|
||||
-- 邊界:只授權 read scope;write/admin SSH 工具仍必須走 approval_executor + Gate 5。
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
WITH agent_body AS (
|
||||
SELECT jsonb_build_object(
|
||||
'schema_version', 'awooop_agent_contract_v1',
|
||||
'agent_id', 'auto_repair_executor',
|
||||
'display_name', 'Auto Repair Executor',
|
||||
'project_id', 'awoooi',
|
||||
'purpose', 'Read-only auto-repair diagnostics through AwoooP MCP Gateway',
|
||||
'allowed_scopes', jsonb_build_array('read'),
|
||||
'forbidden_scopes', jsonb_build_array('write', 'admin'),
|
||||
'stage', 't23_auto_repair_diagnostic_gateway'
|
||||
) AS body_json
|
||||
),
|
||||
inserted_revision AS (
|
||||
INSERT INTO awooop_contract_revisions (
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
version_major,
|
||||
version_minor,
|
||||
lifecycle_status,
|
||||
body_json,
|
||||
body_hash,
|
||||
body_schema_version,
|
||||
publisher_id,
|
||||
published_at
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
'agent',
|
||||
'auto_repair_executor',
|
||||
1,
|
||||
0,
|
||||
'active',
|
||||
body_json,
|
||||
encode(digest(body_json::text, 'sha256'), 'hex'),
|
||||
'v1.0',
|
||||
'migration:t23_auto_repair_executor_read_gateway',
|
||||
NOW()
|
||||
FROM agent_body
|
||||
ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor)
|
||||
DO NOTHING
|
||||
RETURNING revision_id, project_id, contract_family, contract_id
|
||||
),
|
||||
chosen_revision AS (
|
||||
SELECT revision_id, project_id, contract_family, contract_id
|
||||
FROM inserted_revision
|
||||
UNION ALL
|
||||
SELECT revision_id, project_id, contract_family, contract_id
|
||||
FROM awooop_contract_revisions
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id = 'auto_repair_executor'
|
||||
AND version_major = 1
|
||||
AND version_minor = 0
|
||||
AND lifecycle_status = 'active'
|
||||
),
|
||||
upsert_pointer AS (
|
||||
INSERT INTO awooop_active_revisions (
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
active_revision_id,
|
||||
updated_at
|
||||
)
|
||||
SELECT DISTINCT ON (project_id, contract_family, contract_id)
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
revision_id,
|
||||
NOW()
|
||||
FROM chosen_revision
|
||||
ORDER BY project_id, contract_family, contract_id, revision_id
|
||||
ON CONFLICT (project_id, contract_family, contract_id)
|
||||
DO UPDATE SET
|
||||
active_revision_id = EXCLUDED.active_revision_id,
|
||||
updated_at = NOW()
|
||||
RETURNING contract_id
|
||||
)
|
||||
SELECT 'auto_repair_executor_active_contracts', count(*) FROM upsert_pointer;
|
||||
|
||||
WITH read_tools(tool_name, description) AS (
|
||||
VALUES
|
||||
('ssh_diagnose', 'SSH host/container diagnosis read'),
|
||||
('ssh_get_top_processes', 'SSH top processes read'),
|
||||
('ssh_get_disk_usage', 'SSH disk usage read'),
|
||||
('ssh_get_memory_info', 'SSH memory info read'),
|
||||
('ssh_get_container_logs', 'SSH container logs read'),
|
||||
('ssh_get_container_status', 'SSH container status read'),
|
||||
('ssh_get_service_status', 'SSH service status read'),
|
||||
('ssh_check_port', 'SSH port check read'),
|
||||
('ssh_get_nginx_error_log', 'SSH nginx error log read'),
|
||||
('ssh_get_swap_info', 'SSH swap info read')
|
||||
),
|
||||
upsert_tools AS (
|
||||
INSERT INTO awooop_mcp_tool_registry (
|
||||
project_id,
|
||||
tool_name,
|
||||
tool_type,
|
||||
description,
|
||||
allowed_scopes,
|
||||
environment_tags,
|
||||
is_active,
|
||||
updated_at
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
tool_name,
|
||||
'mcp_server',
|
||||
description,
|
||||
'["read"]'::jsonb,
|
||||
'{"env": "prod"}'::jsonb,
|
||||
TRUE,
|
||||
NOW()
|
||||
FROM read_tools
|
||||
ON CONFLICT (project_id, tool_name)
|
||||
DO UPDATE SET
|
||||
description = EXCLUDED.description,
|
||||
allowed_scopes = EXCLUDED.allowed_scopes,
|
||||
environment_tags = EXCLUDED.environment_tags,
|
||||
is_active = TRUE,
|
||||
updated_at = NOW()
|
||||
RETURNING tool_id, tool_name, allowed_scopes
|
||||
),
|
||||
upsert_grants AS (
|
||||
INSERT INTO awooop_mcp_grants (
|
||||
project_id,
|
||||
agent_id,
|
||||
tool_id,
|
||||
granted_by,
|
||||
granted_scopes,
|
||||
expires_at,
|
||||
is_revoked,
|
||||
revoked_at,
|
||||
revoked_by
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
'auto_repair_executor',
|
||||
tool_id,
|
||||
'migration:t23_auto_repair_executor_read_gateway',
|
||||
allowed_scopes,
|
||||
NULL,
|
||||
FALSE,
|
||||
NULL,
|
||||
NULL
|
||||
FROM upsert_tools
|
||||
ON CONFLICT (project_id, agent_id, tool_id)
|
||||
DO UPDATE SET
|
||||
granted_by = EXCLUDED.granted_by,
|
||||
granted_scopes = EXCLUDED.granted_scopes,
|
||||
expires_at = NULL,
|
||||
is_revoked = FALSE,
|
||||
revoked_at = NULL,
|
||||
revoked_by = NULL
|
||||
RETURNING grant_id
|
||||
)
|
||||
SELECT
|
||||
'auto_repair_executor_read_gateway',
|
||||
(SELECT count(*) FROM upsert_tools) AS tool_rows,
|
||||
(SELECT count(*) FROM upsert_grants) AS grant_rows;
|
||||
@@ -0,0 +1,24 @@
|
||||
-- Rollback T23 auto-repair executor read-only MCP Gateway grant.
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
UPDATE awooop_mcp_grants
|
||||
SET is_revoked = TRUE,
|
||||
revoked_at = NOW(),
|
||||
revoked_by = 'rollback:t23_auto_repair_executor_read_gateway'
|
||||
WHERE project_id = 'awoooi'
|
||||
AND agent_id = 'auto_repair_executor'
|
||||
AND granted_by = 'migration:t23_auto_repair_executor_read_gateway';
|
||||
|
||||
DELETE FROM awooop_active_revisions
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id = 'auto_repair_executor';
|
||||
|
||||
UPDATE awooop_contract_revisions
|
||||
SET lifecycle_status = 'retired'
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id = 'auto_repair_executor'
|
||||
AND publisher_id = 'migration:t23_auto_repair_executor_read_gateway'
|
||||
AND lifecycle_status = 'active';
|
||||
@@ -0,0 +1,25 @@
|
||||
-- =============================================================================
|
||||
-- AwoooP / AWOOOI MCP Gateway Shadow Onboarding
|
||||
-- 2026-05-13 Codex + ogt
|
||||
--
|
||||
-- 背景:
|
||||
-- AWOOOI 已完成 read-only MCP tool registry / grants seed,但 project 本身仍停在
|
||||
-- legacy_awoooi_default,會被 MCP Gateway Gate 1 正確攔截。
|
||||
--
|
||||
-- 邊界:
|
||||
-- 只把 AWOOOI 租戶升到 shadow,讓既有 Gate 1 生效。
|
||||
-- write/admin tool 仍未授權;自動修復/破壞性動作不因本 migration 開放。
|
||||
-- =============================================================================
|
||||
|
||||
BEGIN;
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
UPDATE awooop_projects
|
||||
SET
|
||||
migration_mode = 'shadow',
|
||||
updated_at = NOW()
|
||||
WHERE project_id = 'awoooi'
|
||||
AND migration_mode = 'legacy_awoooi_default';
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,20 @@
|
||||
-- =============================================================================
|
||||
-- Rollback: AwoooP / AWOOOI MCP Gateway Shadow Onboarding
|
||||
-- 2026-05-13 Codex + ogt
|
||||
--
|
||||
-- 只回退仍停在 shadow 的 AWOOOI;若已由人工/後續 migration 推進到 canary/active,
|
||||
-- 不自動降級。
|
||||
-- =============================================================================
|
||||
|
||||
BEGIN;
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
UPDATE awooop_projects
|
||||
SET
|
||||
migration_mode = 'legacy_awoooi_default',
|
||||
updated_at = NOW()
|
||||
WHERE project_id = 'awoooi'
|
||||
AND migration_mode = 'shadow';
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,211 @@
|
||||
-- T7: awoooi read-only MCP Gateway seed
|
||||
-- 目的:讓決策前感官 MCP 能通過 AwoooP Gateway Gate 2/3,產生 first-class audit。
|
||||
-- 邊界:只授權 read scope;不授權 restart/delete/scale/apply/rollback 等 write/admin 工具。
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
WITH agent_seed(agent_id, display_name) AS (
|
||||
VALUES
|
||||
('pre_decision_investigator', 'Pre-decision Investigator'),
|
||||
('post_execution_verifier', 'Post-execution Verifier')
|
||||
),
|
||||
agent_body AS (
|
||||
SELECT
|
||||
agent_id,
|
||||
jsonb_build_object(
|
||||
'schema_version', 'awooop_agent_contract_v1',
|
||||
'agent_id', agent_id,
|
||||
'display_name', display_name,
|
||||
'project_id', 'awoooi',
|
||||
'purpose', 'Read-only MCP sensing through AwoooP Gateway',
|
||||
'allowed_scopes', jsonb_build_array('read'),
|
||||
'forbidden_scopes', jsonb_build_array('write', 'admin'),
|
||||
'stage', 't7_mcp_gateway_read_sense'
|
||||
) AS body_json
|
||||
FROM agent_seed
|
||||
),
|
||||
inserted_revision AS (
|
||||
INSERT INTO awooop_contract_revisions (
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
version_major,
|
||||
version_minor,
|
||||
lifecycle_status,
|
||||
body_json,
|
||||
body_hash,
|
||||
body_schema_version,
|
||||
publisher_id,
|
||||
published_at
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
'agent',
|
||||
agent_id,
|
||||
1,
|
||||
0,
|
||||
'active',
|
||||
body_json,
|
||||
encode(digest(body_json::text, 'sha256'), 'hex'),
|
||||
'v1.0',
|
||||
'migration:t7_mcp_gateway_read_seed',
|
||||
NOW()
|
||||
FROM agent_body
|
||||
ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor)
|
||||
DO NOTHING
|
||||
RETURNING revision_id, project_id, contract_family, contract_id
|
||||
),
|
||||
chosen_revision AS (
|
||||
SELECT revision_id, project_id, contract_family, contract_id
|
||||
FROM inserted_revision
|
||||
UNION ALL
|
||||
SELECT revision_id, project_id, contract_family, contract_id
|
||||
FROM awooop_contract_revisions
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id IN (SELECT agent_id FROM agent_seed)
|
||||
AND version_major = 1
|
||||
AND version_minor = 0
|
||||
AND lifecycle_status = 'active'
|
||||
),
|
||||
upsert_pointer AS (
|
||||
INSERT INTO awooop_active_revisions (
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
active_revision_id,
|
||||
updated_at
|
||||
)
|
||||
SELECT DISTINCT ON (project_id, contract_family, contract_id)
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
revision_id,
|
||||
NOW()
|
||||
FROM chosen_revision
|
||||
ORDER BY project_id, contract_family, contract_id, revision_id
|
||||
ON CONFLICT (project_id, contract_family, contract_id)
|
||||
DO UPDATE SET
|
||||
active_revision_id = EXCLUDED.active_revision_id,
|
||||
updated_at = NOW()
|
||||
RETURNING contract_id
|
||||
)
|
||||
SELECT 'active_agent_contracts', count(*) FROM upsert_pointer;
|
||||
|
||||
WITH read_tools(tool_name, description) AS (
|
||||
VALUES
|
||||
('k8s_get_pod_logs', 'Kubernetes pod logs read'),
|
||||
('k8s_get_events', 'Kubernetes events read'),
|
||||
('k8s_describe_pod', 'Kubernetes pod describe read'),
|
||||
('k8s_get_hpa_status', 'Kubernetes HPA status read'),
|
||||
('k8s_get_node_conditions', 'Kubernetes node conditions read'),
|
||||
('ssh_diagnose', 'SSH host diagnosis read'),
|
||||
('ssh_get_top_processes', 'SSH top processes read'),
|
||||
('ssh_get_disk_usage', 'SSH disk usage read'),
|
||||
('ssh_get_memory_info', 'SSH memory info read'),
|
||||
('ssh_get_container_logs', 'SSH container logs read'),
|
||||
('ssh_get_container_status', 'SSH container status read'),
|
||||
('ssh_get_service_status', 'SSH service status read'),
|
||||
('ssh_check_port', 'SSH port check read'),
|
||||
('ssh_get_nginx_error_log', 'SSH nginx error log read'),
|
||||
('ssh_get_swap_info', 'SSH swap info read'),
|
||||
('prometheus_query', 'Prometheus instant query read'),
|
||||
('prometheus_query_range', 'Prometheus range query read'),
|
||||
('prometheus_get_alert_history', 'Prometheus alert history read'),
|
||||
('gold_metrics', 'SigNoz gold metrics read'),
|
||||
('trace_url', 'SigNoz trace URL read'),
|
||||
('system_metrics', 'SigNoz system metrics read'),
|
||||
('query_logs', 'SigNoz logs read'),
|
||||
('error_logs_summary', 'SigNoz error logs summary read'),
|
||||
('list_approvals', 'Approval records read'),
|
||||
('get_approval', 'Approval detail read'),
|
||||
('list_incidents', 'Incident records read'),
|
||||
('list_timeline', 'Timeline records read'),
|
||||
('read_file', 'Filesystem allowlisted file read'),
|
||||
('list_directory', 'Filesystem allowlisted directory read'),
|
||||
('search_in_file', 'Filesystem allowlisted file search'),
|
||||
('list_dashboards', 'Grafana dashboards read'),
|
||||
('get_dashboard', 'Grafana dashboard read'),
|
||||
('get_panel_data', 'Grafana panel data read'),
|
||||
('generate_dashboard_url', 'Grafana dashboard URL read'),
|
||||
('search_runbook', 'Runbook semantic search read'),
|
||||
('get_index_stats', 'Runbook index stats read'),
|
||||
('argocd_list_apps', 'ArgoCD apps read'),
|
||||
('argocd_get_app_status', 'ArgoCD app status read'),
|
||||
('argocd_get_sync_history', 'ArgoCD sync history read'),
|
||||
('sentry_list_issues', 'Sentry issues read'),
|
||||
('sentry_get_issue', 'Sentry issue detail read'),
|
||||
('sentry_search_issues', 'Sentry issue search read')
|
||||
),
|
||||
upsert_tools AS (
|
||||
INSERT INTO awooop_mcp_tool_registry (
|
||||
project_id,
|
||||
tool_name,
|
||||
tool_type,
|
||||
description,
|
||||
allowed_scopes,
|
||||
environment_tags,
|
||||
is_active,
|
||||
updated_at
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
tool_name,
|
||||
'mcp_server',
|
||||
description,
|
||||
'["read"]'::jsonb,
|
||||
'{"env": "prod"}'::jsonb,
|
||||
TRUE,
|
||||
NOW()
|
||||
FROM read_tools
|
||||
ON CONFLICT (project_id, tool_name)
|
||||
DO UPDATE SET
|
||||
description = EXCLUDED.description,
|
||||
allowed_scopes = EXCLUDED.allowed_scopes,
|
||||
environment_tags = EXCLUDED.environment_tags,
|
||||
is_active = TRUE,
|
||||
updated_at = NOW()
|
||||
RETURNING tool_id
|
||||
),
|
||||
grant_agents(agent_id) AS (
|
||||
VALUES
|
||||
('pre_decision_investigator'),
|
||||
('post_execution_verifier')
|
||||
),
|
||||
upsert_grants AS (
|
||||
INSERT INTO awooop_mcp_grants (
|
||||
project_id,
|
||||
agent_id,
|
||||
tool_id,
|
||||
granted_by,
|
||||
granted_scopes,
|
||||
expires_at,
|
||||
is_revoked,
|
||||
revoked_at,
|
||||
revoked_by
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
grant_agents.agent_id,
|
||||
upsert_tools.tool_id,
|
||||
'migration:t7_mcp_gateway_read_seed',
|
||||
'["read"]'::jsonb,
|
||||
NULL,
|
||||
FALSE,
|
||||
NULL,
|
||||
NULL
|
||||
FROM upsert_tools
|
||||
CROSS JOIN grant_agents
|
||||
ON CONFLICT (project_id, agent_id, tool_id)
|
||||
DO UPDATE SET
|
||||
granted_scopes = EXCLUDED.granted_scopes,
|
||||
expires_at = NULL,
|
||||
is_revoked = FALSE,
|
||||
revoked_at = NULL,
|
||||
revoked_by = NULL
|
||||
RETURNING grant_id
|
||||
)
|
||||
SELECT
|
||||
'awoooi_read_tools',
|
||||
(SELECT count(*) FROM upsert_tools) AS tool_rows,
|
||||
(SELECT count(*) FROM upsert_grants) AS grant_rows;
|
||||
@@ -0,0 +1,77 @@
|
||||
-- Rollback for T7 awoooi read-only MCP Gateway seed.
|
||||
-- Contract revisions are append-only; rollback revokes grants and deactivates the seeded read tools.
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
UPDATE awooop_mcp_grants
|
||||
SET
|
||||
is_revoked = TRUE,
|
||||
revoked_at = NOW(),
|
||||
revoked_by = 'rollback:t7_mcp_gateway_read_seed'
|
||||
WHERE project_id = 'awoooi'
|
||||
AND agent_id IN ('pre_decision_investigator', 'post_execution_verifier')
|
||||
AND granted_by = 'migration:t7_mcp_gateway_read_seed'
|
||||
AND is_revoked = FALSE;
|
||||
|
||||
UPDATE awooop_mcp_tool_registry
|
||||
SET
|
||||
is_active = FALSE,
|
||||
updated_at = NOW()
|
||||
WHERE project_id = 'awoooi'
|
||||
AND tool_name IN (
|
||||
'k8s_get_pod_logs',
|
||||
'k8s_get_events',
|
||||
'k8s_describe_pod',
|
||||
'k8s_get_hpa_status',
|
||||
'k8s_get_node_conditions',
|
||||
'ssh_diagnose',
|
||||
'ssh_get_top_processes',
|
||||
'ssh_get_disk_usage',
|
||||
'ssh_get_memory_info',
|
||||
'ssh_get_container_logs',
|
||||
'ssh_get_container_status',
|
||||
'ssh_get_service_status',
|
||||
'ssh_check_port',
|
||||
'ssh_get_nginx_error_log',
|
||||
'ssh_get_swap_info',
|
||||
'prometheus_query',
|
||||
'prometheus_query_range',
|
||||
'prometheus_get_alert_history',
|
||||
'gold_metrics',
|
||||
'trace_url',
|
||||
'system_metrics',
|
||||
'query_logs',
|
||||
'error_logs_summary',
|
||||
'list_approvals',
|
||||
'get_approval',
|
||||
'list_incidents',
|
||||
'list_timeline',
|
||||
'read_file',
|
||||
'list_directory',
|
||||
'search_in_file',
|
||||
'list_dashboards',
|
||||
'get_dashboard',
|
||||
'get_panel_data',
|
||||
'generate_dashboard_url',
|
||||
'search_runbook',
|
||||
'get_index_stats',
|
||||
'argocd_list_apps',
|
||||
'argocd_get_app_status',
|
||||
'argocd_get_sync_history',
|
||||
'sentry_list_issues',
|
||||
'sentry_get_issue',
|
||||
'sentry_search_issues'
|
||||
);
|
||||
|
||||
DELETE FROM awooop_active_revisions
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier');
|
||||
|
||||
UPDATE awooop_contract_revisions
|
||||
SET lifecycle_status = 'revoked'
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier')
|
||||
AND publisher_id = 'migration:t7_mcp_gateway_read_seed'
|
||||
AND lifecycle_status = 'active';
|
||||
@@ -0,0 +1,213 @@
|
||||
-- T7: awoooi read-only MCP Gateway seed
|
||||
-- 目的:讓決策前感官 MCP 能通過 AwoooP Gateway Gate 2/3,產生 first-class audit。
|
||||
-- 邊界:只授權 read scope;不授權 restart/delete/scale/apply/rollback 等 write/admin 工具。
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
WITH agent_seed(agent_id, display_name) AS (
|
||||
VALUES
|
||||
('pre_decision_investigator', 'Pre-decision Investigator'),
|
||||
('post_execution_verifier', 'Post-execution Verifier')
|
||||
),
|
||||
agent_body AS (
|
||||
SELECT
|
||||
agent_id,
|
||||
jsonb_build_object(
|
||||
'schema_version', 'awooop_agent_contract_v1',
|
||||
'agent_id', agent_id,
|
||||
'display_name', display_name,
|
||||
'project_id', 'awoooi',
|
||||
'purpose', 'Read-only MCP sensing through AwoooP Gateway',
|
||||
'allowed_scopes', jsonb_build_array('read'),
|
||||
'forbidden_scopes', jsonb_build_array('write', 'admin'),
|
||||
'stage', 't7_mcp_gateway_read_sense'
|
||||
) AS body_json
|
||||
FROM agent_seed
|
||||
),
|
||||
inserted_revision AS (
|
||||
INSERT INTO awooop_contract_revisions (
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
version_major,
|
||||
version_minor,
|
||||
lifecycle_status,
|
||||
body_json,
|
||||
body_hash,
|
||||
body_schema_version,
|
||||
publisher_id,
|
||||
published_at
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
'agent',
|
||||
agent_id,
|
||||
1,
|
||||
0,
|
||||
'active',
|
||||
body_json,
|
||||
encode(digest(body_json::text, 'sha256'), 'hex'),
|
||||
'v1.0',
|
||||
'migration:t7_mcp_gateway_read_seed',
|
||||
NOW()
|
||||
FROM agent_body
|
||||
ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor)
|
||||
DO NOTHING
|
||||
RETURNING revision_id, project_id, contract_family, contract_id
|
||||
),
|
||||
chosen_revision AS (
|
||||
SELECT revision_id, project_id, contract_family, contract_id
|
||||
FROM inserted_revision
|
||||
UNION ALL
|
||||
SELECT revision_id, project_id, contract_family, contract_id
|
||||
FROM awooop_contract_revisions
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id IN (SELECT agent_id FROM agent_seed)
|
||||
AND version_major = 1
|
||||
AND version_minor = 0
|
||||
AND lifecycle_status = 'active'
|
||||
),
|
||||
upsert_pointer AS (
|
||||
INSERT INTO awooop_active_revisions (
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
active_revision_id,
|
||||
updated_at
|
||||
)
|
||||
SELECT DISTINCT ON (project_id, contract_family, contract_id)
|
||||
project_id,
|
||||
contract_family,
|
||||
contract_id,
|
||||
revision_id,
|
||||
NOW()
|
||||
FROM chosen_revision
|
||||
ORDER BY project_id, contract_family, contract_id, revision_id
|
||||
ON CONFLICT (project_id, contract_family, contract_id)
|
||||
DO UPDATE SET
|
||||
active_revision_id = EXCLUDED.active_revision_id,
|
||||
updated_at = NOW()
|
||||
RETURNING contract_id
|
||||
)
|
||||
SELECT 'active_agent_contracts', count(*) FROM upsert_pointer;
|
||||
|
||||
WITH read_tools(tool_name, description) AS (
|
||||
VALUES
|
||||
('k8s_get_pod_logs', 'Kubernetes pod logs read'),
|
||||
('k8s_get_events', 'Kubernetes events read'),
|
||||
('k8s_describe_pod', 'Kubernetes pod describe read'),
|
||||
('k8s_get_hpa_status', 'Kubernetes HPA status read'),
|
||||
('k8s_get_node_conditions', 'Kubernetes node conditions read'),
|
||||
('ssh_diagnose', 'SSH host diagnosis read'),
|
||||
('ssh_get_top_processes', 'SSH top processes read'),
|
||||
('ssh_get_disk_usage', 'SSH disk usage read'),
|
||||
('ssh_get_memory_info', 'SSH memory info read'),
|
||||
('ssh_get_container_logs', 'SSH container logs read'),
|
||||
('ssh_get_container_status', 'SSH container status read'),
|
||||
('ssh_get_service_status', 'SSH service status read'),
|
||||
('ssh_check_port', 'SSH port check read'),
|
||||
('ssh_get_nginx_error_log', 'SSH nginx error log read'),
|
||||
('ssh_get_swap_info', 'SSH swap info read'),
|
||||
('prometheus_query', 'Prometheus instant query read'),
|
||||
('prometheus_query_range', 'Prometheus range query read'),
|
||||
('prometheus_get_alert_history', 'Prometheus alert history read'),
|
||||
('gold_metrics', 'SigNoz gold metrics read'),
|
||||
('trace_url', 'SigNoz trace URL read'),
|
||||
('system_metrics', 'SigNoz system metrics read'),
|
||||
('query_logs', 'SigNoz logs read'),
|
||||
('error_logs_summary', 'SigNoz error logs summary read'),
|
||||
('list_approvals', 'Approval records read'),
|
||||
('get_approval', 'Approval detail read'),
|
||||
('list_incidents', 'Incident records read'),
|
||||
('list_timeline', 'Timeline records read'),
|
||||
('read_file', 'Filesystem allowlisted file read'),
|
||||
('list_directory', 'Filesystem allowlisted directory read'),
|
||||
('search_in_file', 'Filesystem allowlisted file search'),
|
||||
('list_dashboards', 'Grafana dashboards read'),
|
||||
('get_dashboard', 'Grafana dashboard read'),
|
||||
('get_panel_data', 'Grafana panel data read'),
|
||||
('generate_dashboard_url', 'Grafana dashboard URL read'),
|
||||
('search_runbook', 'Runbook semantic search read'),
|
||||
('get_index_stats', 'Runbook index stats read'),
|
||||
('argocd_list_apps', 'ArgoCD apps read'),
|
||||
('argocd_get_app_status', 'ArgoCD app status read'),
|
||||
('argocd_get_sync_history', 'ArgoCD sync history read'),
|
||||
('sentry_list_issues', 'Sentry issues read'),
|
||||
('sentry_get_issue', 'Sentry issue detail read'),
|
||||
('sentry_search_issues', 'Sentry issue search read')
|
||||
),
|
||||
upsert_tools AS (
|
||||
INSERT INTO awooop_mcp_tool_registry (
|
||||
project_id,
|
||||
tool_name,
|
||||
tool_type,
|
||||
description,
|
||||
allowed_scopes,
|
||||
environment_tags,
|
||||
is_active,
|
||||
updated_at
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
tool_name,
|
||||
'mcp_server',
|
||||
description,
|
||||
'["read"]'::jsonb,
|
||||
'{"env": "prod"}'::jsonb,
|
||||
TRUE,
|
||||
NOW()
|
||||
FROM read_tools
|
||||
ON CONFLICT (project_id, tool_name)
|
||||
DO UPDATE SET
|
||||
description = EXCLUDED.description,
|
||||
allowed_scopes = EXCLUDED.allowed_scopes,
|
||||
environment_tags = EXCLUDED.environment_tags,
|
||||
is_active = TRUE,
|
||||
updated_at = NOW()
|
||||
RETURNING tool_id
|
||||
),
|
||||
grant_agents(agent_id) AS (
|
||||
VALUES
|
||||
('pre_decision_investigator'),
|
||||
('post_execution_verifier')
|
||||
),
|
||||
upsert_grants AS (
|
||||
INSERT INTO awooop_mcp_grants (
|
||||
project_id,
|
||||
agent_id,
|
||||
tool_id,
|
||||
granted_by,
|
||||
granted_scopes,
|
||||
expires_at,
|
||||
is_revoked,
|
||||
revoked_at,
|
||||
revoked_by
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
grant_agents.agent_id,
|
||||
upsert_tools.tool_id,
|
||||
'migration:t7_mcp_gateway_read_seed',
|
||||
'["read"]'::jsonb,
|
||||
NULL,
|
||||
FALSE,
|
||||
NULL,
|
||||
NULL
|
||||
FROM upsert_tools
|
||||
CROSS JOIN grant_agents
|
||||
ON CONFLICT (project_id, agent_id, tool_id)
|
||||
DO UPDATE SET
|
||||
granted_scopes = EXCLUDED.granted_scopes,
|
||||
expires_at = NULL,
|
||||
is_revoked = FALSE,
|
||||
revoked_at = NULL,
|
||||
revoked_by = NULL
|
||||
RETURNING grant_id
|
||||
)
|
||||
SELECT
|
||||
'awoooi_read_tools',
|
||||
(SELECT count(*) FROM upsert_tools) AS tool_rows,
|
||||
(SELECT count(*) FROM upsert_grants) AS grant_rows;
|
||||
|
||||
-- v4 exists only to retrigger run-migration after Gitea skipped the v2->v3 rename-only push.
|
||||
@@ -0,0 +1,79 @@
|
||||
-- Rollback for T7 awoooi read-only MCP Gateway seed.
|
||||
-- Contract revisions are append-only; rollback revokes grants and deactivates the seeded read tools.
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
UPDATE awooop_mcp_grants
|
||||
SET
|
||||
is_revoked = TRUE,
|
||||
revoked_at = NOW(),
|
||||
revoked_by = 'rollback:t7_mcp_gateway_read_seed'
|
||||
WHERE project_id = 'awoooi'
|
||||
AND agent_id IN ('pre_decision_investigator', 'post_execution_verifier')
|
||||
AND granted_by = 'migration:t7_mcp_gateway_read_seed'
|
||||
AND is_revoked = FALSE;
|
||||
|
||||
UPDATE awooop_mcp_tool_registry
|
||||
SET
|
||||
is_active = FALSE,
|
||||
updated_at = NOW()
|
||||
WHERE project_id = 'awoooi'
|
||||
AND tool_name IN (
|
||||
'k8s_get_pod_logs',
|
||||
'k8s_get_events',
|
||||
'k8s_describe_pod',
|
||||
'k8s_get_hpa_status',
|
||||
'k8s_get_node_conditions',
|
||||
'ssh_diagnose',
|
||||
'ssh_get_top_processes',
|
||||
'ssh_get_disk_usage',
|
||||
'ssh_get_memory_info',
|
||||
'ssh_get_container_logs',
|
||||
'ssh_get_container_status',
|
||||
'ssh_get_service_status',
|
||||
'ssh_check_port',
|
||||
'ssh_get_nginx_error_log',
|
||||
'ssh_get_swap_info',
|
||||
'prometheus_query',
|
||||
'prometheus_query_range',
|
||||
'prometheus_get_alert_history',
|
||||
'gold_metrics',
|
||||
'trace_url',
|
||||
'system_metrics',
|
||||
'query_logs',
|
||||
'error_logs_summary',
|
||||
'list_approvals',
|
||||
'get_approval',
|
||||
'list_incidents',
|
||||
'list_timeline',
|
||||
'read_file',
|
||||
'list_directory',
|
||||
'search_in_file',
|
||||
'list_dashboards',
|
||||
'get_dashboard',
|
||||
'get_panel_data',
|
||||
'generate_dashboard_url',
|
||||
'search_runbook',
|
||||
'get_index_stats',
|
||||
'argocd_list_apps',
|
||||
'argocd_get_app_status',
|
||||
'argocd_get_sync_history',
|
||||
'sentry_list_issues',
|
||||
'sentry_get_issue',
|
||||
'sentry_search_issues'
|
||||
);
|
||||
|
||||
DELETE FROM awooop_active_revisions
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier');
|
||||
|
||||
UPDATE awooop_contract_revisions
|
||||
SET lifecycle_status = 'revoked'
|
||||
WHERE project_id = 'awoooi'
|
||||
AND contract_family = 'agent'
|
||||
AND contract_id IN ('pre_decision_investigator', 'post_execution_verifier')
|
||||
AND publisher_id = 'migration:t7_mcp_gateway_read_seed'
|
||||
AND lifecycle_status = 'active';
|
||||
|
||||
-- v4 rollback companion for the retrigger migration.
|
||||
@@ -0,0 +1,77 @@
|
||||
-- T16 verifier gap: allow rollout status evidence through AwoooP MCP Gateway.
|
||||
-- Boundary: read-only scope only; no restart/delete/scale grant is added here.
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
WITH upsert_tool AS (
|
||||
INSERT INTO awooop_mcp_tool_registry (
|
||||
project_id,
|
||||
tool_name,
|
||||
tool_type,
|
||||
description,
|
||||
allowed_scopes,
|
||||
environment_tags,
|
||||
is_active,
|
||||
updated_at
|
||||
)
|
||||
VALUES (
|
||||
'awoooi',
|
||||
'k8s_watch_rollout',
|
||||
'mcp_server',
|
||||
'Kubernetes deployment rollout status read',
|
||||
'["read"]'::jsonb,
|
||||
'{"env": "prod"}'::jsonb,
|
||||
TRUE,
|
||||
NOW()
|
||||
)
|
||||
ON CONFLICT (project_id, tool_name)
|
||||
DO UPDATE SET
|
||||
description = EXCLUDED.description,
|
||||
allowed_scopes = EXCLUDED.allowed_scopes,
|
||||
environment_tags = EXCLUDED.environment_tags,
|
||||
is_active = TRUE,
|
||||
updated_at = NOW()
|
||||
RETURNING tool_id
|
||||
),
|
||||
grant_agents(agent_id) AS (
|
||||
VALUES
|
||||
('pre_decision_investigator'),
|
||||
('post_execution_verifier')
|
||||
),
|
||||
upsert_grants AS (
|
||||
INSERT INTO awooop_mcp_grants (
|
||||
project_id,
|
||||
agent_id,
|
||||
tool_id,
|
||||
granted_by,
|
||||
granted_scopes,
|
||||
expires_at,
|
||||
is_revoked,
|
||||
revoked_at,
|
||||
revoked_by
|
||||
)
|
||||
SELECT
|
||||
'awoooi',
|
||||
grant_agents.agent_id,
|
||||
upsert_tool.tool_id,
|
||||
'migration:t16_rollout_verifier_seed',
|
||||
'["read"]'::jsonb,
|
||||
NULL,
|
||||
FALSE,
|
||||
NULL,
|
||||
NULL
|
||||
FROM upsert_tool
|
||||
CROSS JOIN grant_agents
|
||||
ON CONFLICT (project_id, agent_id, tool_id)
|
||||
DO UPDATE SET
|
||||
granted_scopes = EXCLUDED.granted_scopes,
|
||||
expires_at = NULL,
|
||||
is_revoked = FALSE,
|
||||
revoked_at = NULL,
|
||||
revoked_by = NULL
|
||||
RETURNING grant_id
|
||||
)
|
||||
SELECT
|
||||
'k8s_watch_rollout_read_grants' AS seed,
|
||||
(SELECT count(*) FROM upsert_tool) AS tool_rows,
|
||||
(SELECT count(*) FROM upsert_grants) AS grant_rows;
|
||||
@@ -0,0 +1,24 @@
|
||||
-- Roll back T16 rollout verifier read grant seed.
|
||||
|
||||
SELECT set_config('app.project_id', 'awoooi', FALSE);
|
||||
|
||||
UPDATE awooop_mcp_grants
|
||||
SET
|
||||
is_revoked = TRUE,
|
||||
revoked_at = NOW(),
|
||||
revoked_by = 'migration:t16_rollout_verifier_seed_down'
|
||||
WHERE project_id = 'awoooi'
|
||||
AND agent_id IN ('pre_decision_investigator', 'post_execution_verifier')
|
||||
AND tool_id IN (
|
||||
SELECT tool_id
|
||||
FROM awooop_mcp_tool_registry
|
||||
WHERE project_id = 'awoooi'
|
||||
AND tool_name = 'k8s_watch_rollout'
|
||||
);
|
||||
|
||||
UPDATE awooop_mcp_tool_registry
|
||||
SET
|
||||
is_active = FALSE,
|
||||
updated_at = NOW()
|
||||
WHERE project_id = 'awoooi'
|
||||
AND tool_name = 'k8s_watch_rollout';
|
||||
@@ -0,0 +1,14 @@
|
||||
-- AwoooP Phase 5b:MCP Gateway blocked call 稽核覆蓋
|
||||
-- 日期:2026-05-06
|
||||
-- 維護者:Codex
|
||||
--
|
||||
-- Gate 1 / Gate 2 / 未知工具的 blocked call 可能發生在 tool registry row
|
||||
-- 取得之前。這些安全決策仍必須落稽核紀錄,因此 tool_id 允許為 NULL,
|
||||
-- 但 tool_name 仍維持必填,作為未知工具與早期 gate block 的追蹤線索。
|
||||
|
||||
BEGIN;
|
||||
|
||||
ALTER TABLE awooop_mcp_gateway_audit
|
||||
ALTER COLUMN tool_id DROP NOT NULL;
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,21 @@
|
||||
-- AwoooP Phase 7 T15b: inbound event truth-chain columns
|
||||
--
|
||||
-- Purpose:
|
||||
-- Telegram cards are only the notification surface. Operators need a
|
||||
-- redacted replay envelope for inbound alerts so Alertmanager, Sentry, and
|
||||
-- SignOz events can be correlated with incidents, approvals, logs, and
|
||||
-- automation decisions without storing raw secrets or PII.
|
||||
|
||||
ALTER TABLE awooop_conversation_event
|
||||
ADD COLUMN IF NOT EXISTS content_redacted TEXT,
|
||||
ADD COLUMN IF NOT EXISTS redaction_version VARCHAR(32) NOT NULL DEFAULT 'audit_sink_v1',
|
||||
ADD COLUMN IF NOT EXISTS source_envelope JSONB NOT NULL DEFAULT '{}'::jsonb;
|
||||
|
||||
COMMENT ON COLUMN awooop_conversation_event.content_redacted IS
|
||||
'Full inbound event content after audit_sink redaction; raw unredacted payload text is not stored.';
|
||||
|
||||
COMMENT ON COLUMN awooop_conversation_event.redaction_version IS
|
||||
'Redaction algorithm/version used for content_redacted and source_envelope.';
|
||||
|
||||
COMMENT ON COLUMN awooop_conversation_event.source_envelope IS
|
||||
'Redacted source metadata for inbound replay/audit, including payload hash, provider, source refs, and log correlation hints.';
|
||||
@@ -0,0 +1,6 @@
|
||||
-- Rollback for AwoooP Phase 7 T15b inbound truth-chain columns.
|
||||
-- Safe only if no consumers depend on the redacted replay fields.
|
||||
|
||||
ALTER TABLE awooop_conversation_event DROP COLUMN IF EXISTS source_envelope;
|
||||
ALTER TABLE awooop_conversation_event DROP COLUMN IF EXISTS redaction_version;
|
||||
ALTER TABLE awooop_conversation_event DROP COLUMN IF EXISTS content_redacted;
|
||||
@@ -0,0 +1,21 @@
|
||||
-- AwoooP Phase 7 T1: outbound message truth-chain columns
|
||||
--
|
||||
-- Purpose:
|
||||
-- Telegram must remain a summary channel, but the operator console needs a
|
||||
-- complete redacted replay of the rendered card and the source envelope that
|
||||
-- produced it. Store redacted content only; raw unredacted Telegram text stays
|
||||
-- out of PostgreSQL.
|
||||
|
||||
ALTER TABLE awooop_outbound_message
|
||||
ADD COLUMN IF NOT EXISTS content_redacted TEXT,
|
||||
ADD COLUMN IF NOT EXISTS redaction_version VARCHAR(32) NOT NULL DEFAULT 'audit_sink_v1',
|
||||
ADD COLUMN IF NOT EXISTS source_envelope JSONB NOT NULL DEFAULT '{}'::jsonb;
|
||||
|
||||
COMMENT ON COLUMN awooop_outbound_message.content_redacted IS
|
||||
'Full rendered outbound content after audit_sink redaction; raw unredacted text is not stored.';
|
||||
|
||||
COMMENT ON COLUMN awooop_outbound_message.redaction_version IS
|
||||
'Redaction algorithm/version used for content_redacted and source_envelope.';
|
||||
|
||||
COMMENT ON COLUMN awooop_outbound_message.source_envelope IS
|
||||
'Redacted source metadata for replay/audit, including payload hash and adapter context.';
|
||||
@@ -0,0 +1,6 @@
|
||||
-- Rollback for AwoooP Phase 7 T1 outbound truth-chain columns.
|
||||
-- Safe only if no consumers depend on the redacted replay fields.
|
||||
|
||||
ALTER TABLE awooop_outbound_message DROP COLUMN IF EXISTS source_envelope;
|
||||
ALTER TABLE awooop_outbound_message DROP COLUMN IF EXISTS redaction_version;
|
||||
ALTER TABLE awooop_outbound_message DROP COLUMN IF EXISTS content_redacted;
|
||||
@@ -6,10 +6,12 @@
|
||||
-- bge-m3 產生 1024 維向量,現有 schema vector(768) 不相容,INSERT 會直接失敗
|
||||
--
|
||||
-- 影響範圍:
|
||||
-- 1. rag_chunks.embedding vector(768) → vector(1024)
|
||||
-- 2. playbook_embeddings.embedding vector(768) → vector(1024)
|
||||
-- 1. knowledge_entries.embedding vector(768) → vector(1024)
|
||||
-- 2. rag_chunks.embedding vector(768) → vector(1024)
|
||||
-- 3. playbook_embeddings.embedding vector(768) → vector(1024)
|
||||
--
|
||||
-- 遷移策略:清空現有向量資料,切換維度後由 re-embed script 重新嵌入
|
||||
-- 遷移策略:僅在欄位不是 vector(1024) 時清空現有向量資料,切換維度後由 re-embed script 重新嵌入
|
||||
-- 已經是 vector(1024) 的環境重跑本 migration 時,必須保留既有向量資料。
|
||||
-- 現有向量資料若要保留,需先 dump 用 nomic 格式備份(舊維度無法轉換)
|
||||
--
|
||||
-- 執行前置條件:
|
||||
@@ -21,13 +23,69 @@
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- 1. rag_chunks:清空向量資料,變更欄位維度
|
||||
-- ivfflat index 必須先 DROP 才能 ALTER COLUMN
|
||||
DROP INDEX IF EXISTS idx_rag_chunks_embedding;
|
||||
-- 1. knowledge_entries:備份舊向量並清空,變更欄位維度
|
||||
DO $$
|
||||
DECLARE
|
||||
v_dim integer;
|
||||
BEGIN
|
||||
SELECT a.atttypmod INTO v_dim
|
||||
FROM pg_attribute a
|
||||
JOIN pg_class c ON a.attrelid = c.oid
|
||||
WHERE c.relname = 'knowledge_entries'
|
||||
AND a.attname = 'embedding';
|
||||
|
||||
ALTER TABLE rag_chunks
|
||||
ALTER COLUMN embedding TYPE vector(1024)
|
||||
USING NULL; -- 清空現有 768 維向量(維度不可轉換)
|
||||
IF v_dim IS DISTINCT FROM 1024 THEN
|
||||
EXECUTE $sql$
|
||||
CREATE TABLE IF NOT EXISTS knowledge_entries_embedding_backup_20260505 AS
|
||||
SELECT
|
||||
id,
|
||||
embedding::text AS embedding_768,
|
||||
NOW() AS backed_up_at
|
||||
FROM knowledge_entries
|
||||
WHERE embedding IS NOT NULL
|
||||
$sql$;
|
||||
|
||||
EXECUTE $sql$
|
||||
ALTER TABLE knowledge_entries
|
||||
ALTER COLUMN embedding TYPE vector(1024)
|
||||
USING NULL
|
||||
$sql$;
|
||||
|
||||
RAISE NOTICE 'knowledge_entries.embedding migrated from vector(%) to vector(1024); old embeddings were backed up and cleared', v_dim;
|
||||
ELSE
|
||||
RAISE NOTICE 'knowledge_entries.embedding already vector(1024); existing embeddings preserved';
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
COMMENT ON COLUMN knowledge_entries.embedding IS
|
||||
'bge-m3:latest 1024 維向量 — 遷移自 nomic-embed-text 768 維 (2026-05-05 ADR-110 follow-up)';
|
||||
|
||||
|
||||
-- 2. rag_chunks:清空向量資料,變更欄位維度
|
||||
-- ivfflat index 必須先 DROP 才能 ALTER COLUMN
|
||||
DO $$
|
||||
DECLARE
|
||||
v_dim integer;
|
||||
BEGIN
|
||||
SELECT a.atttypmod INTO v_dim
|
||||
FROM pg_attribute a
|
||||
JOIN pg_class c ON a.attrelid = c.oid
|
||||
WHERE c.relname = 'rag_chunks'
|
||||
AND a.attname = 'embedding';
|
||||
|
||||
IF v_dim IS DISTINCT FROM 1024 THEN
|
||||
EXECUTE 'DROP INDEX IF EXISTS idx_rag_chunks_embedding';
|
||||
EXECUTE $sql$
|
||||
ALTER TABLE rag_chunks
|
||||
ALTER COLUMN embedding TYPE vector(1024)
|
||||
USING NULL
|
||||
$sql$;
|
||||
|
||||
RAISE NOTICE 'rag_chunks.embedding migrated from vector(%) to vector(1024); old embeddings were cleared', v_dim;
|
||||
ELSE
|
||||
RAISE NOTICE 'rag_chunks.embedding already vector(1024); existing embeddings preserved';
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- 重建 ivfflat index(lists=100 適合 ~10k 筆以下資料)
|
||||
CREATE INDEX IF NOT EXISTS idx_rag_chunks_embedding
|
||||
@@ -39,12 +97,30 @@ COMMENT ON COLUMN rag_chunks.embedding IS
|
||||
'bge-m3:latest 1024 維向量 — 遷移自 nomic-embed-text 768 維 (2026-05-04 ADR-110)';
|
||||
|
||||
|
||||
-- 2. playbook_embeddings:清空向量資料,變更欄位維度
|
||||
DROP INDEX IF EXISTS ix_playbook_embeddings_vec;
|
||||
-- 3. playbook_embeddings:清空向量資料,變更欄位維度
|
||||
DO $$
|
||||
DECLARE
|
||||
v_dim integer;
|
||||
BEGIN
|
||||
SELECT a.atttypmod INTO v_dim
|
||||
FROM pg_attribute a
|
||||
JOIN pg_class c ON a.attrelid = c.oid
|
||||
WHERE c.relname = 'playbook_embeddings'
|
||||
AND a.attname = 'embedding';
|
||||
|
||||
ALTER TABLE playbook_embeddings
|
||||
ALTER COLUMN embedding TYPE vector(1024)
|
||||
USING NULL; -- 清空現有 768 維向量
|
||||
IF v_dim IS DISTINCT FROM 1024 THEN
|
||||
EXECUTE 'DROP INDEX IF EXISTS ix_playbook_embeddings_vec';
|
||||
EXECUTE $sql$
|
||||
ALTER TABLE playbook_embeddings
|
||||
ALTER COLUMN embedding TYPE vector(1024)
|
||||
USING NULL
|
||||
$sql$;
|
||||
|
||||
RAISE NOTICE 'playbook_embeddings.embedding migrated from vector(%) to vector(1024); old embeddings were cleared', v_dim;
|
||||
ELSE
|
||||
RAISE NOTICE 'playbook_embeddings.embedding already vector(1024); existing embeddings preserved';
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS ix_playbook_embeddings_vec
|
||||
ON playbook_embeddings
|
||||
@@ -61,9 +137,15 @@ COMMENT ON TABLE playbook_embeddings IS
|
||||
-- 3. 驗證遷移結果
|
||||
DO $$
|
||||
DECLARE
|
||||
v_km_dim integer;
|
||||
v_rag_dim integer;
|
||||
v_pb_dim integer;
|
||||
BEGIN
|
||||
SELECT atttypmod INTO v_km_dim
|
||||
FROM pg_attribute
|
||||
JOIN pg_class ON attrelid = pg_class.oid
|
||||
WHERE relname = 'knowledge_entries' AND attname = 'embedding';
|
||||
|
||||
SELECT atttypmod INTO v_rag_dim
|
||||
FROM pg_attribute
|
||||
JOIN pg_class ON attrelid = pg_class.oid
|
||||
@@ -74,15 +156,18 @@ BEGIN
|
||||
JOIN pg_class ON attrelid = pg_class.oid
|
||||
WHERE relname = 'playbook_embeddings' AND attname = 'embedding';
|
||||
|
||||
-- atttypmod for vector(1024) = 1024 + 1 = 1025
|
||||
IF v_rag_dim != 1025 THEN
|
||||
RAISE EXCEPTION 'rag_chunks.embedding 維度驗證失敗:expected 1025, got %', v_rag_dim;
|
||||
-- pgvector atttypmod stores the configured dimension.
|
||||
IF v_km_dim != 1024 THEN
|
||||
RAISE EXCEPTION 'knowledge_entries.embedding 維度驗證失敗:expected 1024, got %', v_km_dim;
|
||||
END IF;
|
||||
IF v_pb_dim != 1025 THEN
|
||||
RAISE EXCEPTION 'playbook_embeddings.embedding 維度驗證失敗:expected 1025, got %', v_pb_dim;
|
||||
IF v_rag_dim != 1024 THEN
|
||||
RAISE EXCEPTION 'rag_chunks.embedding 維度驗證失敗:expected 1024, got %', v_rag_dim;
|
||||
END IF;
|
||||
IF v_pb_dim != 1024 THEN
|
||||
RAISE EXCEPTION 'playbook_embeddings.embedding 維度驗證失敗:expected 1024, got %', v_pb_dim;
|
||||
END IF;
|
||||
|
||||
RAISE NOTICE '✅ embedding 遷移驗證通過:rag_chunks 和 playbook_embeddings 均為 vector(1024)';
|
||||
RAISE NOTICE '✅ embedding 遷移驗證通過:knowledge_entries、rag_chunks、playbook_embeddings 均為 vector(1024)';
|
||||
END $$;
|
||||
|
||||
COMMIT;
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
"log_anomaly": "deepseek-r1:14b",
|
||||
"nemoclaw": "deepseek-r1:14b",
|
||||
"playbook_draft": "qwen3:14b",
|
||||
"code_review": "qwen2.5-coder:32b",
|
||||
"code_review": "qwen2.5-coder:7b",
|
||||
"embedding": "bge-m3:latest",
|
||||
"rag_generate": "qwen3:14b",
|
||||
"image_analysis": "minicpm-v:latest",
|
||||
@@ -175,7 +175,7 @@
|
||||
},
|
||||
"pr_code_review": {
|
||||
"phase": 32,
|
||||
"model": "qwen2.5-coder:32b",
|
||||
"model": "qwen2.5-coder:7b",
|
||||
"timeout_seconds": 120,
|
||||
"purpose": "Gitea PR 自動審查"
|
||||
},
|
||||
|
||||
@@ -9,7 +9,7 @@ AwoooP Phase 1 Batch 1 回填腳本
|
||||
awooop_phase1_batch1_rls_2026-05-04.sql Step A(ADD COLUMN nullable)已執行
|
||||
|
||||
執行方式:
|
||||
export DATABASE_URL="postgresql+asyncpg://awoooi:<password>@192.168.0.188:5432/awoooi_prod"
|
||||
從 secret manager / operator vault 設定 DATABASE_URL,禁止在指令或檔案中寫入 URL。
|
||||
cd apps/api && python scripts/awooop_phase1_batch1_backfill.py
|
||||
|
||||
2026-05-04 ogt + Claude Sonnet 4.6(ADR-118 Batch 1 C-3 修正)
|
||||
|
||||
@@ -37,6 +37,7 @@ logging = structlog.get_logger(__name__)
|
||||
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://34.143.170.20:11434")
|
||||
EMBEDDING_MODEL = "bge-m3:latest"
|
||||
EXPECTED_DIM = 1024
|
||||
PROJECT_ID = os.getenv("AWOOOP_PROJECT_ID", "awoooi")
|
||||
|
||||
|
||||
async def embed_text(client: httpx.AsyncClient, text: str) -> list[float]:
|
||||
@@ -162,6 +163,7 @@ async def main(dry_run: bool, batch_size: int) -> None:
|
||||
|
||||
conn = await asyncpg.connect(database_url)
|
||||
try:
|
||||
await conn.execute("SELECT set_config('app.project_id', $1, FALSE)", PROJECT_ID)
|
||||
# 統計待嵌入筆數
|
||||
rag_null = await conn.fetchval("SELECT COUNT(*) FROM rag_chunks WHERE embedding IS NULL")
|
||||
pb_null = await conn.fetchval("SELECT COUNT(*) FROM playbook_embeddings WHERE embedding IS NULL")
|
||||
|
||||
@@ -15,7 +15,7 @@ from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
|
||||
# 2026-04-22 ogt: 移除硬碼 changeme,改為讀取環境變數(強制要求設定)。
|
||||
# 執行前: export DATABASE_URL="postgresql+asyncpg://awoooi:<password>@192.168.0.188:5432/awoooi_prod"
|
||||
# 執行前: 從 secret manager / operator vault 設定 DATABASE_URL,禁止在指令或檔案中寫入 URL。
|
||||
DATABASE_URL = os.environ["DATABASE_URL"]
|
||||
|
||||
MIGRATION_SQLS = [
|
||||
|
||||
@@ -28,7 +28,7 @@ except ImportError:
|
||||
# ============================================================================
|
||||
|
||||
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")
|
||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://192.168.0.188:11434")
|
||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://192.168.0.110:11435")
|
||||
|
||||
if not NVIDIA_API_KEY:
|
||||
print("❌ 請設定 NVIDIA_API_KEY 環境變數")
|
||||
|
||||
@@ -18,8 +18,15 @@ Endpoints:
|
||||
from __future__ import annotations
|
||||
|
||||
import structlog
|
||||
from fastapi import APIRouter, Query
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from src.services.adr100_remediation_service import (
|
||||
RemediationMode,
|
||||
RemediationNotFoundError,
|
||||
get_adr100_remediation_service,
|
||||
)
|
||||
from src.services.adr100_slo_status_service import get_adr100_slo_status_service
|
||||
from src.services.ai_slo_calculator import AiSloCalculator
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
@@ -27,6 +34,20 @@ logger = structlog.get_logger(__name__)
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class RemediationPreviewRequest(BaseModel):
|
||||
"""ADR-100 remediation preview request."""
|
||||
|
||||
work_item_id: str = Field(min_length=1)
|
||||
mode: RemediationMode = "auto"
|
||||
|
||||
|
||||
class RemediationDryRunRequest(BaseModel):
|
||||
"""ADR-100 remediation dry-run request."""
|
||||
|
||||
work_item_id: str = Field(min_length=1)
|
||||
mode: RemediationMode = "auto"
|
||||
|
||||
|
||||
@router.get("/ai/slo")
|
||||
async def get_ai_slo(
|
||||
force_refresh: bool = Query(False, description="忽略快取,強制重算"),
|
||||
@@ -50,9 +71,65 @@ async def get_ai_slo(
|
||||
if cached:
|
||||
data = cached.to_dict()
|
||||
data["cache_hit"] = True
|
||||
data["adr100"] = await get_adr100_slo_status_service().fetch_report()
|
||||
return data
|
||||
|
||||
report = await calc.run()
|
||||
data = report.to_dict()
|
||||
data["cache_hit"] = False
|
||||
data["adr100"] = await get_adr100_slo_status_service().fetch_report()
|
||||
return data
|
||||
|
||||
|
||||
@router.get("/ai/slo/remediation/preview")
|
||||
async def preview_ai_slo_remediation(
|
||||
work_item_id: str = Query(..., min_length=1),
|
||||
mode: RemediationMode = Query("auto"),
|
||||
) -> dict:
|
||||
"""Preview the safe remediation plan for one ADR-100 queue item."""
|
||||
|
||||
try:
|
||||
return await get_adr100_remediation_service().preview(work_item_id, mode)
|
||||
except RemediationNotFoundError as exc:
|
||||
raise HTTPException(status_code=404, detail="remediation_work_item_not_found") from exc
|
||||
|
||||
|
||||
@router.post("/ai/slo/remediation/preview")
|
||||
async def preview_ai_slo_remediation_post(request: RemediationPreviewRequest) -> dict:
|
||||
"""POST variant for clients that prefer JSON bodies."""
|
||||
|
||||
try:
|
||||
return await get_adr100_remediation_service().preview(
|
||||
request.work_item_id,
|
||||
request.mode,
|
||||
)
|
||||
except RemediationNotFoundError as exc:
|
||||
raise HTTPException(status_code=404, detail="remediation_work_item_not_found") from exc
|
||||
|
||||
|
||||
@router.post("/ai/slo/remediation/dry-run")
|
||||
async def dry_run_ai_slo_remediation(request: RemediationDryRunRequest) -> dict:
|
||||
"""Run a read-only ADR-100 remediation dry-run."""
|
||||
|
||||
try:
|
||||
return await get_adr100_remediation_service().dry_run(
|
||||
request.work_item_id,
|
||||
request.mode,
|
||||
)
|
||||
except RemediationNotFoundError as exc:
|
||||
raise HTTPException(status_code=404, detail="remediation_work_item_not_found") from exc
|
||||
|
||||
|
||||
@router.get("/ai/slo/remediation/history")
|
||||
async def list_ai_slo_remediation_history(
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
incident_id: str | None = Query(default=None, min_length=1),
|
||||
work_item_id: str | None = Query(default=None, min_length=1),
|
||||
) -> dict:
|
||||
"""List durable ADR-100 remediation dry-run history from alert_operation_log."""
|
||||
|
||||
return await get_adr100_remediation_service().history(
|
||||
limit=limit,
|
||||
incident_id=incident_id,
|
||||
work_item_id=work_item_id,
|
||||
)
|
||||
|
||||
@@ -11,7 +11,7 @@ Endpoints:
|
||||
Components Checked:
|
||||
- PostgreSQL (192.168.0.188:5432)
|
||||
- Redis (192.168.0.188:6380)
|
||||
- Ollama (192.168.0.188:11434)
|
||||
- Ollama (settings.OLLAMA_URL / ADR-110 provider pool)
|
||||
- OpenClaw (192.168.0.188:8089)
|
||||
- SigNoz (192.168.0.188:3301)
|
||||
"""
|
||||
|
||||
@@ -17,9 +17,10 @@ Phase 6.4 核心功能:
|
||||
- Proposal 必須關聯到 Incident
|
||||
"""
|
||||
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, HTTPException, status
|
||||
from fastapi import APIRouter, HTTPException, Query, status
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from src.core.logging import get_logger
|
||||
@@ -133,6 +134,7 @@ class IncidentTimelineResponse(BaseModel):
|
||||
timeline: list[IncidentTimelineStage] = Field(default_factory=list)
|
||||
events: list[IncidentTimelineEvent] = Field(default_factory=list)
|
||||
ascii_timeline: str
|
||||
reconciliation: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
@@ -148,18 +150,26 @@ class IncidentTimelineResponse(BaseModel):
|
||||
|
||||
Phase 6.5 升級:
|
||||
- 每個事件自動附帶 decision_token
|
||||
- 確保 UI 永遠有決策可操作
|
||||
- 雙軌引擎: LLM (主) + Expert System (備)
|
||||
- 預設只讀取已存在的 decision_token
|
||||
- 需要新決策時改由明確的 proposal / operator run 入口觸發
|
||||
""",
|
||||
)
|
||||
async def list_incidents() -> IncidentListResponse:
|
||||
async def list_incidents(
|
||||
generate_missing_decisions: bool = Query(
|
||||
False,
|
||||
description=(
|
||||
"預設 false,列表查詢只讀既有 decision token;"
|
||||
"true 僅供明確維運操作使用,會背景產生缺少的決策。"
|
||||
),
|
||||
),
|
||||
) -> IncidentListResponse:
|
||||
"""
|
||||
取得活躍事件清單
|
||||
|
||||
Phase 6.5: 自動為每個事件生成決策令牌
|
||||
- P0/P1 事件優先處理
|
||||
- 30 秒內保證有決策
|
||||
- LLM 失敗時 Expert System 保底
|
||||
Phase 6.5: 附帶既有決策令牌
|
||||
- 列表查詢必須是低成本純讀路徑
|
||||
- 不可因為前端輪詢就背景觸發 LLM / Ollama / OpenClaw
|
||||
- 需要新決策時,呼叫 POST /api/v1/incidents/{incident_id}/proposal
|
||||
|
||||
Returns:
|
||||
IncidentListResponse: 事件清單與計數 (含決策令牌)
|
||||
@@ -174,8 +184,6 @@ async def list_incidents() -> IncidentListResponse:
|
||||
|
||||
# 按時間排序 (最新優先)
|
||||
# 2026-03-26 修復: 處理 timezone-aware 與 naive datetime 混合問題
|
||||
from datetime import UTC
|
||||
|
||||
def safe_created_at(i: Incident) -> float:
|
||||
"""安全取得 timestamp,處理 timezone 混合問題"""
|
||||
dt = i.created_at
|
||||
@@ -189,15 +197,24 @@ async def list_incidents() -> IncidentListResponse:
|
||||
# 2026-04-09 Claude Sonnet 4.6: 效能修復 — list endpoint 不同步等待 AI
|
||||
# 原設計: 每個 incident await AI 決策 (120-180s timeout),多 incident 時乘積爆炸
|
||||
# 修復: 只取已存在的決策 token,若無則背景觸發生成,前端 poll 單筆 GET 取得結果
|
||||
import asyncio
|
||||
#
|
||||
# 2026-05-06 Codex: 成本與推理槽修復 — 預設不再背景觸發 AI。
|
||||
# 根因: 多個前端頁面會輪詢 GET /incidents;若列表查詢偷偷 create_task,
|
||||
# 每次頁面載入都可能消耗 GCP Ollama / OpenClaw 推理槽,甚至 fallback 到 Gemini。
|
||||
# 新規則: GET list 是純讀;生成新修復建議必須走明確 proposal/operator-run 入口。
|
||||
if generate_missing_decisions:
|
||||
import asyncio
|
||||
|
||||
responses = []
|
||||
background_tasks = []
|
||||
existing_tokens = await decision_manager._find_existing_tokens_for_incidents(
|
||||
[incident.incident_id for incident in incidents]
|
||||
)
|
||||
|
||||
for incident in incidents:
|
||||
try:
|
||||
# 只查已快取的決策 (不等待 AI,立即返回)
|
||||
existing = await decision_manager._find_existing_token(incident.incident_id)
|
||||
existing = existing_tokens.get(incident.incident_id)
|
||||
if existing:
|
||||
decision_info = DecisionInfo(
|
||||
token=existing.token,
|
||||
@@ -207,17 +224,20 @@ async def list_incidents() -> IncidentListResponse:
|
||||
)
|
||||
responses.append(IncidentResponse.from_incident(incident, decision_info))
|
||||
else:
|
||||
# 無快取 → 背景觸發,本次返回 None(前端看到 decision=null 會 poll)
|
||||
# 無快取 → 本次返回 None。列表查詢預設不觸發 AI;
|
||||
# 前端若需要修復建議,必須呼叫明確的 proposal 入口。
|
||||
responses.append(IncidentResponse.from_incident(incident, None))
|
||||
if not generate_missing_decisions:
|
||||
continue
|
||||
|
||||
# 2026-04-16 Claude Sonnet 4.6: 只對 48h 內的 incident 觸發 AI 分析
|
||||
# 舊 incident token 每小時過期,若不限制會反覆重新分析歷史事件 → Telegram 洪水
|
||||
from datetime import datetime, timezone, timedelta
|
||||
_created = getattr(incident, "created_at", None)
|
||||
_too_old = False
|
||||
if _created:
|
||||
if _created.tzinfo is None:
|
||||
_created = _created.replace(tzinfo=timezone.utc)
|
||||
_too_old = (_created < datetime.now(timezone.utc) - timedelta(hours=48))
|
||||
_created = _created.replace(tzinfo=UTC)
|
||||
_too_old = (_created < datetime.now(UTC) - timedelta(hours=48))
|
||||
if not _too_old:
|
||||
timeout = 120.0 if incident.severity in (Severity.P0, Severity.P1) else 180.0
|
||||
background_tasks.append(
|
||||
@@ -240,6 +260,7 @@ async def list_incidents() -> IncidentListResponse:
|
||||
"incidents_listed",
|
||||
count=len(incidents),
|
||||
with_decisions=sum(1 for r in responses if r.decision is not None),
|
||||
generate_missing_decisions=generate_missing_decisions,
|
||||
)
|
||||
|
||||
return IncidentListResponse(
|
||||
|
||||
@@ -9,14 +9,21 @@ ADR-106/ADR-107/ADR-114/ADR-115/ADR-116
|
||||
from fastapi import APIRouter
|
||||
|
||||
from src.api.v1.platform.contracts import router as contracts_router
|
||||
from src.api.v1.platform.events import router as events_router
|
||||
from src.api.v1.platform.operator_runs import router as operator_runs_router
|
||||
from src.api.v1.platform.runs import router as runs_router
|
||||
from src.api.v1.platform.tenants import router as tenants_router
|
||||
from src.api.v1.platform.truth_chain import router as truth_chain_router
|
||||
|
||||
router = APIRouter()
|
||||
router.include_router(events_router)
|
||||
router.include_router(truth_chain_router)
|
||||
# 2026-05-06 Codex: FastAPI 依註冊順序比對路由。Operator Console 的
|
||||
# `/runs/list` 必須排在 `/runs/{run_id}` 前面,否則 `list` 會被當成
|
||||
# run_id,造成前端 Run 監控頁 HTTP 422。
|
||||
router.include_router(operator_runs_router)
|
||||
router.include_router(runs_router)
|
||||
router.include_router(tenants_router)
|
||||
router.include_router(contracts_router)
|
||||
router.include_router(operator_runs_router)
|
||||
|
||||
__all__ = ["router"]
|
||||
|
||||
338
apps/api/src/api/v1/platform/events.py
Normal file
338
apps/api/src/api/v1/platform/events.py
Normal file
@@ -0,0 +1,338 @@
|
||||
"""
|
||||
AwoooP Operator Console — Channel Events API
|
||||
============================================
|
||||
提供 Operator Console 讀取 Communication Hub / legacy mirror 的事件摘要。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from src.services.channel_event_dossier_service import (
|
||||
RecurrenceWorkItemMode,
|
||||
RecurrenceWorkItemNotFoundError,
|
||||
fetch_channel_event_dossier,
|
||||
fetch_channel_event_dossier_coverage,
|
||||
fetch_channel_event_dossier_recurrence,
|
||||
fetch_recurrence_work_item_dry_run,
|
||||
fetch_recurrence_work_item_preview,
|
||||
)
|
||||
from src.services.platform_operator_service import list_recent_channel_events
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class ChannelEventItem(BaseModel):
|
||||
event_id: UUID
|
||||
project_id: str
|
||||
channel_type: str
|
||||
provider_event_id: str
|
||||
channel_chat_id: str | None
|
||||
content_preview: str | None
|
||||
is_duplicate: bool
|
||||
received_at: datetime
|
||||
|
||||
|
||||
class RecentEventsResponse(BaseModel):
|
||||
events: list[ChannelEventItem]
|
||||
total: int
|
||||
limit: int
|
||||
|
||||
|
||||
class ChannelEventDossierItem(BaseModel):
|
||||
event_id: UUID
|
||||
project_id: str
|
||||
channel_type: str
|
||||
provider: str | None
|
||||
stage: str
|
||||
provider_event_id: str
|
||||
content_preview: str | None
|
||||
content_redacted: str | None
|
||||
has_redacted_content: bool
|
||||
redaction_version: str | None
|
||||
source_url: str | None
|
||||
content_sha256: str | None
|
||||
content_length: int | None
|
||||
source_refs: dict[str, Any]
|
||||
source_ref_count: int
|
||||
log_correlation: dict[str, Any]
|
||||
alertname: str | None
|
||||
severity: str | None
|
||||
namespace: str | None
|
||||
target_resource: str | None
|
||||
fingerprint: str | None
|
||||
is_duplicate: bool
|
||||
provider_ts: datetime | None
|
||||
received_at: datetime
|
||||
|
||||
|
||||
class ChannelEventDossierSummary(BaseModel):
|
||||
source_count: int
|
||||
duplicate_total: int
|
||||
redacted_total: int
|
||||
source_ref_total: int
|
||||
|
||||
|
||||
class ChannelEventDossierResponse(BaseModel):
|
||||
events: list[ChannelEventDossierItem]
|
||||
total: int
|
||||
limit: int
|
||||
summary: ChannelEventDossierSummary
|
||||
|
||||
|
||||
class ChannelEventProviderCoverage(BaseModel):
|
||||
provider: str
|
||||
total: int
|
||||
duplicate_total: int
|
||||
redacted_total: int
|
||||
source_ref_total: int
|
||||
missing_source_refs_total: int
|
||||
sentry_ref_total: int
|
||||
signoz_ref_total: int
|
||||
alert_ref_total: int
|
||||
latest_received_at: datetime | None
|
||||
|
||||
|
||||
class ChannelEventDossierCoverageSummary(BaseModel):
|
||||
source_count: int
|
||||
source_envelope_total: int
|
||||
missing_source_envelope_total: int
|
||||
with_source_refs_total: int
|
||||
missing_source_refs_total: int
|
||||
duplicate_total: int
|
||||
redacted_total: int
|
||||
source_ref_total: int
|
||||
sentry_ref_total: int
|
||||
signoz_ref_total: int
|
||||
alert_ref_total: int
|
||||
latest_received_at: datetime | None
|
||||
|
||||
|
||||
class ChannelEventDossierCoverageResponse(BaseModel):
|
||||
project_id: str
|
||||
limit: int
|
||||
summary: ChannelEventDossierCoverageSummary
|
||||
providers: list[ChannelEventProviderCoverage]
|
||||
|
||||
|
||||
class ChannelEventRecurrenceSummary(BaseModel):
|
||||
source_event_total: int
|
||||
recurrence_group_total: int
|
||||
recurrent_group_total: int
|
||||
duplicate_event_total: int
|
||||
linked_run_total: int
|
||||
unlinked_event_total: int
|
||||
auto_repair_linked_total: int = 0
|
||||
verified_repair_group_total: int = 0
|
||||
open_work_item_group_total: int = 0
|
||||
manual_gate_group_total: int = 0
|
||||
automation_gap_group_total: int = 0
|
||||
failed_repair_group_total: int = 0
|
||||
latest_received_at: datetime | None
|
||||
|
||||
|
||||
class ChannelEventRecurrenceItem(BaseModel):
|
||||
recurrence_key: str
|
||||
provider: str | None
|
||||
alertname: str | None
|
||||
severity: str | None
|
||||
namespace: str | None
|
||||
target_resource: str | None
|
||||
fingerprint: str | None
|
||||
latest_event_id: UUID | None
|
||||
latest_provider_event_id: str | None
|
||||
latest_content_preview: str | None
|
||||
latest_run_id: UUID | None
|
||||
latest_run_state: str | None
|
||||
latest_agent_id: str | None
|
||||
latest_incident_id: str | None = None
|
||||
incident_ids: list[str] = Field(default_factory=list)
|
||||
repair_summary: dict[str, Any] | None = None
|
||||
work_item: dict[str, Any] | None = None
|
||||
occurrence_total: int
|
||||
duplicate_total: int
|
||||
linked_run_total: int
|
||||
source_ref_total: int
|
||||
missing_source_refs_total: int
|
||||
sentry_ref_total: int
|
||||
signoz_ref_total: int
|
||||
alert_ref_total: int
|
||||
run_state_counts: dict[str, int]
|
||||
first_received_at: datetime | None
|
||||
latest_received_at: datetime | None
|
||||
|
||||
|
||||
class ChannelEventRecurrenceResponse(BaseModel):
|
||||
project_id: str
|
||||
limit: int
|
||||
summary: ChannelEventRecurrenceSummary
|
||||
items: list[ChannelEventRecurrenceItem]
|
||||
|
||||
|
||||
class RecurrenceWorkItemDryRunRequest(BaseModel):
|
||||
"""AwoooP recurrence work item dry-run request."""
|
||||
|
||||
project_id: str | None = Field(default=None, min_length=1)
|
||||
work_item_id: str = Field(min_length=1)
|
||||
mode: RecurrenceWorkItemMode = "auto"
|
||||
provider: str | None = Field(default=None, min_length=1)
|
||||
limit: int = Field(default=300, ge=1, le=300)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/events/dossier",
|
||||
response_model=ChannelEventDossierResponse,
|
||||
summary="查詢 Channel Event 來源卷宗",
|
||||
description=(
|
||||
"返回 redacted inbound source envelope,供 AwoooP Run Detail 顯示"
|
||||
"告警來源、source refs、Sentry / SignOz / Alertmanager 關聯與去重狀態。"
|
||||
),
|
||||
)
|
||||
async def get_event_dossier(
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
run_id: UUID | None = Query(None, description="Run ID(可選)"),
|
||||
provider_event_id: str | None = Query(
|
||||
None, description="provider_event_id(可選)"
|
||||
),
|
||||
limit: int = Query(20, ge=1, le=50, description="最多返回筆數"),
|
||||
) -> dict[str, Any]:
|
||||
return await fetch_channel_event_dossier(
|
||||
project_id=project_id,
|
||||
run_id=run_id,
|
||||
provider_event_id=provider_event_id,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/events/dossier/coverage",
|
||||
response_model=ChannelEventDossierCoverageResponse,
|
||||
summary="查詢 Channel Event 來源卷宗覆蓋率",
|
||||
description=(
|
||||
"返回近期 inbound event 的 source_envelope / source_refs / 去重 / "
|
||||
"Sentry / SignOz 關聯覆蓋率,供 AwoooP Run List 顯示告警是否已入庫。"
|
||||
),
|
||||
)
|
||||
async def get_event_dossier_coverage(
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
provider: str | None = Query(
|
||||
None, description="provider(可選,如 sentry / signoz)"
|
||||
),
|
||||
limit: int = Query(100, ge=1, le=200, description="最多納入統計筆數"),
|
||||
) -> dict[str, Any]:
|
||||
return await fetch_channel_event_dossier_coverage(
|
||||
project_id=project_id,
|
||||
provider=provider,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/events/dossier/recurrence",
|
||||
response_model=ChannelEventRecurrenceResponse,
|
||||
summary="查詢 Channel Event 重複發生與關聯 Run 狀態",
|
||||
description=(
|
||||
"將近期 inbound source events 依 fingerprint / alertname / namespace / target 分組,"
|
||||
"顯示重複發生次數、去重數、source refs 與最新 linked run 狀態。"
|
||||
),
|
||||
)
|
||||
async def get_event_dossier_recurrence(
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
provider: str | None = Query(
|
||||
None, description="provider(可選,如 alertmanager / sentry / signoz)"
|
||||
),
|
||||
limit: int = Query(100, ge=1, le=300, description="最多納入統計筆數"),
|
||||
) -> dict[str, Any]:
|
||||
return await fetch_channel_event_dossier_recurrence(
|
||||
project_id=project_id,
|
||||
provider=provider,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/events/dossier/recurrence/work-item/preview",
|
||||
summary="預覽重複告警工作項的安全處理計畫",
|
||||
description=(
|
||||
"依 recurrence read model 找出指定 work_item,返回下一步、pre-flight checks "
|
||||
"與 read-only / no-write 保證;不修改 incident、auto-repair 或 ticket 狀態。"
|
||||
),
|
||||
)
|
||||
async def preview_event_recurrence_work_item(
|
||||
work_item_id: str = Query(..., min_length=1, description="recurrence work_item_id"),
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
provider: str | None = Query(
|
||||
None, description="provider(可選,如 alertmanager / sentry / signoz)"
|
||||
),
|
||||
mode: RecurrenceWorkItemMode = Query("auto", description="預覽模式"),
|
||||
limit: int = Query(300, ge=1, le=300, description="最多納入統計筆數"),
|
||||
) -> dict[str, Any]:
|
||||
try:
|
||||
return await fetch_recurrence_work_item_preview(
|
||||
project_id=project_id,
|
||||
work_item_id=work_item_id,
|
||||
mode=mode,
|
||||
provider=provider,
|
||||
limit=limit,
|
||||
)
|
||||
except RecurrenceWorkItemNotFoundError as exc:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="recurrence_work_item_not_found",
|
||||
) from exc
|
||||
|
||||
|
||||
@router.post(
|
||||
"/events/dossier/recurrence/work-item/dry-run",
|
||||
summary="乾跑重複告警工作項的安全處理流程",
|
||||
description=(
|
||||
"依 recurrence read model 產生 dry-run 結果並寫入 pre-flight history,"
|
||||
"但不修改 incident、auto-repair 或 ticket 狀態。"
|
||||
),
|
||||
)
|
||||
async def dry_run_event_recurrence_work_item(
|
||||
request: RecurrenceWorkItemDryRunRequest,
|
||||
) -> dict[str, Any]:
|
||||
try:
|
||||
return await fetch_recurrence_work_item_dry_run(
|
||||
project_id=request.project_id,
|
||||
work_item_id=request.work_item_id,
|
||||
mode=request.mode,
|
||||
provider=request.provider,
|
||||
limit=request.limit,
|
||||
)
|
||||
except RecurrenceWorkItemNotFoundError as exc:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="recurrence_work_item_not_found",
|
||||
) from exc
|
||||
|
||||
|
||||
@router.get(
|
||||
"/events/recent",
|
||||
response_model=RecentEventsResponse,
|
||||
summary="列出最近 Channel Events",
|
||||
description=(
|
||||
"返回 awooop_conversation_event 最近事件。"
|
||||
"可用 channel_type / provider_prefix 過濾,例如 alert-group 收斂事件。"
|
||||
),
|
||||
)
|
||||
async def list_recent_events(
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
channel_type: str | None = Query(None, description="通道類型(可選)"),
|
||||
provider_prefix: str | None = Query(
|
||||
None, description="provider_event_id 前綴(可選)"
|
||||
),
|
||||
limit: int = Query(20, ge=1, le=100, description="最多返回筆數"),
|
||||
) -> dict[str, Any]:
|
||||
return await list_recent_channel_events(
|
||||
project_id=project_id,
|
||||
channel_type=channel_type,
|
||||
provider_prefix=provider_prefix,
|
||||
limit=limit,
|
||||
)
|
||||
@@ -15,12 +15,26 @@ from decimal import Decimal
|
||||
from typing import Any, Literal
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, Query
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from src.core.awooop_operator_auth import (
|
||||
AwoooPOperatorPrincipal,
|
||||
verify_awooop_operator,
|
||||
)
|
||||
from src.services.platform_operator_service import (
|
||||
decide_approval as decide_approval_svc,
|
||||
)
|
||||
from src.services.platform_operator_service import (
|
||||
get_run_detail as get_run_detail_svc,
|
||||
)
|
||||
from src.services.platform_operator_service import (
|
||||
list_approvals as list_approvals_svc,
|
||||
)
|
||||
from src.services.platform_operator_service import (
|
||||
list_callback_replies as list_callback_replies_svc,
|
||||
)
|
||||
from src.services.platform_operator_service import (
|
||||
list_runs as list_runs_svc,
|
||||
)
|
||||
|
||||
@@ -40,6 +54,8 @@ class RunItem(BaseModel):
|
||||
step_count: int
|
||||
created_at: datetime
|
||||
timeout_at: datetime | None
|
||||
remediation_summary: dict[str, Any] | None = None
|
||||
callback_reply_summary: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class ListRunsResponse(BaseModel):
|
||||
@@ -49,12 +65,43 @@ class ListRunsResponse(BaseModel):
|
||||
per_page: int
|
||||
|
||||
|
||||
class CallbackReplyItem(BaseModel):
|
||||
message_id: UUID
|
||||
run_id: UUID
|
||||
project_id: str
|
||||
status: str
|
||||
needs_human: bool
|
||||
action: str | None = None
|
||||
incident_id: str | None = None
|
||||
event_at: datetime | None = None
|
||||
channel_type: str
|
||||
message_type: str
|
||||
send_status: str
|
||||
send_error: str | None = None
|
||||
provider_message_id: str | None = None
|
||||
triggered_by_state: str | None = None
|
||||
content_preview: str | None = None
|
||||
run_state: str | None = None
|
||||
agent_id: str | None = None
|
||||
run_created_at: datetime | None = None
|
||||
callback_reply: dict[str, Any]
|
||||
run_detail_href: str | None = None
|
||||
|
||||
|
||||
class ListCallbackRepliesResponse(BaseModel):
|
||||
items: list[CallbackReplyItem]
|
||||
total: int
|
||||
page: int
|
||||
per_page: int
|
||||
|
||||
|
||||
class ApprovalItem(BaseModel):
|
||||
run_id: UUID
|
||||
project_id: str
|
||||
agent_id: str
|
||||
created_at: datetime
|
||||
timeout_at: datetime | None
|
||||
remediation_summary: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class ListApprovalsResponse(BaseModel):
|
||||
@@ -65,7 +112,10 @@ class ListApprovalsResponse(BaseModel):
|
||||
class DecideApprovalRequest(BaseModel):
|
||||
project_id: str = Field(..., description="租戶 ID")
|
||||
decision: Literal["approve", "reject"] = Field(..., description="核准或拒絕")
|
||||
approver_id: str = Field(..., description="審核人 ID(platform_subject_id 或 operator email)")
|
||||
approver_id: str | None = Field(
|
||||
default=None,
|
||||
description="Deprecated. Ignored; approver comes from trusted operator headers.",
|
||||
)
|
||||
reason: str | None = Field(None, description="決策原因(可選)")
|
||||
|
||||
|
||||
@@ -81,7 +131,8 @@ class DecideApprovalResponse(BaseModel):
|
||||
response_model=ListRunsResponse,
|
||||
summary="列出 Runs",
|
||||
description=(
|
||||
"返回 awooop_run_state 記錄,支援 project_id / state filter 與分頁。\n\n"
|
||||
"返回 awooop_run_state 記錄,支援 project_id / state / remediation_status / "
|
||||
"callback_reply_status / incident_id filter 與分頁。\n\n"
|
||||
"- 按 created_at DESC 排序\n"
|
||||
"- 注意:此路徑為 /runs/list 以避免與 runs.py 的 /runs/{run_id} 衝突"
|
||||
),
|
||||
@@ -89,14 +140,74 @@ class DecideApprovalResponse(BaseModel):
|
||||
async def list_runs(
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
state: str | None = Query(None, description="Run 狀態 filter(可選)"),
|
||||
remediation_status: str | None = Query(
|
||||
None,
|
||||
description="AI 證據狀態 filter(no_evidence/mcp_observed/read_only_dry_run/write_observed/blocked/observed)",
|
||||
),
|
||||
callback_reply_status: str | None = Query(
|
||||
None,
|
||||
description="Telegram callback reply 狀態 filter(no_callback/sent/fallback_sent/rescue_sent/failed/observed)",
|
||||
),
|
||||
incident_id: str | None = Query(None, description="關聯 Incident ID filter(可選)"),
|
||||
page: int = Query(1, ge=1, description="頁碼,從 1 開始"),
|
||||
per_page: int = Query(_DEFAULT_PER_PAGE, ge=1, le=_MAX_PER_PAGE, description="每頁筆數"),
|
||||
) -> dict[str, Any]:
|
||||
return await list_runs_svc(
|
||||
project_id=project_id, state=state, page=page, per_page=per_page
|
||||
project_id=project_id,
|
||||
state=state,
|
||||
remediation_status=remediation_status,
|
||||
callback_reply_status=callback_reply_status,
|
||||
incident_id=incident_id,
|
||||
page=page,
|
||||
per_page=per_page,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/runs/callback-replies",
|
||||
response_model=ListCallbackRepliesResponse,
|
||||
summary="列出 Telegram Callback Reply Evidence",
|
||||
description=(
|
||||
"從 AwoooP outbound mirror 查詢 Telegram 詳情 / 歷史 callback reply 的"
|
||||
"送達、fallback、救援與失敗證據;只讀,不修改 incident、run 或 Telegram 狀態。"
|
||||
),
|
||||
)
|
||||
async def list_callback_replies(
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
callback_reply_status: str | None = Query(
|
||||
None,
|
||||
description="Telegram callback reply 狀態 filter(sent/fallback_sent/rescue_sent/failed/observed/no_callback)",
|
||||
),
|
||||
action: str | None = Query(None, description="Callback action filter(例如 detail/history)"),
|
||||
incident_id: str | None = Query(None, description="關聯 Incident ID filter(可選)"),
|
||||
page: int = Query(1, ge=1, description="頁碼,從 1 開始"),
|
||||
per_page: int = Query(20, ge=1, le=_MAX_PER_PAGE, description="每頁筆數"),
|
||||
) -> dict[str, Any]:
|
||||
return await list_callback_replies_svc(
|
||||
project_id=project_id,
|
||||
callback_reply_status=callback_reply_status,
|
||||
action=action,
|
||||
incident_id=incident_id,
|
||||
page=page,
|
||||
per_page=per_page,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/runs/{run_id}/detail",
|
||||
summary="查詢 Run 詳細時間線",
|
||||
description=(
|
||||
"返回單一 Run 的主狀態、Step Journal、MCP Gateway audit、"
|
||||
"入站 Channel Event 與出站訊息,供 Operator Console 顯示完整處置脈絡。"
|
||||
),
|
||||
)
|
||||
async def get_run_detail(
|
||||
run_id: str,
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
) -> dict[str, Any]:
|
||||
return await get_run_detail_svc(run_id=run_id, project_id=project_id)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/approvals",
|
||||
response_model=ListApprovalsResponse,
|
||||
@@ -108,8 +219,17 @@ async def list_runs(
|
||||
)
|
||||
async def list_approvals(
|
||||
project_id: str | None = Query(None, description="租戶 ID(可選)"),
|
||||
run_id: str | None = Query(None, description="Run ID(可選,M8 詳情頁查單筆)"),
|
||||
remediation_status: str | None = Query(
|
||||
None,
|
||||
description="AI 證據狀態 filter(no_evidence/mcp_observed/read_only_dry_run/write_observed/blocked/observed)",
|
||||
),
|
||||
) -> dict[str, Any]:
|
||||
return await list_approvals_svc(project_id=project_id)
|
||||
return await list_approvals_svc(
|
||||
project_id=project_id,
|
||||
run_id=run_id,
|
||||
remediation_status=remediation_status,
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
@@ -126,11 +246,12 @@ async def list_approvals(
|
||||
async def decide_approval(
|
||||
run_id: str,
|
||||
body: DecideApprovalRequest,
|
||||
operator: AwoooPOperatorPrincipal = Depends(verify_awooop_operator),
|
||||
) -> dict[str, Any]:
|
||||
return await decide_approval_svc(
|
||||
run_id=run_id,
|
||||
project_id=body.project_id,
|
||||
decision=body.decision,
|
||||
approver_id=body.approver_id,
|
||||
approver_id=operator.operator_id,
|
||||
reason=body.reason,
|
||||
)
|
||||
|
||||
64
apps/api/src/api/v1/platform/truth_chain.py
Normal file
64
apps/api/src/api/v1/platform/truth_chain.py
Normal file
@@ -0,0 +1,64 @@
|
||||
"""AwoooP Operator Console — truth-chain read API."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
|
||||
from src.core.awooop_operator_auth import (
|
||||
AwoooPOperatorPrincipal,
|
||||
verify_awooop_operator,
|
||||
)
|
||||
from src.services.awooop_truth_chain_service import (
|
||||
fetch_automation_quality_summary,
|
||||
fetch_truth_chain,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get(
|
||||
"/truth-chain/quality/summary",
|
||||
summary="查詢 AI 自動化品質總覽",
|
||||
description=(
|
||||
"T12c read-only aggregate endpoint. 聚合最近 incident 的 automation quality gate,"
|
||||
"讓 Operator 不必逐張 Telegram 卡片判斷是否真正完成 AI 自動修復。"
|
||||
"此總覽不回傳逐筆 examples;source-level truth-chain 詳情仍需 operator auth。"
|
||||
),
|
||||
)
|
||||
async def get_automation_quality_summary(
|
||||
project_id: str = Query("awoooi", description="租戶 ID"),
|
||||
hours: int = Query(24, ge=1, le=168, description="回看小時數"),
|
||||
limit: int = Query(200, ge=1, le=500, description="最多評估 incident 數"),
|
||||
) -> dict[str, Any]:
|
||||
summary = await fetch_automation_quality_summary(
|
||||
project_id=project_id,
|
||||
hours=hours,
|
||||
limit=limit,
|
||||
)
|
||||
summary["examples"] = []
|
||||
summary["visibility_note"] = (
|
||||
"Aggregate only. Use /truth-chain/{source_id} with operator auth for source-level details."
|
||||
)
|
||||
return summary
|
||||
|
||||
|
||||
@router.get(
|
||||
"/truth-chain/{source_id}",
|
||||
summary="查詢 Telegram / Incident / Drift 真相鏈",
|
||||
description=(
|
||||
"T0 read-only endpoint. 聚合 incident、approval、evidence、MCP、"
|
||||
"automation_operation_log、drift repeat state 與 outbound mirror,"
|
||||
"讓 Operator Console 能判斷 Telegram 卡片目前卡在哪個流程節點。"
|
||||
),
|
||||
)
|
||||
async def get_truth_chain(
|
||||
source_id: str,
|
||||
project_id: str = Query("awoooi", description="租戶 ID"),
|
||||
operator: AwoooPOperatorPrincipal = Depends(verify_awooop_operator),
|
||||
) -> dict[str, Any]:
|
||||
# operator dependency intentionally gates this read API even though the
|
||||
# principal is not otherwise needed by the aggregation query.
|
||||
_ = operator
|
||||
return await fetch_truth_chain(source_id=source_id, project_id=project_id)
|
||||
@@ -8,9 +8,10 @@ leWOOOgo 原則: Router 只做 HTTP 轉發,業務邏輯在 KnowledgeRAGService
|
||||
建立者: Claude Code (Phase 33 ADR-067)
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, BackgroundTasks, HTTPException
|
||||
from fastapi import APIRouter, BackgroundTasks
|
||||
from pydantic import BaseModel
|
||||
|
||||
from src.core.config import get_settings
|
||||
from src.services.knowledge_rag_service import get_knowledge_rag_service
|
||||
|
||||
router = APIRouter(prefix="/rag", tags=["RAG Knowledge Base"])
|
||||
@@ -43,9 +44,10 @@ async def trigger_index(background_tasks: BackgroundTasks) -> RagIndexResponse:
|
||||
- .agents/skills/*.md
|
||||
"""
|
||||
background_tasks.add_task(_run_index)
|
||||
model = get_settings().OLLAMA_EMBEDDING_MODEL
|
||||
return RagIndexResponse(
|
||||
status="accepted",
|
||||
message="索引已排程,背景執行中(nomic-embed-text @ Ollama 111)",
|
||||
message=f"索引已排程,背景執行中({model} @ Ollama GCP-A/GCP-B/111)",
|
||||
)
|
||||
|
||||
|
||||
@@ -76,15 +78,16 @@ async def rag_debug() -> dict:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as c:
|
||||
from src.core.config import get_settings as _gs
|
||||
settings = _gs()
|
||||
r = await c.post(
|
||||
f"{_gs().OLLAMA_URL}/api/embeddings",
|
||||
json={"model": "nomic-embed-text", "prompt": "test"},
|
||||
f"{settings.OLLAMA_URL}/api/embeddings",
|
||||
json={"model": settings.OLLAMA_EMBEDDING_MODEL, "prompt": "test"},
|
||||
)
|
||||
ollama_ok = r.status_code == 200 if r.status_code == 200 else f"http_{r.status_code}"
|
||||
except Exception as e:
|
||||
ollama_ok = f"error: {type(e).__name__}: {e}"
|
||||
|
||||
return {"cwd": os.getcwd(), "paths": paths_check, "ollama_111_embed": ollama_ok}
|
||||
return {"cwd": os.getcwd(), "paths": paths_check, "ollama_embedding": ollama_ok}
|
||||
|
||||
|
||||
@router.get("/stats", summary="索引統計")
|
||||
|
||||
@@ -35,6 +35,7 @@ from src.models.approval import (
|
||||
)
|
||||
from src.services.anomaly_counter import get_anomaly_counter
|
||||
from src.services.approval_db import get_approval_service
|
||||
from src.services.channel_hub import record_external_alert_event
|
||||
from src.services.openclaw_http_service import get_openclaw_http_service
|
||||
from src.services.sentry_service import get_sentry_service
|
||||
# 2026-04-27 P3.1-T2 by Claude — Tier-2 三服務感知強化:補 SentryWebhookService 簽章驗證
|
||||
@@ -124,16 +125,60 @@ async def handle_sentry_error(
|
||||
|
||||
# 提取錯誤資訊
|
||||
issue_data = payload.get("data", {}).get("issue", {})
|
||||
event_data = payload.get("data", {}).get("event", {})
|
||||
issue_id = issue_data.get("id")
|
||||
source_url = (
|
||||
issue_data.get("permalink")
|
||||
or issue_data.get("web_url")
|
||||
or issue_data.get("url")
|
||||
)
|
||||
|
||||
background_tasks.add_task(
|
||||
record_external_alert_event,
|
||||
project_id="awoooi",
|
||||
provider="sentry",
|
||||
event_id=str(issue_id or issue_data.get("shortId") or "unknown"),
|
||||
stage="received",
|
||||
title=str(issue_data.get("title") or "Sentry issue"),
|
||||
severity=str(issue_data.get("level") or "error"),
|
||||
namespace="sentry",
|
||||
target_resource=str(issue_data.get("culprit") or issue_data.get("project", {}).get("slug") or "unknown"),
|
||||
fingerprint=f"sentry-{issue_id or issue_data.get('shortId') or 'unknown'}",
|
||||
source_url=source_url,
|
||||
labels={
|
||||
"project": issue_data.get("project", {}),
|
||||
"level": issue_data.get("level"),
|
||||
"culprit": issue_data.get("culprit"),
|
||||
},
|
||||
annotations={"message": event_data.get("message")},
|
||||
payload=payload,
|
||||
)
|
||||
|
||||
# Phase 10.2.1: 去重檢查 (10 分鐘內不重複發送)
|
||||
issue_id = issue_data.get("id")
|
||||
sentry_service = get_sentry_service()
|
||||
if not await sentry_service.check_dedup(issue_id, ttl=SENTRY_DEDUP_TTL):
|
||||
background_tasks.add_task(
|
||||
record_external_alert_event,
|
||||
project_id="awoooi",
|
||||
provider="sentry",
|
||||
event_id=str(issue_id or issue_data.get("shortId") or "unknown"),
|
||||
stage="deduplicated",
|
||||
title=str(issue_data.get("title") or "Sentry issue"),
|
||||
severity=str(issue_data.get("level") or "error"),
|
||||
namespace="sentry",
|
||||
target_resource=str(issue_data.get("culprit") or issue_data.get("project", {}).get("slug") or "unknown"),
|
||||
fingerprint=f"sentry-{issue_id or issue_data.get('shortId') or 'unknown'}",
|
||||
source_url=source_url,
|
||||
labels={"project": issue_data.get("project", {}), "level": issue_data.get("level")},
|
||||
annotations={"message": event_data.get("message")},
|
||||
payload={"dedup_ttl": SENTRY_DEDUP_TTL},
|
||||
is_duplicate=True,
|
||||
)
|
||||
return {"status": "deduplicated", "issue_id": issue_id, "ttl": SENTRY_DEDUP_TTL}
|
||||
event_data = payload.get("data", {}).get("event", {})
|
||||
|
||||
error_context = {
|
||||
"issue_id": issue_data.get("id"),
|
||||
"source_url": source_url,
|
||||
"title": issue_data.get("title"),
|
||||
"culprit": issue_data.get("culprit"),
|
||||
"level": issue_data.get("level"),
|
||||
@@ -256,6 +301,29 @@ async def analyze_and_comment(
|
||||
analysis=analysis,
|
||||
anomaly_frequency=frequency_dict,
|
||||
)
|
||||
await record_external_alert_event(
|
||||
project_id="awoooi",
|
||||
provider="sentry",
|
||||
event_id=str(issue_id or error_context.get("issue_id") or "unknown"),
|
||||
stage="approval_linked",
|
||||
title=str(error_context.get("title") or "Sentry issue"),
|
||||
severity=str(error_context.get("level") or "error"),
|
||||
namespace="sentry",
|
||||
target_resource=str(error_context.get("culprit") or error_context.get("project") or "unknown"),
|
||||
fingerprint=f"sentry-{issue_id or error_context.get('issue_id') or 'unknown'}",
|
||||
approval_id=approval_id,
|
||||
source_url=error_context.get("source_url"),
|
||||
labels={
|
||||
"project": error_context.get("project"),
|
||||
"level": error_context.get("level"),
|
||||
},
|
||||
annotations={"message": error_context.get("message")},
|
||||
payload={
|
||||
"anomaly_frequency": frequency_dict,
|
||||
"ai_analyzed": analysis is not None,
|
||||
"ai_provider": analysis.analyzed_by if analysis else None,
|
||||
},
|
||||
)
|
||||
|
||||
# 4. 發送 Telegram 告警 (含頻率資訊)
|
||||
await send_sentry_telegram_alert(
|
||||
|
||||
@@ -18,6 +18,7 @@ AWOOOI API - SignOz Webhook Handler
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import structlog
|
||||
from fastapi import APIRouter, BackgroundTasks, HTTPException, Request
|
||||
@@ -37,10 +38,14 @@ from src.models.approval import (
|
||||
)
|
||||
from src.services.anomaly_counter import get_anomaly_counter
|
||||
from src.services.approval_db import get_approval_service
|
||||
from src.services.channel_hub import record_external_alert_event
|
||||
from src.services.incident_service import get_incident_service
|
||||
from src.services.telegram_gateway import get_telegram_gateway
|
||||
from src.utils.timezone import now_taipei_iso
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from src.services.openclaw import LLMAnalysisResult
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/webhooks/signoz", tags=["SignOz Webhook"])
|
||||
@@ -104,6 +109,26 @@ async def handle_signoz_alert(
|
||||
labels = alert.get("labels", {})
|
||||
annotations = alert.get("annotations", {})
|
||||
severity = labels.get("severity", "warning")
|
||||
source_url = alert.get("generatorURL")
|
||||
service_name = labels.get("service_name", labels.get("service", "unknown"))
|
||||
fingerprint = labels.get("fingerprint") or f"signoz-{alert_name}-{service_name}"
|
||||
|
||||
background_tasks.add_task(
|
||||
record_external_alert_event,
|
||||
project_id="awoooi",
|
||||
provider="signoz",
|
||||
event_id=str(fingerprint),
|
||||
stage="received",
|
||||
title=str(alert_name),
|
||||
severity=str(severity),
|
||||
namespace=str(labels.get("namespace", "signoz")),
|
||||
target_resource=str(service_name),
|
||||
fingerprint=str(fingerprint),
|
||||
source_url=source_url,
|
||||
labels=labels,
|
||||
annotations=annotations,
|
||||
payload=alert,
|
||||
)
|
||||
|
||||
# 背景處理
|
||||
background_tasks.add_task(
|
||||
@@ -113,6 +138,8 @@ async def handle_signoz_alert(
|
||||
annotations=annotations,
|
||||
severity=severity,
|
||||
starts_at=alert.get("startsAt"),
|
||||
source_url=source_url,
|
||||
raw_payload=alert,
|
||||
)
|
||||
|
||||
results.append({
|
||||
@@ -133,6 +160,8 @@ async def process_signoz_alert(
|
||||
annotations: dict,
|
||||
severity: str,
|
||||
starts_at: str | None,
|
||||
source_url: str | None = None,
|
||||
raw_payload: dict | None = None,
|
||||
):
|
||||
"""
|
||||
背景處理 SignOz 告警
|
||||
@@ -190,6 +219,7 @@ async def process_signoz_alert(
|
||||
"annotations": annotations,
|
||||
"fingerprint": f"signoz-{alert_name}-{labels.get('service_name', 'unknown')}",
|
||||
}
|
||||
fingerprint = signal_data["fingerprint"]
|
||||
# ADR-037: 傳遞頻率統計到 Incident
|
||||
incident = await incident_service.create_incident_from_signal(
|
||||
signal_data, frequency_stats=anomaly_frequency
|
||||
@@ -229,6 +259,30 @@ async def process_signoz_alert(
|
||||
anomaly_frequency=anomaly_frequency,
|
||||
analysis_result=analysis_result, # 帶入 AI 結果
|
||||
)
|
||||
await record_external_alert_event(
|
||||
project_id="awoooi",
|
||||
provider="signoz",
|
||||
event_id=str(fingerprint),
|
||||
stage="incident_linked",
|
||||
title=str(alert_name),
|
||||
severity=str(severity),
|
||||
namespace=str(labels.get("namespace", "signoz")),
|
||||
target_resource=str(labels.get("service_name", labels.get("service", "unknown"))),
|
||||
fingerprint=str(fingerprint),
|
||||
incident_id=str(incident.incident_id),
|
||||
approval_id=str(approval_id),
|
||||
source_url=source_url or trace_url,
|
||||
labels=labels,
|
||||
annotations=annotations,
|
||||
payload={
|
||||
"raw_alert": raw_payload or {},
|
||||
"trace_url": trace_url,
|
||||
"has_signoz_metrics": bool(signoz_metrics),
|
||||
"ai_provider": ai_provider,
|
||||
"tokens": tokens,
|
||||
"cost": cost,
|
||||
},
|
||||
)
|
||||
|
||||
# =================================================================
|
||||
# Step 5: 發送 Telegram 告警
|
||||
|
||||
@@ -19,6 +19,7 @@ Endpoints:
|
||||
- 每個 Nonce 只能使用一次
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, HTTPException, status
|
||||
@@ -27,6 +28,8 @@ from pydantic import BaseModel
|
||||
from src.core.config import settings
|
||||
from src.core.logging import get_logger
|
||||
from src.services.approval_db import get_approval_service
|
||||
from src.services.approval_execution import get_execution_service
|
||||
from src.services.incident_approval_service import get_incident_approval_service
|
||||
from src.services.security_interceptor import (
|
||||
NonceReplayError,
|
||||
UserNotWhitelistedError,
|
||||
@@ -64,6 +67,80 @@ class TestPushRequest(BaseModel):
|
||||
incident_id: str = ""
|
||||
|
||||
|
||||
async def _run_telegram_approved_execution(approval) -> None:
|
||||
"""Run the approved action that originated from a Telegram callback."""
|
||||
approval_id = str(getattr(approval, "id", ""))
|
||||
incident_id = getattr(approval, "incident_id", None)
|
||||
try:
|
||||
result = await get_execution_service().execute_approved_action(approval)
|
||||
logger.info(
|
||||
"telegram_approval_execution_completed",
|
||||
approval_id=approval_id,
|
||||
incident_id=incident_id,
|
||||
success=bool(result),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"telegram_approval_execution_failed",
|
||||
approval_id=approval_id,
|
||||
incident_id=incident_id,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
|
||||
def _schedule_telegram_approved_execution(approval) -> bool:
|
||||
"""Schedule execution after Telegram approval reaches required signatures."""
|
||||
try:
|
||||
asyncio.create_task(_run_telegram_approved_execution(approval))
|
||||
logger.info(
|
||||
"telegram_approval_execution_scheduled",
|
||||
approval_id=str(getattr(approval, "id", "")),
|
||||
incident_id=getattr(approval, "incident_id", None),
|
||||
)
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"telegram_approval_execution_schedule_failed",
|
||||
approval_id=str(getattr(approval, "id", "")),
|
||||
incident_id=getattr(approval, "incident_id", None),
|
||||
error=str(exc),
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
async def _finalize_telegram_approval(approval, execution_triggered: bool) -> bool:
|
||||
"""Complete the execution handoff for Telegram approvals.
|
||||
|
||||
ApprovalDBService only records the signature/status transition. The actual
|
||||
executor scheduling lives in API callers, so Telegram must mirror the REST
|
||||
approval endpoint instead of stopping at a visual approval stamp.
|
||||
"""
|
||||
if not execution_triggered:
|
||||
return False
|
||||
return _schedule_telegram_approved_execution(approval)
|
||||
|
||||
|
||||
async def _sync_telegram_rejection(approval_id: str) -> bool:
|
||||
"""Keep Incident state aligned when an approval is rejected from Telegram."""
|
||||
try:
|
||||
await get_incident_approval_service().on_approval_status_change(
|
||||
approval_id=approval_id,
|
||||
new_status="rejected",
|
||||
)
|
||||
logger.info(
|
||||
"telegram_rejection_incident_synced",
|
||||
approval_id=approval_id,
|
||||
)
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"telegram_rejection_incident_sync_failed",
|
||||
approval_id=approval_id,
|
||||
error=str(exc),
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Endpoints
|
||||
# =============================================================================
|
||||
@@ -198,12 +275,17 @@ async def telegram_webhook(
|
||||
)
|
||||
|
||||
if approval:
|
||||
execution_scheduled = await _finalize_telegram_approval(
|
||||
approval=approval,
|
||||
execution_triggered=execution_triggered,
|
||||
)
|
||||
logger.info(
|
||||
"telegram_approval_signed",
|
||||
approval_id=approval_id,
|
||||
user_id=user_id,
|
||||
status=approval.status.value,
|
||||
execution_triggered=execution_triggered,
|
||||
execution_scheduled=execution_scheduled,
|
||||
)
|
||||
await _log_user_action("approve", True, getattr(approval, "incident_id", None))
|
||||
|
||||
@@ -213,6 +295,7 @@ async def telegram_webhook(
|
||||
"approval_id": approval_id,
|
||||
"status": approval.status.value,
|
||||
"execution_triggered": execution_triggered,
|
||||
"execution_scheduled": execution_scheduled,
|
||||
}
|
||||
|
||||
elif action == "reject":
|
||||
@@ -224,10 +307,12 @@ async def telegram_webhook(
|
||||
)
|
||||
|
||||
if approval:
|
||||
incident_synced = await _sync_telegram_rejection(approval_id)
|
||||
logger.info(
|
||||
"telegram_approval_rejected",
|
||||
approval_id=approval_id,
|
||||
user_id=user_id,
|
||||
incident_synced=incident_synced,
|
||||
)
|
||||
await _log_user_action("reject", False, getattr(approval, "incident_id", None))
|
||||
|
||||
@@ -236,6 +321,7 @@ async def telegram_webhook(
|
||||
"message": "Rejected",
|
||||
"approval_id": approval_id,
|
||||
"status": approval.status.value,
|
||||
"incident_synced": incident_synced,
|
||||
}
|
||||
|
||||
return {"ok": False, "message": "Unknown action"}
|
||||
|
||||
@@ -33,14 +33,8 @@ from pydantic import BaseModel, Field
|
||||
|
||||
from src.core.config import settings
|
||||
from src.core.constants import is_cicd_alertname, is_heartbeat_alertname
|
||||
from src.services.alert_rule_engine import get_incident_type, match_rule
|
||||
from src.services.action_parser import is_safe_kubectl_action
|
||||
from src.services.security_interceptor import check_webhook_nonce # P0-06: nonce dedup via Service 層
|
||||
from src.core.logging import get_logger
|
||||
from src.core.metrics import record_alert_chain_success
|
||||
|
||||
# Phase 15.2: Trace Context (moved to SignalProducerService)
|
||||
# get_trace_context 已移至 Service 層
|
||||
from src.models.approval import (
|
||||
ApprovalRequestCreate,
|
||||
BlastRadius,
|
||||
@@ -48,31 +42,43 @@ from src.models.approval import (
|
||||
DryRunCheck,
|
||||
RiskLevel,
|
||||
)
|
||||
|
||||
# R4 #129 (2026-04-01 ogt): AlertPayload/AlertResponse 移至 models 層,AlertAnalyzer 移至 services 層
|
||||
# ogt 更新 v1.1 2026-04-01 台北時間: generate_alert_fingerprint 移至 alert_analyzer_service (ADR-024)
|
||||
# [首席架構師] 移除 generate_alert_fingerprint 直接 import,改用 AlertAnalyzer.generate_fingerprint v1.2 2026-04-01 Asia/Taipei
|
||||
from src.models.webhook import AlertPayload, AlertResponse
|
||||
from src.services.action_parser import is_safe_kubectl_action
|
||||
from src.services.alert_analyzer_service import AlertAnalyzer
|
||||
from src.services.alert_approval_guard import guard_alert_approval_action
|
||||
from src.services.alert_grouping_service import get_alert_grouping_service
|
||||
from src.services.alert_rule_engine import get_incident_type, match_rule
|
||||
from src.services.alertmanager_llm_guard import (
|
||||
ALERTMANAGER_LLM_INFLIGHT_LOCK_TTL_SECONDS,
|
||||
try_acquire_alertmanager_llm_lock,
|
||||
)
|
||||
from src.services.approval_db import get_approval_service
|
||||
from src.services.auto_approve import get_auto_approve_policy
|
||||
from src.services.auto_repair_service import AutoRepairService
|
||||
from src.services.channel_hub import (
|
||||
record_alertmanager_event,
|
||||
record_grouped_alert_event,
|
||||
)
|
||||
|
||||
# Phase 15.2: Trace Context (moved to SignalProducerService)
|
||||
# get_trace_context 已移至 Service 層
|
||||
|
||||
# R4 #129 (2026-04-01 ogt): AlertPayload/AlertResponse 移至 models 層,AlertAnalyzer 移至 services 層
|
||||
# ogt 更新 v1.1 2026-04-01 台北時間: generate_alert_fingerprint 移至 alert_analyzer_service (ADR-024)
|
||||
# [首席架構師] 移除 generate_alert_fingerprint 直接 import,改用 AlertAnalyzer.generate_fingerprint v1.2 2026-04-01 Asia/Taipei
|
||||
|
||||
# Phase 17 P0: Service 層 (消除 Router 直接存取 Redis)
|
||||
# C2 修正 (首席架構師審查 2026-04-10): create_incident_for_approval + extract_affected_services 已移入 Service 層
|
||||
from src.services.incident_service import (
|
||||
classify_alert_early,
|
||||
create_incident_for_approval,
|
||||
extract_affected_services,
|
||||
get_incident_service,
|
||||
)
|
||||
from src.services.auto_approve import get_auto_approve_policy
|
||||
from src.services.auto_repair_service import AutoRepairService
|
||||
|
||||
# Phase 5: OpenClaw AI Engine
|
||||
from src.services.openclaw import get_openclaw
|
||||
from src.services.playbook_match_resolver import resolve_playbook_id_for_alert
|
||||
from src.services.security_interceptor import check_webhook_nonce # P0-06: nonce dedup via Service 層
|
||||
from src.services.signal_producer import SignalData, get_signal_producer
|
||||
|
||||
# Phase 5: Telegram Gateway (行動戰情室)
|
||||
@@ -81,9 +87,6 @@ from src.services.telegram_gateway import TelegramGatewayError, get_telegram_gat
|
||||
# Phase 18.1.7: K8s 資源名稱正規化 已移至 alert_analyzer_service (R4 #129)
|
||||
from src.utils.timezone import now_taipei
|
||||
|
||||
# ADR-076: 告警聚合引擎 (2026-04-14 Claude Haiku 4.5 Asia/Taipei)
|
||||
from src.services.alert_grouping_service import get_alert_grouping_service
|
||||
|
||||
router = APIRouter(prefix="/webhooks", tags=["Webhooks"])
|
||||
logger = get_logger("awoooi.webhooks")
|
||||
|
||||
@@ -136,6 +139,38 @@ def _should_use_alertmanager_rule_first(
|
||||
)
|
||||
|
||||
|
||||
async def _analyze_alertmanager_with_timeout(
|
||||
openclaw,
|
||||
alert_context: dict,
|
||||
*,
|
||||
alert_id: str,
|
||||
alertname: str,
|
||||
) -> tuple:
|
||||
"""Run Alertmanager AI analysis without letting it block the workflow forever."""
|
||||
|
||||
try:
|
||||
return await asyncio.wait_for(
|
||||
openclaw.analyze_alert(alert_context),
|
||||
timeout=ALERTMANAGER_BACKGROUND_AI_TIMEOUT_SECONDS,
|
||||
)
|
||||
except TimeoutError:
|
||||
logger.warning(
|
||||
"alertmanager_openclaw_timeout_fallback",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
timeout_sec=ALERTMANAGER_BACKGROUND_AI_TIMEOUT_SECONDS,
|
||||
)
|
||||
return None, "fallback_timeout", "", None, "", 0, 0.0
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"alertmanager_openclaw_failed_fallback",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
error=str(exc),
|
||||
)
|
||||
return None, "fallback_error", "", None, "", 0, 0.0
|
||||
|
||||
|
||||
async def _escalate_auto_repair_unavailable(
|
||||
*,
|
||||
incident_id: str,
|
||||
@@ -163,6 +198,19 @@ async def _escalate_auto_repair_unavailable(
|
||||
)
|
||||
|
||||
|
||||
def _auto_repair_action_label(result, fallback_target: str) -> str:
|
||||
"""Build a verifier label that includes the actual playbook steps."""
|
||||
playbook_id = getattr(result, "playbook_id", None) or "unknown"
|
||||
steps = getattr(result, "executed_steps", None) or []
|
||||
step_text = " | ".join(str(step) for step in steps).strip()
|
||||
if not step_text:
|
||||
step_text = fallback_target
|
||||
step_text = " ".join(step_text.split())
|
||||
if len(step_text) > 240:
|
||||
step_text = f"{step_text[:237]}..."
|
||||
return f"auto_repair_playbook:{playbook_id} {step_text}".strip()
|
||||
|
||||
|
||||
async def _try_auto_repair_background(
|
||||
incident_id: str,
|
||||
approval_id: str,
|
||||
@@ -252,6 +300,46 @@ async def _try_auto_repair_background(
|
||||
},
|
||||
)
|
||||
|
||||
_pre_execution_snapshot = None
|
||||
try:
|
||||
from src.core.feature_flags import aiops_flags
|
||||
|
||||
if aiops_flags.is_sub_flag_enabled("AIOPS_P1_PRE_DECISION_INVESTIGATOR"):
|
||||
from src.services.evidence_snapshot import get_latest_snapshot
|
||||
from src.services.post_execution_verifier import get_post_execution_verifier
|
||||
|
||||
_pre_execution_snapshot = await get_latest_snapshot(incident_id)
|
||||
if _pre_execution_snapshot is None:
|
||||
from src.services.pre_decision_investigator import (
|
||||
get_pre_decision_investigator,
|
||||
)
|
||||
|
||||
_pre_execution_snapshot = await asyncio.wait_for(
|
||||
get_pre_decision_investigator().investigate(incident),
|
||||
timeout=60.0,
|
||||
)
|
||||
if _pre_execution_snapshot is not None:
|
||||
await asyncio.wait_for(
|
||||
get_post_execution_verifier().capture_pre_execution_state(
|
||||
incident,
|
||||
_pre_execution_snapshot,
|
||||
),
|
||||
timeout=30.0,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
"auto_repair_pre_state_capture_timeout",
|
||||
incident_id=incident_id,
|
||||
approval_id=approval_id,
|
||||
)
|
||||
except Exception as _pre_state_err:
|
||||
logger.warning(
|
||||
"auto_repair_pre_state_capture_failed",
|
||||
incident_id=incident_id,
|
||||
approval_id=approval_id,
|
||||
error=str(_pre_state_err),
|
||||
)
|
||||
|
||||
# 執行自動修復
|
||||
logger.info(
|
||||
"auto_repair_executing",
|
||||
@@ -263,6 +351,7 @@ async def _try_auto_repair_background(
|
||||
playbook=decision.playbook,
|
||||
is_cold_start=decision.is_cold_start,
|
||||
similarity_score=decision.similarity_score,
|
||||
run_post_verification=False,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
@@ -273,6 +362,20 @@ async def _try_auto_repair_background(
|
||||
|
||||
# 記錄執行結果
|
||||
if result:
|
||||
try:
|
||||
await get_approval_service().update_execution_status(
|
||||
approval_id=approval_id,
|
||||
success=result.success,
|
||||
error_message=result.error,
|
||||
)
|
||||
except Exception as _approval_status_err:
|
||||
logger.warning(
|
||||
"auto_repair_approval_status_update_failed",
|
||||
approval_id=approval_id,
|
||||
incident_id=incident_id,
|
||||
error=str(_approval_status_err),
|
||||
)
|
||||
|
||||
await op_log.append(
|
||||
"EXECUTION_COMPLETED",
|
||||
incident_id=incident_id,
|
||||
@@ -336,11 +439,10 @@ async def _try_auto_repair_background(
|
||||
from src.services.evidence_snapshot import get_latest_snapshot
|
||||
from src.services.learning_service import get_learning_service
|
||||
|
||||
_snapshot = await get_latest_snapshot(incident_id)
|
||||
_action_label = (
|
||||
f"{target_resource}:{namespace}"
|
||||
if not result.success
|
||||
else f"auto_repair_playbook:{result.playbook_id}"
|
||||
_snapshot = _pre_execution_snapshot or await get_latest_snapshot(incident_id)
|
||||
_action_label = _auto_repair_action_label(
|
||||
result,
|
||||
fallback_target=f"{target_resource}:{namespace}",
|
||||
)
|
||||
_verifier = get_post_execution_verifier()
|
||||
_verify_result = await asyncio.wait_for(
|
||||
@@ -792,6 +894,7 @@ async def verify_webhook_signature(
|
||||
|
||||
# 戰略 B: 滑動時間窗 (ADR-073: 5 分鐘改 30 分鐘,防同一問題反覆重建 Incident,2026-04-12 ogt)
|
||||
DEBOUNCE_WINDOW_MINUTES = 30
|
||||
ALERTMANAGER_BACKGROUND_AI_TIMEOUT_SECONDS = 90.0
|
||||
|
||||
|
||||
# =============================================================================
|
||||
@@ -1105,7 +1208,12 @@ async def receive_alert(
|
||||
# 呼叫 OpenClaw LLM 分析 (v7.0 含 SignOz 整合)
|
||||
# 2026-03-29 ogt: 加入 Token/Cost 追蹤
|
||||
openclaw = get_openclaw()
|
||||
analysis_result, ai_provider, raw_response, signoz_metrics, signoz_trace_url, ai_tokens, ai_cost = await openclaw.analyze_alert(alert_context)
|
||||
analysis_result, ai_provider, raw_response, signoz_metrics, signoz_trace_url, ai_tokens, ai_cost = await _analyze_alertmanager_with_timeout(
|
||||
openclaw,
|
||||
alert_context,
|
||||
alert_id=alert_id,
|
||||
alertname=alert.alert_type,
|
||||
)
|
||||
|
||||
if analysis_result:
|
||||
# LLM 分析成功
|
||||
@@ -1147,15 +1255,33 @@ async def receive_alert(
|
||||
data_impact = impact_mapping.get(blast.data_impact.value, DataImpact.NONE)
|
||||
|
||||
# 2026-04-27 Claude Sonnet 4.6: shadow-run Step1 — 補 metadata kwarg,讓 extra_metadata 可觀測
|
||||
_cmd_cs1 = (analysis_result.kubectl_command or "").strip()
|
||||
_alertname_cs1 = str((alert.labels or {}).get("alertname") or alert.alert_type or "")
|
||||
_guarded_action_cs1 = await guard_alert_approval_action(
|
||||
action=(_cmd_cs1 or f"{analysis_result.action_title} | NO_ACTION"),
|
||||
alert_namespace=alert.namespace,
|
||||
alertname=_alertname_cs1,
|
||||
alert_category=get_incident_type(_alertname_cs1),
|
||||
)
|
||||
_matched_playbook_id_cs1 = await resolve_playbook_id_for_alert(
|
||||
alertname=_alertname_cs1,
|
||||
affected_services=analysis_result.affected_services
|
||||
or ([alert.target_resource] if alert.target_resource else []),
|
||||
severity=risk_level.value,
|
||||
)
|
||||
if _guarded_action_cs1.blocked:
|
||||
risk_level = RiskLevel.LOW
|
||||
_cmd_cs1 = ""
|
||||
|
||||
_approval_metadata_cs1 = {
|
||||
"source": ai_provider,
|
||||
"confidence_score": analysis_result.confidence,
|
||||
"is_rule_based": False,
|
||||
"playbook_id": None,
|
||||
"playbook_id": _matched_playbook_id_cs1,
|
||||
**_guarded_action_cs1.metadata,
|
||||
}
|
||||
_cmd_cs1 = (analysis_result.kubectl_command or "").strip()
|
||||
approval_create = ApprovalRequestCreate(
|
||||
action=(_cmd_cs1 or f"{analysis_result.action_title} | NO_ACTION"),
|
||||
action=_guarded_action_cs1.action,
|
||||
description=f"[AI: {ai_provider}] {analysis_result.action_title} | {analysis_result.description}",
|
||||
risk_level=risk_level,
|
||||
blast_radius=BlastRadius(
|
||||
@@ -1172,6 +1298,7 @@ async def receive_alert(
|
||||
],
|
||||
requested_by=f"OpenClaw ({ai_provider})",
|
||||
metadata=_approval_metadata_cs1,
|
||||
matched_playbook_id=_matched_playbook_id_cs1,
|
||||
)
|
||||
suggested_action = analysis_result.kubectl_command
|
||||
else:
|
||||
@@ -1218,7 +1345,7 @@ async def receive_alert(
|
||||
# 設計:confidence ≥ 0.85 + 非 CRITICAL + 非破壞性 + 有 kubectl 指令 → 直接執行
|
||||
# 安全防線:CRITICAL / destructive patterns / NO_ACTION/INVESTIGATE/OBSERVE / 空 kubectl → 降級 PENDING
|
||||
if analysis_result:
|
||||
_cs1_kubectl = analysis_result.kubectl_command.strip() if analysis_result.kubectl_command else ""
|
||||
_cs1_kubectl = _cmd_cs1
|
||||
_cs1_can_auto = (
|
||||
bool(_cs1_kubectl)
|
||||
and analysis_result.confidence >= 0.85
|
||||
@@ -1239,7 +1366,7 @@ async def receive_alert(
|
||||
required_signatures=0,
|
||||
status=ApprovalStatus.APPROVED,
|
||||
risk_level=risk_level.value,
|
||||
matched_playbook_id=None,
|
||||
matched_playbook_id=_matched_playbook_id_cs1,
|
||||
metadata={
|
||||
**_approval_metadata_cs1,
|
||||
"is_high_confidence": True,
|
||||
@@ -1420,6 +1547,39 @@ class AlertmanagerPayload(BaseModel):
|
||||
alerts: list[AlertmanagerAlert]
|
||||
|
||||
|
||||
_CICD_JOB_STATUSES = frozenset({"running", "success", "failed", "pending"})
|
||||
|
||||
|
||||
def _cicd_job_status_from_alert(alert: AlertmanagerAlert) -> str:
|
||||
"""將 CI/CD Alertmanager label 轉成 TelegramGateway 支援的狀態。
|
||||
|
||||
2026-05-12 Codex: Gitea workflow 先送進 AWOOI API,不能只靠
|
||||
severity=info 推 success,否則 failed/pending 事件進 AwoooP 後語義會失真。
|
||||
"""
|
||||
labels = alert.labels or {}
|
||||
for key in ("status", "job_status", "ci_status"):
|
||||
value = str(labels.get(key) or "").strip().lower()
|
||||
if value in _CICD_JOB_STATUSES:
|
||||
return value
|
||||
|
||||
severity = str(labels.get("severity") or "").strip().lower()
|
||||
if severity == "info":
|
||||
return "success"
|
||||
if severity in {"critical", "error"}:
|
||||
return "failed"
|
||||
return "running"
|
||||
|
||||
|
||||
def _cicd_duration_seconds_from_alert(alert: AlertmanagerAlert) -> int:
|
||||
labels = alert.labels or {}
|
||||
raw = labels.get("duration_seconds") or labels.get("duration") or 0
|
||||
try:
|
||||
value = int(str(raw).strip())
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
return max(value, 0)
|
||||
|
||||
|
||||
def is_internal_ip(client_ip: str) -> bool:
|
||||
"""檢查是否為內網 IP"""
|
||||
import ipaddress
|
||||
@@ -1456,6 +1616,11 @@ async def _process_new_alert_background(
|
||||
try:
|
||||
service = get_approval_service()
|
||||
openclaw = get_openclaw()
|
||||
traced_alert_labels = {
|
||||
**(alert_labels or {}),
|
||||
"fingerprint": fingerprint,
|
||||
"alert_id": alert_id,
|
||||
}
|
||||
|
||||
rule_response = match_rule(alert_context)
|
||||
should_bypass_llm = _should_use_alertmanager_rule_first(rule_response, alert_category)
|
||||
@@ -1489,7 +1654,6 @@ async def _process_new_alert_background(
|
||||
str(blast.get("data_impact", "NONE")).upper(),
|
||||
DataImpact.NONE,
|
||||
)
|
||||
rule_action_title = str(rule_response.get("action_title", "人工排查主機告警"))
|
||||
rule_kubectl = str(rule_response.get("kubectl_command", "")).strip()
|
||||
rule_description = str(rule_response.get("description", message))
|
||||
rule_action = (
|
||||
@@ -1497,13 +1661,31 @@ async def _process_new_alert_background(
|
||||
if rule_kubectl else
|
||||
f"NO_ACTION - {rule_description[:120]}"
|
||||
)
|
||||
_matched_playbook_id_cs2 = await resolve_playbook_id_for_alert(
|
||||
rule_id=str(rule_response.get("rule_id", "")),
|
||||
alertname=alertname,
|
||||
affected_services=[target_resource] if target_resource else [],
|
||||
severity=rule_risk.value,
|
||||
)
|
||||
_guarded_action_cs2 = await guard_alert_approval_action(
|
||||
action=rule_action,
|
||||
alert_namespace=namespace,
|
||||
alertname=alertname,
|
||||
alert_category=alert_category,
|
||||
)
|
||||
if _guarded_action_cs2.blocked:
|
||||
rule_action = _guarded_action_cs2.action
|
||||
rule_kubectl = ""
|
||||
rule_risk = RiskLevel.LOW
|
||||
|
||||
# 2026-04-27 Claude Sonnet 4.6: shadow-run Step1 — 補 metadata kwarg,讓 extra_metadata 可觀測
|
||||
_approval_metadata_cs2 = {
|
||||
"source": "rule_engine",
|
||||
"confidence_score": float(rule_response.get("confidence", 0.0) or 0.0),
|
||||
"is_rule_based": True,
|
||||
"playbook_id": str(rule_response.get("rule_id", "")) or None,
|
||||
"rule_id": str(rule_response.get("rule_id", "")) or None,
|
||||
"playbook_id": _matched_playbook_id_cs2,
|
||||
**_guarded_action_cs2.metadata,
|
||||
}
|
||||
approval_create = ApprovalRequestCreate(
|
||||
action=rule_action,
|
||||
@@ -1534,6 +1716,7 @@ async def _process_new_alert_background(
|
||||
],
|
||||
requested_by="OpenClaw (rule-engine)",
|
||||
metadata=_approval_metadata_cs2,
|
||||
matched_playbook_id=_matched_playbook_id_cs2,
|
||||
)
|
||||
|
||||
approval = await service.create_approval_with_fingerprint(
|
||||
@@ -1565,6 +1748,10 @@ async def _process_new_alert_background(
|
||||
# 2026-04-27 ogt + Claude Sonnet 4.6: CS2 規則引擎自動執行
|
||||
# 設計:is_rule_based=True 確定性高,滿足條件直接執行,不等人工審核
|
||||
# 安全防線:CRITICAL / destructive patterns / NO_ACTION / 空 kubectl → 全部降級 PENDING
|
||||
_cs2_auto_approval = None
|
||||
_cs2_executor = None
|
||||
_cs2_exec_success: bool | None = None
|
||||
_cs2_exec_error: str | None = None
|
||||
try:
|
||||
from src.models.approval import ApprovalRequest, ApprovalStatus
|
||||
from src.services.approval_execution import ApprovalExecutionService
|
||||
@@ -1584,10 +1771,11 @@ async def _process_new_alert_background(
|
||||
required_signatures=0,
|
||||
status=ApprovalStatus.APPROVED,
|
||||
risk_level=rule_risk.value,
|
||||
matched_playbook_id=_approval_metadata_cs2.get("playbook_id"),
|
||||
matched_playbook_id=_matched_playbook_id_cs2,
|
||||
)
|
||||
# 使用 DB 中剛建立的 approval.id 讓 executor 可回寫
|
||||
_auto_approval.id = approval.id
|
||||
_cs2_auto_approval = _auto_approval
|
||||
|
||||
_cs2_executor = ApprovalExecutionService()
|
||||
_cs2_exec_success = await _cs2_executor.execute_approved_action(_auto_approval)
|
||||
@@ -1610,6 +1798,8 @@ async def _process_new_alert_background(
|
||||
exec_success=_cs2_exec_success,
|
||||
)
|
||||
except Exception as _auto_err:
|
||||
_cs2_exec_success = False if _cs2_auto_approval is not None else None
|
||||
_cs2_exec_error = str(_auto_err)
|
||||
logger.warning(
|
||||
"cs2_auto_execute_failed_degraded_to_pending",
|
||||
approval_id=str(approval.id),
|
||||
@@ -1625,7 +1815,7 @@ async def _process_new_alert_background(
|
||||
message=message,
|
||||
source="alertmanager",
|
||||
alertname=alertname,
|
||||
alert_labels=alert_labels,
|
||||
alert_labels=traced_alert_labels,
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
)
|
||||
@@ -1641,6 +1831,41 @@ async def _process_new_alert_background(
|
||||
error=str(_meta_err),
|
||||
)
|
||||
|
||||
await record_alertmanager_event(
|
||||
project_id="awoooi",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
fingerprint=fingerprint,
|
||||
stage="incident_linked",
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
incident_id=incident_id,
|
||||
approval_id=str(approval.id),
|
||||
repeat_count=1,
|
||||
labels=traced_alert_labels,
|
||||
annotations=alert_context.get("annotations", {}),
|
||||
)
|
||||
|
||||
if _cs2_auto_approval is not None and _cs2_exec_success is not None:
|
||||
try:
|
||||
_cs2_auto_approval.incident_id = incident_id
|
||||
_cs2_executor = _cs2_executor or ApprovalExecutionService()
|
||||
await _cs2_executor.finalize_auto_approved_execution(
|
||||
_cs2_auto_approval,
|
||||
success=_cs2_exec_success,
|
||||
error_message=_cs2_exec_error,
|
||||
)
|
||||
except Exception as _cs2_finalize_err:
|
||||
logger.warning(
|
||||
"cs2_auto_execute_finalize_failed",
|
||||
approval_id=str(approval.id),
|
||||
incident_id=incident_id,
|
||||
error=str(_cs2_finalize_err),
|
||||
)
|
||||
|
||||
_is_heartbeat = is_heartbeat_alertname(alertname)
|
||||
if can_auto_repair and not _is_heartbeat:
|
||||
await _try_auto_repair_background(
|
||||
@@ -1694,7 +1919,12 @@ async def _process_new_alert_background(
|
||||
record_alert_chain_success("alertmanager")
|
||||
return
|
||||
|
||||
analysis_result, ai_provider, raw_response, signoz_metrics, signoz_trace_url, ai_tokens, ai_cost = await openclaw.analyze_alert(alert_context)
|
||||
analysis_result, ai_provider, raw_response, signoz_metrics, signoz_trace_url, ai_tokens, ai_cost = await _analyze_alertmanager_with_timeout(
|
||||
openclaw,
|
||||
alert_context,
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
)
|
||||
|
||||
if analysis_result:
|
||||
risk_mapping = {
|
||||
@@ -1724,15 +1954,34 @@ async def _process_new_alert_background(
|
||||
data_impact = impact_mapping.get(blast.data_impact.value, DataImpact.NONE) if blast else DataImpact.NONE
|
||||
|
||||
# 2026-04-27 Claude Sonnet 4.6: shadow-run Step1 — 補 metadata kwarg,讓 extra_metadata 可觀測
|
||||
_cmd_cs3 = (analysis_result.kubectl_command or "").strip()
|
||||
_guarded_action_cs3 = await guard_alert_approval_action(
|
||||
action=(_cmd_cs3 or f"{analysis_result.action_title} | NO_ACTION"),
|
||||
alert_namespace=namespace,
|
||||
alertname=alertname,
|
||||
alert_category=alert_category,
|
||||
)
|
||||
_matched_playbook_id_cs3 = await resolve_playbook_id_for_alert(
|
||||
rule_id=str(rule_response.get("rule_id", "")),
|
||||
alertname=alertname,
|
||||
affected_services=analysis_result.affected_services
|
||||
or ([target_resource] if target_resource else []),
|
||||
severity=risk_level.value,
|
||||
)
|
||||
if _guarded_action_cs3.blocked:
|
||||
risk_level = RiskLevel.LOW
|
||||
_cmd_cs3 = ""
|
||||
|
||||
_approval_metadata_cs3 = {
|
||||
"source": ai_provider,
|
||||
"confidence_score": analysis_result.confidence,
|
||||
"is_rule_based": False,
|
||||
"playbook_id": None,
|
||||
"rule_id": str(rule_response.get("rule_id", "")) or None,
|
||||
"playbook_id": _matched_playbook_id_cs3,
|
||||
**_guarded_action_cs3.metadata,
|
||||
}
|
||||
_cmd_cs3 = (analysis_result.kubectl_command or "").strip()
|
||||
approval_create = ApprovalRequestCreate(
|
||||
action=(_cmd_cs3 or f"{analysis_result.action_title} | NO_ACTION"),
|
||||
action=_guarded_action_cs3.action,
|
||||
description=f"[AI: {ai_provider}] {analysis_result.action_title} | {analysis_result.description}",
|
||||
risk_level=risk_level,
|
||||
blast_radius=BlastRadius(
|
||||
@@ -1747,6 +1996,7 @@ async def _process_new_alert_background(
|
||||
],
|
||||
requested_by=f"OpenClaw ({ai_provider})",
|
||||
metadata=_approval_metadata_cs3,
|
||||
matched_playbook_id=_matched_playbook_id_cs3,
|
||||
)
|
||||
|
||||
approval = await service.create_approval_with_fingerprint(
|
||||
@@ -1760,7 +2010,7 @@ async def _process_new_alert_background(
|
||||
"risk_level": risk_level.value,
|
||||
"confidence": analysis_result.confidence,
|
||||
"action": approval_create.action,
|
||||
"kubectl_command": analysis_result.kubectl_command,
|
||||
"kubectl_command": _cmd_cs3,
|
||||
"is_rule_based": False,
|
||||
"source": ai_provider,
|
||||
}
|
||||
@@ -1776,7 +2026,7 @@ async def _process_new_alert_background(
|
||||
logger.warning("shadow_auto_approve_failed", error=str(_shadow_err_cs3))
|
||||
|
||||
# 2026-04-27 Claude Sonnet 4.6: CS3 LLM 高信心自動執行(修法3擴展)
|
||||
_cs3_kubectl = (analysis_result.kubectl_command or "").strip()
|
||||
_cs3_kubectl = _cmd_cs3
|
||||
_cs3_can_auto = (
|
||||
bool(_cs3_kubectl)
|
||||
and analysis_result.confidence >= 0.85
|
||||
@@ -1784,8 +2034,15 @@ async def _process_new_alert_background(
|
||||
and "NO_ACTION" not in (analysis_result.action_title or "")
|
||||
and is_safe_kubectl_action(_cs3_kubectl)
|
||||
)
|
||||
_cs3_auto_approval = None
|
||||
_cs3_executor = None
|
||||
_cs3_exec_success: bool | None = None
|
||||
_cs3_exec_error: str | None = None
|
||||
if _cs3_can_auto:
|
||||
try:
|
||||
from src.models.approval import ApprovalRequest, ApprovalStatus
|
||||
from src.services.approval_execution import ApprovalExecutionService
|
||||
|
||||
_cs3_auto_approval = ApprovalRequest(
|
||||
action=approval_create.action,
|
||||
description=approval_create.description,
|
||||
@@ -1793,7 +2050,7 @@ async def _process_new_alert_background(
|
||||
required_signatures=0,
|
||||
status=ApprovalStatus.APPROVED,
|
||||
risk_level=risk_level.value,
|
||||
matched_playbook_id=None,
|
||||
matched_playbook_id=_matched_playbook_id_cs3,
|
||||
metadata={
|
||||
**_approval_metadata_cs3,
|
||||
"is_high_confidence": True,
|
||||
@@ -1802,8 +2059,17 @@ async def _process_new_alert_background(
|
||||
else "cs3_auto_confident_execution",
|
||||
},
|
||||
)
|
||||
_cs3_auto_approval.id = approval.id
|
||||
_cs3_executor = ApprovalExecutionService()
|
||||
_cs3_exec_success = await _cs3_executor.execute_approved_action(_cs3_auto_approval)
|
||||
try:
|
||||
await service.update_execution_status(approval.id, _cs3_exec_success)
|
||||
except Exception as _cs3_upd_err:
|
||||
logger.warning(
|
||||
"cs3_auto_execute_status_update_failed",
|
||||
approval_id=str(approval.id),
|
||||
error=str(_cs3_upd_err),
|
||||
)
|
||||
logger.info(
|
||||
"cs3_llm_auto_executed",
|
||||
approval_id=str(approval.id),
|
||||
@@ -1819,6 +2085,8 @@ async def _process_new_alert_background(
|
||||
),
|
||||
)
|
||||
except Exception as _cs3_exec_err:
|
||||
_cs3_exec_success = False if _cs3_auto_approval is not None else None
|
||||
_cs3_exec_error = str(_cs3_exec_err)
|
||||
logger.warning("cs3_llm_auto_execute_failed", error=str(_cs3_exec_err))
|
||||
|
||||
incident_id = await create_incident_for_approval(
|
||||
@@ -1830,7 +2098,7 @@ async def _process_new_alert_background(
|
||||
message=message,
|
||||
source="alertmanager",
|
||||
alertname=alertname,
|
||||
alert_labels=alert_labels,
|
||||
alert_labels=traced_alert_labels,
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
)
|
||||
@@ -1846,6 +2114,41 @@ async def _process_new_alert_background(
|
||||
error=str(_meta_err),
|
||||
)
|
||||
|
||||
await record_alertmanager_event(
|
||||
project_id="awoooi",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
fingerprint=fingerprint,
|
||||
stage="incident_linked",
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
incident_id=incident_id,
|
||||
approval_id=str(approval.id),
|
||||
repeat_count=1,
|
||||
labels=traced_alert_labels,
|
||||
annotations=alert_context.get("annotations", {}),
|
||||
)
|
||||
|
||||
if _cs3_auto_approval is not None and _cs3_exec_success is not None:
|
||||
try:
|
||||
_cs3_auto_approval.incident_id = incident_id
|
||||
_cs3_executor = _cs3_executor or ApprovalExecutionService()
|
||||
await _cs3_executor.finalize_auto_approved_execution(
|
||||
_cs3_auto_approval,
|
||||
success=_cs3_exec_success,
|
||||
error_message=_cs3_exec_error,
|
||||
)
|
||||
except Exception as _cs3_finalize_err:
|
||||
logger.warning(
|
||||
"cs3_auto_execute_finalize_failed",
|
||||
approval_id=str(approval.id),
|
||||
incident_id=incident_id,
|
||||
error=str(_cs3_finalize_err),
|
||||
)
|
||||
|
||||
root_cause = analysis_result.description or message
|
||||
estimated_downtime = blast.estimated_downtime if blast else "~30s"
|
||||
primary_responsibility = analysis_result.primary_responsibility or "COLLAB"
|
||||
@@ -1895,7 +2198,7 @@ async def _process_new_alert_background(
|
||||
risk_level=risk_level.value,
|
||||
resource_name=target_resource,
|
||||
root_cause=root_cause,
|
||||
suggested_action=(analysis_result.kubectl_command or "").strip() or analysis_result.suggested_action.value,
|
||||
suggested_action=approval_create.action,
|
||||
estimated_downtime=estimated_downtime,
|
||||
hit_count=1,
|
||||
primary_responsibility=primary_responsibility,
|
||||
@@ -1921,11 +2224,17 @@ async def _process_new_alert_background(
|
||||
else:
|
||||
# LLM 失敗 - 使用預設值
|
||||
# 2026-04-27 Claude Sonnet 4.6: shadow-run Step1 — 補 metadata kwarg,讓 extra_metadata 可觀測
|
||||
_matched_playbook_id_cs4 = await resolve_playbook_id_for_alert(
|
||||
rule_id=str(rule_response.get("rule_id", "")),
|
||||
alertname=alertname,
|
||||
affected_services=[target_resource] if target_resource else [],
|
||||
severity="medium",
|
||||
)
|
||||
_approval_metadata_cs4 = {
|
||||
"source": "fallback",
|
||||
"confidence_score": None,
|
||||
"is_rule_based": False,
|
||||
"playbook_id": None,
|
||||
"playbook_id": _matched_playbook_id_cs4,
|
||||
}
|
||||
fallback_create = ApprovalRequestCreate(
|
||||
action="OBSERVE",
|
||||
@@ -1940,6 +2249,7 @@ async def _process_new_alert_background(
|
||||
dry_run_checks=[],
|
||||
requested_by="OpenClaw (fallback)",
|
||||
metadata=_approval_metadata_cs4,
|
||||
matched_playbook_id=_matched_playbook_id_cs4,
|
||||
)
|
||||
|
||||
approval = await service.create_approval_with_fingerprint(
|
||||
@@ -1977,7 +2287,7 @@ async def _process_new_alert_background(
|
||||
message=message,
|
||||
source="alertmanager",
|
||||
alertname=alertname,
|
||||
alert_labels=alert_labels,
|
||||
alert_labels=traced_alert_labels,
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
)
|
||||
@@ -1993,6 +2303,55 @@ async def _process_new_alert_background(
|
||||
error=str(_meta_err),
|
||||
)
|
||||
|
||||
await record_alertmanager_event(
|
||||
project_id="awoooi",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
fingerprint=fingerprint,
|
||||
stage="incident_linked",
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
incident_id=fallback_incident_id,
|
||||
approval_id=str(approval.id),
|
||||
repeat_count=1,
|
||||
labels=traced_alert_labels,
|
||||
annotations=alert_context.get("annotations", {}),
|
||||
)
|
||||
|
||||
_is_heartbeat = is_heartbeat_alertname(alertname)
|
||||
if can_auto_repair and not _is_heartbeat:
|
||||
await _try_auto_repair_background(
|
||||
incident_id=fallback_incident_id,
|
||||
approval_id=str(approval.id),
|
||||
alert_type=alert_type,
|
||||
target_resource=target_resource,
|
||||
namespace=namespace,
|
||||
)
|
||||
elif not can_auto_repair and not _is_heartbeat:
|
||||
from src.repositories.alert_operation_log_repository import get_alert_operation_log_repository
|
||||
_op_log_fallback = get_alert_operation_log_repository()
|
||||
await _op_log_fallback.append(
|
||||
"GUARDRAIL_BLOCKED",
|
||||
incident_id=fallback_incident_id,
|
||||
approval_id=str(approval.id),
|
||||
actor="prometheus-rule",
|
||||
action_detail=f"Prometheus rule 設定 auto_repair=false,fallback 轉人工: {alertname}",
|
||||
success=False,
|
||||
context={"alertname": alertname, "auto_repair_flag": False},
|
||||
)
|
||||
await _escalate_auto_repair_unavailable(
|
||||
incident_id=fallback_incident_id,
|
||||
approval_id=str(approval.id),
|
||||
alert_type=alert_type,
|
||||
target_resource=target_resource,
|
||||
namespace=namespace,
|
||||
failure_reason="Prometheus rule auto_repair=false,fallback 未進入自動修復評估",
|
||||
attempted_actions="llm_fallback -> guardrail:auto_repair_false -> emergency_intervention",
|
||||
)
|
||||
|
||||
await _push_to_telegram_background(
|
||||
approval_id=str(approval.id),
|
||||
risk_level="medium",
|
||||
@@ -2125,11 +2484,12 @@ async def alertmanager_webhook(
|
||||
telegram = get_telegram_gateway()
|
||||
# 解析 CI/CD 狀態
|
||||
stage = alert.labels.get("stage", "")
|
||||
job_status = "success" if alert.labels.get("severity") == "info" else "running"
|
||||
job_status = _cicd_job_status_from_alert(alert)
|
||||
commit_sha = alert.labels.get("commit", "")
|
||||
triggered_by = alert.labels.get("triggered_by", "CI")
|
||||
workflow_url = alert.annotations.get("workflow_url", "")
|
||||
summary = alert.annotations.get("summary", alertname)
|
||||
detail_message = alert.annotations.get("description", "")
|
||||
|
||||
await telegram.send_cicd_progress(
|
||||
job_name=summary,
|
||||
@@ -2137,6 +2497,8 @@ async def alertmanager_webhook(
|
||||
stage=stage,
|
||||
commit_sha=commit_sha,
|
||||
triggered_by=triggered_by,
|
||||
duration_seconds=_cicd_duration_seconds_from_alert(alert),
|
||||
message=detail_message,
|
||||
workflow_url=workflow_url,
|
||||
)
|
||||
|
||||
@@ -2236,6 +2598,22 @@ async def alertmanager_webhook(
|
||||
target=target_resource,
|
||||
fingerprint=fingerprint,
|
||||
)
|
||||
background_tasks.add_task(
|
||||
record_alertmanager_event,
|
||||
project_id="awoooi",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
fingerprint=fingerprint,
|
||||
stage="received",
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
source_url=alert.generatorURL,
|
||||
labels=dict(alert.labels) if alert.labels else {},
|
||||
annotations=dict(alert.annotations) if alert.annotations else {},
|
||||
)
|
||||
|
||||
# ==========================================================================
|
||||
# ADR-076: 告警聚合引擎 — 5 分鐘滑動視窗,防止告警風暴
|
||||
@@ -2266,6 +2644,19 @@ async def alertmanager_webhook(
|
||||
parent_fingerprint=grouping_result.parent_fingerprint,
|
||||
reason="Alert storm suppressed — child alert within 5-min window",
|
||||
)
|
||||
background_tasks.add_task(
|
||||
record_grouped_alert_event,
|
||||
project_id="awoooi",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
group_key=grouping_result.group_key,
|
||||
count=grouping_result.count,
|
||||
parent_fingerprint=grouping_result.parent_fingerprint,
|
||||
fingerprint=fingerprint,
|
||||
)
|
||||
return AlertResponse(
|
||||
success=True,
|
||||
message=(
|
||||
@@ -2305,6 +2696,26 @@ async def alertmanager_webhook(
|
||||
hit_count=updated_approval.hit_count,
|
||||
reason="Converged alert - Telegram already sent for this fingerprint",
|
||||
)
|
||||
background_tasks.add_task(
|
||||
record_alertmanager_event,
|
||||
project_id="awoooi",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
fingerprint=fingerprint,
|
||||
stage="converged",
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
incident_id=getattr(updated_approval, "incident_id", None),
|
||||
approval_id=str(updated_approval.id),
|
||||
repeat_count=updated_approval.hit_count,
|
||||
is_duplicate=True,
|
||||
source_url=alert.generatorURL,
|
||||
labels=dict(alert.labels) if alert.labels else {},
|
||||
annotations=dict(alert.annotations) if alert.annotations else {},
|
||||
)
|
||||
|
||||
return AlertResponse(
|
||||
success=True,
|
||||
@@ -2332,10 +2743,27 @@ async def alertmanager_webhook(
|
||||
message=message,
|
||||
source="alertmanager",
|
||||
alertname=alertname,
|
||||
alert_labels=alert.labels,
|
||||
alert_labels={**alert.labels, "fingerprint": fingerprint, "alert_id": alert_id},
|
||||
notification_type="TYPE-1",
|
||||
alert_category=alert_category,
|
||||
)
|
||||
background_tasks.add_task(
|
||||
record_alertmanager_event,
|
||||
project_id="awoooi",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
fingerprint=fingerprint,
|
||||
stage="incident_linked",
|
||||
notification_type="TYPE-1",
|
||||
alert_category=alert_category,
|
||||
incident_id=_info_incident_id,
|
||||
source_url=alert.generatorURL,
|
||||
labels={**alert.labels, "fingerprint": fingerprint, "alert_id": alert_id},
|
||||
annotations=dict(alert.annotations) if alert.annotations else {},
|
||||
)
|
||||
# 2026-04-15 ogt: TYPE-1 純資訊告警建立後立即關閉
|
||||
# 設計原則: backup/heartbeat/info 告警無需追蹤狀態,通知即完成
|
||||
# 防止 incidents 表無限累積 INVESTIGATING 記錄(ADR-073 漏洞修補)
|
||||
@@ -2355,7 +2783,7 @@ async def alertmanager_webhook(
|
||||
record_alert_chain_success("alertmanager")
|
||||
return AlertResponse(
|
||||
success=True,
|
||||
message=f"✅ TYPE-1 純資訊告警已通知 (no LLM)",
|
||||
message="✅ TYPE-1 純資訊告警已通知 (no LLM)",
|
||||
alert_id=alert_id,
|
||||
approval_created=False,
|
||||
)
|
||||
@@ -2367,6 +2795,23 @@ async def alertmanager_webhook(
|
||||
fingerprint=fingerprint,
|
||||
ttl_seconds=ALERTMANAGER_LLM_INFLIGHT_LOCK_TTL_SECONDS,
|
||||
)
|
||||
background_tasks.add_task(
|
||||
record_alertmanager_event,
|
||||
project_id="awoooi",
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
fingerprint=fingerprint,
|
||||
stage="llm_inflight_suppressed",
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
is_duplicate=True,
|
||||
source_url=alert.generatorURL,
|
||||
labels=dict(alert.labels) if alert.labels else {},
|
||||
annotations=dict(alert.annotations) if alert.annotations else {},
|
||||
)
|
||||
return AlertResponse(
|
||||
success=True,
|
||||
message="🛡️ 告警已由同指紋背景 AI 分析處理中,跳過重複 LLM 呼叫",
|
||||
|
||||
126
apps/api/src/core/awooop_operator_auth.py
Normal file
126
apps/api/src/core/awooop_operator_auth.py
Normal file
@@ -0,0 +1,126 @@
|
||||
"""
|
||||
AwoooP Operator authentication boundary.
|
||||
|
||||
ADR-116 Gate 5 approval decisions must not trust browser-supplied identities.
|
||||
This module accepts a short-lived operator identity only when it is paired with
|
||||
the server-side AwoooP operator key.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import secrets
|
||||
from dataclasses import dataclass
|
||||
from typing import Annotated
|
||||
|
||||
import structlog
|
||||
from fastapi import Header, HTTPException, status
|
||||
|
||||
from src.core.config import settings
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
_OPERATOR_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_.:@-]{1,127}$")
|
||||
_PROD_ENVS = {"prod", "production"}
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class AwoooPOperatorPrincipal:
|
||||
"""Authenticated AwoooP operator principal."""
|
||||
|
||||
operator_id: str
|
||||
auth_method: str
|
||||
|
||||
|
||||
def _auth_error(detail: str = "Operator authentication required") -> HTTPException:
|
||||
return HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail=detail)
|
||||
|
||||
|
||||
def _clean_operator_id(operator_id: str | None) -> str:
|
||||
if operator_id is None:
|
||||
raise _auth_error()
|
||||
cleaned = operator_id.strip()
|
||||
if not _OPERATOR_ID_RE.fullmatch(cleaned):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
|
||||
detail="Invalid operator identity",
|
||||
)
|
||||
return cleaned
|
||||
|
||||
|
||||
def authenticate_awooop_operator_headers(
|
||||
operator_id: str | None,
|
||||
operator_key: str | None,
|
||||
*,
|
||||
configured_key: str | None = None,
|
||||
environment: str | None = None,
|
||||
) -> AwoooPOperatorPrincipal:
|
||||
"""Validate trusted AwoooP operator headers.
|
||||
|
||||
Args:
|
||||
operator_id: Value from ``X-AwoooP-Operator-Id``.
|
||||
operator_key: Value from ``X-AwoooP-Operator-Key``.
|
||||
configured_key: Server-side shared key. Defaults to settings.
|
||||
environment: Runtime environment. Defaults to settings.
|
||||
|
||||
Returns:
|
||||
Authenticated operator principal.
|
||||
|
||||
Raises:
|
||||
HTTPException: 401 when authentication is missing/invalid, or 422 for
|
||||
malformed operator identity.
|
||||
"""
|
||||
cleaned_operator_id = _clean_operator_id(operator_id)
|
||||
expected_key = (
|
||||
settings.AWOOOP_OPERATOR_API_KEY
|
||||
if configured_key is None
|
||||
else configured_key
|
||||
)
|
||||
runtime_env = (environment or settings.ENVIRONMENT or "").lower()
|
||||
|
||||
if not expected_key:
|
||||
if runtime_env in _PROD_ENVS:
|
||||
logger.critical(
|
||||
"awooop_operator_key_missing_in_production",
|
||||
environment=runtime_env,
|
||||
)
|
||||
raise _auth_error()
|
||||
logger.warning(
|
||||
"awooop_operator_key_skipped_dev_only",
|
||||
environment=runtime_env,
|
||||
operator_id=cleaned_operator_id,
|
||||
)
|
||||
return AwoooPOperatorPrincipal(
|
||||
operator_id=cleaned_operator_id,
|
||||
auth_method="dev_header",
|
||||
)
|
||||
|
||||
if not operator_key:
|
||||
logger.warning("awooop_operator_key_missing", operator_id=cleaned_operator_id)
|
||||
raise _auth_error()
|
||||
|
||||
if not secrets.compare_digest(operator_key, expected_key):
|
||||
logger.warning("awooop_operator_key_invalid", operator_id=cleaned_operator_id)
|
||||
raise _auth_error()
|
||||
|
||||
return AwoooPOperatorPrincipal(
|
||||
operator_id=cleaned_operator_id,
|
||||
auth_method="operator_api_key",
|
||||
)
|
||||
|
||||
|
||||
async def verify_awooop_operator(
|
||||
x_awooop_operator_id: Annotated[
|
||||
str | None,
|
||||
Header(alias="X-AwoooP-Operator-Id"),
|
||||
] = None,
|
||||
x_awooop_operator_key: Annotated[
|
||||
str | None,
|
||||
Header(alias="X-AwoooP-Operator-Key"),
|
||||
] = None,
|
||||
) -> AwoooPOperatorPrincipal:
|
||||
"""FastAPI dependency for operator mutation endpoints."""
|
||||
return authenticate_awooop_operator_headers(
|
||||
operator_id=x_awooop_operator_id,
|
||||
operator_key=x_awooop_operator_key,
|
||||
)
|
||||
@@ -145,7 +145,7 @@ class Settings(BaseSettings):
|
||||
# ==========================================================================
|
||||
# ADR-104: LLM Playbook Generator
|
||||
# 成功修復且未命中既有 Playbook 時,用本地 LLM 生成 DRAFT/REVIEW Playbook。
|
||||
# 成本護欄:實作層只走 local provider(Ollama 111 → Ollama 188),不新增雲端 fallback。
|
||||
# 成本護欄:實作層只走 local provider(GCP-A → GCP-B → 111),不新增雲端 fallback。
|
||||
# 回滾指令: kubectl set env deployment/awoooi-api ENABLE_LLM_PLAYBOOK_GENERATION=false
|
||||
# ==========================================================================
|
||||
ENABLE_LLM_PLAYBOOK_GENERATION: bool = Field(
|
||||
@@ -215,8 +215,8 @@ class Settings(BaseSettings):
|
||||
description="Phase 25 P0: DIAGNOSE NIM timeout (秒),實測 2.2-27.3s avg 10.6s,60s 含 buffer",
|
||||
)
|
||||
OLLAMA_DIAGNOSE_TIMEOUT_SECONDS: int = Field(
|
||||
default=200,
|
||||
description="Phase 25 P0: Ollama timeout (秒),實測 CPU-only 238s,保留欄位但 DIAGNOSE 不再走 Ollama",
|
||||
default=300,
|
||||
description="Ollama diagnose timeout (秒)。GCP qwen3:14b CPU-only can exceed the old 120s proxy limit.",
|
||||
)
|
||||
|
||||
# ==========================================================================
|
||||
@@ -370,11 +370,16 @@ class Settings(BaseSettings):
|
||||
)
|
||||
return v
|
||||
|
||||
# 2026-04-25 Claude Engineer-C (P1.1): Ollama 健康檢測推理測試模型
|
||||
# 2026-05-05 Codex: health inference must stay on alert-fast model; qwen2.5
|
||||
# keeps reloading a 7B model on CPU-only GCP and slows incident fallback.
|
||||
OLLAMA_HEALTH_CHECK_MODEL: str = Field(
|
||||
default="qwen2.5:7b-instruct",
|
||||
default="gemma3:4b",
|
||||
description="OllamaHealthMonitor 推理測試使用模型(P1.1)",
|
||||
)
|
||||
OLLAMA_EMBEDDING_MODEL: str = Field(
|
||||
default="bge-m3:latest",
|
||||
description="Ollama embedding model. ADR-110 migrated embeddings from nomic-embed-text to bge-m3.",
|
||||
)
|
||||
# 2026-04-12 ogt: 心跳必須確認載入的 Ollama 模型清單
|
||||
# 2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP 升級,更新必要模型清單(nomic→bge-m3 + 新增 qwen3:14b + hermes3)
|
||||
OLLAMA_REQUIRED_MODELS: list[str] = Field(
|
||||
@@ -500,10 +505,42 @@ class Settings(BaseSettings):
|
||||
default=False,
|
||||
description=(
|
||||
"Allow LocalCodeReviewService to fall back to Gemini when the "
|
||||
"GCP-B/Ollama code-review lane fails. Default false to avoid "
|
||||
"local Ollama code-review lane fails. Default false to avoid "
|
||||
"unexpected cloud spend from Gitea push/PR alerts."
|
||||
),
|
||||
)
|
||||
ALERT_AI_ALLOW_CLOUD_FALLBACK: bool = Field(
|
||||
default=True,
|
||||
description=(
|
||||
"Allow incident/alert OpenClaw analysis to use cloud fallback "
|
||||
"providers after the GCP-A/GCP-B/111 Ollama lane is exhausted. "
|
||||
"Default true so Gemini can act as the final backup, after the "
|
||||
"ordered Ollama lane is exhausted."
|
||||
),
|
||||
)
|
||||
ALERT_AI_ENFORCE_OLLAMA_FIRST: bool = Field(
|
||||
default=True,
|
||||
description=(
|
||||
"Force incident/alert OpenClaw analysis to try GCP-A, then GCP-B, "
|
||||
"then local 111 before cloud backup providers such as Gemini."
|
||||
),
|
||||
)
|
||||
ALERT_OLLAMA_MODEL: str = Field(
|
||||
default="qwen3:14b",
|
||||
description=(
|
||||
"Ollama model used for incident/alert deep diagnosis. Alert cards "
|
||||
"may wait for this model; Gemini remains a backup after GCP-A, "
|
||||
"GCP-B, and 111 fail."
|
||||
),
|
||||
)
|
||||
INCIDENT_LLM_TIMEOUT_SECONDS: int = Field(
|
||||
default=360,
|
||||
description=(
|
||||
"Outer timeout for incident OpenClaw proposal generation. This must "
|
||||
"be long enough for the GCP-A/GCP-B/111 Ollama lane to complete "
|
||||
"before Gemini backup is considered useful."
|
||||
),
|
||||
)
|
||||
# 2026-03-29 ogt: ADR-036 Nemotron Tool Calling 整合
|
||||
NVIDIA_API_KEY: str = Field(
|
||||
default="",
|
||||
@@ -565,6 +602,13 @@ class Settings(BaseSettings):
|
||||
default="",
|
||||
description="API Key for K8s admin endpoints (X-K8s-Api-Key header)",
|
||||
)
|
||||
AWOOOP_OPERATOR_API_KEY: str = Field(
|
||||
default="",
|
||||
description=(
|
||||
"API key for AwoooP operator mutation endpoints "
|
||||
"(X-AwoooP-Operator-Key header)"
|
||||
),
|
||||
)
|
||||
|
||||
# ==========================================================================
|
||||
# 統帥鐵律:禁止 SQLite (AWOOOI 憲法)
|
||||
@@ -855,7 +899,7 @@ class Settings(BaseSettings):
|
||||
# ==========================================================================
|
||||
# MCP Phase 2b: Prometheus MCP Server (ADR-071, 2026-04-11 Claude Sonnet 4.6)
|
||||
# ==========================================================================
|
||||
# 2026-04-29 ogt + Claude Opus 4.7: drift fix — 188 是 Ollama Hub,Prometheus 實際在 110
|
||||
# 2026-04-29 ogt + Claude Opus 4.7: drift fix — Prometheus 實際在 110
|
||||
# ConfigMap 04-configmap.yaml 也是 110;governance_agent / SLO check 連 188 會 timeout
|
||||
# 此 drift 是 SPF-4 (governance_agent silently fail) 根因之一
|
||||
PROMETHEUS_URL: str = Field(
|
||||
@@ -929,7 +973,7 @@ class Settings(BaseSettings):
|
||||
"devops": "192.168.0.110", # Harbor, GH Runner
|
||||
"security": "192.168.0.112", # Kali Scanner
|
||||
"k3s_master": "192.168.0.120", # K3s Master
|
||||
"ai_web": "192.168.0.188", # Nginx, Postgres, Redis, Ollama
|
||||
"ai_web": "192.168.0.188", # Nginx, Postgres, Redis, SignOz
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ Features:
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
@@ -19,6 +20,28 @@ from structlog.types import Processor
|
||||
|
||||
from src.core.config import settings
|
||||
|
||||
_TELEGRAM_BOT_URL_RE = re.compile(r"(api\.telegram\.org/bot)[^/\s]+")
|
||||
|
||||
|
||||
def _redact_sensitive_log_text(text: str) -> str:
|
||||
"""遮蔽可能出現在第三方 logger 訊息中的敏感 URL。"""
|
||||
return _TELEGRAM_BOT_URL_RE.sub(r"\1<redacted>", text)
|
||||
|
||||
|
||||
class SensitiveURLRedactionFilter(logging.Filter):
|
||||
"""標準 logging filter:避免 httpx 等第三方 logger 把 token URL 打進 log。"""
|
||||
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
record.msg = _redact_sensitive_log_text(str(record.msg))
|
||||
if isinstance(record.args, tuple):
|
||||
record.args = tuple(_redact_sensitive_log_text(str(arg)) for arg in record.args)
|
||||
elif isinstance(record.args, dict):
|
||||
record.args = {
|
||||
key: _redact_sensitive_log_text(str(value))
|
||||
for key, value in record.args.items()
|
||||
}
|
||||
return True
|
||||
|
||||
|
||||
def setup_logging() -> None:
|
||||
"""Configure structlog for the application"""
|
||||
@@ -68,6 +91,15 @@ def setup_logging() -> None:
|
||||
stream=sys.stdout,
|
||||
level=logging.getLevelName(settings.LOG_LEVEL),
|
||||
)
|
||||
redaction_filter = SensitiveURLRedactionFilter()
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.addFilter(redaction_filter)
|
||||
for handler in root_logger.handlers:
|
||||
handler.addFilter(redaction_filter)
|
||||
|
||||
# httpx INFO 會輸出完整 request URL;Telegram Bot API URL 內含 token。
|
||||
logging.getLogger("httpx").setLevel(logging.WARNING)
|
||||
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
||||
|
||||
|
||||
def get_logger(name: str | None = None, **initial_context: Any) -> structlog.BoundLogger:
|
||||
|
||||
@@ -17,6 +17,7 @@ PostgreSQL 事務管理器,確保多表操作原子性。
|
||||
from typing import Any
|
||||
|
||||
import structlog
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
@@ -49,14 +50,20 @@ class UnitOfWork:
|
||||
- Redis 操作失敗時必須手動呼叫 rollback()
|
||||
"""
|
||||
|
||||
def __init__(self, session_factory: async_sessionmaker[AsyncSession]):
|
||||
def __init__(
|
||||
self,
|
||||
session_factory: async_sessionmaker[AsyncSession],
|
||||
project_id: str | None = None,
|
||||
):
|
||||
"""
|
||||
初始化 UnitOfWork
|
||||
|
||||
Args:
|
||||
session_factory: SQLAlchemy async session factory
|
||||
project_id: RLS project context. None means contextvar/default awoooi.
|
||||
"""
|
||||
self._session_factory = session_factory
|
||||
self._project_id = project_id
|
||||
self._session: AsyncSession | None = None
|
||||
self._committed = False
|
||||
|
||||
@@ -74,9 +81,18 @@ class UnitOfWork:
|
||||
|
||||
async def __aenter__(self) -> "UnitOfWork":
|
||||
"""進入事務"""
|
||||
from src.core.context import get_current_project_id
|
||||
|
||||
self._session = self._session_factory()
|
||||
effective_pid = (
|
||||
self._project_id if self._project_id is not None else get_current_project_id()
|
||||
)
|
||||
await self._session.execute(
|
||||
text("SELECT set_config('app.project_id', :pid, TRUE)"),
|
||||
{"pid": effective_pid},
|
||||
)
|
||||
self._committed = False
|
||||
logger.debug("uow_started")
|
||||
logger.debug("uow_started", project_id=effective_pid)
|
||||
return self
|
||||
|
||||
async def __aexit__(
|
||||
|
||||
@@ -10,7 +10,7 @@ from __future__ import annotations
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
from uuid import UUID, uuid4
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy import (
|
||||
Boolean,
|
||||
@@ -577,8 +577,8 @@ class AwoooPMcpGatewayAudit(Base):
|
||||
run_id: Mapped[UUID | None] = mapped_column(nullable=True)
|
||||
trace_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
|
||||
agent_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
|
||||
tool_id: Mapped[UUID] = mapped_column(
|
||||
ForeignKey("awooop_mcp_tool_registry.tool_id"), nullable=False
|
||||
tool_id: Mapped[UUID | None] = mapped_column(
|
||||
ForeignKey("awooop_mcp_tool_registry.tool_id"), nullable=True
|
||||
)
|
||||
tool_name: Mapped[str] = mapped_column(String(128), nullable=False)
|
||||
credential_ref: Mapped[str | None] = mapped_column(String(256), nullable=True)
|
||||
@@ -635,6 +635,13 @@ class AwoooPConversationEvent(Base):
|
||||
content_type: Mapped[str] = mapped_column(String(32), nullable=False, default="text")
|
||||
content_hash: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
content_preview: Mapped[str | None] = mapped_column(String(256), nullable=True)
|
||||
content_redacted: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
redaction_version: Mapped[str] = mapped_column(
|
||||
String(32), nullable=False, server_default=text("'audit_sink_v1'")
|
||||
)
|
||||
source_envelope: Mapped[dict[str, Any]] = mapped_column(
|
||||
JSONB, nullable=False, server_default=text("'{}'::jsonb")
|
||||
)
|
||||
attachment_sha256: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
is_duplicate: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
provider_ts: Mapped[datetime | None] = mapped_column(nullable=True)
|
||||
@@ -680,6 +687,13 @@ class AwoooPOutboundMessage(Base):
|
||||
message_type: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
content_hash: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
content_preview: Mapped[str | None] = mapped_column(String(256), nullable=True)
|
||||
content_redacted: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
redaction_version: Mapped[str] = mapped_column(
|
||||
String(32), nullable=False, server_default=text("'audit_sink_v1'")
|
||||
)
|
||||
source_envelope: Mapped[dict[str, Any]] = mapped_column(
|
||||
JSONB, nullable=False, server_default=text("'{}'::jsonb")
|
||||
)
|
||||
provider_message_id: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
send_status: Mapped[str] = mapped_column(String(16), nullable=False, default="pending")
|
||||
send_error: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
|
||||
@@ -106,10 +106,13 @@ async def get_db() -> AsyncGenerator[AsyncSession, None]:
|
||||
factory = get_session_factory()
|
||||
async with factory() as session:
|
||||
try:
|
||||
from src.core.context import get_current_project_id
|
||||
|
||||
# AwoooP Phase 2.3 (2026-05-04 ogt): SET LOCAL app.project_id 讓 RLS Policy 生效
|
||||
# 預設 'awoooi',多租戶路由將在 middleware 注入實際 project_id
|
||||
# 預設 'awoooi',多租戶路由將透過 contextvar 注入實際 project_id
|
||||
await session.execute(
|
||||
text("SELECT set_config('app.project_id', 'awoooi', TRUE)")
|
||||
text("SELECT set_config('app.project_id', :pid, TRUE)"),
|
||||
{"pid": get_current_project_id()},
|
||||
)
|
||||
yield session
|
||||
await session.commit()
|
||||
|
||||
@@ -9,6 +9,7 @@ Layer 1 意圖路由(關鍵字正則)→ Ollama 本地模型(111)→ Tel
|
||||
debugger/vuln → deepseek-r1:14b(推理); code agents → qwen2.5-coder:7b; 其他 → qwen2.5:7b-instruct
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
import time
|
||||
@@ -17,7 +18,6 @@ import httpx
|
||||
import structlog
|
||||
from sqlalchemy import text
|
||||
|
||||
from src.core.config import settings
|
||||
from src.core.redis_client import get_redis
|
||||
from src.db.base import get_db_context
|
||||
from src.hermes.agent_loader import get_agent_system_prompt
|
||||
@@ -266,7 +266,9 @@ async def process_nl_message(
|
||||
success = False
|
||||
error_type: str | None = None
|
||||
try:
|
||||
ollama_base = getattr(settings, "OLLAMA_URL", "http://34.143.170.20:11434") # 2026-05-03 ogt: ADR-110 GCP-A Primary
|
||||
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
|
||||
|
||||
ollama_base = resolve_ollama_endpoint("hermes")
|
||||
async with httpx.AsyncClient(timeout=_OLLAMA_TIMEOUT) as _hc:
|
||||
resp = await _hc.post(
|
||||
f"{ollama_base}/api/chat",
|
||||
|
||||
@@ -46,6 +46,7 @@ _DEDUP_TTL_SEC = 3600 # 同一告警 1 小時內不重複發送
|
||||
_TG_SILENCE_THRESHOLD = 2 # PENDING telegram_message_id IS NULL 告警門檻
|
||||
_FLYWHEEL_SUCCESS_MIN = 0.30 # 執行成功率下限
|
||||
_STUCK_ANALYSIS_THRESHOLD = 3 # Agent Debate 失敗導致卡住的告警門檻
|
||||
_TRUST_DRIFT_META_MIN_RATIO = 0.20 # 低於此比例只記治理事件,不升 Meta System
|
||||
|
||||
# 2026-05-03 ogt + Claude Opus 4.7 — feedback_silencing_alerts_recurring_violation
|
||||
# 啟動寬限期:30 分鐘內可 skip「資料還沒到」噪音;超過寬限期仍空 = 真資料管線斷,必須告警
|
||||
@@ -210,7 +211,8 @@ async def _check_once() -> None:
|
||||
from src.services.governance_agent import get_governance_agent
|
||||
trust_result = await get_governance_agent().check_trust_drift(emit_alert=False)
|
||||
drifted = trust_result.get("drifted", 0)
|
||||
if drifted > 0:
|
||||
drift_ratio = float(trust_result.get("drift_ratio") or 0.0)
|
||||
if drifted > 0 and drift_ratio >= _TRUST_DRIFT_META_MIN_RATIO:
|
||||
auto_deprecated = trust_result.get("auto_deprecated", 0)
|
||||
kept = trust_result.get("kept", 0)
|
||||
violations.append(
|
||||
@@ -219,6 +221,13 @@ async def _check_once() -> None:
|
||||
)
|
||||
# 2026-05-05 ogt W6 修復:移除動態 low_count,避免 count 微變繞過 dedup
|
||||
violation_codes.append("W6:trust_drift")
|
||||
elif drifted > 0:
|
||||
logger.info(
|
||||
"watchdog_w6_trust_drift_below_meta_threshold",
|
||||
drifted=drifted,
|
||||
drift_ratio=round(drift_ratio, 3),
|
||||
threshold=_TRUST_DRIFT_META_MIN_RATIO,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("watchdog_w6_trust_drift_check_failed", error=str(e))
|
||||
|
||||
|
||||
@@ -479,7 +479,7 @@ async def _collect_all_k8s_assets() -> tuple[list[dict[str, Any]], list[dict[str
|
||||
|
||||
# 6. Prometheus targets — 補齊 host-install services (110/112/188/125 等非 K8s)
|
||||
# Gap 1 修補 (2026-04-19 audit): 原本 asset_inventory 只涵蓋 K8s,
|
||||
# 110 Harbor/Gitea/監控 + 188 PostgreSQL/Redis/Ollama host-install 全漏
|
||||
# 110 Harbor/Gitea/監控 + 188 PostgreSQL/Redis host-install 全漏
|
||||
# 用 Prometheus /api/v1/targets 自動發現全節點服務
|
||||
try:
|
||||
prom_assets, host_relationships = await _collect_prometheus_targets()
|
||||
|
||||
@@ -172,7 +172,7 @@ _LLM_FORECAST_PROMPT = """你是 AWOOOI 容量規劃專家。以下 host 過去
|
||||
{findings_json}
|
||||
|
||||
## 當前主機環境資訊
|
||||
- 主機架構: 110 (Harbor/Gitea/監控), 112 (Security), 120/121 (K3s), 125 (K3s backup), 188 (PG/Redis/Ollama/MinIO)
|
||||
- 主機架構: 110 (Harbor/Gitea/監控), 112 (Security), 120/121 (K3s), 125 (K3s backup), 188 (PG/Redis/MinIO)
|
||||
- 判斷請考慮: 該主機上跑什麼服務、常見瓶頸模式
|
||||
|
||||
## 輸出規格 (必須是合法 JSON,純 JSON 無前後文字)
|
||||
|
||||
@@ -28,7 +28,7 @@ from datetime import timedelta
|
||||
import structlog
|
||||
from sqlalchemy import select, update
|
||||
|
||||
from src.db.base import get_session_factory
|
||||
from src.db.base import get_db_context
|
||||
from src.db.models import AiGovernanceEvent, KnowledgeEntryRecord
|
||||
from src.utils.timezone import now_taipei
|
||||
|
||||
@@ -129,7 +129,7 @@ class KbRotCleaner:
|
||||
rot_reasons: dict[str, list[str]] = {}
|
||||
total = 0
|
||||
|
||||
async with get_session_factory()() as session:
|
||||
async with get_db_context() as session:
|
||||
# 只掃 active 狀態(非 archived)
|
||||
q = await session.execute(
|
||||
select(KnowledgeEntryRecord).where(
|
||||
@@ -193,7 +193,7 @@ class KbRotCleaner:
|
||||
if not result.stale_ids:
|
||||
return
|
||||
|
||||
async with get_session_factory()() as session:
|
||||
async with get_db_context() as session:
|
||||
# 逐條更新(避免 bulk update 覆蓋 tags JSONB)
|
||||
q = await session.execute(
|
||||
select(KnowledgeEntryRecord).where(
|
||||
@@ -220,7 +220,7 @@ class KbRotCleaner:
|
||||
async def _save_event(self, result: RotScanResult) -> None:
|
||||
"""寫 kb_stale 事件到 ai_governance_events。"""
|
||||
try:
|
||||
async with get_session_factory()() as session:
|
||||
async with get_db_context() as session:
|
||||
event = AiGovernanceEvent(
|
||||
event_type="kb_stale",
|
||||
details=result.to_dict(),
|
||||
|
||||
@@ -25,7 +25,9 @@ Feature Flag:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
|
||||
import structlog
|
||||
|
||||
from src.core.config import settings
|
||||
|
||||
@@ -33,7 +33,7 @@ from datetime import timedelta
|
||||
import structlog
|
||||
from sqlalchemy import and_, select, update
|
||||
|
||||
from src.db.base import get_session_factory
|
||||
from src.db.base import get_db_context
|
||||
from src.db.models import KnowledgeEntryRecord
|
||||
from src.models.knowledge import EntryStatus
|
||||
from src.utils.timezone import now_taipei
|
||||
@@ -112,8 +112,7 @@ class KnowledgeDecayJob:
|
||||
cutoff = now_taipei() - timedelta(days=DECAY_AGE_DAYS)
|
||||
decayable_statuses = [EntryStatus.DRAFT.value, EntryStatus.REVIEW.value]
|
||||
|
||||
session_factory = get_session_factory()
|
||||
async with session_factory() as db:
|
||||
async with get_db_context() as db:
|
||||
# 查:30 天未引用(view_count=0)且 updated_at < cutoff 的 draft/review 條目
|
||||
stmt = select(KnowledgeEntryRecord).where(
|
||||
and_(
|
||||
|
||||
@@ -29,7 +29,7 @@ from datetime import timedelta
|
||||
import structlog
|
||||
from sqlalchemy import and_, select
|
||||
|
||||
from src.db.base import get_session_factory
|
||||
from src.db.base import get_db_context
|
||||
from src.db.models import AgentSession, AiGovernanceEvent, AutoRepairExecution, IncidentEvidence
|
||||
from src.utils.timezone import now_taipei
|
||||
|
||||
@@ -109,9 +109,7 @@ class OfflineReplayService:
|
||||
|
||||
async def _run_replay(self) -> OfflineReplayReport:
|
||||
cutoff = now_taipei() - timedelta(days=REPLAY_LOOKBACK_DAYS)
|
||||
session_factory = get_session_factory()
|
||||
|
||||
async with session_factory() as db:
|
||||
async with get_db_context() as db:
|
||||
# 1. 取最近 N 個有 AgentSession(coordinator) 的 Incident
|
||||
stmt = (
|
||||
select(AgentSession.incident_id)
|
||||
@@ -137,7 +135,7 @@ class OfflineReplayService:
|
||||
)
|
||||
|
||||
results: list[IncidentReplayResult] = []
|
||||
async with session_factory() as db:
|
||||
async with get_db_context() as db:
|
||||
for incident_id in incident_ids:
|
||||
r = await self._replay_one(db, incident_id)
|
||||
results.append(r)
|
||||
|
||||
@@ -76,12 +76,13 @@ from src.api.v1 import terminal as terminal_v1 # Phase 19.1: Omni-Terminal SSE
|
||||
from src.api.v1 import timeline as timeline_v1
|
||||
from src.api.v1 import webhooks as webhooks_v1
|
||||
from src.core.config import settings
|
||||
from src.core.feature_flags import aiops_flags # ADR-080: AI 自主化飛輪 feature flags 啟動驗證
|
||||
from src.core.http_client import close_all_http_clients, init_all_http_clients
|
||||
from src.core.logging import get_logger, setup_logging
|
||||
from src.core.redis_client import close_redis_pool, init_redis_pool
|
||||
from src.core.sse import get_publisher
|
||||
from src.core.telemetry import setup_telemetry, shutdown_telemetry
|
||||
from src.services.adr100_slo_metrics_service import get_adr100_slo_metrics_service
|
||||
from src.services.flywheel_stats_service import get_flywheel_stats_service
|
||||
|
||||
# CTO-201: Database & Executor
|
||||
from src.db.base import close_db, init_db
|
||||
@@ -553,7 +554,6 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
# 2026-04-27 P3.1-T3 by Claude
|
||||
try:
|
||||
from src.utils.timezone import now_taipei
|
||||
from datetime import datetime as _dt
|
||||
|
||||
async def _run_kb_rot_cleaner_loop() -> None:
|
||||
from src.jobs.kb_rot_cleaner import get_kb_rot_cleaner
|
||||
@@ -683,7 +683,7 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
logger.warning("ollama_failover_system_start_failed", error=str(e))
|
||||
|
||||
# 2026-04-27 P3.2.2 by Claude — AI Provider 版本追蹤(每 1 小時)
|
||||
# 探測 5 Provider(ollama/ollama_188/gemini/claude/openclaw_nemo)版本
|
||||
# 探測 5 Provider(ollama/ollama_local/gemini/claude/openclaw_nemo)版本
|
||||
# 寫入 ai_provider_version_history;版本變更時 log warning,P3.2.3 alerter 後續整合
|
||||
try:
|
||||
async def _run_model_version_tracker_loop() -> None:
|
||||
@@ -1005,10 +1005,24 @@ app.include_router(platform_v1.router, prefix="/api/v1/platform", tags=["AwoooP
|
||||
@app.get("/metrics", include_in_schema=False)
|
||||
async def prometheus_metrics() -> Response:
|
||||
"""Prometheus metrics endpoint for alerting"""
|
||||
return Response(
|
||||
content=generate_latest(),
|
||||
media_type=CONTENT_TYPE_LATEST,
|
||||
)
|
||||
content = generate_latest().decode("utf-8")
|
||||
# 2026-05-07 ogt + Claude Sonnet 4.6 — INC-20260507-99ADF2 修復
|
||||
# 飛輪指標(awoooi_flywheel_*)原本只在 /api/v1/stats/flywheel/metrics 暴露,
|
||||
# 110 Prom awoooi-api job scrape /metrics 時抓不到 → FlywheelExecutionRateMissing 永久 firing
|
||||
# 修法:在此串入飛輪指標,讓既有 scrape job 無需新增 job 即可抓到
|
||||
try:
|
||||
flywheel_metrics = await get_flywheel_stats_service().compute()
|
||||
content += flywheel_metrics.to_prometheus_lines()
|
||||
except Exception:
|
||||
logger.warning("prometheus_metrics_flywheel_error")
|
||||
# 2026-05-14 Codex — T18 ADR-100 SLO emitter
|
||||
# GovernanceAgent 讀 Prometheus recording rules;若 /metrics 不吐底層 DB totals,
|
||||
# sli:* rules 會全空並每小時重複發 governance_slo_data_gap。
|
||||
try:
|
||||
content += await get_adr100_slo_metrics_service().to_prometheus_lines()
|
||||
except Exception as exc:
|
||||
logger.warning("prometheus_metrics_adr100_slo_error", error=str(exc))
|
||||
return Response(content=content, media_type=CONTENT_TYPE_LATEST)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
|
||||
@@ -29,7 +29,7 @@ from __future__ import annotations
|
||||
from prometheus_client import Histogram
|
||||
|
||||
# Buckets 對齊 NIM 實測分佈(2-27s),並覆蓋三段 timeout 30/20/15s 邊界
|
||||
# 低端(0.5-5s):快速路徑(Ollama 188 本地)
|
||||
# 低端(0.5-5s):快速路徑(Ollama provider pool)
|
||||
# 中端(5-20s):NIM + Gemini fallback
|
||||
# 高端(20-60s):超時 / 慢速 Provider
|
||||
_AGENT_STEP_BUCKETS = [0.5, 1.0, 2.0, 5.0, 10.0, 15.0, 20.0, 30.0, 45.0, 60.0]
|
||||
|
||||
@@ -39,14 +39,15 @@ import hashlib
|
||||
import json
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
import structlog
|
||||
from sqlalchemy import select, text
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from src.core.redis_client import get_redis
|
||||
from src.db.awooop_models import (
|
||||
AwoooPActiveRevision,
|
||||
AwoooPMcpGatewayAudit,
|
||||
@@ -277,7 +278,7 @@ class McpGateway:
|
||||
self, ctx: GatewayContext, gate_result: GateCheckResult
|
||||
) -> tuple[AwoooPMcpToolRegistry, AwoooPMcpGrant]:
|
||||
"""Gate 3:tool 在白名單 + grant 有效(未到期、未撤銷)"""
|
||||
now = datetime.now(timezone.utc)
|
||||
now = datetime.now(UTC)
|
||||
|
||||
# 查 tool registry
|
||||
tool_result = await self._db.execute(
|
||||
@@ -359,14 +360,9 @@ class McpGateway:
|
||||
raise GateApprovalError("write/admin 操作需要 run_id(approval 追蹤用)")
|
||||
|
||||
try:
|
||||
import aioredis
|
||||
|
||||
from src.core.config import settings
|
||||
|
||||
redis = aioredis.from_url(settings.REDIS_URL)
|
||||
redis = get_redis()
|
||||
approval_key = f"mcp_approval:{ctx.project_id}:{ctx.agent_id}:{ctx.tool_name}:{ctx.run_id}"
|
||||
approved = await redis.get(approval_key)
|
||||
await redis.aclose()
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"mcp_gate5_redis_error",
|
||||
@@ -392,10 +388,7 @@ class McpGateway:
|
||||
parameters: dict[str, Any],
|
||||
) -> MCPToolResult:
|
||||
"""呼叫底層 MCP provider 執行工具"""
|
||||
registry = get_provider_registry()
|
||||
provider = registry.get(ctx.tool_name) or registry.get(
|
||||
tool_row.tool_name if tool_row else ctx.tool_name
|
||||
)
|
||||
provider = await self._resolve_provider(ctx, tool_row)
|
||||
|
||||
# 找不到 provider → 回傳 shadow no-op
|
||||
if provider is None:
|
||||
@@ -411,14 +404,57 @@ class McpGateway:
|
||||
)
|
||||
|
||||
audit_params = dict(parameters)
|
||||
existing_audit = (
|
||||
parameters.get("_mcp_audit")
|
||||
if isinstance(parameters, dict) and isinstance(parameters.get("_mcp_audit"), dict)
|
||||
else {}
|
||||
)
|
||||
audit_params["_mcp_audit"] = {
|
||||
"project_id": ctx.project_id,
|
||||
"agent_id": ctx.agent_id,
|
||||
"run_id": str(ctx.run_id) if ctx.run_id else None,
|
||||
"trace_id": ctx.trace_id,
|
||||
"incident_id": existing_audit.get("incident_id") or ctx.trace_id,
|
||||
"session_id": existing_audit.get("session_id"),
|
||||
"flywheel_node": existing_audit.get("flywheel_node"),
|
||||
"agent_role": existing_audit.get("agent_role") or ctx.agent_id,
|
||||
"gateway_path": "awooop_mcp_gateway",
|
||||
}
|
||||
return await provider.execute(ctx.tool_name, audit_params)
|
||||
|
||||
async def _resolve_provider(
|
||||
self,
|
||||
ctx: GatewayContext,
|
||||
tool_row: AwoooPMcpToolRegistry | None,
|
||||
):
|
||||
"""Find the provider that owns ctx.tool_name.
|
||||
|
||||
ProviderRegistry is keyed by provider name (`kubernetes`, `ssh_host`, ...),
|
||||
while GatewayContext intentionally uses the governed tool name
|
||||
(`kubectl_get`, `ssh_diagnose`, ...). Scan provider tool manifests as the
|
||||
compatibility bridge until registry exposes a first-class tool index.
|
||||
"""
|
||||
registry = get_provider_registry()
|
||||
direct = registry.get(ctx.tool_name)
|
||||
if direct is not None:
|
||||
return direct
|
||||
|
||||
lookup_name = tool_row.tool_name if tool_row else ctx.tool_name
|
||||
for provider in registry.all():
|
||||
try:
|
||||
tools = await provider.list_tools()
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"mcp_gateway_provider_manifest_skipped",
|
||||
provider=getattr(provider, "name", None),
|
||||
tool_name=lookup_name,
|
||||
error=str(exc),
|
||||
)
|
||||
continue
|
||||
if any(tool.name == lookup_name for tool in tools):
|
||||
return provider
|
||||
return None
|
||||
|
||||
# ── Audit log ─────────────────────────────────────────────────────────────
|
||||
|
||||
async def _write_audit(
|
||||
@@ -446,6 +482,15 @@ class McpGateway:
|
||||
json.dumps(result.output, sort_keys=True, default=str).encode()
|
||||
).hexdigest()
|
||||
|
||||
gate_payload = {
|
||||
**gate_result.as_dict(),
|
||||
"schema_version": "awooop_mcp_gateway_audit_v1",
|
||||
"gateway_path": "awooop_mcp_gateway",
|
||||
"policy_enforced": True,
|
||||
"is_shadow": ctx.is_shadow,
|
||||
"required_scope": ctx.required_scope,
|
||||
}
|
||||
|
||||
audit = AwoooPMcpGatewayAudit(
|
||||
project_id=ctx.project_id,
|
||||
run_id=ctx.run_id,
|
||||
@@ -455,16 +500,15 @@ class McpGateway:
|
||||
tool_name=ctx.tool_name,
|
||||
input_hash=input_hash,
|
||||
output_hash=output_hash,
|
||||
gate_result=gate_result.as_dict(),
|
||||
gate_result=gate_payload,
|
||||
result_status=result_status,
|
||||
block_gate=block_gate,
|
||||
block_reason=block_reason,
|
||||
latency_ms=latency_ms,
|
||||
)
|
||||
|
||||
if tool_row is not None:
|
||||
self._db.add(audit)
|
||||
await self._db.flush()
|
||||
self._db.add(audit)
|
||||
await self._db.flush()
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"mcp_gateway_audit_write_failed",
|
||||
|
||||
@@ -14,6 +14,7 @@ from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from src.utils.timezone import now_taipei
|
||||
|
||||
@@ -29,7 +30,9 @@ class MCPTool:
|
||||
name: str
|
||||
description: str
|
||||
input_schema: dict[str, Any]
|
||||
server_name: str
|
||||
# 2026-05-06 Codex: 部分舊 provider 的 list_tools() 尚未傳 server_name。
|
||||
# 先給 DTO 預設值,registry 會以 provider.name 補正,避免啟動登記直接 crash。
|
||||
server_name: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -41,12 +44,21 @@ class MCPToolResult:
|
||||
"""
|
||||
|
||||
success: bool
|
||||
execution_id: str
|
||||
execution_id: str = ""
|
||||
output: Any | None = None
|
||||
# 2026-05-06 Codex: 舊 provider 曾使用 data=... 作為成功輸出欄位。
|
||||
# 保留 alias,避免 provider 成功路徑因 dataclass 參數不相容而 crash。
|
||||
data: Any | None = None
|
||||
error: str | None = None
|
||||
duration: float = 0.0
|
||||
timestamp: datetime = field(default_factory=now_taipei)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if not self.execution_id:
|
||||
self.execution_id = f"mcp-{uuid4()}"
|
||||
if self.output is None and self.data is not None:
|
||||
self.output = self.data
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"success": self.success,
|
||||
|
||||
@@ -24,6 +24,7 @@ from typing import Any
|
||||
import httpx
|
||||
|
||||
from src.core.config import settings # P0-13: K8s namespace 由 settings.AWOOOI_K8S_NAMESPACE 提供
|
||||
from src.services.mcp_audit_context import with_mcp_audit_context
|
||||
from src.utils.timezone import now_taipei
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -518,6 +519,13 @@ class MCPBridge:
|
||||
raise ValueError(f"Unknown MCP Server: {server_name}")
|
||||
|
||||
server = self._servers[server_name]
|
||||
parameters = with_mcp_audit_context(
|
||||
parameters,
|
||||
session_id=f"mcp_bridge:{execution_id}",
|
||||
flywheel_node="govern",
|
||||
agent_role="mcp_bridge",
|
||||
gateway_path="legacy_mcp_bridge",
|
||||
)
|
||||
result = await self._execute_tool(server, tool_name, parameters)
|
||||
|
||||
# ========================================
|
||||
|
||||
@@ -41,6 +41,7 @@ SSH 連線:
|
||||
@see docs/superpowers/specs/2026-04-10-infra-rebuild-sprint-abc-design.md §MCP-2a
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
@@ -51,6 +52,7 @@ import structlog
|
||||
from src.plugins.mcp.interfaces import MCPTool, MCPToolProvider, MCPToolResult
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
_asyncssh_logger_configured = False
|
||||
|
||||
# =============================================================================
|
||||
# 安全常數
|
||||
@@ -58,10 +60,17 @@ logger = structlog.get_logger(__name__)
|
||||
|
||||
SSH_KEY_PATH = "/run/secrets/ssh_mcp_key"
|
||||
SSH_USER = "wooo"
|
||||
SSH_PORT = 22
|
||||
DEFAULT_HOST_USERS = {
|
||||
# AI/Web host is operated by the ollama account in the current topology.
|
||||
"192.168.0.188": "ollama",
|
||||
}
|
||||
SHORT_HOST_MAP = {
|
||||
"110": "192.168.0.110",
|
||||
"120": "192.168.0.120",
|
||||
"121": "192.168.0.121",
|
||||
"188": "192.168.0.188",
|
||||
}
|
||||
DIAG_TIMEOUT = 10 # 診斷類超時(秒)
|
||||
OP_TIMEOUT = 60 # 操作類超時(秒)
|
||||
|
||||
@@ -104,6 +113,47 @@ def _validate_param(key: str, value: str) -> str:
|
||||
# tail / port / lines 由呼叫方 int() 轉換,不需字串白名單
|
||||
return value
|
||||
|
||||
|
||||
def _normalize_ssh_host(value: str) -> str:
|
||||
"""
|
||||
Normalize host labels before they enter asyncssh.
|
||||
|
||||
Prometheus labels often arrive as ``192.168.0.110:9100``. That port is the
|
||||
exporter port, not SSH. The SSH provider must connect to the host on the
|
||||
platform SSH port, otherwise asyncssh can receive a stringly port from
|
||||
config/labels and fail with ``%d format`` before the tool even runs.
|
||||
"""
|
||||
host = (value or "").strip()
|
||||
if host.startswith("ssh://"):
|
||||
host = host.removeprefix("ssh://")
|
||||
if "@" in host:
|
||||
host = host.rsplit("@", 1)[1]
|
||||
if host.startswith("[") and "]" in host:
|
||||
return host[1:host.index("]")]
|
||||
if host.count(":") == 1:
|
||||
maybe_host, maybe_port = host.rsplit(":", 1)
|
||||
if maybe_port.isdigit():
|
||||
host = maybe_host
|
||||
if host in SHORT_HOST_MAP:
|
||||
return SHORT_HOST_MAP[host]
|
||||
return host
|
||||
|
||||
|
||||
def _quiet_asyncssh_info_logs() -> None:
|
||||
"""Keep third-party asyncssh INFO logs from breaking stdlib %-format logging.
|
||||
|
||||
Some target SSH servers send exit status as a string. AsyncSSH then emits an
|
||||
INFO log with ``%d`` and that string argument before our code sees the
|
||||
result, which produces noisy ``TypeError: %d format`` tracebacks. The tool
|
||||
result itself is still available, so production should keep asyncssh at
|
||||
WARNING and rely on our structured MCP audit logs.
|
||||
"""
|
||||
global _asyncssh_logger_configured
|
||||
if _asyncssh_logger_configured:
|
||||
return
|
||||
logging.getLogger("asyncssh").setLevel(logging.WARNING)
|
||||
_asyncssh_logger_configured = True
|
||||
|
||||
# 群組 A(只讀)
|
||||
GROUP_A_TOOLS = {
|
||||
"ssh_diagnose",
|
||||
@@ -198,6 +248,10 @@ class SSHProvider(MCPToolProvider):
|
||||
),
|
||||
input_schema={"type": "object", "properties": {
|
||||
"host": {"type": "string", "description": "Target host IP"},
|
||||
"container_name": {
|
||||
"type": "string",
|
||||
"description": "Optional Docker container name for container-focused diagnostics",
|
||||
},
|
||||
}, "required": ["host"]},
|
||||
server_name=self.name,
|
||||
),
|
||||
@@ -375,7 +429,7 @@ class SSHProvider(MCPToolProvider):
|
||||
error=f"Unknown tool: {tool_name}",
|
||||
)
|
||||
|
||||
host = parameters.get("host", "")
|
||||
host = _normalize_ssh_host(str(parameters.get("host", "")))
|
||||
|
||||
# 守衛 2: 允許的 host
|
||||
if host not in self._allowed_hosts():
|
||||
@@ -500,12 +554,23 @@ class SSHProvider(MCPToolProvider):
|
||||
# 所有接受用戶字串的工具,必須先通過 _validate_param() 白名單驗證
|
||||
if tool_name == "ssh_diagnose":
|
||||
# 2026-04-27 Claude Sonnet 4.6: 主機告警自動診斷 — 只讀,不修改任何狀態
|
||||
return (
|
||||
command = (
|
||||
"echo '=== CPU TOP ===' && ps aux --sort=-%cpu | head -15 && "
|
||||
"echo '=== MEMORY ===' && free -h && "
|
||||
"echo '=== DISK ===' && df -h && "
|
||||
"echo '=== LOAD ===' && uptime"
|
||||
)
|
||||
container_name = params.get("container_name")
|
||||
if container_name:
|
||||
name = _validate_param("container_name", str(container_name))
|
||||
command = (
|
||||
f"{command} && "
|
||||
f"echo '=== DOCKER STATS {name} ===' && "
|
||||
f"docker stats --no-stream {name} 2>&1 && "
|
||||
f"echo '=== DOCKER INSPECT {name} ===' && "
|
||||
f"docker inspect {name} 2>&1 | head -80"
|
||||
)
|
||||
return command
|
||||
|
||||
if tool_name == "ssh_get_top_processes":
|
||||
return "ps aux --sort=-%cpu | head -15"
|
||||
@@ -604,7 +669,9 @@ class SSHProvider(MCPToolProvider):
|
||||
raise RuntimeError(
|
||||
"asyncssh is not installed. "
|
||||
"Add 'asyncssh' to pyproject.toml dependencies."
|
||||
)
|
||||
) from None
|
||||
|
||||
_quiet_asyncssh_info_logs()
|
||||
|
||||
import os
|
||||
if not os.path.exists(SSH_KEY_PATH):
|
||||
@@ -625,11 +692,13 @@ class SSHProvider(MCPToolProvider):
|
||||
|
||||
async with asyncssh.connect(
|
||||
host,
|
||||
port=SSH_PORT,
|
||||
username=username or SSH_USER,
|
||||
client_keys=[SSH_KEY_PATH],
|
||||
known_hosts=known_hosts_path, # None = 跳過驗證(內網),或指定文件路徑
|
||||
connect_timeout=timeout,
|
||||
config=None, # 禁止讀取使用者 ssh config,避免 Port 字串污染 asyncssh
|
||||
connect_timeout=float(timeout),
|
||||
) as conn:
|
||||
# Bug 根因:asyncssh 模組沒有頂層 run();應呼叫 conn.run()(2026-04-24 Claude Sonnet 4.6)
|
||||
result = await conn.run(cmd, timeout=timeout, check=False)
|
||||
result = await conn.run(cmd, timeout=float(timeout), check=False)
|
||||
return (result.stdout or ""), (result.stderr or "")
|
||||
|
||||
@@ -167,6 +167,31 @@ class DriftReportRepository:
|
||||
{"report_id": report_id, "narrative": narrative},
|
||||
)
|
||||
|
||||
async def get_repeat_state(self, report: DriftReport) -> dict:
|
||||
"""Return stable fingerprint repeat state for a drift report."""
|
||||
from src.services.drift_repeat_state import build_drift_repeat_state
|
||||
|
||||
async with get_db_context() as db:
|
||||
result = await db.execute(
|
||||
text("""
|
||||
SELECT
|
||||
report_id,
|
||||
namespace,
|
||||
status,
|
||||
scanned_at,
|
||||
created_at,
|
||||
items
|
||||
FROM drift_reports
|
||||
WHERE namespace = :namespace
|
||||
AND created_at > now() - interval '24 hours'
|
||||
ORDER BY scanned_at DESC
|
||||
LIMIT 200
|
||||
"""),
|
||||
{"namespace": report.namespace},
|
||||
)
|
||||
rows = [dict(row) for row in result.mappings().all()]
|
||||
return build_drift_repeat_state(report, rows)
|
||||
|
||||
|
||||
_drift_repo: DriftReportRepository | None = None
|
||||
|
||||
|
||||
@@ -60,13 +60,17 @@ class MetricsDBRepository(IMetricsRepository):
|
||||
cutoff = datetime.now(UTC) - timedelta(hours=hours)
|
||||
|
||||
# Query: 統計 executed vs total (approved + executed + execution_failed)
|
||||
# 2026-05-06 ogt + Codex:
|
||||
# approval_records.status 目前實際寫入的是大寫 enum
|
||||
# (APPROVED / EXECUTION_SUCCESS / EXECUTION_FAILED)。舊查詢只看
|
||||
# lowercase executed,導致 AI Success 在報表層永遠趨近 0。
|
||||
query = text("""
|
||||
SELECT
|
||||
COUNT(CASE WHEN status = 'executed' THEN 1 END) as executed_count,
|
||||
COUNT(CASE WHEN UPPER(status::text) = 'EXECUTION_SUCCESS' THEN 1 END) as executed_count,
|
||||
COUNT(*) as total_count
|
||||
FROM approval_records
|
||||
WHERE created_at >= :cutoff
|
||||
AND status IN ('approved', 'executed', 'execution_failed')
|
||||
AND UPPER(status::text) IN ('APPROVED', 'EXECUTION_SUCCESS', 'EXECUTION_FAILED')
|
||||
""")
|
||||
|
||||
result = await session.execute(query, {"cutoff": cutoff})
|
||||
@@ -127,11 +131,11 @@ class MetricsDBRepository(IMetricsRepository):
|
||||
trend_query = text("""
|
||||
SELECT
|
||||
date_trunc('hour', created_at) as hour_bucket,
|
||||
COUNT(CASE WHEN status = 'executed' THEN 1 END) * 100.0 /
|
||||
COUNT(CASE WHEN UPPER(status::text) = 'EXECUTION_SUCCESS' THEN 1 END) * 100.0 /
|
||||
NULLIF(COUNT(*), 0) as hourly_rate
|
||||
FROM approval_records
|
||||
WHERE created_at >= :cutoff
|
||||
AND status IN ('approved', 'executed', 'execution_failed')
|
||||
AND UPPER(status::text) IN ('APPROVED', 'EXECUTION_SUCCESS', 'EXECUTION_FAILED')
|
||||
GROUP BY hour_bucket
|
||||
ORDER BY hour_bucket DESC
|
||||
LIMIT :limit
|
||||
|
||||
@@ -104,7 +104,7 @@ async def get_agent_thinking(
|
||||
) -> StreamingResponse:
|
||||
"""
|
||||
OpenClaw 思考軌跡 (SSE 串流)
|
||||
Phase 1.2: 真實串接 Ollama at 192.168.0.188:11434
|
||||
Phase 1.2: 真實串接設定中的 Ollama provider pool
|
||||
"""
|
||||
|
||||
async def generate_thinking_stream():
|
||||
|
||||
606
apps/api/src/services/adr100_remediation_service.py
Normal file
606
apps/api/src/services/adr100_remediation_service.py
Normal file
@@ -0,0 +1,606 @@
|
||||
"""
|
||||
ADR-100 Remediation Service
|
||||
===========================
|
||||
Safe operator entrypoints for verification remediation work items.
|
||||
|
||||
T25: remediation queue items are now actionable without mutating incident state:
|
||||
- preview: show the selected guardrail path
|
||||
- dry-run: collect read-only current state and validate supported executor routing
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import Any, Literal, Protocol
|
||||
|
||||
import structlog
|
||||
|
||||
from src.models.incident import Incident
|
||||
from src.repositories.incident_repository import IncidentDBRepository
|
||||
from src.services.adr100_slo_status_service import (
|
||||
Adr100SloStatusService,
|
||||
get_adr100_slo_status_service,
|
||||
)
|
||||
from src.services.auto_repair_service import AutoRepairService
|
||||
from src.services.post_execution_verifier import (
|
||||
PostExecutionVerifier,
|
||||
_assess_recovery,
|
||||
_build_prometheus_query,
|
||||
get_post_execution_verifier,
|
||||
)
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
RemediationMode = Literal["auto", "reverify", "replay"]
|
||||
|
||||
_READY_STATUSES = {"ready_for_replay", "ready_for_reverify"}
|
||||
|
||||
|
||||
class RemediationNotFoundError(LookupError):
|
||||
"""Requested ADR-100 remediation work item is not in the current read model."""
|
||||
|
||||
|
||||
class _IncidentRepository(Protocol):
|
||||
async def get_by_id(self, incident_id: str) -> Incident | None:
|
||||
...
|
||||
|
||||
|
||||
class Adr100RemediationService:
|
||||
"""Read-only remediation preview and dry-run service."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
slo_service: Adr100SloStatusService | None = None,
|
||||
incident_repository: _IncidentRepository | None = None,
|
||||
auto_repair_service: AutoRepairService | None = None,
|
||||
verifier: PostExecutionVerifier | None = None,
|
||||
timeline_service: Any | None = None,
|
||||
alert_operation_log_repository: Any | None = None,
|
||||
record_history: bool = True,
|
||||
) -> None:
|
||||
self._slo_service = slo_service or get_adr100_slo_status_service()
|
||||
self._incident_repository = incident_repository or IncidentDBRepository()
|
||||
self._auto_repair_service = auto_repair_service or AutoRepairService()
|
||||
self._verifier = verifier or get_post_execution_verifier()
|
||||
self._timeline_service = timeline_service
|
||||
self._alert_operation_log_repository = alert_operation_log_repository
|
||||
self._record_history_enabled = record_history
|
||||
|
||||
async def preview(self, work_item_id: str, mode: RemediationMode = "auto") -> dict[str, Any]:
|
||||
"""Return the safe execution plan for a remediation queue item."""
|
||||
|
||||
item = await self._find_work_item(work_item_id)
|
||||
selected_mode = _select_mode(item, mode)
|
||||
checks = _base_checks(item)
|
||||
allowed = all(check["passed"] for check in checks)
|
||||
|
||||
return {
|
||||
"schema_version": "adr100_remediation_preview_v1",
|
||||
"work_item_id": item.get("work_item_id"),
|
||||
"incident_id": item.get("incident_id"),
|
||||
"auto_repair_id": item.get("auto_repair_id"),
|
||||
"mode": selected_mode,
|
||||
"allowed": allowed,
|
||||
"safety_level": "read_only",
|
||||
"writes_incident_state": False,
|
||||
"writes_auto_repair_result": False,
|
||||
"checks": checks,
|
||||
"plan": _plan_for_item(item, selected_mode),
|
||||
"source": "adr100.verification_coverage.remediation_queue",
|
||||
}
|
||||
|
||||
async def dry_run(self, work_item_id: str, mode: RemediationMode = "auto") -> dict[str, Any]:
|
||||
"""Run a safe, read-only remediation dry-run for one queue item."""
|
||||
|
||||
item = await self._find_work_item(work_item_id)
|
||||
selected_mode = _select_mode(item, mode)
|
||||
checks = _base_checks(item)
|
||||
incident = await self._load_incident(item)
|
||||
checks.append({
|
||||
"name": "incident_loaded",
|
||||
"passed": incident is not None,
|
||||
"detail": item.get("incident_id") or "missing incident_id",
|
||||
})
|
||||
|
||||
if incident is None or not all(check["passed"] for check in checks):
|
||||
payload = _dry_run_blocked_payload(item, selected_mode, checks)
|
||||
payload["history"] = await self._record_dry_run_history(item, payload)
|
||||
return payload
|
||||
|
||||
if selected_mode == "replay":
|
||||
return await self._dry_run_replay(item, incident, checks)
|
||||
return await self._dry_run_reverify(item, incident, checks)
|
||||
|
||||
async def history(
|
||||
self,
|
||||
*,
|
||||
limit: int = 50,
|
||||
incident_id: str | None = None,
|
||||
work_item_id: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Return durable dry-run history written by this remediation service."""
|
||||
|
||||
safe_limit = max(1, min(limit, 200))
|
||||
fetch_limit = min(max(safe_limit * 4, 50), 200)
|
||||
rows: list[Any] = []
|
||||
repo = self._alert_operation_log_repository
|
||||
if repo is None:
|
||||
from src.repositories.alert_operation_log_repository import (
|
||||
get_alert_operation_log_repository,
|
||||
)
|
||||
|
||||
repo = get_alert_operation_log_repository()
|
||||
|
||||
for event_type in ("PRE_FLIGHT_PASSED", "PRE_FLIGHT_FAILED"):
|
||||
try:
|
||||
batch, _total = await repo.list_recent(
|
||||
limit=fetch_limit,
|
||||
event_type=event_type,
|
||||
incident_id=incident_id,
|
||||
)
|
||||
rows.extend(batch)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"adr100_remediation_history_fetch_failed",
|
||||
event_type=event_type,
|
||||
incident_id=incident_id,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
rows.sort(key=_record_created_at, reverse=True)
|
||||
|
||||
items: list[dict[str, Any]] = []
|
||||
for row in rows:
|
||||
context = getattr(row, "context", None) or {}
|
||||
if context.get("schema_version") != "adr100_remediation_dry_run_history_v1":
|
||||
continue
|
||||
if work_item_id and context.get("work_item_id") != work_item_id:
|
||||
continue
|
||||
items.append(_history_item(row, context))
|
||||
if len(items) >= safe_limit:
|
||||
break
|
||||
|
||||
return {
|
||||
"schema_version": "adr100_remediation_history_v1",
|
||||
"total": len(items),
|
||||
"limit": safe_limit,
|
||||
"filters": {
|
||||
"incident_id": incident_id,
|
||||
"work_item_id": work_item_id,
|
||||
},
|
||||
"items": items,
|
||||
"by_work_item": _summarize_history_by_work_item(items),
|
||||
}
|
||||
|
||||
async def _find_work_item(self, work_item_id: str) -> dict[str, Any]:
|
||||
report = await self._slo_service.fetch_report()
|
||||
coverage = report.get("verification_coverage") or {}
|
||||
queue = coverage.get("remediation_queue") or {}
|
||||
|
||||
for item in queue.get("items") or []:
|
||||
if item.get("work_item_id") == work_item_id:
|
||||
return dict(item)
|
||||
|
||||
raise RemediationNotFoundError(work_item_id)
|
||||
|
||||
async def _load_incident(self, item: dict[str, Any]) -> Incident | None:
|
||||
incident_id = str(item.get("incident_id") or "")
|
||||
if not incident_id:
|
||||
return None
|
||||
return await self._incident_repository.get_by_id(incident_id)
|
||||
|
||||
async def _dry_run_reverify(
|
||||
self,
|
||||
item: dict[str, Any],
|
||||
incident: Incident,
|
||||
checks: list[dict[str, Any]],
|
||||
) -> dict[str, Any]:
|
||||
post_state = await self._collect_current_state(incident)
|
||||
action_taken = f"dry_run_reverify:{item.get('playbook_id') or 'unknown'}"
|
||||
result = _assess_recovery(None, post_state, action_taken)
|
||||
|
||||
payload = _dry_run_result_payload(
|
||||
item=item,
|
||||
mode="reverify",
|
||||
checks=checks,
|
||||
post_state=post_state,
|
||||
verification_result_preview=result,
|
||||
extra={
|
||||
"promql": _promql_for_incident(incident),
|
||||
"mcp_route": {
|
||||
"agent_id": "post_execution_verifier",
|
||||
"required_scope": "read",
|
||||
"is_shadow": True,
|
||||
"flywheel_node": "verify",
|
||||
},
|
||||
},
|
||||
)
|
||||
payload["history"] = await self._record_dry_run_history(item, payload)
|
||||
return payload
|
||||
|
||||
async def _dry_run_replay(
|
||||
self,
|
||||
item: dict[str, Any],
|
||||
incident: Incident,
|
||||
checks: list[dict[str, Any]],
|
||||
) -> dict[str, Any]:
|
||||
diagnostic_command = _diagnostic_command_for_incident(incident)
|
||||
route = self._auto_repair_service.preview_read_only_ssh_mcp_route(
|
||||
incident,
|
||||
diagnostic_command,
|
||||
)
|
||||
checks.append({
|
||||
"name": "supported_executor_route",
|
||||
"passed": route is not None,
|
||||
"detail": "mcp:ssh_diagnose" if route else "missing host/container route",
|
||||
})
|
||||
|
||||
post_state = await self._collect_current_state(incident)
|
||||
action_taken = f"dry_run_replay:{item.get('playbook_id') or 'unknown'}"
|
||||
result = _assess_recovery(None, post_state, action_taken)
|
||||
|
||||
payload = _dry_run_result_payload(
|
||||
item=item,
|
||||
mode="replay",
|
||||
checks=checks,
|
||||
post_state=post_state,
|
||||
verification_result_preview=result,
|
||||
extra={
|
||||
"diagnostic_command_preview": diagnostic_command,
|
||||
"mcp_route": route,
|
||||
"promql": _promql_for_incident(incident),
|
||||
},
|
||||
)
|
||||
payload["history"] = await self._record_dry_run_history(item, payload)
|
||||
return payload
|
||||
|
||||
async def _collect_current_state(self, incident: Incident) -> dict[str, Any]:
|
||||
try:
|
||||
return await asyncio.wait_for(
|
||||
self._verifier._collect_post_state(incident),
|
||||
timeout=12.0,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
"adr100_remediation_dry_run_timeout",
|
||||
incident_id=incident.incident_id,
|
||||
)
|
||||
return {}
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"adr100_remediation_dry_run_collect_failed",
|
||||
incident_id=incident.incident_id,
|
||||
error=str(exc),
|
||||
)
|
||||
return {}
|
||||
|
||||
async def _record_dry_run_history(
|
||||
self,
|
||||
item: dict[str, Any],
|
||||
payload: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
if not self._record_history_enabled:
|
||||
return {"recorded": False, "reason": "disabled"}
|
||||
|
||||
incident_id = str(item.get("incident_id") or "")
|
||||
if not incident_id:
|
||||
return {"recorded": False, "reason": "missing_incident_id"}
|
||||
|
||||
history: dict[str, Any] = {
|
||||
"recorded": False,
|
||||
"alert_operation_id": None,
|
||||
"timeline_event_id": None,
|
||||
}
|
||||
context = _history_context(item, payload)
|
||||
allowed = bool(payload.get("allowed"))
|
||||
|
||||
try:
|
||||
repo = self._alert_operation_log_repository
|
||||
if repo is None:
|
||||
from src.repositories.alert_operation_log_repository import (
|
||||
get_alert_operation_log_repository,
|
||||
)
|
||||
|
||||
repo = get_alert_operation_log_repository()
|
||||
record = await repo.append(
|
||||
"PRE_FLIGHT_PASSED" if allowed else "PRE_FLIGHT_FAILED",
|
||||
incident_id=incident_id,
|
||||
auto_repair_id=str(item.get("auto_repair_id") or "") or None,
|
||||
actor="adr100_remediation_service",
|
||||
action_detail=f"adr100_remediation_dry_run:{payload.get('mode')}"[:200],
|
||||
success=allowed,
|
||||
context=context,
|
||||
)
|
||||
if record is not None:
|
||||
history["alert_operation_id"] = getattr(record, "id", None)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"adr100_remediation_alert_operation_history_failed",
|
||||
incident_id=incident_id,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
try:
|
||||
timeline = self._timeline_service
|
||||
if timeline is None:
|
||||
from src.services.approval_db import get_timeline_service
|
||||
|
||||
timeline = get_timeline_service()
|
||||
event = await timeline.add_event(
|
||||
event_type="verifier",
|
||||
status=_timeline_status(payload),
|
||||
title="ADR-100 remediation dry-run",
|
||||
description=_history_description(context),
|
||||
actor="adr100_remediation_service",
|
||||
actor_role=str(payload.get("mode") or "dry_run"),
|
||||
incident_id=incident_id,
|
||||
)
|
||||
if event:
|
||||
history["timeline_event_id"] = event.get("id")
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"adr100_remediation_timeline_history_failed",
|
||||
incident_id=incident_id,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
history["recorded"] = bool(
|
||||
history.get("alert_operation_id") or history.get("timeline_event_id")
|
||||
)
|
||||
return history
|
||||
|
||||
|
||||
def _select_mode(item: dict[str, Any], requested: RemediationMode) -> Literal["reverify", "replay"]:
|
||||
if requested in ("reverify", "replay"):
|
||||
return requested
|
||||
if item.get("remediation_status") == "ready_for_reverify":
|
||||
return "reverify"
|
||||
if item.get("remediation_action") == "reverify_with_promql_template":
|
||||
return "reverify"
|
||||
return "replay"
|
||||
|
||||
|
||||
def _base_checks(item: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
status = str(item.get("remediation_status") or "unknown")
|
||||
action = str(item.get("remediation_action") or "unknown")
|
||||
return [
|
||||
{
|
||||
"name": "queue_item_ready",
|
||||
"passed": status in _READY_STATUSES,
|
||||
"detail": status,
|
||||
},
|
||||
{
|
||||
"name": "read_only_guardrail",
|
||||
"passed": action in {
|
||||
"replay_with_supported_executor",
|
||||
"reverify_with_promql_template",
|
||||
},
|
||||
"detail": action,
|
||||
},
|
||||
{
|
||||
"name": "no_state_mutation",
|
||||
"passed": True,
|
||||
"detail": "dry_run_does_not_update_incident_or_auto_repair_rows",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def _plan_for_item(item: dict[str, Any], mode: str) -> dict[str, Any]:
|
||||
if mode == "reverify":
|
||||
return {
|
||||
"step": "collect_current_state_and_assess",
|
||||
"agent_id": "post_execution_verifier",
|
||||
"required_scope": "read",
|
||||
"writes": [],
|
||||
}
|
||||
return {
|
||||
"step": "validate_supported_executor_route_then_collect_current_state",
|
||||
"agent_id": "auto_repair_executor",
|
||||
"required_scope": "read",
|
||||
"writes": [],
|
||||
"target_action": item.get("remediation_action"),
|
||||
}
|
||||
|
||||
|
||||
def _dry_run_blocked_payload(
|
||||
item: dict[str, Any],
|
||||
mode: str,
|
||||
checks: list[dict[str, Any]],
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"schema_version": "adr100_remediation_dry_run_v1",
|
||||
"work_item_id": item.get("work_item_id"),
|
||||
"incident_id": item.get("incident_id"),
|
||||
"auto_repair_id": item.get("auto_repair_id"),
|
||||
"mode": mode,
|
||||
"allowed": False,
|
||||
"executed": False,
|
||||
"safety_level": "read_only",
|
||||
"writes_incident_state": False,
|
||||
"writes_auto_repair_result": False,
|
||||
"checks": checks,
|
||||
"verification_result_preview": "blocked",
|
||||
"post_state_summary": {},
|
||||
}
|
||||
|
||||
|
||||
def _dry_run_result_payload(
|
||||
*,
|
||||
item: dict[str, Any],
|
||||
mode: str,
|
||||
checks: list[dict[str, Any]],
|
||||
post_state: dict[str, Any],
|
||||
verification_result_preview: str,
|
||||
extra: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"schema_version": "adr100_remediation_dry_run_v1",
|
||||
"work_item_id": item.get("work_item_id"),
|
||||
"incident_id": item.get("incident_id"),
|
||||
"auto_repair_id": item.get("auto_repair_id"),
|
||||
"mode": mode,
|
||||
"allowed": all(check["passed"] for check in checks),
|
||||
"executed": True,
|
||||
"safety_level": "read_only",
|
||||
"writes_incident_state": False,
|
||||
"writes_auto_repair_result": False,
|
||||
"checks": checks,
|
||||
"verification_result_preview": verification_result_preview,
|
||||
"post_state_summary": _summarize_post_state(post_state),
|
||||
**extra,
|
||||
}
|
||||
|
||||
|
||||
def _summarize_post_state(post_state: dict[str, Any]) -> dict[str, Any]:
|
||||
keys = sorted(post_state.keys())
|
||||
return {
|
||||
"tool_count": len(keys),
|
||||
"tools": keys[:8],
|
||||
"has_state": bool(post_state),
|
||||
}
|
||||
|
||||
|
||||
def _history_context(item: dict[str, Any], payload: dict[str, Any]) -> dict[str, Any]:
|
||||
return {
|
||||
"schema_version": "adr100_remediation_dry_run_history_v1",
|
||||
"work_item_id": item.get("work_item_id"),
|
||||
"auto_repair_id": item.get("auto_repair_id"),
|
||||
"playbook_id": item.get("playbook_id"),
|
||||
"alertname": item.get("alertname"),
|
||||
"mode": payload.get("mode"),
|
||||
"allowed": payload.get("allowed"),
|
||||
"executed": payload.get("executed"),
|
||||
"safety_level": payload.get("safety_level"),
|
||||
"writes_incident_state": payload.get("writes_incident_state"),
|
||||
"writes_auto_repair_result": payload.get("writes_auto_repair_result"),
|
||||
"verification_result_preview": payload.get("verification_result_preview"),
|
||||
"post_state_summary": payload.get("post_state_summary"),
|
||||
"mcp_route": payload.get("mcp_route"),
|
||||
"checks": payload.get("checks"),
|
||||
}
|
||||
|
||||
|
||||
def _timeline_status(payload: dict[str, Any]) -> str:
|
||||
if not payload.get("allowed"):
|
||||
return "warning"
|
||||
if payload.get("verification_result_preview") == "success":
|
||||
return "success"
|
||||
return "warning"
|
||||
|
||||
|
||||
def _history_description(context: dict[str, Any]) -> str:
|
||||
tool_count = (context.get("post_state_summary") or {}).get("tool_count", 0)
|
||||
route = context.get("mcp_route") or {}
|
||||
agent = route.get("agent_id") or "unknown_agent"
|
||||
tool = route.get("tool_name") or "current_state"
|
||||
return (
|
||||
f"mode={context.get('mode')} "
|
||||
f"preview={context.get('verification_result_preview')} "
|
||||
f"tools={tool_count} route={agent}/{tool} "
|
||||
f"writes_incident={context.get('writes_incident_state')} "
|
||||
f"writes_auto_repair={context.get('writes_auto_repair_result')}"
|
||||
)[:500]
|
||||
|
||||
|
||||
def _record_created_at(record: Any) -> str:
|
||||
value = getattr(record, "created_at", None)
|
||||
if hasattr(value, "isoformat"):
|
||||
return value.isoformat()
|
||||
return str(value or "")
|
||||
|
||||
|
||||
def _history_item(record: Any, context: dict[str, Any]) -> dict[str, Any]:
|
||||
route = context.get("mcp_route") or {}
|
||||
post_state = context.get("post_state_summary") or {}
|
||||
return {
|
||||
"id": str(getattr(record, "id", "")),
|
||||
"incident_id": getattr(record, "incident_id", None),
|
||||
"auto_repair_id": getattr(record, "auto_repair_id", None)
|
||||
or context.get("auto_repair_id"),
|
||||
"event_type": str(getattr(record, "event_type", "")),
|
||||
"actor": getattr(record, "actor", None),
|
||||
"success": getattr(record, "success", None),
|
||||
"created_at": _record_created_at(record),
|
||||
"work_item_id": context.get("work_item_id"),
|
||||
"playbook_id": context.get("playbook_id"),
|
||||
"alertname": context.get("alertname"),
|
||||
"mode": context.get("mode"),
|
||||
"allowed": context.get("allowed"),
|
||||
"executed": context.get("executed"),
|
||||
"safety_level": context.get("safety_level"),
|
||||
"verification_result_preview": context.get("verification_result_preview"),
|
||||
"tool_count": post_state.get("tool_count", 0),
|
||||
"tools": post_state.get("tools") or [],
|
||||
"agent_id": route.get("agent_id"),
|
||||
"tool_name": route.get("tool_name") or "current_state",
|
||||
"required_scope": route.get("required_scope"),
|
||||
"writes_incident_state": context.get("writes_incident_state"),
|
||||
"writes_auto_repair_result": context.get("writes_auto_repair_result"),
|
||||
"checks": context.get("checks") or [],
|
||||
}
|
||||
|
||||
|
||||
def _summarize_history_by_work_item(items: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
summary: dict[str, dict[str, Any]] = {}
|
||||
for item in items:
|
||||
key = str(item.get("work_item_id") or item.get("incident_id") or item.get("id"))
|
||||
if key not in summary:
|
||||
summary[key] = {
|
||||
"work_item_id": item.get("work_item_id"),
|
||||
"incident_id": item.get("incident_id"),
|
||||
"count": 0,
|
||||
"latest_at": item.get("created_at"),
|
||||
"latest_event_type": item.get("event_type"),
|
||||
"latest_success": item.get("success"),
|
||||
"latest_preview": item.get("verification_result_preview"),
|
||||
"latest_mode": item.get("mode"),
|
||||
"latest_agent_id": item.get("agent_id"),
|
||||
"latest_tool_name": item.get("tool_name"),
|
||||
"required_scope": item.get("required_scope"),
|
||||
}
|
||||
summary[key]["count"] += 1
|
||||
return list(summary.values())
|
||||
|
||||
|
||||
def _diagnostic_command_for_incident(incident: Incident) -> str:
|
||||
labels = _labels_for_incident(incident)
|
||||
host = str(labels.get("host") or labels.get("instance") or "{host}")
|
||||
container = str(labels.get("container_name") or labels.get("container") or "")
|
||||
if container:
|
||||
return f"ssh {host} 'uptime; docker stats --no-stream {container}'"
|
||||
return f"ssh {host} 'uptime; docker stats --no-stream'"
|
||||
|
||||
|
||||
def _promql_for_incident(incident: Incident) -> str:
|
||||
labels = _labels_for_incident(incident)
|
||||
alertname = ""
|
||||
if incident.signals:
|
||||
signal = incident.signals[0]
|
||||
alertname = labels.get("alertname") or getattr(signal, "alert_name", "")
|
||||
return _build_prometheus_query(alertname, labels)
|
||||
|
||||
|
||||
def _labels_for_incident(incident: Incident) -> dict[str, Any]:
|
||||
if incident.signals:
|
||||
return incident.signals[0].labels or {}
|
||||
return {}
|
||||
|
||||
|
||||
_service: Adr100RemediationService | None = None
|
||||
|
||||
|
||||
def get_adr100_remediation_service() -> Adr100RemediationService:
|
||||
"""Return singleton ADR-100 remediation service."""
|
||||
|
||||
global _service
|
||||
if _service is None:
|
||||
_service = Adr100RemediationService()
|
||||
return _service
|
||||
|
||||
|
||||
def set_adr100_remediation_service(service: Adr100RemediationService | None) -> None:
|
||||
"""Inject ADR-100 remediation service for tests."""
|
||||
|
||||
global _service
|
||||
_service = service
|
||||
354
apps/api/src/services/adr100_slo_metrics_service.py
Normal file
354
apps/api/src/services/adr100_slo_metrics_service.py
Normal file
@@ -0,0 +1,354 @@
|
||||
"""
|
||||
ADR-100 SLO metrics emitter.
|
||||
|
||||
Prometheus recording rules for the AI flywheel SLOs expect a small set of
|
||||
counter-like metrics. The source of truth already lives in PostgreSQL, so this
|
||||
read-side emitter exposes DB totals on /metrics without changing runtime write
|
||||
paths or introducing another state store.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from time import time
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
from src.db.base import get_db_context
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AutomationOperationSample:
|
||||
outcome: str
|
||||
operation_type: str
|
||||
count: int
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class VerificationSample:
|
||||
outcome: str
|
||||
count: int
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Adr100SloMetricsSnapshot:
|
||||
automation_operations: list[AutomationOperationSample] = field(default_factory=list)
|
||||
automation_operations_24h: list[AutomationOperationSample] = field(default_factory=list)
|
||||
post_execution_verifications: list[VerificationSample] = field(default_factory=list)
|
||||
post_execution_verifications_24h: list[VerificationSample] = field(default_factory=list)
|
||||
knowledge_entries_total: int = 0
|
||||
knowledge_entries_created_24h: int = 0
|
||||
high_confidence_total: int = 0
|
||||
high_confidence_success_total: int = 0
|
||||
emitted_at: float = field(default_factory=time)
|
||||
|
||||
|
||||
class Adr100SloMetricsService:
|
||||
"""Build ADR-100 Prometheus samples from production DB state."""
|
||||
|
||||
async def to_prometheus_lines(self) -> str:
|
||||
snapshot = await self.fetch_snapshot()
|
||||
return render_adr100_slo_metrics(snapshot)
|
||||
|
||||
async def fetch_snapshot(self) -> Adr100SloMetricsSnapshot:
|
||||
async with get_db_context() as db:
|
||||
automation_rows = (
|
||||
await db.execute(text(_AUTOMATION_OPERATION_SQL))
|
||||
).fetchall()
|
||||
automation_24h_rows = (
|
||||
await db.execute(text(_AUTOMATION_OPERATION_24H_SQL))
|
||||
).fetchall()
|
||||
verification_rows = (
|
||||
await db.execute(text(_POST_EXECUTION_VERIFICATION_SQL))
|
||||
).fetchall()
|
||||
verification_24h_rows = (
|
||||
await db.execute(text(_POST_EXECUTION_VERIFICATION_24H_SQL))
|
||||
).fetchall()
|
||||
knowledge_total = int(
|
||||
(await db.execute(text("SELECT count(*) FROM knowledge_entries"))).scalar()
|
||||
or 0
|
||||
)
|
||||
knowledge_created_24h = int(
|
||||
(
|
||||
await db.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT count(*)
|
||||
FROM knowledge_entries
|
||||
WHERE created_at >= NOW() - INTERVAL '24 hours'
|
||||
"""
|
||||
)
|
||||
)
|
||||
).scalar()
|
||||
or 0
|
||||
)
|
||||
confidence_row = (
|
||||
await db.execute(text(_HIGH_CONFIDENCE_APPROVAL_SQL))
|
||||
).one()
|
||||
|
||||
return Adr100SloMetricsSnapshot(
|
||||
automation_operations=[
|
||||
AutomationOperationSample(
|
||||
outcome=str(row.outcome),
|
||||
operation_type=str(row.operation_type),
|
||||
count=int(row.count or 0),
|
||||
)
|
||||
for row in automation_rows
|
||||
],
|
||||
automation_operations_24h=[
|
||||
AutomationOperationSample(
|
||||
outcome=str(row.outcome),
|
||||
operation_type=str(row.operation_type),
|
||||
count=int(row.count or 0),
|
||||
)
|
||||
for row in automation_24h_rows
|
||||
],
|
||||
post_execution_verifications=[
|
||||
VerificationSample(
|
||||
outcome=str(row.outcome),
|
||||
count=int(row.count or 0),
|
||||
)
|
||||
for row in verification_rows
|
||||
],
|
||||
post_execution_verifications_24h=[
|
||||
VerificationSample(
|
||||
outcome=str(row.outcome),
|
||||
count=int(row.count or 0),
|
||||
)
|
||||
for row in verification_24h_rows
|
||||
],
|
||||
knowledge_entries_total=knowledge_total,
|
||||
knowledge_entries_created_24h=knowledge_created_24h,
|
||||
high_confidence_total=int(confidence_row.high_confidence_total or 0),
|
||||
high_confidence_success_total=int(
|
||||
confidence_row.high_confidence_success_total or 0
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def render_adr100_slo_metrics(snapshot: Adr100SloMetricsSnapshot) -> str:
|
||||
"""Render ADR-100 SLO metrics in Prometheus text exposition format."""
|
||||
lines: list[str] = [
|
||||
"",
|
||||
"# HELP automation_operation_log_total DB-derived AI automation operation count for ADR-100 SLOs",
|
||||
"# TYPE automation_operation_log_total counter",
|
||||
]
|
||||
if snapshot.automation_operations:
|
||||
for sample in snapshot.automation_operations:
|
||||
lines.append(
|
||||
"automation_operation_log_total"
|
||||
f'{{outcome="{_escape_label(sample.outcome)}",'
|
||||
f'operation_type="{_escape_label(sample.operation_type)}"}} '
|
||||
f"{sample.count}"
|
||||
)
|
||||
else:
|
||||
lines.append(
|
||||
'automation_operation_log_total{outcome="none",operation_type="none"} 0'
|
||||
)
|
||||
|
||||
lines.extend([
|
||||
"# HELP automation_operation_created_24h DB-derived AI automation operation count created in the last 24 hours for ADR-100 SLO dashboards",
|
||||
"# TYPE automation_operation_created_24h gauge",
|
||||
])
|
||||
if snapshot.automation_operations_24h:
|
||||
for sample in snapshot.automation_operations_24h:
|
||||
lines.append(
|
||||
"automation_operation_created_24h"
|
||||
f'{{outcome="{_escape_label(sample.outcome)}",'
|
||||
f'operation_type="{_escape_label(sample.operation_type)}"}} '
|
||||
f"{sample.count}"
|
||||
)
|
||||
else:
|
||||
lines.append(
|
||||
'automation_operation_created_24h{outcome="none",operation_type="none"} 0'
|
||||
)
|
||||
|
||||
lines.extend([
|
||||
"# HELP post_execution_verification_total DB-derived post execution verification result count for ADR-100 SLOs",
|
||||
"# TYPE post_execution_verification_total counter",
|
||||
])
|
||||
if snapshot.post_execution_verifications:
|
||||
for sample in snapshot.post_execution_verifications:
|
||||
lines.append(
|
||||
"post_execution_verification_total"
|
||||
f'{{outcome="{_escape_label(sample.outcome)}"}} {sample.count}'
|
||||
)
|
||||
else:
|
||||
lines.append('post_execution_verification_total{outcome="none"} 0')
|
||||
|
||||
lines.extend([
|
||||
"# HELP post_execution_verification_created_24h DB-derived post execution verification result count created in the last 24 hours for ADR-100 SLO dashboards",
|
||||
"# TYPE post_execution_verification_created_24h gauge",
|
||||
])
|
||||
if snapshot.post_execution_verifications_24h:
|
||||
for sample in snapshot.post_execution_verifications_24h:
|
||||
lines.append(
|
||||
"post_execution_verification_created_24h"
|
||||
f'{{outcome="{_escape_label(sample.outcome)}"}} {sample.count}'
|
||||
)
|
||||
else:
|
||||
lines.append('post_execution_verification_created_24h{outcome="none"} 0')
|
||||
|
||||
lines.extend([
|
||||
"# HELP knowledge_entries_total DB-derived knowledge entry count for ADR-100 SLOs",
|
||||
"# TYPE knowledge_entries_total counter",
|
||||
f"knowledge_entries_total {snapshot.knowledge_entries_total}",
|
||||
"# HELP knowledge_entries_created_24h DB-derived knowledge entries created in the last 24 hours for ADR-100 SLOs",
|
||||
"# TYPE knowledge_entries_created_24h gauge",
|
||||
f"knowledge_entries_created_24h {snapshot.knowledge_entries_created_24h}",
|
||||
"# HELP approval_records_high_confidence_total DB-derived high confidence approval decisions for ADR-100 SLOs",
|
||||
"# TYPE approval_records_high_confidence_total counter",
|
||||
f"approval_records_high_confidence_total {snapshot.high_confidence_total}",
|
||||
"# HELP approval_records_high_confidence_success_total DB-derived high confidence approval decisions with successful verification for ADR-100 SLOs",
|
||||
"# TYPE approval_records_high_confidence_success_total counter",
|
||||
(
|
||||
"approval_records_high_confidence_success_total "
|
||||
f"{snapshot.high_confidence_success_total}"
|
||||
),
|
||||
"# HELP adr100_slo_emitter_last_success_timestamp Last successful ADR-100 DB metrics emission timestamp",
|
||||
"# TYPE adr100_slo_emitter_last_success_timestamp gauge",
|
||||
f"adr100_slo_emitter_last_success_timestamp {snapshot.emitted_at:.0f}",
|
||||
"",
|
||||
])
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _escape_label(value: str) -> str:
|
||||
return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
|
||||
|
||||
|
||||
_AUTOMATION_OPERATION_SQL = """
|
||||
WITH automation_scope AS (
|
||||
SELECT
|
||||
CASE
|
||||
WHEN status <> 'success' THEN status
|
||||
WHEN actor = 'approval_execution'
|
||||
AND COALESCE(input->>'requested_by', '') NOT ILIKE 'auto%%'
|
||||
THEN 'human_required'
|
||||
ELSE 'auto_executed'
|
||||
END AS outcome,
|
||||
operation_type
|
||||
FROM automation_operation_log
|
||||
WHERE operation_type IN (
|
||||
'playbook_executed',
|
||||
'remediation_executed',
|
||||
'remediation_verified',
|
||||
'remediation_rolled_back',
|
||||
'self_correction_attempted'
|
||||
)
|
||||
UNION ALL
|
||||
SELECT
|
||||
CASE WHEN success THEN 'auto_executed' ELSE 'failed' END AS outcome,
|
||||
'auto_repair_executed' AS operation_type
|
||||
FROM auto_repair_executions
|
||||
)
|
||||
SELECT
|
||||
outcome,
|
||||
operation_type,
|
||||
count(*) AS count
|
||||
FROM automation_scope
|
||||
GROUP BY outcome, operation_type
|
||||
ORDER BY outcome, operation_type
|
||||
"""
|
||||
|
||||
|
||||
_AUTOMATION_OPERATION_24H_SQL = """
|
||||
WITH automation_scope AS (
|
||||
SELECT
|
||||
CASE
|
||||
WHEN status <> 'success' THEN status
|
||||
WHEN actor = 'approval_execution'
|
||||
AND COALESCE(input->>'requested_by', '') NOT ILIKE 'auto%%'
|
||||
THEN 'human_required'
|
||||
ELSE 'auto_executed'
|
||||
END AS outcome,
|
||||
operation_type
|
||||
FROM automation_operation_log
|
||||
WHERE operation_type IN (
|
||||
'playbook_executed',
|
||||
'remediation_executed',
|
||||
'remediation_verified',
|
||||
'remediation_rolled_back',
|
||||
'self_correction_attempted'
|
||||
)
|
||||
AND created_at >= NOW() - INTERVAL '24 hours'
|
||||
UNION ALL
|
||||
SELECT
|
||||
CASE WHEN success THEN 'auto_executed' ELSE 'failed' END AS outcome,
|
||||
'auto_repair_executed' AS operation_type
|
||||
FROM auto_repair_executions
|
||||
WHERE created_at >= NOW() - INTERVAL '24 hours'
|
||||
)
|
||||
SELECT
|
||||
outcome,
|
||||
operation_type,
|
||||
count(*) AS count
|
||||
FROM automation_scope
|
||||
GROUP BY outcome, operation_type
|
||||
ORDER BY outcome, operation_type
|
||||
"""
|
||||
|
||||
|
||||
_POST_EXECUTION_VERIFICATION_SQL = """
|
||||
SELECT verification_result AS outcome, count(*) AS count
|
||||
FROM incident_evidence
|
||||
WHERE verification_result IS NOT NULL
|
||||
GROUP BY verification_result
|
||||
ORDER BY verification_result
|
||||
"""
|
||||
|
||||
|
||||
_POST_EXECUTION_VERIFICATION_24H_SQL = """
|
||||
SELECT verification_result AS outcome, count(*) AS count
|
||||
FROM incident_evidence
|
||||
WHERE verification_result IS NOT NULL
|
||||
AND collected_at >= NOW() - INTERVAL '24 hours'
|
||||
GROUP BY verification_result
|
||||
ORDER BY verification_result
|
||||
"""
|
||||
|
||||
|
||||
_HIGH_CONFIDENCE_APPROVAL_SQL = """
|
||||
WITH approval_confidence AS (
|
||||
SELECT
|
||||
id,
|
||||
incident_id,
|
||||
COALESCE(
|
||||
CASE
|
||||
WHEN extra_metadata->>'confidence_score' ~ '^[0-9]+(\\.[0-9]+)?$'
|
||||
THEN (extra_metadata->>'confidence_score')::numeric
|
||||
ELSE NULL
|
||||
END,
|
||||
CASE
|
||||
WHEN extra_metadata->>'confidence' ~ '^[0-9]+(\\.[0-9]+)?$'
|
||||
THEN (extra_metadata->>'confidence')::numeric
|
||||
ELSE NULL
|
||||
END,
|
||||
composite_score,
|
||||
0
|
||||
) AS confidence
|
||||
FROM approval_records
|
||||
)
|
||||
SELECT
|
||||
count(*) FILTER (WHERE confidence >= 0.8) AS high_confidence_total,
|
||||
count(*) FILTER (
|
||||
WHERE confidence >= 0.8
|
||||
AND EXISTS (
|
||||
SELECT 1
|
||||
FROM incident_evidence ev
|
||||
WHERE ev.incident_id = approval_confidence.incident_id
|
||||
AND ev.verification_result = 'success'
|
||||
)
|
||||
) AS high_confidence_success_total
|
||||
FROM approval_confidence
|
||||
"""
|
||||
|
||||
|
||||
_adr100_slo_metrics_service: Adr100SloMetricsService | None = None
|
||||
|
||||
|
||||
def get_adr100_slo_metrics_service() -> Adr100SloMetricsService:
|
||||
global _adr100_slo_metrics_service
|
||||
if _adr100_slo_metrics_service is None:
|
||||
_adr100_slo_metrics_service = Adr100SloMetricsService()
|
||||
return _adr100_slo_metrics_service
|
||||
743
apps/api/src/services/adr100_slo_status_service.py
Normal file
743
apps/api/src/services/adr100_slo_status_service.py
Normal file
@@ -0,0 +1,743 @@
|
||||
"""
|
||||
Read-only ADR-100 SLO status snapshot.
|
||||
|
||||
GovernanceAgent.check_slo_compliance() can emit governance alerts when an SLO is
|
||||
violated. This service is intentionally read-only so dashboards can show the
|
||||
same Prometheus-backed state without producing Telegram/DB side effects.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
import structlog
|
||||
from sqlalchemy import text
|
||||
|
||||
from src.core.config import settings
|
||||
from src.db.base import get_db_context
|
||||
from src.utils.timezone import now_taipei_iso
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Adr100SloDefinition:
|
||||
name: str
|
||||
query: str
|
||||
target: float
|
||||
hard_red_line: float
|
||||
direction: str
|
||||
unit: str
|
||||
window: str
|
||||
denominator_query: str | None = None
|
||||
denominator_window_seconds: int = 0
|
||||
minimum_events: float = 1.0
|
||||
|
||||
|
||||
ADR100_SLO_DEFINITIONS: tuple[Adr100SloDefinition, ...] = (
|
||||
Adr100SloDefinition(
|
||||
name="autonomy_rate",
|
||||
query="sli:autonomy_rate:5m",
|
||||
target=0.80,
|
||||
hard_red_line=0.70,
|
||||
direction="above",
|
||||
unit="percent",
|
||||
window="5m",
|
||||
denominator_query="sum(rate(automation_operation_log_total[5m]))",
|
||||
denominator_window_seconds=300,
|
||||
),
|
||||
Adr100SloDefinition(
|
||||
name="decision_accuracy",
|
||||
query="sli:decision_accuracy:5m",
|
||||
target=0.90,
|
||||
hard_red_line=0.85,
|
||||
direction="above",
|
||||
unit="percent",
|
||||
window="5m",
|
||||
denominator_query='sum(rate(automation_operation_log_total{outcome="auto_executed"}[5m]))',
|
||||
denominator_window_seconds=300,
|
||||
),
|
||||
Adr100SloDefinition(
|
||||
name="confidence_calibration",
|
||||
query="sli:confidence_calibration:1h",
|
||||
target=0.80,
|
||||
hard_red_line=0.70,
|
||||
direction="above",
|
||||
unit="percent",
|
||||
window="1h",
|
||||
denominator_query="sum(rate(approval_records_high_confidence_total[1h]))",
|
||||
denominator_window_seconds=3600,
|
||||
),
|
||||
Adr100SloDefinition(
|
||||
name="km_growth_rate",
|
||||
query="max(knowledge_entries_created_24h) or max(sli:km_growth_rate:24h)",
|
||||
target=20.0,
|
||||
hard_red_line=5.0,
|
||||
direction="above",
|
||||
unit="count",
|
||||
window="24h",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class Adr100SloStatusService:
|
||||
"""Fetch ADR-100 SLO status from Prometheus without writing governance events."""
|
||||
|
||||
async def fetch_report(self) -> dict[str, Any]:
|
||||
prom_url = getattr(
|
||||
settings,
|
||||
"PROMETHEUS_URL",
|
||||
"http://prometheus.observability.svc:9090",
|
||||
).rstrip("/")
|
||||
metrics: list[dict[str, Any]] = []
|
||||
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
for definition in ADR100_SLO_DEFINITIONS:
|
||||
metrics.append(await self._fetch_metric(client, prom_url, definition))
|
||||
|
||||
evaluable = [metric for metric in metrics if metric.get("evaluable")]
|
||||
ok_count = sum(1 for metric in evaluable if metric.get("status") == "ok")
|
||||
overall_compliance = (ok_count / len(evaluable)) if evaluable else None
|
||||
verification_coverage = await self._fetch_verification_coverage()
|
||||
overall_status = _overall_status(metrics, evaluable, verification_coverage)
|
||||
|
||||
return {
|
||||
"schema_version": "adr100_slo_status_v1",
|
||||
"source": "prometheus+postgresql",
|
||||
"evaluated_at": now_taipei_iso(),
|
||||
"overall_status": overall_status,
|
||||
"overall_compliance": overall_compliance,
|
||||
"evaluable_count": len(evaluable),
|
||||
"metric_count": len(metrics),
|
||||
"metrics": metrics,
|
||||
"verification_coverage": verification_coverage,
|
||||
}
|
||||
|
||||
async def _fetch_metric(
|
||||
self,
|
||||
client: httpx.AsyncClient,
|
||||
prom_url: str,
|
||||
definition: Adr100SloDefinition,
|
||||
) -> dict[str, Any]:
|
||||
denominator_value: float | None = None
|
||||
sample_count: float | None = None
|
||||
|
||||
if definition.denominator_query:
|
||||
denominator_result = await _query_prometheus_value(
|
||||
client,
|
||||
prom_url,
|
||||
definition.denominator_query,
|
||||
)
|
||||
if denominator_result["status"] != "ok":
|
||||
return _metric_payload(
|
||||
definition,
|
||||
value=None,
|
||||
status="no_data",
|
||||
reason=denominator_result["reason"],
|
||||
denominator_value=None,
|
||||
sample_count=None,
|
||||
)
|
||||
|
||||
denominator_value = float(denominator_result["value"])
|
||||
sample_count = denominator_value * definition.denominator_window_seconds
|
||||
if sample_count < definition.minimum_events:
|
||||
return _metric_payload(
|
||||
definition,
|
||||
value=None,
|
||||
status="skipped_low_volume",
|
||||
reason="denominator_below_minimum_events",
|
||||
denominator_value=denominator_value,
|
||||
sample_count=sample_count,
|
||||
)
|
||||
|
||||
value_result = await _query_prometheus_value(client, prom_url, definition.query)
|
||||
if value_result["status"] != "ok":
|
||||
status = (
|
||||
"skipped_low_volume"
|
||||
if value_result["reason"] == "prometheus_nan_or_inf"
|
||||
else "no_data"
|
||||
)
|
||||
return _metric_payload(
|
||||
definition,
|
||||
value=None,
|
||||
status=status,
|
||||
reason=value_result["reason"],
|
||||
denominator_value=denominator_value,
|
||||
sample_count=sample_count,
|
||||
)
|
||||
|
||||
value = float(value_result["value"])
|
||||
status = _classify_status(value, definition)
|
||||
return _metric_payload(
|
||||
definition,
|
||||
value=value,
|
||||
status=status,
|
||||
reason=None,
|
||||
denominator_value=denominator_value,
|
||||
sample_count=sample_count if sample_count is not None else value,
|
||||
)
|
||||
|
||||
async def _fetch_verification_coverage(self) -> dict[str, Any]:
|
||||
"""Summarize whether recent auto-repair executions have verifier evidence."""
|
||||
try:
|
||||
async with get_db_context() as db:
|
||||
summary_row = (
|
||||
await db.execute(text(_VERIFICATION_COVERAGE_SQL))
|
||||
).mappings().one()
|
||||
recent_rows = (
|
||||
await db.execute(text(_VERIFICATION_COVERAGE_RECENT_SQL))
|
||||
).mappings().all()
|
||||
recent_non_success_rows = (
|
||||
await db.execute(text(_VERIFICATION_COVERAGE_NON_SUCCESS_SQL))
|
||||
).mappings().all()
|
||||
except Exception as exc:
|
||||
logger.warning("adr100_verification_coverage_query_error", error=str(exc))
|
||||
return {
|
||||
"schema_version": "adr100_verification_coverage_v1",
|
||||
"source": "postgresql",
|
||||
"window": "24h",
|
||||
"status": "error",
|
||||
"reason": "postgresql_query_error",
|
||||
"evaluable": False,
|
||||
"total_auto": 0,
|
||||
"successful_auto": 0,
|
||||
"verified_auto": 0,
|
||||
"verified_success": 0,
|
||||
"verified_non_success": 0,
|
||||
"unverified_auto": 0,
|
||||
"coverage_rate": None,
|
||||
"verification_success_rate": None,
|
||||
"last_auto_at": None,
|
||||
"last_verified_auto_at": None,
|
||||
"last_verification_evidence_at": None,
|
||||
"latest_auto_age_seconds": None,
|
||||
"last_verified_auto_age_seconds": None,
|
||||
"recent_unverified": [],
|
||||
"recent_non_success": [],
|
||||
"non_success_breakdown": {
|
||||
"by_verification_result": [],
|
||||
"by_failure_class": [],
|
||||
},
|
||||
"remediation_queue": _remediation_queue_payload([]),
|
||||
}
|
||||
|
||||
return _build_verification_coverage_payload(
|
||||
summary_row,
|
||||
recent_rows,
|
||||
recent_non_success_rows,
|
||||
)
|
||||
|
||||
|
||||
_VERIFICATION_COVERAGE_SQL = """
|
||||
WITH recent_auto AS (
|
||||
SELECT id, incident_id, success, created_at
|
||||
FROM auto_repair_executions
|
||||
WHERE created_at >= NOW() - INTERVAL '24 hours'
|
||||
),
|
||||
per_auto AS (
|
||||
SELECT
|
||||
are.id,
|
||||
are.incident_id,
|
||||
are.success,
|
||||
are.created_at,
|
||||
latest.verification_result,
|
||||
latest.collected_at AS verification_collected_at,
|
||||
latest.self_healing_score
|
||||
FROM recent_auto are
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT ev.verification_result, ev.collected_at, ev.self_healing_score
|
||||
FROM incident_evidence ev
|
||||
WHERE ev.incident_id = are.incident_id
|
||||
AND ev.verification_result IS NOT NULL
|
||||
ORDER BY ev.collected_at DESC
|
||||
LIMIT 1
|
||||
) latest ON TRUE
|
||||
)
|
||||
SELECT
|
||||
count(*)::int AS total_auto,
|
||||
count(*) FILTER (WHERE success)::int AS successful_auto,
|
||||
count(*) FILTER (WHERE verification_result IS NOT NULL)::int AS verified_auto,
|
||||
count(*) FILTER (WHERE verification_result = 'success')::int AS verified_success,
|
||||
count(*) FILTER (WHERE verification_result IN ('degraded','failed','timeout'))::int AS verified_non_success,
|
||||
count(*) FILTER (WHERE verification_result IS NULL)::int AS unverified_auto,
|
||||
max(created_at) AS last_auto_at,
|
||||
max(created_at) FILTER (WHERE verification_result IS NOT NULL) AS last_verified_auto_at,
|
||||
max(verification_collected_at) AS last_verification_evidence_at,
|
||||
EXTRACT(EPOCH FROM (NOW() - max(created_at)))::int AS latest_auto_age_seconds,
|
||||
EXTRACT(EPOCH FROM (NOW() - (max(created_at) FILTER (WHERE verification_result IS NOT NULL))))::int
|
||||
AS last_verified_auto_age_seconds
|
||||
FROM per_auto
|
||||
"""
|
||||
|
||||
|
||||
_VERIFICATION_COVERAGE_RECENT_SQL = """
|
||||
WITH recent_auto AS (
|
||||
SELECT id, incident_id, success, created_at
|
||||
FROM auto_repair_executions
|
||||
WHERE created_at >= NOW() - INTERVAL '24 hours'
|
||||
),
|
||||
per_auto AS (
|
||||
SELECT
|
||||
are.id,
|
||||
are.incident_id,
|
||||
are.success,
|
||||
are.created_at,
|
||||
latest.verification_result
|
||||
FROM recent_auto are
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT ev.verification_result
|
||||
FROM incident_evidence ev
|
||||
WHERE ev.incident_id = are.incident_id
|
||||
AND ev.verification_result IS NOT NULL
|
||||
ORDER BY ev.collected_at DESC
|
||||
LIMIT 1
|
||||
) latest ON TRUE
|
||||
)
|
||||
SELECT id, incident_id, success, created_at
|
||||
FROM per_auto
|
||||
WHERE verification_result IS NULL
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 5
|
||||
"""
|
||||
|
||||
|
||||
_VERIFICATION_COVERAGE_NON_SUCCESS_SQL = """
|
||||
WITH recent_auto AS (
|
||||
SELECT
|
||||
id,
|
||||
incident_id,
|
||||
success,
|
||||
playbook_id,
|
||||
playbook_name,
|
||||
triggered_by,
|
||||
risk_level,
|
||||
error_message,
|
||||
created_at
|
||||
FROM auto_repair_executions
|
||||
WHERE created_at >= NOW() - INTERVAL '24 hours'
|
||||
),
|
||||
per_auto AS (
|
||||
SELECT
|
||||
are.id AS auto_repair_id,
|
||||
are.incident_id,
|
||||
are.success AS auto_success,
|
||||
are.playbook_id,
|
||||
are.playbook_name,
|
||||
are.triggered_by,
|
||||
are.risk_level,
|
||||
left(coalesce(are.error_message, ''), 240) AS auto_error,
|
||||
are.created_at AS auto_created_at,
|
||||
latest.verification_result,
|
||||
latest.collected_at AS verification_collected_at,
|
||||
left(coalesce(latest.post_execution_state::text, ''), 700) AS post_state_text,
|
||||
left(coalesce(latest.evidence_summary, ''), 300) AS evidence_summary
|
||||
FROM recent_auto are
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT
|
||||
ev.verification_result,
|
||||
ev.collected_at,
|
||||
ev.post_execution_state,
|
||||
ev.evidence_summary
|
||||
FROM incident_evidence ev
|
||||
WHERE ev.incident_id = are.incident_id
|
||||
AND ev.verification_result IS NOT NULL
|
||||
ORDER BY ev.collected_at DESC
|
||||
LIMIT 1
|
||||
) latest ON TRUE
|
||||
)
|
||||
SELECT
|
||||
p.*,
|
||||
i.status::text AS incident_status,
|
||||
i.severity::text AS incident_severity,
|
||||
i.alert_category,
|
||||
i.alertname
|
||||
FROM per_auto p
|
||||
LEFT JOIN incidents i ON i.incident_id = p.incident_id
|
||||
WHERE p.verification_result IS NOT NULL
|
||||
AND p.verification_result <> 'success'
|
||||
ORDER BY p.auto_created_at DESC
|
||||
LIMIT 8
|
||||
"""
|
||||
|
||||
|
||||
async def _query_prometheus_value(
|
||||
client: httpx.AsyncClient,
|
||||
prom_url: str,
|
||||
query: str,
|
||||
) -> dict[str, Any]:
|
||||
try:
|
||||
response = await client.get(
|
||||
f"{prom_url}/api/v1/query",
|
||||
params={"query": query},
|
||||
)
|
||||
data = response.json()
|
||||
if data.get("status") != "success":
|
||||
return {"status": "error", "reason": "prometheus_query_failed"}
|
||||
|
||||
results = data.get("data", {}).get("result", [])
|
||||
if not results:
|
||||
return {
|
||||
"status": "no_data",
|
||||
"reason": "prometheus_empty_result_metric_not_emitted",
|
||||
}
|
||||
|
||||
raw_value = results[0]["value"][1]
|
||||
value = float(raw_value)
|
||||
if not math.isfinite(value):
|
||||
return {
|
||||
"status": "skipped",
|
||||
"reason": "prometheus_nan_or_inf",
|
||||
"raw_value": raw_value,
|
||||
}
|
||||
return {"status": "ok", "value": value}
|
||||
except Exception as exc:
|
||||
logger.warning("adr100_slo_prometheus_query_error", query=query, error=str(exc))
|
||||
return {"status": "error", "reason": "prometheus_query_error"}
|
||||
|
||||
|
||||
def _metric_payload(
|
||||
definition: Adr100SloDefinition,
|
||||
*,
|
||||
value: float | None,
|
||||
status: str,
|
||||
reason: str | None,
|
||||
denominator_value: float | None,
|
||||
sample_count: float | None,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"name": definition.name,
|
||||
"query": definition.query,
|
||||
"value": value,
|
||||
"target": definition.target,
|
||||
"hard_red_line": definition.hard_red_line,
|
||||
"direction": definition.direction,
|
||||
"unit": definition.unit,
|
||||
"window": definition.window,
|
||||
"status": status,
|
||||
"evaluable": status in {"ok", "warning", "violated"},
|
||||
"reason": reason,
|
||||
"denominator_query": definition.denominator_query,
|
||||
"denominator_value": denominator_value,
|
||||
"sample_count": sample_count,
|
||||
}
|
||||
|
||||
|
||||
def _classify_status(value: float, definition: Adr100SloDefinition) -> str:
|
||||
if definition.direction == "above":
|
||||
if value < definition.hard_red_line:
|
||||
return "violated"
|
||||
if value < definition.target:
|
||||
return "warning"
|
||||
return "ok"
|
||||
|
||||
if value > definition.hard_red_line:
|
||||
return "violated"
|
||||
if value > definition.target:
|
||||
return "warning"
|
||||
return "ok"
|
||||
|
||||
|
||||
def _build_verification_coverage_payload(
|
||||
summary_row: Any,
|
||||
recent_unverified_rows: Any,
|
||||
recent_non_success_rows: Any = (),
|
||||
) -> dict[str, Any]:
|
||||
row = dict(summary_row)
|
||||
total_auto = int(row.get("total_auto") or 0)
|
||||
verified_auto = int(row.get("verified_auto") or 0)
|
||||
verified_success = int(row.get("verified_success") or 0)
|
||||
verified_non_success = int(row.get("verified_non_success") or 0)
|
||||
unverified_auto = int(row.get("unverified_auto") or 0)
|
||||
|
||||
if total_auto == 0:
|
||||
status = "skipped_low_volume"
|
||||
reason = "no_auto_repair_executions_24h"
|
||||
evaluable = False
|
||||
elif unverified_auto > 0:
|
||||
status = "warning"
|
||||
reason = "verification_backlog_present"
|
||||
evaluable = True
|
||||
elif verified_non_success > 0:
|
||||
status = "warning"
|
||||
reason = "non_success_verification_present"
|
||||
evaluable = True
|
||||
else:
|
||||
status = "ok"
|
||||
reason = None
|
||||
evaluable = True
|
||||
|
||||
coverage_rate = (verified_auto / total_auto) if total_auto else None
|
||||
verification_success_rate = (verified_success / verified_auto) if verified_auto else None
|
||||
recent_non_success = [
|
||||
_non_success_finding_payload(dict(raw))
|
||||
for raw in recent_non_success_rows
|
||||
]
|
||||
remediation_queue = _remediation_queue_payload(recent_non_success)
|
||||
|
||||
return {
|
||||
"schema_version": "adr100_verification_coverage_v1",
|
||||
"source": "postgresql",
|
||||
"window": "24h",
|
||||
"status": status,
|
||||
"reason": reason,
|
||||
"evaluable": evaluable,
|
||||
"total_auto": total_auto,
|
||||
"successful_auto": int(row.get("successful_auto") or 0),
|
||||
"verified_auto": verified_auto,
|
||||
"verified_success": verified_success,
|
||||
"verified_non_success": verified_non_success,
|
||||
"unverified_auto": unverified_auto,
|
||||
"coverage_rate": coverage_rate,
|
||||
"verification_success_rate": verification_success_rate,
|
||||
"last_auto_at": _iso(row.get("last_auto_at")),
|
||||
"last_verified_auto_at": _iso(row.get("last_verified_auto_at")),
|
||||
"last_verification_evidence_at": _iso(row.get("last_verification_evidence_at")),
|
||||
"latest_auto_age_seconds": _int_or_none(row.get("latest_auto_age_seconds")),
|
||||
"last_verified_auto_age_seconds": _int_or_none(row.get("last_verified_auto_age_seconds")),
|
||||
"recent_unverified": [
|
||||
{
|
||||
"id": str(item.get("id")),
|
||||
"incident_id": str(item.get("incident_id")),
|
||||
"success": bool(item.get("success")),
|
||||
"created_at": _iso(item.get("created_at")),
|
||||
}
|
||||
for item in (dict(raw) for raw in recent_unverified_rows)
|
||||
],
|
||||
"recent_non_success": recent_non_success,
|
||||
"non_success_breakdown": {
|
||||
"by_verification_result": _count_breakdown(
|
||||
item["verification_result"] for item in recent_non_success
|
||||
),
|
||||
"by_failure_class": _count_breakdown(
|
||||
item["failure_class"] for item in recent_non_success
|
||||
),
|
||||
"by_remediation_status": _count_breakdown(
|
||||
item["remediation_status"] for item in remediation_queue["items"]
|
||||
),
|
||||
},
|
||||
"remediation_queue": remediation_queue,
|
||||
}
|
||||
|
||||
|
||||
def _non_success_finding_payload(row: dict[str, Any]) -> dict[str, Any]:
|
||||
failure_class = _classify_non_success_failure(row)
|
||||
remediation = _remediation_for_failure_class(failure_class)
|
||||
return {
|
||||
"auto_repair_id": str(row.get("auto_repair_id")),
|
||||
"incident_id": str(row.get("incident_id")),
|
||||
"incident_status": str(row.get("incident_status") or "unknown"),
|
||||
"incident_severity": str(row.get("incident_severity") or "unknown"),
|
||||
"alert_category": row.get("alert_category"),
|
||||
"alertname": row.get("alertname"),
|
||||
"auto_success": bool(row.get("auto_success")),
|
||||
"playbook_id": row.get("playbook_id"),
|
||||
"playbook_name": row.get("playbook_name"),
|
||||
"triggered_by": row.get("triggered_by"),
|
||||
"risk_level": row.get("risk_level"),
|
||||
"verification_result": str(row.get("verification_result") or "unknown"),
|
||||
"failure_class": failure_class,
|
||||
"next_step": _next_step_for_failure_class(failure_class),
|
||||
"remediation_status": remediation["status"],
|
||||
"remediation_action": remediation["action"],
|
||||
"remediation_owner": remediation["owner"],
|
||||
"remediation_reason": remediation["reason"],
|
||||
"auto_error_excerpt": _short_text(row.get("auto_error"), 180),
|
||||
"evidence_excerpt": _short_text(row.get("evidence_summary"), 180),
|
||||
"auto_created_at": _iso(row.get("auto_created_at")),
|
||||
"verification_collected_at": _iso(row.get("verification_collected_at")),
|
||||
}
|
||||
|
||||
|
||||
def _classify_non_success_failure(row: dict[str, Any]) -> str:
|
||||
combined = " ".join(
|
||||
str(row.get(key) or "")
|
||||
for key in ("auto_error", "post_state_text", "evidence_summary")
|
||||
).lower()
|
||||
if "unsupported scheme" in combined:
|
||||
return "unsupported_action_scheme"
|
||||
if "missing_query_parameter" in combined:
|
||||
return "verifier_missing_promql"
|
||||
if "empty_pod_name" in combined:
|
||||
return "verifier_target_missing_pod"
|
||||
if not bool(row.get("auto_success")):
|
||||
return "auto_repair_execution_failed"
|
||||
|
||||
result = str(row.get("verification_result") or "").lower()
|
||||
if result in {"failed", "timeout"}:
|
||||
return f"verification_{result}"
|
||||
return "verification_degraded"
|
||||
|
||||
|
||||
def _remediation_for_failure_class(failure_class: str) -> dict[str, str]:
|
||||
"""Map a non-success verification class to a read-only remediation work item.
|
||||
|
||||
This is dashboard triage metadata only. It does not auto-close incidents,
|
||||
replay repairs, or approve write actions.
|
||||
"""
|
||||
if failure_class == "unsupported_action_scheme":
|
||||
return {
|
||||
"status": "ready_for_replay",
|
||||
"action": "replay_with_supported_executor",
|
||||
"owner": "auto_repair_executor",
|
||||
"reason": "executor_gateway_available_after_t23",
|
||||
}
|
||||
if failure_class == "verifier_missing_promql":
|
||||
return {
|
||||
"status": "ready_for_reverify",
|
||||
"action": "reverify_with_promql_template",
|
||||
"owner": "post_execution_verifier",
|
||||
"reason": "promql_template_available_after_t23",
|
||||
}
|
||||
if failure_class == "verifier_target_missing_pod":
|
||||
return {
|
||||
"status": "needs_target_mapping",
|
||||
"action": "map_target_and_reverify",
|
||||
"owner": "post_execution_verifier",
|
||||
"reason": "verifier_target_missing",
|
||||
}
|
||||
if failure_class == "auto_repair_execution_failed":
|
||||
return {
|
||||
"status": "needs_playbook_ticket",
|
||||
"action": "create_playbook_ticket",
|
||||
"owner": "solver_or_operator",
|
||||
"reason": "execution_failed_after_route_normalization",
|
||||
}
|
||||
if failure_class in {"verification_failed", "verification_timeout"}:
|
||||
return {
|
||||
"status": "manual_review",
|
||||
"action": "escalate_verification_failure",
|
||||
"owner": "sre_operator",
|
||||
"reason": "verifier_returned_hard_failure",
|
||||
}
|
||||
return {
|
||||
"status": "manual_review",
|
||||
"action": "inspect_degraded_evidence",
|
||||
"owner": "sre_operator",
|
||||
"reason": "degraded_evidence_requires_human_context",
|
||||
}
|
||||
|
||||
|
||||
def _next_step_for_failure_class(failure_class: str) -> str:
|
||||
if failure_class == "unsupported_action_scheme":
|
||||
return "normalize_playbook_executor"
|
||||
if failure_class == "verifier_missing_promql":
|
||||
return "add_verifier_query_template"
|
||||
if failure_class == "verifier_target_missing_pod":
|
||||
return "map_verifier_target"
|
||||
if failure_class == "auto_repair_execution_failed":
|
||||
return "review_auto_repair_execution"
|
||||
if failure_class in {"verification_failed", "verification_timeout"}:
|
||||
return "escalate_verification_failure"
|
||||
return "review_degraded_verification"
|
||||
|
||||
|
||||
def _remediation_queue_payload(recent_non_success: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
items: list[dict[str, Any]] = []
|
||||
for item in recent_non_success:
|
||||
items.append({
|
||||
"work_item_id": (
|
||||
f"verification:{item.get('incident_id')}:{item.get('auto_repair_id')}"
|
||||
),
|
||||
"incident_id": item.get("incident_id"),
|
||||
"auto_repair_id": item.get("auto_repair_id"),
|
||||
"alertname": item.get("alertname"),
|
||||
"playbook_id": item.get("playbook_id"),
|
||||
"failure_class": item.get("failure_class"),
|
||||
"verification_result": item.get("verification_result"),
|
||||
"remediation_status": item.get("remediation_status"),
|
||||
"remediation_action": item.get("remediation_action"),
|
||||
"remediation_owner": item.get("remediation_owner"),
|
||||
"remediation_reason": item.get("remediation_reason"),
|
||||
"source": "adr100_verification_coverage",
|
||||
"auto_created_at": item.get("auto_created_at"),
|
||||
"verification_collected_at": item.get("verification_collected_at"),
|
||||
})
|
||||
|
||||
ready_for_ai = sum(
|
||||
1 for item in items
|
||||
if item.get("remediation_status") in {"ready_for_replay", "ready_for_reverify"}
|
||||
)
|
||||
needs_human = sum(
|
||||
1 for item in items
|
||||
if item.get("remediation_status") in {
|
||||
"needs_target_mapping",
|
||||
"needs_playbook_ticket",
|
||||
"manual_review",
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"schema_version": "adr100_remediation_queue_v1",
|
||||
"source": "recent_non_success_read_model",
|
||||
"total": len(items),
|
||||
"ready_for_ai": ready_for_ai,
|
||||
"needs_human": needs_human,
|
||||
"items": items,
|
||||
"by_status": _count_breakdown(
|
||||
item.get("remediation_status") for item in items
|
||||
),
|
||||
"by_action": _count_breakdown(
|
||||
item.get("remediation_action") for item in items
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _count_breakdown(values: Any) -> list[dict[str, Any]]:
|
||||
counts: dict[str, int] = {}
|
||||
for value in values:
|
||||
key = str(value or "unknown")
|
||||
counts[key] = counts.get(key, 0) + 1
|
||||
return [
|
||||
{"name": name, "count": count}
|
||||
for name, count in sorted(counts.items(), key=lambda item: (-item[1], item[0]))
|
||||
]
|
||||
|
||||
|
||||
def _short_text(value: Any, limit: int) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
text = " ".join(str(value).split())
|
||||
if not text:
|
||||
return None
|
||||
return text[:limit]
|
||||
|
||||
|
||||
def _iso(value: Any) -> str | None:
|
||||
return value.isoformat() if hasattr(value, "isoformat") else None
|
||||
|
||||
|
||||
def _int_or_none(value: Any) -> int | None:
|
||||
return int(value) if value is not None else None
|
||||
|
||||
|
||||
def _overall_status(
|
||||
metrics: list[dict[str, Any]],
|
||||
evaluable: list[dict[str, Any]],
|
||||
verification_coverage: dict[str, Any] | None = None,
|
||||
) -> str:
|
||||
if any(metric.get("status") == "violated" for metric in metrics):
|
||||
return "violated"
|
||||
if verification_coverage and verification_coverage.get("status") in {"violated", "warning"}:
|
||||
return str(verification_coverage["status"])
|
||||
if any(metric.get("status") == "warning" for metric in metrics):
|
||||
return "warning"
|
||||
if evaluable and any(metric.get("status") == "skipped_low_volume" for metric in metrics):
|
||||
return "partial"
|
||||
if evaluable:
|
||||
return "ok"
|
||||
if any(metric.get("status") == "no_data" for metric in metrics):
|
||||
return "no_data"
|
||||
return "skipped_low_volume"
|
||||
|
||||
|
||||
_adr100_slo_status_service: Adr100SloStatusService | None = None
|
||||
|
||||
|
||||
def get_adr100_slo_status_service() -> Adr100SloStatusService:
|
||||
global _adr100_slo_status_service
|
||||
if _adr100_slo_status_service is None:
|
||||
_adr100_slo_status_service = Adr100SloStatusService()
|
||||
return _adr100_slo_status_service
|
||||
@@ -27,7 +27,7 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import dataclasses
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
@@ -63,11 +63,25 @@ if TYPE_CHECKING:
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
def _agent_debate_global_timeout_seconds() -> float:
|
||||
"""Return the full Phase 2 debate timeout.
|
||||
|
||||
GCP Ollama incident analysis can legitimately take longer than the old
|
||||
90s guard. Keep a hard ceiling, but make it an explicit deployment knob.
|
||||
"""
|
||||
|
||||
raw = os.environ.get("AGENT_DEBATE_GLOBAL_TIMEOUT_SEC", "420.0")
|
||||
try:
|
||||
timeout = float(raw)
|
||||
except (TypeError, ValueError):
|
||||
timeout = 420.0
|
||||
return max(timeout, 90.0)
|
||||
|
||||
|
||||
# 全局超時(所有 Agent 加起來)
|
||||
# 2026-04-16 Claude Sonnet 4.6: deepseek-r1:14b 實測 2.2-27.3s avg 10.6s
|
||||
# 原 30s 對 3 個序列 Agent 每個只剩 10s → 頻繁 timeout → confidence=20%
|
||||
# 調整: 每 Agent 25s, 3個序列+1組並行 = 最差 75s + buffer = 90s
|
||||
GLOBAL_TIMEOUT_SEC = 90.0
|
||||
# 2026-05-06 Codex: configurable for GCP-A/GCP-B/111 Ollama-first incident
|
||||
# diagnosis. The old 90s guard was cutting off valid deep diagnosis runs.
|
||||
GLOBAL_TIMEOUT_SEC = _agent_debate_global_timeout_seconds()
|
||||
|
||||
# 2026-04-16 ogt + Claude Sonnet 4.6: 移除 _PER_AGENT_TIMEOUT_SEC
|
||||
# LLM 必須等到完整回應,不得人工截斷。降級只在真正異常(連線失敗、模型崩潰)觸發。
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
"""
|
||||
Ollama Provider - Phase 24 ADR-052
|
||||
====================================
|
||||
本地 LLM 推理 (192.168.0.188 VMware VM, CPU-only)
|
||||
本地 / 私有 LLM 推理 Provider。
|
||||
|
||||
搬移自: openclaw.py _call_ollama (L349-409)
|
||||
特性: 免費、隱私安全 (local)、但 CPU 慢 (~97s/30tokens for qwen2.5:7b)
|
||||
特性: 免費、隱私安全 (local)、可依 ADR-110 指向 GCP-A/GCP-B/111。
|
||||
|
||||
2026-04-02 ogt: Phase 24-A 從 openclaw.py 抽出
|
||||
"""
|
||||
@@ -29,6 +29,62 @@ from src.services.model_registry import get_model_registry
|
||||
logger = structlog.get_logger(__name__)
|
||||
settings = get_settings()
|
||||
|
||||
_GCP_LIGHTWEIGHT_MODELS = {
|
||||
"gemma3:4b",
|
||||
}
|
||||
|
||||
|
||||
def _normalized_url(value: str | None) -> str:
|
||||
return (value or "").rstrip("/")
|
||||
|
||||
|
||||
def _is_gcp_alert_lane(endpoint_url: str) -> bool:
|
||||
"""Return true for the CPU-only GCP-A/B synchronous alert lane."""
|
||||
endpoint = _normalized_url(endpoint_url)
|
||||
return endpoint in {
|
||||
_normalized_url(getattr(settings, "OLLAMA_URL", "")),
|
||||
_normalized_url(getattr(settings, "OLLAMA_SECONDARY_URL", "")),
|
||||
}
|
||||
|
||||
|
||||
def _resolve_model_for_endpoint(
|
||||
*,
|
||||
requested_model: str,
|
||||
endpoint_url: str,
|
||||
context: dict | None,
|
||||
) -> str:
|
||||
"""
|
||||
Keep non-diagnosis calls from polluting the GCP diagnosis lane.
|
||||
|
||||
GCP-A/B are allowed to run the deep incident diagnosis model because the
|
||||
alert goal is correctness and resolution, not the fastest Telegram card.
|
||||
Accidental non-diagnosis workloads still fall back to the lightweight health
|
||||
model so embedding/Hermes/background calls cannot occupy the same lane.
|
||||
"""
|
||||
model_name = requested_model.strip()
|
||||
context = context or {}
|
||||
allow_gcp_heavy = bool(context.get("allow_gcp_heavy_model"))
|
||||
task_type = str(context.get("task_type") or context.get("intent_hint") or "").lower()
|
||||
is_deep_diagnosis = task_type in {"diagnose", "alert_deep", "incident_diagnosis"}
|
||||
|
||||
if (
|
||||
_is_gcp_alert_lane(endpoint_url)
|
||||
and not allow_gcp_heavy
|
||||
and not is_deep_diagnosis
|
||||
and model_name not in _GCP_LIGHTWEIGHT_MODELS
|
||||
):
|
||||
fallback_model = str(getattr(settings, "OLLAMA_HEALTH_CHECK_MODEL", "gemma3:4b")).strip() or "gemma3:4b"
|
||||
logger.warning(
|
||||
"ollama_gcp_non_diagnosis_model_coerced",
|
||||
endpoint=endpoint_url,
|
||||
requested_model=model_name,
|
||||
safe_model=fallback_model,
|
||||
task_type=task_type,
|
||||
)
|
||||
return fallback_model
|
||||
|
||||
return model_name
|
||||
|
||||
|
||||
class OllamaProvider:
|
||||
"""
|
||||
@@ -77,11 +133,17 @@ class OllamaProvider:
|
||||
client = await self._get_client()
|
||||
|
||||
registry = get_model_registry()
|
||||
model_name = registry.get_model("ollama", "rca")
|
||||
endpoint_url = self._endpoint_url()
|
||||
requested_model = str((context or {}).get("ollama_model") or registry.get_model("ollama", "rca")).strip()
|
||||
model_name = _resolve_model_for_endpoint(
|
||||
requested_model=requested_model,
|
||||
endpoint_url=endpoint_url,
|
||||
context=context,
|
||||
)
|
||||
options = registry.get_provider_options("ollama")
|
||||
|
||||
# P0 2026-04-04 Claude Code: per-task timeout(Option C 分情境)
|
||||
# FORCE_LOCAL/diagnose → OLLAMA_DIAGNOSE_TIMEOUT_SECONDS (200s,實測 ~173s)
|
||||
# FORCE_LOCAL/diagnose → OLLAMA_DIAGNOSE_TIMEOUT_SECONDS
|
||||
# 其他 → OPENCLAW_TIMEOUT(既有設定)
|
||||
task_type = (context or {}).get("task_type", "")
|
||||
if task_type in ("diagnose", "force_local"):
|
||||
@@ -89,7 +151,6 @@ class OllamaProvider:
|
||||
else:
|
||||
read_timeout = float(settings.OPENCLAW_TIMEOUT)
|
||||
|
||||
endpoint_url = self._endpoint_url()
|
||||
response = await client.post(
|
||||
f"{endpoint_url}/api/generate",
|
||||
json={
|
||||
@@ -112,7 +173,13 @@ class OllamaProvider:
|
||||
tokens = data.get("eval_count", 0) + data.get("prompt_eval_count", 0)
|
||||
latency = (time.perf_counter() - start) * 1000
|
||||
|
||||
logger.info("ollama_provider_success", response_length=len(result), tokens=tokens, latency_ms=round(latency, 1))
|
||||
logger.info(
|
||||
"ollama_provider_success",
|
||||
response_length=len(result),
|
||||
tokens=tokens,
|
||||
latency_ms=round(latency, 1),
|
||||
model=model_name,
|
||||
)
|
||||
return AIResult(
|
||||
raw_response=result,
|
||||
success=True,
|
||||
@@ -158,7 +225,7 @@ class OllamaProvider:
|
||||
total_tokens = 0
|
||||
messages: list[dict] = [{"role": "user", "content": prompt}]
|
||||
registry = get_model_registry()
|
||||
model_name = registry.get_model("ollama", "rca")
|
||||
model_name = str((context or {}).get("ollama_model") or registry.get_model("ollama", "rca")).strip()
|
||||
options = registry.get_provider_options("ollama")
|
||||
task_type = (context or {}).get("task_type", "")
|
||||
if task_type in ("diagnose", "force_local"):
|
||||
@@ -268,33 +335,27 @@ class OllamaProvider:
|
||||
self._http_client = None
|
||||
|
||||
|
||||
# 2026-04-26 Wave5 B1-fix by Claude Engineer-A4 — OLLAMA_188 provider 註冊
|
||||
class Ollama188Provider(OllamaProvider):
|
||||
# 2026-05-06 Codex — 188 不再作為 Ollama Provider;本地備援統一命名為 ollama_local。
|
||||
class OllamaLocalProvider(OllamaProvider):
|
||||
"""
|
||||
Ollama 188 CPU-only 備援 Provider
|
||||
Ollama Local fallback Provider
|
||||
|
||||
繼承 OllamaProvider,但使用 OLLAMA_FALLBACK_URL(192.168.0.188:11434)
|
||||
作為推理端點,模型預設 OLLAMA_HEALTH_CHECK_MODEL(qwen2.5:7b-instruct)。
|
||||
|
||||
B1 修復:原本 _init_registry 未登錄此 provider,導致
|
||||
executor.execute() 遇到 "ollama_188" → not_registered → 跳過,
|
||||
188 從未被打到。此類別補全登錄鏈路。
|
||||
|
||||
2026-04-26 Wave5 B1-fix by Claude Engineer-A4
|
||||
使用 OLLAMA_FALLBACK_URL 作為本地最後防線端點。
|
||||
ADR-110 目前設定為 110 nginx proxy → 111 Ollama;188 不得再作為 Ollama provider。
|
||||
"""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "ollama_188"
|
||||
return "ollama_local"
|
||||
|
||||
@property
|
||||
def is_enabled(self) -> bool:
|
||||
import os
|
||||
# 優先查 ENABLE_OLLAMA_188;若未設定(預設 true)則看 OLLAMA_FALLBACK_URL 是否有值
|
||||
env_override = os.getenv("ENABLE_OLLAMA_188", "true").lower() == "true"
|
||||
# 優先查 ENABLE_OLLAMA_LOCAL;若未設定(預設 true)則看 OLLAMA_FALLBACK_URL 是否有值。
|
||||
env_override = os.getenv("ENABLE_OLLAMA_LOCAL", "true").lower() == "true"
|
||||
if not env_override:
|
||||
return False
|
||||
# OLLAMA_FALLBACK_URL 空字串 → 未設定 188 節點 → 停用
|
||||
# OLLAMA_FALLBACK_URL 空字串 → 未設定本地節點 → 停用。
|
||||
return bool(getattr(settings, "OLLAMA_FALLBACK_URL", ""))
|
||||
|
||||
def _endpoint_url(self) -> str:
|
||||
@@ -319,18 +380,18 @@ class Ollama188Provider(OllamaProvider):
|
||||
client = await self._get_client()
|
||||
|
||||
registry = get_model_registry()
|
||||
# 嘗試取 ollama_188 專屬設定,fallback 到 ollama 預設
|
||||
# 嘗試取本地 fallback 專屬設定,fallback 到 ollama 預設。
|
||||
try:
|
||||
model_name = registry.get_model("ollama_188", "rca")
|
||||
model_name = str((context or {}).get("ollama_model") or registry.get_model("ollama_local", "rca")).strip()
|
||||
except Exception:
|
||||
model_name = getattr(settings, "OLLAMA_HEALTH_CHECK_MODEL", "qwen2.5:7b-instruct")
|
||||
model_name = str((context or {}).get("ollama_model") or getattr(settings, "OLLAMA_HEALTH_CHECK_MODEL", "qwen2.5:7b-instruct")).strip()
|
||||
|
||||
try:
|
||||
options = registry.get_provider_options("ollama_188")
|
||||
options = registry.get_provider_options("ollama_local")
|
||||
except Exception:
|
||||
options = registry.get_provider_options("ollama")
|
||||
|
||||
# CPU-only 備援:固定使用較長 timeout(CPU 推理慢)
|
||||
# 本地備援:固定使用較長 timeout,避免 111 模型載入時被過早判死。
|
||||
task_type = (context or {}).get("task_type", "")
|
||||
if task_type in ("diagnose", "force_local"):
|
||||
read_timeout = float(getattr(settings, "OLLAMA_DIAGNOSE_TIMEOUT_SECONDS", 200))
|
||||
@@ -359,11 +420,12 @@ class Ollama188Provider(OllamaProvider):
|
||||
latency = (time.perf_counter() - start) * 1000
|
||||
|
||||
logger.info(
|
||||
"ollama_188_provider_success",
|
||||
"ollama_local_provider_success",
|
||||
response_length=len(result),
|
||||
tokens=tokens,
|
||||
latency_ms=round(latency, 1),
|
||||
endpoint=fallback_url,
|
||||
model=model_name,
|
||||
)
|
||||
return AIResult(
|
||||
raw_response=result,
|
||||
@@ -375,12 +437,12 @@ class Ollama188Provider(OllamaProvider):
|
||||
|
||||
except httpx.TimeoutException as e:
|
||||
latency = (time.perf_counter() - start) * 1000
|
||||
logger.warning("ollama_188_provider_timeout", error=str(e), latency_ms=round(latency, 1))
|
||||
logger.warning("ollama_local_provider_timeout", error=str(e), latency_ms=round(latency, 1))
|
||||
return AIResult(raw_response="", success=False, provider=self.name, latency_ms=latency, error=f"Timeout: {e}")
|
||||
|
||||
except Exception as e:
|
||||
latency = (time.perf_counter() - start) * 1000
|
||||
logger.warning("ollama_188_provider_failed", error=str(e), latency_ms=round(latency, 1))
|
||||
logger.warning("ollama_local_provider_failed", error=str(e), latency_ms=round(latency, 1))
|
||||
return AIResult(raw_response="", success=False, provider=self.name, latency_ms=latency, error=str(e))
|
||||
|
||||
async def health_check(self) -> bool:
|
||||
|
||||
@@ -274,14 +274,13 @@ class AIRateLimiter:
|
||||
|
||||
try:
|
||||
from src.core.config import settings
|
||||
from src.services.telegram_gateway import get_telegram_gateway
|
||||
|
||||
target_chat_id = settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID
|
||||
if not settings.OPENCLAW_TG_BOT_TOKEN or not target_chat_id:
|
||||
logger.warning("telegram_not_configured_for_cost_alert")
|
||||
return
|
||||
|
||||
import httpx
|
||||
|
||||
message = (
|
||||
f"🚨🚨🚨 <b>AI 成本超限警報</b> 🚨🚨🚨\n\n"
|
||||
f"Provider: <code>{provider.upper()}</code>\n"
|
||||
@@ -292,15 +291,15 @@ class AIRateLimiter:
|
||||
f"<code>redis-cli DEL ai_rate:total_cost:{provider}</code>"
|
||||
)
|
||||
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
await client.post(
|
||||
f"https://api.telegram.org/bot{settings.OPENCLAW_TG_BOT_TOKEN}/sendMessage",
|
||||
json={
|
||||
"chat_id": target_chat_id,
|
||||
"text": message,
|
||||
"parse_mode": "HTML",
|
||||
},
|
||||
)
|
||||
gateway = get_telegram_gateway()
|
||||
await gateway._send_request(
|
||||
"sendMessage",
|
||||
{
|
||||
"chat_id": target_chat_id,
|
||||
"text": message,
|
||||
"parse_mode": "HTML",
|
||||
},
|
||||
)
|
||||
|
||||
logger.error(
|
||||
"ai_cost_alert_sent",
|
||||
@@ -327,13 +326,12 @@ class AIRateLimiter:
|
||||
|
||||
try:
|
||||
from src.core.config import settings
|
||||
from src.services.telegram_gateway import get_telegram_gateway
|
||||
|
||||
target_chat_id = settings.SRE_GROUP_CHAT_ID or settings.OPENCLAW_TG_CHAT_ID
|
||||
if not settings.OPENCLAW_TG_BOT_TOKEN or not target_chat_id:
|
||||
return
|
||||
|
||||
import httpx
|
||||
|
||||
limit = COST_LIMITS[provider]["total_cost_usd"]
|
||||
remaining = limit - current_cost
|
||||
|
||||
@@ -345,15 +343,15 @@ class AIRateLimiter:
|
||||
f"接近上限,請注意監控!"
|
||||
)
|
||||
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
await client.post(
|
||||
f"https://api.telegram.org/bot{settings.OPENCLAW_TG_BOT_TOKEN}/sendMessage",
|
||||
json={
|
||||
"chat_id": target_chat_id,
|
||||
"text": message,
|
||||
"parse_mode": "HTML",
|
||||
},
|
||||
)
|
||||
gateway = get_telegram_gateway()
|
||||
await gateway._send_request(
|
||||
"sendMessage",
|
||||
{
|
||||
"chat_id": target_chat_id,
|
||||
"text": message,
|
||||
"parse_mode": "HTML",
|
||||
},
|
||||
)
|
||||
|
||||
logger.warning(
|
||||
"ai_cost_warning_sent",
|
||||
|
||||
@@ -73,10 +73,6 @@ class AIProviderEnum(str, Enum):
|
||||
"""AI 提供者"""
|
||||
|
||||
OLLAMA = "ollama"
|
||||
# 2026-04-25 critic-fix Part2 B2 by Claude Engineer-C2
|
||||
# P1.1b OllamaFailoverManager 使用 provider_name="ollama_188",
|
||||
# 但 AIProviderEnum 沒有此值 → P1.2 整合時 lookup 失敗
|
||||
OLLAMA_188 = "ollama_188" # 188 CPU-only 備援節點(P1.1b)
|
||||
# 2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP 三層容災
|
||||
# OllamaFailoverManager 回傳 provider_name="ollama_gcp_a"/"ollama_gcp_b"/"ollama_local"
|
||||
# 缺少 enum 值 → AIProviderEnum(primary_str) 拋 ValueError → fallback chain 清空 → 直跳 Gemini
|
||||
@@ -96,8 +92,6 @@ class AIProviderEnum(str, Enum):
|
||||
# Provider 對應延遲預算 (ms)
|
||||
PROVIDER_LATENCY_BUDGET: dict[AIProviderEnum, int] = {
|
||||
AIProviderEnum.OLLAMA: 60000, # 本地,允許較長處理時間
|
||||
# 2026-04-25 critic-fix Part2 B2 by Claude Engineer-C2 — 188 CPU-only 推理較慢
|
||||
AIProviderEnum.OLLAMA_188: 120000, # 120s budget for CPU inference
|
||||
# 2026-05-04 ogt: ADR-110 GCP 三層容災 — GCP NVMe SSD 推理快,60s 足夠
|
||||
AIProviderEnum.OLLAMA_GCP_A: 60000,
|
||||
AIProviderEnum.OLLAMA_GCP_B: 60000,
|
||||
@@ -432,7 +426,7 @@ class AIRouter:
|
||||
model = failover_result.primary.model
|
||||
reason = f"{reason} [failover→{primary_str}]"
|
||||
except ValueError:
|
||||
# provider_name 無法對應已知 enum(理論上不應發生,OLLAMA_188 已加)
|
||||
# provider_name 無法對應已知 enum;避免未知 provider 靜默進入執行層。
|
||||
logger.warning(
|
||||
"ai_router_unknown_failover_provider",
|
||||
provider=primary_str,
|
||||
@@ -848,14 +842,13 @@ class AIRouter:
|
||||
空 dict 代表無資料或查詢失敗(caller 應降級為忽略)。
|
||||
"""
|
||||
try:
|
||||
from src.db.base import get_session_factory
|
||||
from src.db.base import get_db_context
|
||||
from src.repositories.aider_event_repository import AiderEventRepository
|
||||
except ImportError:
|
||||
return {}
|
||||
|
||||
try:
|
||||
sf = get_session_factory()
|
||||
async with sf() as sess:
|
||||
async with get_db_context() as sess:
|
||||
repo_obj = AiderEventRepository(sess)
|
||||
stats = await repo_obj.model_stats_since(days=days)
|
||||
except Exception:
|
||||
@@ -1078,11 +1071,51 @@ class AIRouterExecutor:
|
||||
cached = await redis.get(cache_key)
|
||||
if cached:
|
||||
data = _json.loads(cached)
|
||||
cached_provider = data.get("provider", "cache")
|
||||
provider_allowed = cached_provider in provider_order
|
||||
ollama_first_required = (
|
||||
bool(context)
|
||||
and any(
|
||||
key in context
|
||||
for key in (
|
||||
"alert_type",
|
||||
"alertname",
|
||||
"alert_name",
|
||||
"fingerprint",
|
||||
"incident_id",
|
||||
"severity",
|
||||
"target_resource",
|
||||
)
|
||||
)
|
||||
and bool(provider_order)
|
||||
and provider_order[0].startswith("ollama")
|
||||
) or (
|
||||
bool(context)
|
||||
and bool(context.get("enforce_ollama_first"))
|
||||
and bool(provider_order)
|
||||
and provider_order[0].startswith("ollama")
|
||||
)
|
||||
if (
|
||||
cached_provider == "ollama"
|
||||
and any(provider.startswith("ollama") for provider in provider_order)
|
||||
):
|
||||
provider_allowed = True
|
||||
if ollama_first_required and not cached_provider.startswith("ollama"):
|
||||
provider_allowed = False
|
||||
if not provider_allowed:
|
||||
logger.info(
|
||||
"ai_router_cache_provider_mismatch_skip",
|
||||
cache_key=cache_key[:30],
|
||||
cached_provider=cached_provider,
|
||||
provider_order=provider_order,
|
||||
ollama_first_required=ollama_first_required,
|
||||
)
|
||||
raise ValueError("cached provider not allowed by current provider_order")
|
||||
logger.info("ai_router_cache_hit", cache_key=cache_key[:30])
|
||||
return AIResult(
|
||||
raw_response=data.get("response", ""),
|
||||
success=True,
|
||||
provider=data.get("provider", "cache"),
|
||||
provider=cached_provider,
|
||||
from_cache=True,
|
||||
)
|
||||
except Exception as e:
|
||||
@@ -1107,6 +1140,10 @@ class AIRouterExecutor:
|
||||
_lf_trace_ctx = None
|
||||
|
||||
errors: list[str] = []
|
||||
attempted_providers: set[str] = set()
|
||||
alert_requires_ollama_before_cloud = bool(
|
||||
(context or {}).get("alert_requires_ollama_before_cloud")
|
||||
)
|
||||
|
||||
# 2026-04-27 Claude Sonnet 4.6: A2 INC-20260425 — DIAGNOSE fallback metric 追蹤
|
||||
# 透過 context.get("intent_hint") 判斷是否為 DIAGNOSE,避免改動 execute() 簽名
|
||||
@@ -1156,13 +1193,31 @@ class AIRouterExecutor:
|
||||
errors.append(f"{provider_name}: privacy_skip(non_local)")
|
||||
continue
|
||||
|
||||
if alert_requires_ollama_before_cloud and provider.privacy_level == "cloud":
|
||||
if "ollama_local" not in attempted_providers:
|
||||
errors.append(f"{provider_name}: blocked_until_ollama_local_attempted")
|
||||
logger.warning(
|
||||
"ai_router_cloud_blocked_until_ollama_local_attempted",
|
||||
provider=provider_name,
|
||||
provider_order=provider_order,
|
||||
attempted_providers=sorted(attempted_providers),
|
||||
)
|
||||
continue
|
||||
|
||||
# 閘門 1: Circuit Breaker (per-provider, C2 修復)
|
||||
cb = self._get_circuit_breaker(provider_name)
|
||||
if cb.is_open():
|
||||
errors.append(f"{provider_name}: circuit_open")
|
||||
logger.warning("ai_router_circuit_open", provider=provider_name)
|
||||
# 2026-04-27 Claude Sonnet 4.6: F6 — circuit_open 不設 _last_attempted_provider(未嘗試)
|
||||
continue
|
||||
if alert_requires_ollama_before_cloud and provider_name.startswith("ollama"):
|
||||
logger.warning(
|
||||
"ai_router_alert_ollama_circuit_bypassed",
|
||||
provider=provider_name,
|
||||
reason="alert_requires_ollama_before_cloud",
|
||||
)
|
||||
else:
|
||||
errors.append(f"{provider_name}: circuit_open")
|
||||
logger.warning("ai_router_circuit_open", provider=provider_name)
|
||||
# 2026-04-27 Claude Sonnet 4.6: F6 — circuit_open 不設 _last_attempted_provider(未嘗試)
|
||||
continue
|
||||
|
||||
# 閘門 2: Rate Limiter
|
||||
# 2026-04-02 Claude Code: Phase 24 B3 + C1 修復 — Rate Limiter (含 openclaw_nemo)
|
||||
@@ -1182,6 +1237,7 @@ class AIRouterExecutor:
|
||||
sem = self._get_semaphore(provider_name)
|
||||
async with sem:
|
||||
try:
|
||||
attempted_providers.add(provider_name)
|
||||
result = await provider.analyze(prompt, context)
|
||||
|
||||
if result.success:
|
||||
@@ -1306,7 +1362,7 @@ def _init_registry() -> AIProviderRegistry:
|
||||
"""初始化 Provider Registry (首次呼叫時自動註冊所有 Provider)"""
|
||||
from src.services.ai_providers.ollama import (
|
||||
OllamaProvider,
|
||||
Ollama188Provider,
|
||||
OllamaLocalProvider,
|
||||
OllamaGcpBProvider, # 2026-05-04 ADR-110 GCP-B
|
||||
)
|
||||
from src.services.ai_providers.gemini import GeminiProvider
|
||||
@@ -1327,8 +1383,9 @@ def _init_registry() -> AIProviderRegistry:
|
||||
from src.services.ai_providers.nemotron import NemotronProvider
|
||||
registry.register(NemotronProvider())
|
||||
|
||||
# 2026-04-26 Wave5 B1-fix by Claude Engineer-A4 — 補登 OLLAMA_188 備援 provider
|
||||
ollama_local = Ollama188Provider()
|
||||
# 2026-05-06 Codex: 188 不再作為 Ollama provider。
|
||||
# Local fallback 統一命名為 ollama_local,端點由 OLLAMA_FALLBACK_URL 指向 111/110 proxy。
|
||||
ollama_local = OllamaLocalProvider()
|
||||
registry.register(ollama_local)
|
||||
|
||||
# 2026-05-04 ogt + Claude Sonnet 4.6: ADR-110 GCP 三層容災修復
|
||||
@@ -1337,7 +1394,7 @@ def _init_registry() -> AIProviderRegistry:
|
||||
# 修復:
|
||||
# "ollama_gcp_a" alias → 同 OllamaProvider(OLLAMA_URL = GCP-A)
|
||||
# "ollama_gcp_b" → 新 OllamaGcpBProvider(OLLAMA_SECONDARY_URL = GCP-B)
|
||||
# "ollama_local" alias → 同 Ollama188Provider(OLLAMA_FALLBACK_URL = 111)
|
||||
# "ollama_local" → OllamaLocalProvider(OLLAMA_FALLBACK_URL = 111 / 110:11437)
|
||||
registry._providers["ollama_gcp_a"] = ollama_gcp_a
|
||||
registry.register(OllamaGcpBProvider())
|
||||
registry._providers["ollama_local"] = ollama_local
|
||||
|
||||
@@ -28,7 +28,7 @@ from datetime import timedelta
|
||||
import structlog
|
||||
from sqlalchemy import func, select, text
|
||||
|
||||
from src.db.base import get_session_factory
|
||||
from src.db.base import get_db_context
|
||||
from src.db.models import AiGovernanceEvent, AutoRepairExecution, ApprovalRecord
|
||||
from src.utils.timezone import now_taipei
|
||||
|
||||
@@ -127,7 +127,7 @@ class AiSloCalculator:
|
||||
try:
|
||||
since = now_taipei() - timedelta(days=SLO_WINDOW_DAYS)
|
||||
|
||||
async with get_session_factory()() as session:
|
||||
async with get_db_context() as session:
|
||||
slo1 = await self._calc_auto_success_rate(session, since)
|
||||
slo2 = await self._calc_human_override_rate(session, since)
|
||||
slo3 = await self._calc_false_neg_rate(session, since)
|
||||
@@ -210,7 +210,7 @@ class AiSloCalculator:
|
||||
只在 any_violated=True 時呼叫。不管舊違反是否解決。
|
||||
"""
|
||||
try:
|
||||
async with get_session_factory()() as session:
|
||||
async with get_db_context() as session:
|
||||
event = AiGovernanceEvent(
|
||||
event_type="slo_violation",
|
||||
details=report.to_dict(),
|
||||
|
||||
151
apps/api/src/services/alert_approval_guard.py
Normal file
151
apps/api/src/services/alert_approval_guard.py
Normal file
@@ -0,0 +1,151 @@
|
||||
"""Alert approval guardrails for AI-generated remediation actions.
|
||||
|
||||
This service runs before an Alertmanager-derived action becomes an
|
||||
ApprovalRecord. It prevents a known failure mode: an LLM invents a kubectl
|
||||
target that does not belong to the current alert domain, then the approval
|
||||
pipeline faithfully executes or displays that bad command.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import structlog
|
||||
|
||||
from src.services.action_parser import ActionKind, parse_kubectl_action
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
_ALLOWED_K8S_NAMESPACES = frozenset({"awoooi-prod", "observability", "signoz", "langfuse"})
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ApprovalActionGuardResult:
|
||||
"""Guarded action payload returned to approval creation."""
|
||||
|
||||
action: str
|
||||
blocked: bool = False
|
||||
reason: str | None = None
|
||||
metadata: dict[str, object] = field(default_factory=dict)
|
||||
|
||||
|
||||
async def guard_alert_approval_action(
|
||||
*,
|
||||
action: str,
|
||||
alert_namespace: str | None,
|
||||
alertname: str,
|
||||
alert_category: str,
|
||||
) -> ApprovalActionGuardResult:
|
||||
"""Validate an AI/rule action before it is persisted as an approval.
|
||||
|
||||
Non-kubectl actions are intentionally left to their domain-specific gates.
|
||||
Kubectl actions must satisfy the structured parser and must not jump to an
|
||||
unrelated namespace such as ``default`` or ``production`` when the alert
|
||||
came from AWOOOI's production namespace.
|
||||
"""
|
||||
|
||||
raw_action = (action or "").strip()
|
||||
if not raw_action.lower().startswith("kubectl"):
|
||||
return ApprovalActionGuardResult(action=action)
|
||||
|
||||
parsed = parse_kubectl_action(raw_action)
|
||||
if not parsed.ok:
|
||||
return _blocked(raw_action, f"invalid_kubectl:{parsed.reason}", alertname)
|
||||
|
||||
requested_namespace = parsed.namespace
|
||||
expected_namespace = (alert_namespace or "awoooi-prod").strip() or "awoooi-prod"
|
||||
if requested_namespace and requested_namespace not in _ALLOWED_K8S_NAMESPACES:
|
||||
return _blocked(
|
||||
raw_action,
|
||||
f"namespace_not_allowed:{requested_namespace}",
|
||||
alertname,
|
||||
expected_namespace=expected_namespace,
|
||||
)
|
||||
|
||||
if (
|
||||
requested_namespace
|
||||
and expected_namespace in _ALLOWED_K8S_NAMESPACES
|
||||
and requested_namespace != expected_namespace
|
||||
and requested_namespace != "observability"
|
||||
):
|
||||
return _blocked(
|
||||
raw_action,
|
||||
f"namespace_mismatch:{requested_namespace}!={expected_namespace}",
|
||||
alertname,
|
||||
expected_namespace=expected_namespace,
|
||||
)
|
||||
|
||||
# Read-only commands are safe enough to display once the namespace is sane.
|
||||
# Mutating commands still need resource existence checks to avoid executing
|
||||
# hallucinated deployments like "flywheelexecutionratemissing".
|
||||
if parsed.kind == ActionKind.READONLY and parsed.verb in {"get", "version"}:
|
||||
return ApprovalActionGuardResult(action=action)
|
||||
|
||||
if parsed.resource_name and parsed.resource_type in {
|
||||
"deployment",
|
||||
"statefulset",
|
||||
"daemonset",
|
||||
"pod",
|
||||
"service",
|
||||
}:
|
||||
try:
|
||||
from src.services.resource_resolver import get_resource_resolver
|
||||
|
||||
resolver = get_resource_resolver()
|
||||
resolved = await resolver.resolve(
|
||||
raw_resource=parsed.resource_name,
|
||||
namespace=requested_namespace or expected_namespace,
|
||||
resource_kind=parsed.resource_type,
|
||||
)
|
||||
if not resolved.success:
|
||||
return _blocked(
|
||||
raw_action,
|
||||
f"k8s_resource_not_found:{parsed.resource_type}/{parsed.resource_name}",
|
||||
alertname,
|
||||
expected_namespace=expected_namespace,
|
||||
candidates=resolved.candidates,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"approval_action_resource_guard_unavailable",
|
||||
alertname=alertname,
|
||||
alert_category=alert_category,
|
||||
action=raw_action[:160],
|
||||
error=str(exc),
|
||||
)
|
||||
return ApprovalActionGuardResult(
|
||||
action=action,
|
||||
metadata={"action_guard_warning": "resource_guard_unavailable"},
|
||||
)
|
||||
|
||||
return ApprovalActionGuardResult(action=action)
|
||||
|
||||
|
||||
def _blocked(
|
||||
raw_action: str,
|
||||
reason: str,
|
||||
alertname: str,
|
||||
*,
|
||||
expected_namespace: str | None = None,
|
||||
candidates: list[str] | None = None,
|
||||
) -> ApprovalActionGuardResult:
|
||||
logger.warning(
|
||||
"approval_action_blocked_before_persist",
|
||||
alertname=alertname,
|
||||
reason=reason,
|
||||
action=raw_action[:160],
|
||||
expected_namespace=expected_namespace,
|
||||
candidates=candidates or [],
|
||||
)
|
||||
return ApprovalActionGuardResult(
|
||||
action=f"NO_ACTION - INVALID_TARGET: {reason}; original={raw_action[:180]}",
|
||||
blocked=True,
|
||||
reason=reason,
|
||||
metadata={
|
||||
"action_guard": "blocked_before_persist",
|
||||
"blocked_action": raw_action[:300],
|
||||
"blocked_reason": reason,
|
||||
"expected_namespace": expected_namespace,
|
||||
"candidates": candidates or [],
|
||||
},
|
||||
)
|
||||
@@ -36,6 +36,17 @@ if TYPE_CHECKING:
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
def _decode_redis_member(value: object, fallback: str) -> str:
|
||||
"""Redis client 可能回 bytes 或 str;統一成 str 供 DB / log 使用。"""
|
||||
if isinstance(value, bytes):
|
||||
return value.decode("utf-8", errors="replace")
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
if value is None:
|
||||
return fallback
|
||||
return str(value)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Data Types
|
||||
# =============================================================================
|
||||
@@ -83,8 +94,9 @@ class AlertGroupingService:
|
||||
# 5 分鐘滑動視窗
|
||||
WINDOW_SECONDS: int = 300
|
||||
|
||||
# 觸發聚合的閾值(同一分組 5 分鐘內超過此數量才聚合)
|
||||
GROUP_THRESHOLD: int = 3
|
||||
# 觸發聚合的閾值:保留第一張主卡,第二個同組告警開始收斂。
|
||||
# 2026-05-07 Codex — Telegram 群組噪音治理:舊值 3 會讓前兩張同類告警仍進 AI/Telegram。
|
||||
GROUP_THRESHOLD: int = 2
|
||||
|
||||
# Redis Key 前綴
|
||||
PREFIX_WINDOW = "alert_group:window:"
|
||||
@@ -188,7 +200,10 @@ class AlertGroupingService:
|
||||
|
||||
count = results[2]
|
||||
first_members = results[3]
|
||||
parent_fingerprint = first_members[0] if first_members else fingerprint
|
||||
parent_fingerprint = _decode_redis_member(
|
||||
first_members[0] if first_members else None,
|
||||
fallback=fingerprint,
|
||||
)
|
||||
|
||||
# 是否為父告警(第一個)
|
||||
is_parent = parent_fingerprint == fingerprint or count == 1
|
||||
|
||||
@@ -25,14 +25,19 @@ Approval Execution Service - Phase 16 R4.2 瘦身 Router 抽取
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import TYPE_CHECKING, Any
|
||||
from uuid import UUID
|
||||
|
||||
import structlog
|
||||
|
||||
from src.core.config import settings
|
||||
from src.core.redis_client import get_redis
|
||||
from src.db.base import get_db_context
|
||||
from src.models.approval import ApprovalRequest
|
||||
from src.plugins.mcp.gateway import GatewayContext, McpGateway, McpGatewayError
|
||||
from src.plugins.mcp.interfaces import MCPToolResult
|
||||
from src.services.approval_db import get_approval_service, get_timeline_service
|
||||
from src.services.executor import OperationType, get_executor
|
||||
from src.services.executor import ExecutionResult, OperationType, get_executor
|
||||
from src.services.operation_parser import parse_operation_from_action
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -45,6 +50,23 @@ logger = structlog.get_logger(__name__)
|
||||
# 上限 60s 涵蓋 verifier warmup(10s) + collect(30s) + 緩衝 20s.
|
||||
_VERIFIER_AWAIT_TIMEOUT_SEC = 60.0
|
||||
|
||||
# T9: approved SSH execution must go through AwoooP MCP Gateway.
|
||||
# ApprovalRequest itself is the human/multi-sig decision artifact; for write/admin
|
||||
# tools we project it into the short-lived Gate 5 Redis key expected by Gateway.
|
||||
_SSH_GATEWAY_AGENT_ID = "approval_executor"
|
||||
_SSH_GATEWAY_PROJECT_ID = "awoooi"
|
||||
_SSH_GATEWAY_APPROVAL_TTL_SECONDS = 600
|
||||
_SSH_GATEWAY_TOOL_SCOPES: dict[str, str] = {
|
||||
"ssh_diagnose": "read",
|
||||
"ssh_docker_restart": "write",
|
||||
"ssh_docker_compose_restart": "write",
|
||||
"ssh_systemctl_restart": "write",
|
||||
"ssh_clear_docker_logs": "write",
|
||||
"ssh_renew_ssl": "write",
|
||||
"ssh_reload_nginx": "write",
|
||||
"ssh_docker_prune": "admin",
|
||||
}
|
||||
|
||||
|
||||
class ApprovalExecutionService:
|
||||
"""
|
||||
@@ -222,6 +244,7 @@ class ApprovalExecutionService:
|
||||
approval_id=str(approval.id),
|
||||
action=approval.action,
|
||||
reason="NO_ACTION - 純調查/觀察類,不執行破壞動作",
|
||||
path="no_action",
|
||||
)
|
||||
# 標為 SUCCESS (觀察/調查本身就是成功完成)
|
||||
await service.update_execution_status(approval.id, success=True)
|
||||
@@ -248,6 +271,29 @@ class ApprovalExecutionService:
|
||||
duration_ms=int((time.time() - _aol_started_ms) * 1000),
|
||||
output={"reason": "NO_ACTION", "action": approval.action[:200]},
|
||||
)
|
||||
# F2 (2026-05-07 ogt + Claude Sonnet 4.6 + Codex):
|
||||
# NO_ACTION 路徑要把 incident 推到 RESOLVED,否則 incident 永遠卡
|
||||
# INVESTIGATING(FlywheelExecutionRateMissing 死告警 + 566 stuck 增長根因 #1)。
|
||||
# resolve_incident 內已加 RESOLVED 冪等 guard,重複 resolve 會 idempotent
|
||||
# return existing incident 不會重觸發 postmortem。
|
||||
if approval.incident_id:
|
||||
try:
|
||||
from src.services.incident_service import get_incident_service
|
||||
|
||||
await get_incident_service().resolve_incident(approval.incident_id)
|
||||
logger.info(
|
||||
"incident_resolved_after_no_action_execution",
|
||||
incident_id=approval.incident_id,
|
||||
approval_id=str(approval.id),
|
||||
path="no_action",
|
||||
)
|
||||
except Exception as _resolve_e:
|
||||
logger.warning(
|
||||
"incident_resolve_after_no_action_execution_failed",
|
||||
incident_id=approval.incident_id,
|
||||
approval_id=str(approval.id),
|
||||
error=str(_resolve_e),
|
||||
)
|
||||
return True # NO_ACTION 視為成功完成
|
||||
|
||||
# 真解析失敗 (非 NO_ACTION)
|
||||
@@ -614,7 +660,7 @@ class ApprovalExecutionService:
|
||||
self,
|
||||
approval: ApprovalRequest,
|
||||
host: str,
|
||||
) -> "ExecutionResult":
|
||||
) -> ExecutionResult:
|
||||
"""
|
||||
執行 SSH 主機 action(手動批准路徑專用)
|
||||
|
||||
@@ -629,8 +675,6 @@ class ApprovalExecutionService:
|
||||
- "ps aux" / "df -h" / "free -h" / "top" / "uptime" / 'echo' / 'ls -lah' → ssh_diagnose
|
||||
- 其他:回傳失敗,提示 LLM 改寫 action
|
||||
"""
|
||||
from src.services.executor import ExecutionResult
|
||||
|
||||
start = time.time()
|
||||
action = approval.action or ""
|
||||
action_lower = action.lower().strip()
|
||||
@@ -684,11 +728,20 @@ class ApprovalExecutionService:
|
||||
error=err,
|
||||
)
|
||||
|
||||
# 呼叫 SSH MCP Provider
|
||||
from src.plugins.mcp.providers.ssh_provider import SSHProvider
|
||||
provider = SSHProvider()
|
||||
try:
|
||||
mcp_result = await provider.execute(tool_name=tool_name, parameters=params)
|
||||
logger.warning(
|
||||
"mcp_gateway_approved_ssh_execution_path",
|
||||
approval_id=str(approval.id),
|
||||
incident_id=approval.incident_id,
|
||||
tool=tool_name,
|
||||
host=host,
|
||||
agent_id=_SSH_GATEWAY_AGENT_ID,
|
||||
)
|
||||
mcp_result = await self._execute_ssh_tool_via_gateway(
|
||||
approval=approval,
|
||||
tool_name=tool_name,
|
||||
params=params,
|
||||
)
|
||||
duration_ms = int((time.time() - start) * 1000)
|
||||
success = bool(mcp_result.success)
|
||||
return ExecutionResult(
|
||||
@@ -719,6 +772,75 @@ class ApprovalExecutionService:
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
async def _execute_ssh_tool_via_gateway(
|
||||
self,
|
||||
approval: ApprovalRequest,
|
||||
tool_name: str,
|
||||
params: dict[str, Any],
|
||||
) -> MCPToolResult:
|
||||
required_scope = _SSH_GATEWAY_TOOL_SCOPES.get(tool_name, "read")
|
||||
run_id = approval.id if isinstance(approval.id, UUID) else UUID(str(approval.id))
|
||||
|
||||
if required_scope != "read":
|
||||
approval_key = (
|
||||
f"mcp_approval:{_SSH_GATEWAY_PROJECT_ID}:{_SSH_GATEWAY_AGENT_ID}:"
|
||||
f"{tool_name}:{run_id}"
|
||||
)
|
||||
try:
|
||||
redis = get_redis()
|
||||
await redis.set(
|
||||
approval_key,
|
||||
"approved",
|
||||
ex=_SSH_GATEWAY_APPROVAL_TTL_SECONDS,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"mcp_gateway_approval_projection_failed",
|
||||
approval_id=str(approval.id),
|
||||
tool=tool_name,
|
||||
approval_key=approval_key,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
params_with_audit = {
|
||||
**params,
|
||||
"_mcp_audit": {
|
||||
"session_id": f"approval:{approval.id}",
|
||||
"incident_id": approval.incident_id,
|
||||
"agent_role": _SSH_GATEWAY_AGENT_ID,
|
||||
"flywheel_node": "execute",
|
||||
"approval_id": str(approval.id),
|
||||
},
|
||||
}
|
||||
async with get_db_context(_SSH_GATEWAY_PROJECT_ID) as db:
|
||||
ctx = GatewayContext(
|
||||
project_id=_SSH_GATEWAY_PROJECT_ID,
|
||||
agent_id=_SSH_GATEWAY_AGENT_ID,
|
||||
tool_name=tool_name,
|
||||
run_id=run_id,
|
||||
trace_id=approval.incident_id or str(approval.id),
|
||||
is_shadow=False,
|
||||
environment={"env": "prod"},
|
||||
required_scope=required_scope,
|
||||
)
|
||||
try:
|
||||
return await McpGateway(db).call(ctx, params_with_audit)
|
||||
except McpGatewayError as exc:
|
||||
logger.warning(
|
||||
"mcp_gateway_approved_ssh_blocked",
|
||||
approval_id=str(approval.id),
|
||||
incident_id=approval.incident_id,
|
||||
tool=tool_name,
|
||||
gate=exc.gate,
|
||||
error_code=exc.error_code,
|
||||
error=str(exc),
|
||||
)
|
||||
return MCPToolResult(
|
||||
success=False,
|
||||
execution_id=f"blocked:{tool_name}:{run_id}",
|
||||
error=f"{exc.error_code}: {exc}",
|
||||
)
|
||||
|
||||
async def _push_execution_result_to_alert(
|
||||
self,
|
||||
approval: ApprovalRequest,
|
||||
@@ -736,7 +858,7 @@ class ApprovalExecutionService:
|
||||
"""
|
||||
try:
|
||||
# 自動執行路徑 skip(避免與 _push_auto_repair_result 重複發訊息)
|
||||
if (approval.requested_by or "").lower() == "auto_approve":
|
||||
if self._is_auto_approved_request(approval):
|
||||
return
|
||||
|
||||
if not approval.incident_id:
|
||||
@@ -812,9 +934,9 @@ class ApprovalExecutionService:
|
||||
f"{km_info}"
|
||||
)
|
||||
|
||||
await gateway._http_client.post(
|
||||
f"https://api.telegram.org/bot{settings.OPENCLAW_TG_BOT_TOKEN}/sendMessage",
|
||||
json={
|
||||
await gateway._send_request(
|
||||
"sendMessage",
|
||||
{
|
||||
"chat_id": target_chat_id,
|
||||
"text": text,
|
||||
"parse_mode": "HTML",
|
||||
@@ -984,6 +1106,186 @@ class ApprovalExecutionService:
|
||||
error=str(_e),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _is_auto_approved_request(approval: "ApprovalRequest") -> bool:
|
||||
requested_by = (getattr(approval, "requested_by", "") or "").lower()
|
||||
return requested_by.startswith("auto_approve")
|
||||
|
||||
@staticmethod
|
||||
def _is_observation_only_action(action: str | None) -> bool:
|
||||
action_upper = (action or "").strip().upper()
|
||||
return (
|
||||
not action_upper
|
||||
or "NO_ACTION" in action_upper
|
||||
or "NO-ACTION" in action_upper
|
||||
or "NOACTION" in action_upper
|
||||
or action_upper.startswith("OBSERVE")
|
||||
or action_upper.startswith("INVESTIGATE")
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _approval_risk_value(approval: "ApprovalRequest") -> str | None:
|
||||
risk_level = getattr(approval, "risk_level", None)
|
||||
if risk_level is None:
|
||||
return None
|
||||
return getattr(risk_level, "value", str(risk_level))
|
||||
|
||||
async def finalize_auto_approved_execution(
|
||||
self,
|
||||
approval: "ApprovalRequest",
|
||||
*,
|
||||
success: bool,
|
||||
error_message: str | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
補齊「自動批准已執行」路徑的 incident-linked 證據鏈。
|
||||
|
||||
CS2/CS3 webhook 路徑為了快速執行,會先呼叫 execute_approved_action(),
|
||||
再建立 Incident。executor 當下沒有 incident_id,導致 verifier/KM/
|
||||
auto_repair_executions 都無法串回同一張告警卡。此方法只在 incident
|
||||
建立後補上 durable trace,不重新執行 action。
|
||||
"""
|
||||
if not self._is_auto_approved_request(approval):
|
||||
return
|
||||
|
||||
incident_id = getattr(approval, "incident_id", None)
|
||||
if not incident_id:
|
||||
logger.warning(
|
||||
"auto_approved_execution_finalize_skipped_no_incident",
|
||||
approval_id=str(getattr(approval, "id", "")),
|
||||
requested_by=getattr(approval, "requested_by", None),
|
||||
)
|
||||
return
|
||||
|
||||
if self._is_observation_only_action(getattr(approval, "action", None)):
|
||||
logger.info(
|
||||
"auto_approved_execution_finalize_skipped_observation_only",
|
||||
approval_id=str(approval.id),
|
||||
incident_id=incident_id,
|
||||
action=(approval.action or "")[:120],
|
||||
)
|
||||
return
|
||||
|
||||
parsed = parse_operation_from_action(approval.action)
|
||||
operation_type = parsed.operation_type
|
||||
resource_name = parsed.resource_name or "unknown"
|
||||
namespace = parsed.namespace or "default"
|
||||
|
||||
playbook_id = str(getattr(approval, "matched_playbook_id", None) or approval.id)[:36]
|
||||
operation_label = operation_type.value if operation_type else "unknown"
|
||||
playbook_name = f"approval_auto_execute:{operation_label}:{resource_name}"[:200]
|
||||
triggered_by = (getattr(approval, "requested_by", None) or "auto_approve")[:50]
|
||||
action_taken = f"auto_repair_playbook:{playbook_id}:{operation_label}:{resource_name}"
|
||||
if not success:
|
||||
action_taken = f"{action_taken}:FAILED"
|
||||
error_message = error_message or "auto-approved executor returned failure; see approval/aol logs"
|
||||
|
||||
try:
|
||||
from src.repositories.audit_log_repository import get_auto_repair_execution_repository
|
||||
|
||||
repo = get_auto_repair_execution_repository()
|
||||
existing = await repo.list_by_incident(incident_id)
|
||||
already_recorded = any(
|
||||
str(getattr(row, "playbook_id", "")) == playbook_id
|
||||
and getattr(row, "triggered_by", "") == triggered_by
|
||||
and (approval.action or "") in list(getattr(row, "executed_steps", []) or [])
|
||||
for row in existing
|
||||
)
|
||||
if not already_recorded:
|
||||
await repo.create(
|
||||
incident_id=incident_id,
|
||||
playbook_id=playbook_id,
|
||||
playbook_name=playbook_name,
|
||||
success=success,
|
||||
executed_steps=[approval.action],
|
||||
error_message=error_message,
|
||||
triggered_by=triggered_by,
|
||||
risk_level=self._approval_risk_value(approval),
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"auto_approved_execution_record_already_exists",
|
||||
approval_id=str(approval.id),
|
||||
incident_id=incident_id,
|
||||
playbook_id=playbook_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"auto_approved_execution_record_failed",
|
||||
approval_id=str(approval.id),
|
||||
incident_id=incident_id,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
try:
|
||||
timeline = get_timeline_service()
|
||||
await timeline.add_event(
|
||||
event_type="exec",
|
||||
status="success" if success else "error",
|
||||
title=f"{'✅' if success else '❌'} 自動批准執行已補鏈: {operation_label}",
|
||||
description=(
|
||||
f"Target: {resource_name} @ {namespace}; "
|
||||
f"source={triggered_by}; action={approval.action[:160]}"
|
||||
),
|
||||
actor="leWOOOgo",
|
||||
actor_role="executor",
|
||||
approval_id=str(approval.id),
|
||||
incident_id=incident_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"auto_approved_execution_timeline_failed",
|
||||
approval_id=str(approval.id),
|
||||
incident_id=incident_id,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
try:
|
||||
await self.write_execution_result_to_km(approval, success, error_message)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"auto_approved_execution_km_failed",
|
||||
approval_id=str(approval.id),
|
||||
incident_id=incident_id,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
from src.core.feature_flags import aiops_flags
|
||||
if aiops_flags.is_sub_flag_enabled("AIOPS_P1_POST_EXECUTION_VERIFIER"):
|
||||
try:
|
||||
await asyncio.wait_for(
|
||||
self._run_post_execution_verify(
|
||||
approval=approval,
|
||||
action_taken=action_taken,
|
||||
),
|
||||
timeout=_VERIFIER_AWAIT_TIMEOUT_SEC,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
"auto_approved_execution_post_verify_timeout",
|
||||
approval_id=str(approval.id),
|
||||
incident_id=incident_id,
|
||||
timeout_sec=_VERIFIER_AWAIT_TIMEOUT_SEC,
|
||||
)
|
||||
|
||||
if success:
|
||||
try:
|
||||
from src.services.incident_service import get_incident_service
|
||||
|
||||
await get_incident_service().resolve_incident(incident_id)
|
||||
logger.info(
|
||||
"incident_resolved_after_auto_approved_execution_finalize",
|
||||
incident_id=incident_id,
|
||||
approval_id=str(approval.id),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"incident_resolve_after_auto_approved_execution_finalize_failed",
|
||||
incident_id=incident_id,
|
||||
approval_id=str(approval.id),
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
async def write_execution_result_to_km(
|
||||
self,
|
||||
approval: "ApprovalRequest",
|
||||
@@ -1002,7 +1304,7 @@ class ApprovalExecutionService:
|
||||
from src.services.km_writer import KMWritePayload, km_write_with_flag
|
||||
|
||||
# 來源辨識(B.1 精修)
|
||||
_is_auto = (approval.requested_by or "").lower() == "auto_approve"
|
||||
_is_auto = self._is_auto_approved_request(approval)
|
||||
_mode_prefix = "[自動修復]" if _is_auto else "[人工修復]"
|
||||
_mode_tag = "auto_executed" if _is_auto else "human_approved"
|
||||
|
||||
|
||||
@@ -22,9 +22,10 @@ Phase 8: 自動化層實作
|
||||
- P0/P1 嚴重度 Incident 需要人工確認
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from collections.abc import Callable
|
||||
from typing import Protocol
|
||||
from dataclasses import dataclass
|
||||
import re
|
||||
from typing import Any, Protocol
|
||||
|
||||
import structlog
|
||||
|
||||
@@ -81,6 +82,55 @@ class AutoRepairResult:
|
||||
execution_time_ms: int = 0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _SshMcpRoute:
|
||||
"""Route a legacy SSH playbook command to a governed MCP tool."""
|
||||
|
||||
tool_name: str
|
||||
params: dict[str, Any]
|
||||
|
||||
|
||||
_SHORT_HOST_MAP: dict[str, str] = {
|
||||
"110": "192.168.0.110",
|
||||
"120": "192.168.0.120",
|
||||
"121": "192.168.0.121",
|
||||
"188": "192.168.0.188",
|
||||
}
|
||||
|
||||
_SSH_DIAGNOSTIC_KEYWORDS = (
|
||||
"ps aux",
|
||||
"docker stats",
|
||||
"docker inspect",
|
||||
"docker logs",
|
||||
"docker ps",
|
||||
"docker top",
|
||||
"df -h",
|
||||
"du -",
|
||||
"free -h",
|
||||
"journalctl",
|
||||
"systemctl show",
|
||||
"tail ",
|
||||
"top ",
|
||||
"uptime",
|
||||
)
|
||||
|
||||
_SSH_WRITE_KEYWORDS = (
|
||||
"docker restart",
|
||||
"docker start",
|
||||
"docker stop",
|
||||
"docker rm",
|
||||
"docker prune",
|
||||
"systemctl restart",
|
||||
"systemctl stop",
|
||||
"systemctl start",
|
||||
"truncate ",
|
||||
" rm ",
|
||||
"rm -",
|
||||
"certbot renew",
|
||||
"bash ",
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Auto Repair Service Interface
|
||||
# =============================================================================
|
||||
@@ -108,6 +158,7 @@ class IAutoRepairService(Protocol):
|
||||
self,
|
||||
incident: Incident,
|
||||
playbook: Playbook,
|
||||
run_post_verification: bool = True,
|
||||
) -> AutoRepairResult:
|
||||
"""
|
||||
執行自動修復
|
||||
@@ -320,7 +371,16 @@ class AutoRepairService:
|
||||
)
|
||||
|
||||
# 4. 檢查最佳匹配
|
||||
best_match = recommendations[0]
|
||||
best_match = self._select_best_recommendation(recommendations, symptoms)
|
||||
if best_match is not recommendations[0]:
|
||||
logger.warning(
|
||||
"auto_repair_exact_match_prioritized",
|
||||
incident_id=incident.incident_id,
|
||||
selected_playbook_id=best_match.playbook.playbook_id,
|
||||
original_playbook_id=recommendations[0].playbook.playbook_id,
|
||||
selected_similarity=best_match.similarity_score,
|
||||
original_similarity=recommendations[0].similarity_score,
|
||||
)
|
||||
|
||||
# 2026-04-07 Claude Code: 統帥指令「直接全部跳成自動修復」
|
||||
# 移除: 相似度門檻、is_high_quality 門檻、冷啟動機制、風險等級門檻
|
||||
@@ -378,6 +438,7 @@ class AutoRepairService:
|
||||
playbook: Playbook,
|
||||
is_cold_start: bool = False,
|
||||
similarity_score: float | None = None,
|
||||
run_post_verification: bool = True,
|
||||
) -> AutoRepairResult:
|
||||
"""
|
||||
執行自動修復
|
||||
@@ -414,6 +475,8 @@ class AutoRepairService:
|
||||
executed_steps.append(
|
||||
f"Step {step.step_number}: {step.command[:50]}... -> {step_result}"
|
||||
)
|
||||
if self._is_step_failure_result(step_result):
|
||||
raise RuntimeError(f"Step {step.step_number} failed: {step_result}")
|
||||
|
||||
# 更新 Playbook 統計
|
||||
await self._playbook_service.record_execution(
|
||||
@@ -457,6 +520,8 @@ class AutoRepairService:
|
||||
except Exception as _db_e:
|
||||
logger.error("auto_repair_db_write_failed", error=str(_db_e))
|
||||
|
||||
self._record_auto_repair_metric(playbook, success=True)
|
||||
|
||||
# 2026-04-07 Claude Code: Sprint 4 B1/B2 — 記錄處置類型
|
||||
# P0-1 Fix: 統一使用 AnomalyCounter.hash_signature()
|
||||
try:
|
||||
@@ -577,10 +642,17 @@ class AutoRepairService:
|
||||
error=str(_inner_e),
|
||||
)
|
||||
|
||||
_vl_task = _asyncio.create_task(_verify_and_learn())
|
||||
if hasattr(self, "_pending_tasks"):
|
||||
self._pending_tasks.add(_vl_task)
|
||||
_vl_task.add_done_callback(self._pending_tasks.discard)
|
||||
if run_post_verification:
|
||||
_vl_task = _asyncio.create_task(_verify_and_learn())
|
||||
if hasattr(self, "_pending_tasks"):
|
||||
self._pending_tasks.add(_vl_task)
|
||||
_vl_task.add_done_callback(self._pending_tasks.discard)
|
||||
else:
|
||||
logger.info(
|
||||
"auto_repair_service_post_verify_delegated",
|
||||
incident_id=incident.incident_id,
|
||||
playbook_id=playbook.playbook_id,
|
||||
)
|
||||
except Exception as _vl_e:
|
||||
logger.warning("auto_repair_verifier_setup_failed", error=str(_vl_e))
|
||||
|
||||
@@ -630,6 +702,8 @@ class AutoRepairService:
|
||||
except Exception as _db_e:
|
||||
logger.error("auto_repair_db_write_failed", error=str(_db_e))
|
||||
|
||||
self._record_auto_repair_metric(playbook, success=False)
|
||||
|
||||
# 2026-04-04 Claude Code: Phase 25 P1 — 失敗修復後 fire-and-forget 生成 ANTI_PATTERN
|
||||
# 2026-04-05 Claude Code: I1 修正 — 補齊 _pending_tasks GC 防護(對稱化)
|
||||
try:
|
||||
@@ -684,6 +758,44 @@ class AutoRepairService:
|
||||
keywords=keywords[:10],
|
||||
)
|
||||
|
||||
def _select_best_recommendation(
|
||||
self,
|
||||
recommendations,
|
||||
symptoms: SymptomPattern,
|
||||
):
|
||||
"""Prefer deterministic alert/service matches over fuzzy similarity only.
|
||||
|
||||
A higher fuzzy score must not outrank a playbook that explicitly names the
|
||||
firing alert or affected service. Live-fire T16 proved that this can route
|
||||
a safe K8s canary into an unrelated host diagnostic playbook.
|
||||
"""
|
||||
|
||||
symptom_alerts = {str(name) for name in (symptoms.alert_names or []) if name}
|
||||
symptom_services = {
|
||||
str(service) for service in (symptoms.affected_services or []) if service
|
||||
}
|
||||
|
||||
def _priority(recommendation) -> tuple[int, int, float]:
|
||||
pattern = recommendation.playbook.symptom_pattern
|
||||
playbook_alerts = {
|
||||
str(name) for name in (pattern.alert_names or []) if name
|
||||
}
|
||||
playbook_services = {
|
||||
str(service) for service in (pattern.affected_services or []) if service
|
||||
}
|
||||
alert_exact = int(bool(symptom_alerts & playbook_alerts))
|
||||
service_exact = int(bool(symptom_services & playbook_services))
|
||||
return (alert_exact, service_exact, float(recommendation.similarity_score or 0.0))
|
||||
|
||||
return max(recommendations, key=_priority)
|
||||
|
||||
@staticmethod
|
||||
def _is_step_failure_result(step_result: str) -> bool:
|
||||
"""Treat executor-declared failures as failed auto-repair executions."""
|
||||
|
||||
normalized = (step_result or "").strip().upper()
|
||||
return normalized.startswith("FAILED:") or normalized == "UNKNOWN_ACTION_TYPE"
|
||||
|
||||
def _get_max_risk_level(self, playbook: Playbook) -> RiskLevel:
|
||||
"""取得 Playbook 中最高的風險等級"""
|
||||
risk_order = {
|
||||
@@ -700,6 +812,35 @@ class AutoRepairService:
|
||||
|
||||
return max_risk
|
||||
|
||||
def _record_auto_repair_metric(self, playbook: Playbook, success: bool) -> None:
|
||||
"""把實際 auto-repair 執行寫入 Prometheus 指標。
|
||||
|
||||
2026-05-06 ogt + Codex:DB 已有 auto_repair_executions,但
|
||||
core.metrics.record_auto_repair() 長期零 caller,導致治理/心跳用
|
||||
Prometheus 看起來像「飛輪沒做事」。label 使用 action_type,避免
|
||||
playbook_id 造成高基數。
|
||||
"""
|
||||
try:
|
||||
from src.core.metrics import record_auto_repair
|
||||
|
||||
first_step = playbook.repair_steps[0] if playbook.repair_steps else None
|
||||
action = first_step.action_type.value if first_step else "unknown"
|
||||
max_risk = self._get_max_risk_level(playbook)
|
||||
tier = {
|
||||
RiskLevel.LOW: 1,
|
||||
RiskLevel.MEDIUM: 2,
|
||||
RiskLevel.HIGH: 3,
|
||||
RiskLevel.CRITICAL: 4,
|
||||
}.get(max_risk, 0)
|
||||
record_auto_repair(action=action, tier=tier, success=success)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"auto_repair_metric_record_failed",
|
||||
playbook_id=playbook.playbook_id,
|
||||
success=success,
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
def _is_host_or_backup_incident(self, incident: Incident) -> bool:
|
||||
"""主機/備份類事件只能走 SSH/只讀診斷,不允許 K8s rollout 類修復。"""
|
||||
|
||||
@@ -827,6 +968,175 @@ class AutoRepairService:
|
||||
# 安全降級:檢查失敗 → 保守拒絕
|
||||
return False
|
||||
|
||||
def _route_legacy_ssh_command_to_mcp(
|
||||
self,
|
||||
incident: Incident,
|
||||
command: str,
|
||||
) -> _SshMcpRoute | None:
|
||||
"""Map read-only legacy ``ssh {host} '...'`` steps to MCP Gateway.
|
||||
|
||||
YAML_RULE playbooks predate the URI executor and can contain compound
|
||||
shell diagnostics. Those commands should not bypass the newer
|
||||
scheme-based HostRepairAgent or loosen its shell safety guard; read-only
|
||||
diagnostics are instead routed to the governed SSH MCP provider.
|
||||
"""
|
||||
|
||||
raw_command = (command or "").strip()
|
||||
lowered = raw_command.lower()
|
||||
if not lowered.startswith("ssh "):
|
||||
return None
|
||||
|
||||
if any(token in lowered for token in _SSH_WRITE_KEYWORDS):
|
||||
return None
|
||||
|
||||
if not any(token in lowered for token in _SSH_DIAGNOSTIC_KEYWORDS):
|
||||
return None
|
||||
|
||||
host = self._resolve_ssh_host_for_incident(incident, raw_command)
|
||||
if not host:
|
||||
return None
|
||||
|
||||
params: dict[str, Any] = {"host": host}
|
||||
container_name = self._resolve_container_name_for_incident(incident, raw_command)
|
||||
if container_name:
|
||||
params["container_name"] = container_name
|
||||
|
||||
return _SshMcpRoute(tool_name="ssh_diagnose", params=params)
|
||||
|
||||
def preview_read_only_ssh_mcp_route(
|
||||
self,
|
||||
incident: Incident,
|
||||
command: str,
|
||||
) -> dict[str, Any] | None:
|
||||
"""Preview whether a legacy SSH diagnostic can use the MCP Gateway.
|
||||
|
||||
This is used by remediation dry-runs to prove the supported executor
|
||||
path without running the original PlayBook step or writing an execution
|
||||
result.
|
||||
"""
|
||||
|
||||
route = self._route_legacy_ssh_command_to_mcp(incident, command)
|
||||
if route is None:
|
||||
return None
|
||||
return {
|
||||
"tool_name": route.tool_name,
|
||||
"params": route.params,
|
||||
"agent_id": "auto_repair_executor",
|
||||
"required_scope": "read",
|
||||
"flywheel_node": "execute",
|
||||
}
|
||||
|
||||
def _resolve_ssh_host_for_incident(self, incident: Incident, command: str) -> str:
|
||||
"""Resolve ``{host}``, short host labels, and exporter instance ports."""
|
||||
|
||||
labels = self._incident_labels(incident)
|
||||
raw_host = ""
|
||||
match = re.match(r"ssh\s+([^\s'\"]+)", command.strip(), flags=re.IGNORECASE)
|
||||
if match:
|
||||
raw_host = match.group(1)
|
||||
|
||||
if not raw_host or "{" in raw_host or "}" in raw_host:
|
||||
raw_host = (
|
||||
str(labels.get("host") or "")
|
||||
or str(labels.get("instance") or "")
|
||||
or str(labels.get("node") or "")
|
||||
or str(labels.get("exported_instance") or "")
|
||||
)
|
||||
|
||||
return self._normalize_ssh_host(raw_host)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_ssh_host(raw_host: str) -> str:
|
||||
host = (raw_host or "").strip()
|
||||
if host.startswith("ssh://"):
|
||||
host = host.removeprefix("ssh://")
|
||||
if "@" in host:
|
||||
host = host.rsplit("@", 1)[1]
|
||||
if host.startswith("[") and "]" in host:
|
||||
host = host[1:host.index("]")]
|
||||
if host.count(":") == 1:
|
||||
maybe_host, maybe_port = host.rsplit(":", 1)
|
||||
if maybe_port.isdigit():
|
||||
host = maybe_host
|
||||
if host in _SHORT_HOST_MAP:
|
||||
return _SHORT_HOST_MAP[host]
|
||||
match = re.fullmatch(r"(?:node-exporter-|host-)?(110|120|121|188)", host)
|
||||
if match:
|
||||
return _SHORT_HOST_MAP[match.group(1)]
|
||||
return host
|
||||
|
||||
def _resolve_container_name_for_incident(
|
||||
self,
|
||||
incident: Incident,
|
||||
command: str,
|
||||
) -> str:
|
||||
labels = self._incident_labels(incident)
|
||||
for key in ("container_name", "container", "name"):
|
||||
value = str(labels.get(key) or "").strip()
|
||||
if value and "{" not in value and "}" not in value:
|
||||
return value
|
||||
|
||||
match = re.search(
|
||||
r"docker\s+(?:stats\s+--no-stream|inspect|logs|top|ps\s+-a\s+--filter\s+name=)\s+([a-zA-Z0-9._-]+)",
|
||||
command,
|
||||
)
|
||||
return match.group(1) if match else ""
|
||||
|
||||
@staticmethod
|
||||
def _incident_labels(incident: Incident) -> dict[str, Any]:
|
||||
for signal in incident.signals or []:
|
||||
labels = getattr(signal, "labels", None)
|
||||
if labels:
|
||||
return labels
|
||||
return {}
|
||||
|
||||
async def _execute_ssh_mcp_route(
|
||||
self,
|
||||
incident: Incident,
|
||||
route: _SshMcpRoute,
|
||||
) -> str:
|
||||
"""Execute a routed SSH diagnostic through AwoooP MCP Gateway."""
|
||||
|
||||
try:
|
||||
from src.db.base import get_db_context
|
||||
from src.plugins.mcp.gateway import GatewayContext, McpGateway, McpGatewayError
|
||||
from src.services.mcp_audit_context import with_mcp_audit_context
|
||||
|
||||
incident_id = incident.incident_id
|
||||
params = with_mcp_audit_context(
|
||||
route.params,
|
||||
session_id=f"incident:{incident_id}:auto_repair_execute",
|
||||
incident_id=incident_id,
|
||||
flywheel_node="execute",
|
||||
agent_role="auto_repair_executor",
|
||||
)
|
||||
async with get_db_context("awoooi") as db:
|
||||
ctx = GatewayContext(
|
||||
project_id="awoooi",
|
||||
agent_id="auto_repair_executor",
|
||||
tool_name=route.tool_name,
|
||||
trace_id=incident_id,
|
||||
is_shadow=False,
|
||||
environment={"env": "prod"},
|
||||
required_scope="read",
|
||||
)
|
||||
result = await McpGateway(db).call(ctx, params)
|
||||
except McpGatewayError as exc:
|
||||
return f"FAILED: mcp:{route.tool_name} {exc.error_code}: {exc}"
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"auto_repair_ssh_mcp_route_failed",
|
||||
incident_id=incident.incident_id,
|
||||
tool=route.tool_name,
|
||||
error=str(exc),
|
||||
)
|
||||
return f"FAILED: mcp:{route.tool_name} {exc}"
|
||||
|
||||
if result.success:
|
||||
preview = str(result.output or "")[:500]
|
||||
return f"SUCCESS: mcp:{route.tool_name} {preview}".strip()
|
||||
return f"FAILED: mcp:{route.tool_name} {result.error or 'execution failed'}"
|
||||
|
||||
async def _execute_step(self, incident: Incident, step) -> str:
|
||||
"""
|
||||
執行單一修復步驟
|
||||
@@ -858,6 +1168,10 @@ class AutoRepairService:
|
||||
|
||||
# 2026-04-06 Claude Code: Sprint 3 — repair_by_uri (URI scheme 路由)
|
||||
if step.action_type == ActionType.SSH_COMMAND:
|
||||
route = self._route_legacy_ssh_command_to_mcp(incident, step.command)
|
||||
if route is not None:
|
||||
return await self._execute_ssh_mcp_route(incident, route)
|
||||
|
||||
from src.services.host_repair_agent import HostRepairAgent
|
||||
agent = HostRepairAgent()
|
||||
approved = not getattr(step, "requires_approval", False)
|
||||
|
||||
433
apps/api/src/services/awooop_ansible_audit_service.py
Normal file
433
apps/api/src/services/awooop_ansible_audit_service.py
Normal file
@@ -0,0 +1,433 @@
|
||||
"""AwoooP Ansible audit helpers.
|
||||
|
||||
This module is intentionally non-executing. It exposes the Ansible audit
|
||||
contract and repo-known playbook catalog so the truth chain can say whether
|
||||
Ansible was actually considered or executed, without pretending that catalog
|
||||
hints are runtime remediation.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
import structlog
|
||||
from sqlalchemy import text
|
||||
|
||||
from src.db.base import get_db_context
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
ANSIBLE_OPERATION_TYPES = frozenset({
|
||||
"ansible_candidate_matched",
|
||||
"ansible_check_mode_executed",
|
||||
"ansible_apply_executed",
|
||||
"ansible_rollback_executed",
|
||||
"ansible_execution_skipped",
|
||||
})
|
||||
|
||||
_CATALOG: tuple[dict[str, Any], ...] = (
|
||||
{
|
||||
"catalog_id": "ansible:110-devops",
|
||||
"playbook_path": "infra/ansible/playbooks/110-devops.yml",
|
||||
"inventory_hosts": ["host_110"],
|
||||
"domains": ["swap", "harbor", "sentry", "gitea", "langfuse", "bitan", "runner", "keepalived", "nginx"],
|
||||
"keywords": [
|
||||
"110",
|
||||
"docker",
|
||||
"container",
|
||||
"dockercontainerunhealthy",
|
||||
"swap",
|
||||
"harbor",
|
||||
"sentry",
|
||||
"gitea",
|
||||
"langfuse",
|
||||
"bitan",
|
||||
"runner",
|
||||
"github-runner",
|
||||
"keepalived",
|
||||
],
|
||||
"supports_check_mode": True,
|
||||
"auto_apply_enabled": False,
|
||||
"approval_required": True,
|
||||
"risk_level": "medium",
|
||||
},
|
||||
{
|
||||
"catalog_id": "ansible:188-ai-web",
|
||||
"playbook_path": "infra/ansible/playbooks/188-ai-web.yml",
|
||||
"inventory_hosts": ["host_188"],
|
||||
"domains": ["docker", "momo_backup", "signoz", "minio", "litellm", "n8n", "open_webui", "nginx"],
|
||||
"keywords": [
|
||||
"188",
|
||||
"docker",
|
||||
"container",
|
||||
"dockercontainerunhealthy",
|
||||
"momo",
|
||||
"backup",
|
||||
"postgresql",
|
||||
"pg_backup",
|
||||
"signoz",
|
||||
"minio",
|
||||
"litellm",
|
||||
"n8n",
|
||||
"open-webui",
|
||||
"openwebui",
|
||||
"docker-registry",
|
||||
],
|
||||
"supports_check_mode": True,
|
||||
"auto_apply_enabled": False,
|
||||
"approval_required": True,
|
||||
"risk_level": "medium",
|
||||
},
|
||||
{
|
||||
"catalog_id": "ansible:nginx-sync",
|
||||
"playbook_path": "infra/ansible/playbooks/nginx-sync.yml",
|
||||
"inventory_hosts": ["host_110", "host_188"],
|
||||
"domains": ["nginx", "proxy", "ollama_proxy", "tls"],
|
||||
"keywords": ["nginx", "proxy", "ollama", "gcp", "tls", "cert", "502", "upstream"],
|
||||
"supports_check_mode": True,
|
||||
"auto_apply_enabled": False,
|
||||
"approval_required": True,
|
||||
"risk_level": "medium",
|
||||
},
|
||||
{
|
||||
"catalog_id": "ansible:restore-password-auth",
|
||||
"playbook_path": "infra/ansible/playbooks/restore-password-auth.yml",
|
||||
"inventory_hosts": ["host_110", "host_120", "host_121", "host_188"],
|
||||
"domains": ["ssh", "password_auth"],
|
||||
"keywords": ["ssh", "passwordauthentication", "password auth", "login", "auth"],
|
||||
"supports_check_mode": False,
|
||||
"auto_apply_enabled": False,
|
||||
"approval_required": True,
|
||||
"risk_level": "high",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _get(row: dict[str, Any], key: str) -> Any:
|
||||
return row.get(key)
|
||||
|
||||
|
||||
def _tags(row: dict[str, Any]) -> list[str]:
|
||||
raw = _get(row, "tags")
|
||||
if isinstance(raw, list):
|
||||
return [str(item).lower() for item in raw]
|
||||
if isinstance(raw, str):
|
||||
return [part.strip().lower() for part in raw.split(",") if part.strip()]
|
||||
return []
|
||||
|
||||
|
||||
def _first_present(row: dict[str, Any], keys: tuple[str, ...]) -> Any:
|
||||
for key in keys:
|
||||
value = _get(row, key)
|
||||
if value not in (None, ""):
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
def _is_ansible_operation(row: dict[str, Any]) -> bool:
|
||||
operation_type = str(_get(row, "operation_type") or "").lower()
|
||||
if operation_type in ANSIBLE_OPERATION_TYPES:
|
||||
return True
|
||||
if "ansible" in _tags(row):
|
||||
return True
|
||||
executor = str(
|
||||
_first_present(
|
||||
row,
|
||||
(
|
||||
"input_executor",
|
||||
"input_execution_backend",
|
||||
"output_executor",
|
||||
"output_execution_backend",
|
||||
),
|
||||
)
|
||||
or ""
|
||||
).lower()
|
||||
if executor == "ansible":
|
||||
return True
|
||||
playbook_path = str(
|
||||
_first_present(row, ("input_playbook_path", "output_playbook_path", "input_ansible_playbook_path", "output_ansible_playbook_path"))
|
||||
or ""
|
||||
).lower()
|
||||
return "infra/ansible/" in playbook_path or playbook_path.endswith(".yml") and "ansible" in playbook_path
|
||||
|
||||
|
||||
def _ansible_record(row: dict[str, Any]) -> dict[str, Any]:
|
||||
return {
|
||||
"op_id": _get(row, "op_id"),
|
||||
"operation_type": _get(row, "operation_type"),
|
||||
"status": _get(row, "status"),
|
||||
"actor": _get(row, "actor"),
|
||||
"playbook_id": _first_present(row, ("input_playbook_id", "output_playbook_id")),
|
||||
"playbook_path": _first_present(
|
||||
row,
|
||||
("input_playbook_path", "output_playbook_path", "input_ansible_playbook_path", "output_ansible_playbook_path"),
|
||||
),
|
||||
"check_mode": _first_present(row, ("input_check_mode", "output_check_mode")),
|
||||
"not_used_reason": _first_present(row, ("input_not_used_reason", "output_not_used_reason")),
|
||||
"dry_run_result": _get(row, "dry_run_result"),
|
||||
"error": _get(row, "error"),
|
||||
"duration_ms": _get(row, "duration_ms"),
|
||||
"tags": _get(row, "tags"),
|
||||
"created_at": _get(row, "created_at"),
|
||||
}
|
||||
|
||||
|
||||
def _flatten_text(value: Any, pieces: list[str], remaining: int = 80) -> int:
|
||||
if remaining <= 0 or value is None:
|
||||
return remaining
|
||||
if isinstance(value, dict):
|
||||
for key, item in value.items():
|
||||
remaining = _flatten_text(key, pieces, remaining)
|
||||
remaining = _flatten_text(item, pieces, remaining)
|
||||
if remaining <= 0:
|
||||
break
|
||||
return remaining
|
||||
if isinstance(value, list):
|
||||
for item in value:
|
||||
remaining = _flatten_text(item, pieces, remaining)
|
||||
if remaining <= 0:
|
||||
break
|
||||
return remaining
|
||||
pieces.append(str(value).lower())
|
||||
return remaining - 1
|
||||
|
||||
|
||||
def _source_haystack(incident: dict[str, Any] | None, drift: dict[str, Any] | None) -> str:
|
||||
pieces: list[str] = []
|
||||
_flatten_text(incident, pieces)
|
||||
_flatten_text(drift, pieces)
|
||||
return " ".join(pieces)
|
||||
|
||||
|
||||
def _catalog_hints(incident: dict[str, Any] | None, drift: dict[str, Any] | None) -> dict[str, Any]:
|
||||
haystack = _source_haystack(incident, drift)
|
||||
candidates: list[dict[str, Any]] = []
|
||||
unmatched: list[str] = []
|
||||
for item in _CATALOG:
|
||||
matched = [keyword for keyword in item["keywords"] if keyword in haystack]
|
||||
public_item = {
|
||||
key: value
|
||||
for key, value in item.items()
|
||||
if key
|
||||
in {
|
||||
"catalog_id",
|
||||
"playbook_path",
|
||||
"inventory_hosts",
|
||||
"domains",
|
||||
"supports_check_mode",
|
||||
"auto_apply_enabled",
|
||||
"approval_required",
|
||||
"risk_level",
|
||||
}
|
||||
}
|
||||
if matched:
|
||||
candidates.append({
|
||||
**public_item,
|
||||
"match_score": len(matched),
|
||||
"matched_keywords": matched,
|
||||
})
|
||||
else:
|
||||
unmatched.append(item["catalog_id"])
|
||||
candidates.sort(key=lambda row: (-int(row["match_score"]), str(row["catalog_id"])))
|
||||
return {
|
||||
"match_mode": "static_catalog_keyword_hint_v1",
|
||||
"decision_effect": "none",
|
||||
"available_count": len(_CATALOG),
|
||||
"candidates": candidates,
|
||||
"unmatched_catalog_ids": unmatched,
|
||||
}
|
||||
|
||||
|
||||
def build_ansible_truth(
|
||||
automation_ops: list[dict[str, Any]],
|
||||
*,
|
||||
incident: dict[str, Any] | None,
|
||||
drift: dict[str, Any] | None,
|
||||
) -> dict[str, Any]:
|
||||
"""Build the truth-chain Ansible section from audited facts and catalog hints."""
|
||||
|
||||
records = [_ansible_record(row) for row in automation_ops if _is_ansible_operation(row)]
|
||||
return {
|
||||
"considered": bool(records),
|
||||
"records": records,
|
||||
"audit_contract": {
|
||||
"schema_version": "ansible_executor_audit_v1",
|
||||
"operation_types": sorted(ANSIBLE_OPERATION_TYPES),
|
||||
"required_audit_fields": [
|
||||
"operation_type",
|
||||
"status",
|
||||
"actor",
|
||||
"input.executor",
|
||||
"input.playbook_path",
|
||||
"input.check_mode",
|
||||
"output.not_used_reason",
|
||||
"dry_run_result",
|
||||
],
|
||||
"default_execution_mode": "catalog/dry-run audit only until approval execution is explicitly wired",
|
||||
},
|
||||
"candidate_catalog": _catalog_hints(incident, drift),
|
||||
"not_used_reason": (
|
||||
None
|
||||
if records
|
||||
else "no automation_operation_log row with Ansible operation type, tag, or executor backend for this source"
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _incident_public_dict(incident: Any) -> dict[str, Any]:
|
||||
if incident is None:
|
||||
return {}
|
||||
if isinstance(incident, dict):
|
||||
return incident
|
||||
severity = getattr(incident, "severity", None)
|
||||
signals_payload: list[dict[str, Any]] = []
|
||||
for signal in getattr(incident, "signals", None) or []:
|
||||
signals_payload.append({
|
||||
"alert_name": getattr(signal, "alert_name", None),
|
||||
"labels": getattr(signal, "labels", None) or {},
|
||||
"annotations": getattr(signal, "annotations", None) or {},
|
||||
})
|
||||
return {
|
||||
"incident_id": getattr(incident, "incident_id", None),
|
||||
"project_id": getattr(incident, "project_id", None),
|
||||
"alertname": getattr(incident, "alertname", None),
|
||||
"alert_category": getattr(incident, "alert_category", None),
|
||||
"notification_type": getattr(incident, "notification_type", None),
|
||||
"severity": getattr(severity, "value", severity),
|
||||
"affected_services": getattr(incident, "affected_services", None) or [],
|
||||
"signals": signals_payload,
|
||||
}
|
||||
|
||||
|
||||
def build_ansible_decision_audit_payload(
|
||||
*,
|
||||
incident: Any,
|
||||
proposal_data: dict[str, Any],
|
||||
decision_path: str,
|
||||
not_used_reason: str,
|
||||
) -> dict[str, Any] | None:
|
||||
"""Return an AOL payload when Ansible has catalog candidates for a decision."""
|
||||
|
||||
incident_payload = _incident_public_dict(incident)
|
||||
hints = _catalog_hints(incident_payload, None)
|
||||
candidates = hints.get("candidates") or []
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
incident_id = str(incident_payload.get("incident_id") or "")
|
||||
input_payload = {
|
||||
"incident_id": incident_id,
|
||||
"executor": "ansible",
|
||||
"execution_backend": "ansible",
|
||||
"decision_path": decision_path,
|
||||
"check_mode": True,
|
||||
"apply_enabled": False,
|
||||
"approval_required": True,
|
||||
"candidate_catalog_schema": hints["match_mode"],
|
||||
"executor_candidates": [
|
||||
{
|
||||
"catalog_id": row["catalog_id"],
|
||||
"playbook_path": row["playbook_path"],
|
||||
"inventory_hosts": row["inventory_hosts"],
|
||||
"risk_level": row["risk_level"],
|
||||
"match_score": row["match_score"],
|
||||
"matched_keywords": row["matched_keywords"],
|
||||
}
|
||||
for row in candidates[:5]
|
||||
],
|
||||
"proposal_source": proposal_data.get("source", ""),
|
||||
"proposal_risk_level": proposal_data.get("risk_level", ""),
|
||||
"proposal_action_preview": str(
|
||||
proposal_data.get("action")
|
||||
or proposal_data.get("kubectl_command")
|
||||
or ""
|
||||
)[:240],
|
||||
}
|
||||
output_payload = {
|
||||
"not_used_reason": not_used_reason,
|
||||
"decision_effect": "audit_only",
|
||||
"next_required_step": "wire approval_execution to Ansible check-mode before apply",
|
||||
}
|
||||
return {
|
||||
"operation_type": "ansible_candidate_matched",
|
||||
"status": "dry_run",
|
||||
"input": input_payload,
|
||||
"output": output_payload,
|
||||
"dry_run_result": {
|
||||
"check_mode_executed": False,
|
||||
"candidate_count": len(candidates),
|
||||
"reason": not_used_reason,
|
||||
},
|
||||
"tags": ["ansible", "decision", "candidate", "check_mode_pending"],
|
||||
}
|
||||
|
||||
|
||||
async def record_ansible_decision_audit(
|
||||
*,
|
||||
incident: Any,
|
||||
proposal_data: dict[str, Any],
|
||||
decision_path: str,
|
||||
not_used_reason: str,
|
||||
) -> bool:
|
||||
"""Write a best-effort Ansible candidate audit row for one decision."""
|
||||
|
||||
payload = build_ansible_decision_audit_payload(
|
||||
incident=incident,
|
||||
proposal_data=proposal_data,
|
||||
decision_path=decision_path,
|
||||
not_used_reason=not_used_reason,
|
||||
)
|
||||
if payload is None:
|
||||
return False
|
||||
|
||||
incident_id = payload["input"]["incident_id"]
|
||||
project_id = getattr(incident, "project_id", None) or "awoooi"
|
||||
try:
|
||||
async with get_db_context(str(project_id)) as db:
|
||||
existing = await db.execute(
|
||||
text("""
|
||||
SELECT op_id
|
||||
FROM automation_operation_log
|
||||
WHERE operation_type = 'ansible_candidate_matched'
|
||||
AND input ->> 'incident_id' = :incident_id
|
||||
AND input ->> 'executor' = 'ansible'
|
||||
LIMIT 1
|
||||
"""),
|
||||
{"incident_id": incident_id},
|
||||
)
|
||||
if existing.scalar() is not None:
|
||||
return False
|
||||
await db.execute(
|
||||
text("""
|
||||
INSERT INTO automation_operation_log (
|
||||
operation_type, actor, status,
|
||||
input, output, dry_run_result, tags
|
||||
) VALUES (
|
||||
:operation_type,
|
||||
'decision_manager',
|
||||
:status,
|
||||
CAST(:input AS jsonb),
|
||||
CAST(:output AS jsonb),
|
||||
CAST(:dry_run_result AS jsonb),
|
||||
:tags
|
||||
)
|
||||
"""),
|
||||
{
|
||||
"operation_type": payload["operation_type"],
|
||||
"status": payload["status"],
|
||||
"input": json.dumps(payload["input"], ensure_ascii=False),
|
||||
"output": json.dumps(payload["output"], ensure_ascii=False),
|
||||
"dry_run_result": json.dumps(payload["dry_run_result"], ensure_ascii=False),
|
||||
"tags": payload["tags"],
|
||||
},
|
||||
)
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"ansible_decision_audit_write_failed",
|
||||
incident_id=incident_id,
|
||||
error=str(exc),
|
||||
)
|
||||
return False
|
||||
@@ -46,6 +46,8 @@ from typing import Any
|
||||
|
||||
import structlog
|
||||
|
||||
from src.core.redis_client import get_redis
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
@@ -219,29 +221,23 @@ async def record_approval(
|
||||
exp = payload["exp"]
|
||||
|
||||
try:
|
||||
import aioredis
|
||||
from src.core.config import settings
|
||||
|
||||
redis = aioredis.from_url(settings.REDIS_URL)
|
||||
redis = get_redis()
|
||||
|
||||
# jti NX
|
||||
jti_key = f"{_JTI_KEY_PREFIX}{jti}"
|
||||
ttl_remaining = max(exp - int(time.time()), 1)
|
||||
ok = await redis.set(jti_key, "1", nx=True, ex=ttl_remaining)
|
||||
if not ok:
|
||||
await redis.aclose()
|
||||
raise TokenReplayError(f"jti={jti!r} 已使用")
|
||||
|
||||
# SADD approver
|
||||
sig_key = f"{_SIG_SET_PREFIX}{project_id}:{run_id}:{tool_name}"
|
||||
added = await redis.sadd(sig_key, approver_id)
|
||||
if added == 0:
|
||||
await redis.aclose()
|
||||
raise DuplicateApproverError(f"approver '{approver_id}' 已簽核")
|
||||
|
||||
await redis.expire(sig_key, _SIG_TTL_SECONDS)
|
||||
count = int(await redis.scard(sig_key))
|
||||
await redis.aclose()
|
||||
|
||||
logger.info(
|
||||
"awooop_approval_recorded",
|
||||
@@ -271,13 +267,9 @@ async def check_approval_quorum(
|
||||
檢查 quorum。Raises QuorumNotMetError if 不足。
|
||||
"""
|
||||
try:
|
||||
import aioredis
|
||||
from src.core.config import settings
|
||||
|
||||
redis = aioredis.from_url(settings.REDIS_URL)
|
||||
redis = get_redis()
|
||||
sig_key = f"{_SIG_SET_PREFIX}{project_id}:{run_id}:{tool_name}"
|
||||
count = int(await redis.scard(sig_key))
|
||||
await redis.aclose()
|
||||
|
||||
if count < required_count:
|
||||
raise QuorumNotMetError(f"簽核數不足({count}/{required_count})")
|
||||
|
||||
1516
apps/api/src/services/awooop_truth_chain_service.py
Normal file
1516
apps/api/src/services/awooop_truth_chain_service.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -336,7 +336,7 @@ async def _get_tenant_budget_limit(project_id: str) -> Decimal | None:
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
from src.db.base import get_db_context
|
||||
async with get_db_context() as db:
|
||||
async with get_db_context(project_id) as db:
|
||||
row = await db.execute(
|
||||
text("SELECT budget_limit_usd FROM awooop_projects WHERE project_id = :pid"),
|
||||
{"pid": project_id},
|
||||
|
||||
@@ -280,6 +280,7 @@ async def dispatch_action(
|
||||
|
||||
# MCP registry dispatch
|
||||
from src.plugins.mcp.registry import get_provider
|
||||
from src.services.mcp_audit_context import with_mcp_audit_context
|
||||
provider_name = _resolve_provider_name(spec.mcp_provider)
|
||||
provider = get_provider(provider_name)
|
||||
if not provider:
|
||||
@@ -293,8 +294,16 @@ async def dispatch_action(
|
||||
)
|
||||
|
||||
# 執行 MCP tool with timeout
|
||||
audited_params = with_mcp_audit_context(
|
||||
resolved_params,
|
||||
session_id=f"callback:{incident_id}:{action_name}",
|
||||
incident_id=incident_id,
|
||||
flywheel_node="operate",
|
||||
agent_role="telegram_callback_dispatcher",
|
||||
operator_user_id=user_id,
|
||||
)
|
||||
mcp_result = await asyncio.wait_for(
|
||||
provider.execute(spec.mcp_tool, resolved_params),
|
||||
provider.execute(spec.mcp_tool, audited_params),
|
||||
timeout=float(spec.timeout_sec),
|
||||
)
|
||||
|
||||
|
||||
1249
apps/api/src/services/channel_event_dossier_service.py
Normal file
1249
apps/api/src/services/channel_event_dossier_service.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -28,23 +28,201 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import html
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
import re
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
from uuid import NAMESPACE_URL, UUID, uuid5
|
||||
|
||||
import structlog
|
||||
from sqlalchemy import select, text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from src.db.awooop_models import AwoooPRunState
|
||||
from src.services.audit_sink import _redact_string
|
||||
from src.services.audit_sink import _redact_string, sanitize
|
||||
from src.services.platform_runtime import create_run
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
# Progressive Feedback Policy:等待超過此秒數才發 interim 訊息
|
||||
_INTERIM_WAIT_SECONDS = 30
|
||||
_INBOUND_REDACTION_VERSION = "audit_sink_v1"
|
||||
_OUTBOUND_REDACTION_VERSION = "audit_sink_v1"
|
||||
_INCIDENT_ID_RE = re.compile(r"\bINC-\d{8}-[A-Z0-9]{6}\b")
|
||||
|
||||
|
||||
def _db_timestamp_now() -> datetime:
|
||||
"""Return UTC now in the timestamp shape accepted by the production DB path."""
|
||||
return datetime.now(UTC).replace(tzinfo=None)
|
||||
|
||||
|
||||
def _compact_unique(values: list[str | None], *, limit: int = 20) -> list[str]:
|
||||
"""Return stable non-empty values without leaking duplicate source refs."""
|
||||
return sorted({str(value).strip() for value in values if str(value or "").strip()})[:limit]
|
||||
|
||||
|
||||
def build_inbound_source_envelope(
|
||||
*,
|
||||
provider: str,
|
||||
stage: str,
|
||||
provider_event_id: str,
|
||||
raw_event_id: str | None = None,
|
||||
raw_content: str | None = None,
|
||||
alertname: str | None = None,
|
||||
severity: str | None = None,
|
||||
namespace: str | None = None,
|
||||
target_resource: str | None = None,
|
||||
fingerprint: str | None = None,
|
||||
incident_id: str | None = None,
|
||||
approval_id: str | None = None,
|
||||
source_url: str | None = None,
|
||||
labels: dict[str, Any] | None = None,
|
||||
annotations: dict[str, Any] | None = None,
|
||||
extra: dict[str, Any] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Build a redaction-friendly inbound replay envelope for truth-chain use."""
|
||||
content_sha256 = hashlib.sha256(raw_content.encode()).hexdigest() if raw_content else None
|
||||
text_refs = _INCIDENT_ID_RE.findall(raw_content or "")
|
||||
provider_name = str(provider or "unknown").strip().lower() or "unknown"
|
||||
source_refs = {
|
||||
"event_ids": _compact_unique([raw_event_id]),
|
||||
"incident_ids": _compact_unique([incident_id, *text_refs]),
|
||||
"approval_ids": _compact_unique([approval_id]),
|
||||
"alert_ids": _compact_unique([provider_event_id, raw_event_id]),
|
||||
"fingerprints": _compact_unique([fingerprint]),
|
||||
"sentry_issue_ids": _compact_unique(
|
||||
[raw_event_id, provider_event_id] if provider_name == "sentry" else []
|
||||
),
|
||||
"signoz_alerts": _compact_unique(
|
||||
[raw_event_id, alertname] if provider_name == "signoz" else []
|
||||
),
|
||||
}
|
||||
envelope: dict[str, Any] = {
|
||||
"schema_version": "inbound_source_envelope_v1",
|
||||
"redaction_version": _INBOUND_REDACTION_VERSION,
|
||||
"adapter": f"{provider_name}_webhook",
|
||||
"provider": provider_name,
|
||||
"stage": stage,
|
||||
"provider_event_id": provider_event_id,
|
||||
"source_url": source_url,
|
||||
"content_sha256": content_sha256,
|
||||
"content_length": len(raw_content) if raw_content is not None else 0,
|
||||
"source_refs": source_refs,
|
||||
"log_correlation": {
|
||||
"alertname": alertname,
|
||||
"severity": severity,
|
||||
"namespace": namespace,
|
||||
"target_resource": target_resource,
|
||||
"fingerprint": fingerprint,
|
||||
},
|
||||
"labels": labels or {},
|
||||
"annotations": annotations or {},
|
||||
}
|
||||
if extra:
|
||||
envelope["extra"] = extra
|
||||
sanitized = sanitize(envelope)
|
||||
sanitized["content_sha256"] = content_sha256
|
||||
return sanitized
|
||||
|
||||
|
||||
def _input_sha256(input_payload: dict[str, Any] | None) -> str | None:
|
||||
"""計算 Run input 的穩定 hash,讓 mirror run 也能保留最小完整性證據。"""
|
||||
if not input_payload:
|
||||
return None
|
||||
canonical = json.dumps(
|
||||
input_payload,
|
||||
sort_keys=True,
|
||||
separators=(",", ":"),
|
||||
ensure_ascii=False,
|
||||
)
|
||||
return hashlib.sha256(canonical.encode()).hexdigest()
|
||||
|
||||
|
||||
async def ensure_completed_shadow_run(
|
||||
db: AsyncSession,
|
||||
*,
|
||||
project_id: str,
|
||||
run_id: UUID,
|
||||
agent_id: str,
|
||||
trigger_type: str,
|
||||
trigger_ref: str | None,
|
||||
input_payload: dict[str, Any] | None = None,
|
||||
) -> bool:
|
||||
"""為 legacy mirror 資料補一筆 completed shadow run。
|
||||
|
||||
AwoooP 在 strangler 階段會先 mirror legacy Telegram / alert-grouping
|
||||
資料。這些事件不應重新觸發 runtime,但需要 run_state 當 Console 的
|
||||
聚合錨點;因此這裡建立的是已完成的 shadow run,不會被 worker pick up。
|
||||
"""
|
||||
result = await db.execute(
|
||||
text("""
|
||||
INSERT INTO awooop_run_state (
|
||||
run_id, project_id, agent_id, state,
|
||||
trigger_type, trigger_ref, is_shadow,
|
||||
input_sha256,
|
||||
attempt_count, max_attempts, cost_usd, step_count,
|
||||
created_at, completed_at, timeout_at
|
||||
) VALUES (
|
||||
:run_id, :project_id, :agent_id, 'completed',
|
||||
:trigger_type, :trigger_ref, TRUE,
|
||||
:input_sha256,
|
||||
0, 3, 0.0000, 0,
|
||||
NOW(), NOW(), NOW()
|
||||
)
|
||||
ON CONFLICT (run_id) DO NOTHING
|
||||
RETURNING run_id
|
||||
"""),
|
||||
{
|
||||
"run_id": run_id,
|
||||
"project_id": project_id,
|
||||
"agent_id": agent_id,
|
||||
"trigger_type": trigger_type,
|
||||
"trigger_ref": trigger_ref,
|
||||
"input_sha256": _input_sha256(input_payload),
|
||||
},
|
||||
)
|
||||
inserted = result.fetchone() is not None
|
||||
if inserted:
|
||||
logger.info(
|
||||
"completed_shadow_run_created",
|
||||
project_id=project_id,
|
||||
run_id=str(run_id),
|
||||
agent_id=agent_id,
|
||||
trigger_type=trigger_type,
|
||||
)
|
||||
return inserted
|
||||
|
||||
|
||||
def build_grouped_alert_run_id(project_id: str, provider_event_id: str) -> UUID:
|
||||
"""為 grouped child alert 建立穩定 run_id,讓 Run Monitor 可回查。"""
|
||||
return uuid5(NAMESPACE_URL, f"awooop:grouped-alert:{project_id}:{provider_event_id}")
|
||||
|
||||
|
||||
def build_alertmanager_provider_event_id(alert_id: str, fingerprint: str, stage: str) -> str:
|
||||
"""建立 Alertmanager inbound event 的冪等 provider_event_id。"""
|
||||
safe_alert_id = str(alert_id).strip() or "unknown"
|
||||
safe_fingerprint = str(fingerprint).strip()[:32] or "no-fingerprint"
|
||||
safe_stage = str(stage).strip()[:32] or "received"
|
||||
return f"alertmanager:{safe_stage}:{safe_alert_id}:{safe_fingerprint}"
|
||||
|
||||
|
||||
def build_alertmanager_run_id(project_id: str, provider_event_id: str) -> UUID:
|
||||
"""為 Alertmanager inbound mirror 建立穩定 shadow run_id。"""
|
||||
return uuid5(NAMESPACE_URL, f"awooop:alertmanager:{project_id}:{provider_event_id}")
|
||||
|
||||
|
||||
def build_external_alert_provider_event_id(provider: str, event_id: str, stage: str) -> str:
|
||||
"""建立 Sentry/SignOz 等外部告警 inbound event 的冪等 provider_event_id。"""
|
||||
safe_provider = str(provider).strip().lower()[:32] or "external"
|
||||
safe_event_id = str(event_id).strip()[:96] or "unknown"
|
||||
safe_stage = str(stage).strip()[:32] or "received"
|
||||
return f"{safe_provider}:{safe_stage}:{safe_event_id}"
|
||||
|
||||
|
||||
def build_external_alert_run_id(project_id: str, provider_event_id: str) -> UUID:
|
||||
"""為外部告警 inbound mirror 建立穩定 shadow run_id。"""
|
||||
return uuid5(NAMESPACE_URL, f"awooop:external-alert:{project_id}:{provider_event_id}")
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
@@ -62,6 +240,7 @@ async def mirror_inbound_event(
|
||||
channel_chat_id: str | None = None,
|
||||
content_type: str = "text",
|
||||
raw_content: str | None = None,
|
||||
source_envelope: dict[str, Any] | None = None,
|
||||
attachment_sha256: str | None = None,
|
||||
provider_ts: datetime | None = None,
|
||||
run_id: UUID | None = None,
|
||||
@@ -75,12 +254,32 @@ async def mirror_inbound_event(
|
||||
"""
|
||||
content_hash: str | None = None
|
||||
content_preview: str | None = None
|
||||
content_redacted: str | None = None
|
||||
|
||||
if raw_content is not None:
|
||||
content_hash = hashlib.sha256(raw_content.encode()).hexdigest()
|
||||
# preview:redact 後截取前 256 字元
|
||||
redacted = _redact_string(raw_content)
|
||||
content_preview = redacted[:256] if len(redacted) > 256 else redacted
|
||||
content_redacted = _redact_string(raw_content)
|
||||
content_preview = (
|
||||
content_redacted[:256] if len(content_redacted) > 256 else content_redacted
|
||||
)
|
||||
|
||||
if source_envelope and source_envelope.get("schema_version") == "inbound_source_envelope_v1":
|
||||
original_content_sha256 = source_envelope.get("content_sha256")
|
||||
envelope = sanitize(source_envelope)
|
||||
envelope.setdefault("redaction_version", _INBOUND_REDACTION_VERSION)
|
||||
envelope["content_sha256"] = content_hash or original_content_sha256
|
||||
envelope.setdefault("content_length", len(raw_content) if raw_content is not None else 0)
|
||||
else:
|
||||
envelope = build_inbound_source_envelope(
|
||||
provider=channel_type,
|
||||
stage="received",
|
||||
provider_event_id=provider_event_id,
|
||||
raw_event_id=provider_event_id,
|
||||
raw_content=raw_content,
|
||||
extra=source_envelope,
|
||||
)
|
||||
source_envelope_json = json.dumps(envelope, ensure_ascii=False, default=str)
|
||||
|
||||
result = await db.execute(
|
||||
text("""
|
||||
@@ -88,16 +287,28 @@ async def mirror_inbound_event(
|
||||
project_id, channel_type, provider_event_id,
|
||||
platform_subject_id, channel_user_id, channel_chat_id,
|
||||
run_id, content_type, content_hash, content_preview,
|
||||
content_redacted, redaction_version, source_envelope,
|
||||
attachment_sha256, is_duplicate, provider_ts, received_at
|
||||
) VALUES (
|
||||
:project_id, :channel_type, :provider_event_id,
|
||||
:platform_subject_id, :channel_user_id, :channel_chat_id,
|
||||
:run_id, :content_type, :content_hash, :content_preview,
|
||||
:content_redacted, :redaction_version, CAST(:source_envelope AS jsonb),
|
||||
:attachment_sha256, :is_duplicate, :provider_ts, NOW()
|
||||
)
|
||||
ON CONFLICT (project_id, channel_type, provider_event_id) DO UPDATE SET
|
||||
is_duplicate = TRUE,
|
||||
run_id = COALESCE(EXCLUDED.run_id, awooop_conversation_event.run_id)
|
||||
run_id = COALESCE(EXCLUDED.run_id, awooop_conversation_event.run_id),
|
||||
content_redacted = COALESCE(
|
||||
awooop_conversation_event.content_redacted,
|
||||
EXCLUDED.content_redacted
|
||||
),
|
||||
redaction_version = EXCLUDED.redaction_version,
|
||||
source_envelope = CASE
|
||||
WHEN awooop_conversation_event.source_envelope = '{}'::jsonb
|
||||
THEN EXCLUDED.source_envelope
|
||||
ELSE awooop_conversation_event.source_envelope
|
||||
END
|
||||
RETURNING event_id
|
||||
"""),
|
||||
{
|
||||
@@ -111,6 +322,9 @@ async def mirror_inbound_event(
|
||||
"content_type": content_type,
|
||||
"content_hash": content_hash,
|
||||
"content_preview": content_preview,
|
||||
"content_redacted": content_redacted,
|
||||
"redaction_version": _INBOUND_REDACTION_VERSION,
|
||||
"source_envelope": source_envelope_json,
|
||||
"attachment_sha256": attachment_sha256,
|
||||
"is_duplicate": is_duplicate,
|
||||
"provider_ts": provider_ts,
|
||||
@@ -128,6 +342,544 @@ async def mirror_inbound_event(
|
||||
return event_id
|
||||
|
||||
|
||||
def build_grouped_alert_provider_event_id(alert_id: str, fingerprint: str) -> str:
|
||||
"""建立 grouped child alert 的冪等 provider_event_id。"""
|
||||
safe_alert_id = str(alert_id).strip() or "unknown"
|
||||
safe_fingerprint = str(fingerprint).strip()[:32] or "no-fingerprint"
|
||||
return f"alert-group:{safe_alert_id}:{safe_fingerprint}"
|
||||
|
||||
|
||||
def format_alertmanager_event_content(
|
||||
*,
|
||||
stage: str,
|
||||
alert_id: str,
|
||||
alertname: str,
|
||||
severity: str,
|
||||
namespace: str,
|
||||
target_resource: str,
|
||||
fingerprint: str,
|
||||
notification_type: str | None = None,
|
||||
alert_category: str | None = None,
|
||||
incident_id: str | None = None,
|
||||
approval_id: str | None = None,
|
||||
repeat_count: int | None = None,
|
||||
) -> str:
|
||||
"""格式化 Alertmanager inbound mirror 摘要,讓 truth-chain 可回查。"""
|
||||
head = f"Incident: {incident_id}" if incident_id else f"Fingerprint: {fingerprint}"
|
||||
return "\n".join(
|
||||
[
|
||||
f"Alertmanager inbound {stage}",
|
||||
head,
|
||||
f"Alert ID: {alert_id}",
|
||||
f"Approval: {approval_id or '-'}",
|
||||
f"Alert: {alertname}",
|
||||
f"Severity: {severity}",
|
||||
f"Namespace: {namespace or 'default'}",
|
||||
f"Target: {target_resource or '-'}",
|
||||
f"Fingerprint: {fingerprint}",
|
||||
f"Notification Type: {notification_type or '-'}",
|
||||
f"Alert Category: {alert_category or '-'}",
|
||||
f"Repeat Count: {repeat_count if repeat_count is not None else '-'}",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def format_grouped_alert_event_content(
|
||||
*,
|
||||
alert_id: str,
|
||||
alertname: str,
|
||||
severity: str,
|
||||
namespace: str,
|
||||
target_resource: str,
|
||||
group_key: str,
|
||||
count: int,
|
||||
parent_fingerprint: str | None,
|
||||
fingerprint: str,
|
||||
) -> str:
|
||||
"""格式化只落 AwoooP、不發 Telegram 的告警收斂事件摘要。"""
|
||||
parent = parent_fingerprint or "-"
|
||||
target = target_resource or "-"
|
||||
ns = namespace or "default"
|
||||
return "\n".join(
|
||||
[
|
||||
"告警已收斂,不發 Telegram",
|
||||
f"Alert ID: {alert_id}",
|
||||
f"Alert: {alertname}",
|
||||
f"Severity: {severity}",
|
||||
f"Namespace: {ns}",
|
||||
f"Target: {target}",
|
||||
f"Group: {group_key}",
|
||||
f"Group Count: {count}",
|
||||
f"Parent Fingerprint: {parent}",
|
||||
f"Child Fingerprint: {fingerprint}",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def format_grouped_alert_digest_text(
|
||||
*,
|
||||
alertname: str,
|
||||
severity: str,
|
||||
namespace: str,
|
||||
target_resource: str,
|
||||
group_key: str,
|
||||
count: int,
|
||||
) -> str:
|
||||
"""格式化要回覆到父告警卡的短 digest。"""
|
||||
safe_alert = html.escape(alertname or "unknown")
|
||||
safe_severity = html.escape(severity or "unknown")
|
||||
safe_namespace = html.escape(namespace or "default")
|
||||
safe_target = html.escape(target_resource or "unknown")
|
||||
safe_group = html.escape(group_key or "unknown")
|
||||
|
||||
return "\n".join(
|
||||
[
|
||||
"🧩 <b>告警已收斂到父卡</b>",
|
||||
f"├ 類型:<code>{safe_alert}</code>",
|
||||
f"├ 等級:<code>{safe_severity}</code>",
|
||||
f"├ 範圍:<code>{safe_namespace}</code>",
|
||||
f"├ 最新目標:<code>{safe_target}</code>",
|
||||
f"├ 群組:<code>{safe_group}</code>",
|
||||
f"└ 目前視窗:<b>{count}</b> 筆同組告警",
|
||||
"",
|
||||
"完整子告警請看 AwoooP Run 監控,不再逐筆發 Telegram。",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
async def maybe_send_grouped_alert_digest(
|
||||
*,
|
||||
project_id: str,
|
||||
alertname: str,
|
||||
severity: str,
|
||||
namespace: str,
|
||||
target_resource: str,
|
||||
group_key: str,
|
||||
count: int,
|
||||
parent_fingerprint: str | None,
|
||||
) -> bool:
|
||||
"""若父告警卡已存在,回覆一則低頻 digest;找不到父卡則安靜降級。"""
|
||||
if not parent_fingerprint:
|
||||
return False
|
||||
|
||||
try:
|
||||
from sqlalchemy import select
|
||||
|
||||
from src.db.base import get_db_context
|
||||
from src.db.models import ApprovalRecord
|
||||
from src.services.telegram_gateway import get_telegram_gateway
|
||||
|
||||
async with get_db_context(project_id) as db:
|
||||
result = await db.execute(
|
||||
select(ApprovalRecord.incident_id)
|
||||
.where(ApprovalRecord.fingerprint == parent_fingerprint)
|
||||
.where(ApprovalRecord.incident_id.is_not(None))
|
||||
.order_by(ApprovalRecord.created_at.desc())
|
||||
.limit(1)
|
||||
)
|
||||
incident_id = result.scalar_one_or_none()
|
||||
|
||||
if not incident_id:
|
||||
logger.info(
|
||||
"grouped_alert_digest_parent_not_ready",
|
||||
project_id=project_id,
|
||||
group_key=group_key,
|
||||
parent_fingerprint=parent_fingerprint,
|
||||
)
|
||||
return False
|
||||
|
||||
digest_text = format_grouped_alert_digest_text(
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
group_key=group_key,
|
||||
count=count,
|
||||
)
|
||||
sent = await get_telegram_gateway().append_grouped_alert_digest(
|
||||
incident_id=str(incident_id),
|
||||
group_key=group_key,
|
||||
digest_text=digest_text,
|
||||
)
|
||||
logger.info(
|
||||
"grouped_alert_digest_result",
|
||||
project_id=project_id,
|
||||
incident_id=str(incident_id),
|
||||
group_key=group_key,
|
||||
count=count,
|
||||
sent=sent,
|
||||
)
|
||||
return sent
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"grouped_alert_digest_failed",
|
||||
project_id=project_id,
|
||||
group_key=group_key,
|
||||
parent_fingerprint=parent_fingerprint,
|
||||
error=str(exc),
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
async def record_grouped_alert_event(
|
||||
*,
|
||||
project_id: str,
|
||||
alert_id: str,
|
||||
alertname: str,
|
||||
severity: str,
|
||||
namespace: str,
|
||||
target_resource: str,
|
||||
group_key: str,
|
||||
count: int,
|
||||
parent_fingerprint: str | None,
|
||||
fingerprint: str,
|
||||
) -> UUID | None:
|
||||
"""
|
||||
將被 AlertGroupingService 收斂的子告警落到 AwoooP conversation_event。
|
||||
|
||||
這條路徑刻意不發 Telegram,只保留 operator-facing 脈絡:
|
||||
- 群組不洗版
|
||||
- Console 仍能看到同組告警正在持續發生
|
||||
- DB 失敗 fail-open,不影響 Alertmanager webhook ACK
|
||||
"""
|
||||
try:
|
||||
from src.db.base import get_db_context
|
||||
|
||||
provider_event_id = build_grouped_alert_provider_event_id(alert_id, fingerprint)
|
||||
content = format_grouped_alert_event_content(
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
group_key=group_key,
|
||||
count=count,
|
||||
parent_fingerprint=parent_fingerprint,
|
||||
fingerprint=fingerprint,
|
||||
)
|
||||
|
||||
async with get_db_context(project_id) as db:
|
||||
run_id = build_grouped_alert_run_id(project_id, provider_event_id)
|
||||
await ensure_completed_shadow_run(
|
||||
db,
|
||||
project_id=project_id,
|
||||
run_id=run_id,
|
||||
agent_id="legacy-alert-grouping",
|
||||
trigger_type="grouped_alert_event",
|
||||
trigger_ref=provider_event_id,
|
||||
input_payload={
|
||||
"alert_id": alert_id,
|
||||
"alertname": alertname,
|
||||
"severity": severity,
|
||||
"group_key": group_key,
|
||||
"fingerprint": fingerprint,
|
||||
},
|
||||
)
|
||||
event_id = await mirror_inbound_event(
|
||||
db,
|
||||
project_id=project_id,
|
||||
channel_type="internal",
|
||||
provider_event_id=provider_event_id,
|
||||
platform_subject_id="alertmanager",
|
||||
channel_user_id="alertmanager",
|
||||
channel_chat_id=f"alert-group:{group_key}",
|
||||
content_type="text",
|
||||
raw_content=content,
|
||||
provider_ts=_db_timestamp_now(),
|
||||
run_id=run_id,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"grouped_alert_event_recorded",
|
||||
project_id=project_id,
|
||||
alert_id=alert_id,
|
||||
event_id=str(event_id),
|
||||
group_key=group_key,
|
||||
count=count,
|
||||
)
|
||||
await maybe_send_grouped_alert_digest(
|
||||
project_id=project_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
group_key=group_key,
|
||||
count=count,
|
||||
parent_fingerprint=parent_fingerprint,
|
||||
)
|
||||
return event_id
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"grouped_alert_event_record_failed",
|
||||
project_id=project_id,
|
||||
alert_id=alert_id,
|
||||
group_key=group_key,
|
||||
error=str(exc),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
async def record_alertmanager_event(
|
||||
*,
|
||||
project_id: str,
|
||||
alert_id: str,
|
||||
alertname: str,
|
||||
severity: str,
|
||||
namespace: str,
|
||||
target_resource: str,
|
||||
fingerprint: str,
|
||||
stage: str,
|
||||
notification_type: str | None = None,
|
||||
alert_category: str | None = None,
|
||||
incident_id: str | None = None,
|
||||
approval_id: str | None = None,
|
||||
repeat_count: int | None = None,
|
||||
is_duplicate: bool = False,
|
||||
source_url: str | None = None,
|
||||
labels: dict[str, Any] | None = None,
|
||||
annotations: dict[str, Any] | None = None,
|
||||
source_extra: dict[str, Any] | None = None,
|
||||
) -> UUID | None:
|
||||
"""
|
||||
將 Alertmanager inbound alert 鏡像到 AwoooP conversation_event。
|
||||
|
||||
Telegram 不應是唯一事實來源;每個 firing alert 至少要有 received
|
||||
event,建立 incident/approval 後再補 incident_linked event 供 truth-chain
|
||||
依 incident_id 回查。DB 失敗 fail-open,不影響 Alertmanager ACK。
|
||||
"""
|
||||
try:
|
||||
from src.db.base import get_db_context
|
||||
|
||||
incident_ref = str(incident_id) if incident_id else None
|
||||
approval_ref = str(approval_id) if approval_id else None
|
||||
provider_event_id = build_alertmanager_provider_event_id(
|
||||
alert_id=alert_id,
|
||||
fingerprint=fingerprint,
|
||||
stage=stage,
|
||||
)
|
||||
content = format_alertmanager_event_content(
|
||||
stage=stage,
|
||||
alert_id=alert_id,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
fingerprint=fingerprint,
|
||||
notification_type=notification_type,
|
||||
alert_category=alert_category,
|
||||
incident_id=incident_ref,
|
||||
approval_id=approval_ref,
|
||||
repeat_count=repeat_count,
|
||||
)
|
||||
source_envelope = build_inbound_source_envelope(
|
||||
provider="alertmanager",
|
||||
stage=stage,
|
||||
provider_event_id=provider_event_id,
|
||||
raw_event_id=alert_id,
|
||||
raw_content=content,
|
||||
alertname=alertname,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
fingerprint=fingerprint,
|
||||
incident_id=incident_ref,
|
||||
approval_id=approval_ref,
|
||||
source_url=source_url,
|
||||
labels=labels,
|
||||
annotations=annotations,
|
||||
extra={
|
||||
"notification_type": notification_type,
|
||||
"alert_category": alert_category,
|
||||
"repeat_count": repeat_count,
|
||||
**(source_extra or {}),
|
||||
},
|
||||
)
|
||||
|
||||
async with get_db_context(project_id) as db:
|
||||
run_id = build_alertmanager_run_id(project_id, provider_event_id)
|
||||
await ensure_completed_shadow_run(
|
||||
db,
|
||||
project_id=project_id,
|
||||
run_id=run_id,
|
||||
agent_id="legacy-alertmanager-webhook",
|
||||
trigger_type="alertmanager_inbound",
|
||||
trigger_ref=provider_event_id,
|
||||
input_payload={
|
||||
"stage": stage,
|
||||
"alert_id": alert_id,
|
||||
"alertname": alertname,
|
||||
"severity": severity,
|
||||
"namespace": namespace,
|
||||
"target_resource": target_resource,
|
||||
"fingerprint": fingerprint,
|
||||
"notification_type": notification_type,
|
||||
"alert_category": alert_category,
|
||||
"incident_id": incident_ref,
|
||||
"approval_id": approval_ref,
|
||||
"repeat_count": repeat_count,
|
||||
},
|
||||
)
|
||||
event_id = await mirror_inbound_event(
|
||||
db,
|
||||
project_id=project_id,
|
||||
channel_type="internal",
|
||||
provider_event_id=provider_event_id,
|
||||
platform_subject_id="alertmanager",
|
||||
channel_user_id="alertmanager",
|
||||
channel_chat_id=f"alertmanager:{namespace or 'default'}",
|
||||
content_type="text",
|
||||
raw_content=content,
|
||||
source_envelope=source_envelope,
|
||||
provider_ts=_db_timestamp_now(),
|
||||
run_id=run_id,
|
||||
is_duplicate=is_duplicate,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"alertmanager_event_recorded",
|
||||
project_id=project_id,
|
||||
alert_id=alert_id,
|
||||
event_id=str(event_id),
|
||||
stage=stage,
|
||||
incident_id=incident_ref,
|
||||
fingerprint=fingerprint,
|
||||
)
|
||||
return event_id
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"alertmanager_event_record_failed",
|
||||
project_id=project_id,
|
||||
alert_id=alert_id,
|
||||
stage=stage,
|
||||
fingerprint=fingerprint,
|
||||
error=str(exc),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
async def record_external_alert_event(
|
||||
*,
|
||||
project_id: str,
|
||||
provider: str,
|
||||
event_id: str,
|
||||
stage: str,
|
||||
title: str,
|
||||
severity: str,
|
||||
namespace: str | None = None,
|
||||
target_resource: str | None = None,
|
||||
fingerprint: str | None = None,
|
||||
incident_id: str | None = None,
|
||||
approval_id: str | None = None,
|
||||
source_url: str | None = None,
|
||||
labels: dict[str, Any] | None = None,
|
||||
annotations: dict[str, Any] | None = None,
|
||||
payload: dict[str, Any] | None = None,
|
||||
is_duplicate: bool = False,
|
||||
) -> UUID | None:
|
||||
"""
|
||||
將 Sentry / SignOz 等非 Alertmanager 告警鏡像到 conversation_event。
|
||||
|
||||
這是 truth-chain 的最低共用入口:只寫 redacted content + source_envelope,
|
||||
不改變原本 webhook 的通知、審批或自動化行為。
|
||||
"""
|
||||
provider_name = str(provider or "external").strip().lower() or "external"
|
||||
provider_event_id = build_external_alert_provider_event_id(provider_name, event_id, stage)
|
||||
content = "\n".join([
|
||||
f"{provider_name} inbound {stage}",
|
||||
f"Event ID: {event_id}",
|
||||
f"Title: {title}",
|
||||
f"Severity: {severity}",
|
||||
f"Namespace: {namespace or '-'}",
|
||||
f"Target: {target_resource or '-'}",
|
||||
f"Fingerprint: {fingerprint or '-'}",
|
||||
f"Incident: {incident_id or '-'}",
|
||||
f"Approval: {approval_id or '-'}",
|
||||
f"Source URL: {source_url or '-'}",
|
||||
])
|
||||
source_envelope = build_inbound_source_envelope(
|
||||
provider=provider_name,
|
||||
stage=stage,
|
||||
provider_event_id=provider_event_id,
|
||||
raw_event_id=event_id,
|
||||
raw_content=content,
|
||||
alertname=title,
|
||||
severity=severity,
|
||||
namespace=namespace,
|
||||
target_resource=target_resource,
|
||||
fingerprint=fingerprint,
|
||||
incident_id=str(incident_id) if incident_id else None,
|
||||
approval_id=str(approval_id) if approval_id else None,
|
||||
source_url=source_url,
|
||||
labels=labels,
|
||||
annotations=annotations,
|
||||
extra={
|
||||
"payload": payload or {},
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
from src.db.base import get_db_context
|
||||
|
||||
async with get_db_context(project_id) as db:
|
||||
run_id = build_external_alert_run_id(project_id, provider_event_id)
|
||||
await ensure_completed_shadow_run(
|
||||
db,
|
||||
project_id=project_id,
|
||||
run_id=run_id,
|
||||
agent_id=f"legacy-{provider_name}-webhook",
|
||||
trigger_type=f"{provider_name}_inbound",
|
||||
trigger_ref=provider_event_id,
|
||||
input_payload={
|
||||
"provider": provider_name,
|
||||
"event_id": event_id,
|
||||
"stage": stage,
|
||||
"severity": severity,
|
||||
"namespace": namespace,
|
||||
"target_resource": target_resource,
|
||||
"fingerprint": fingerprint,
|
||||
"incident_id": str(incident_id) if incident_id else None,
|
||||
"approval_id": str(approval_id) if approval_id else None,
|
||||
},
|
||||
)
|
||||
event_uuid = await mirror_inbound_event(
|
||||
db,
|
||||
project_id=project_id,
|
||||
channel_type="internal",
|
||||
provider_event_id=provider_event_id,
|
||||
platform_subject_id=provider_name,
|
||||
channel_user_id=provider_name,
|
||||
channel_chat_id=f"{provider_name}:{namespace or 'default'}",
|
||||
content_type="text",
|
||||
raw_content=content,
|
||||
source_envelope=source_envelope,
|
||||
provider_ts=_db_timestamp_now(),
|
||||
run_id=run_id,
|
||||
is_duplicate=is_duplicate,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"external_alert_event_recorded",
|
||||
project_id=project_id,
|
||||
provider=provider_name,
|
||||
event_id=event_id,
|
||||
stage=stage,
|
||||
conversation_event_id=str(event_uuid),
|
||||
incident_id=str(incident_id) if incident_id else None,
|
||||
approval_id=str(approval_id) if approval_id else None,
|
||||
)
|
||||
return event_uuid
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"external_alert_event_record_failed",
|
||||
project_id=project_id,
|
||||
provider=provider_name,
|
||||
event_id=event_id,
|
||||
stage=stage,
|
||||
error=str(exc),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 出站訊息記錄
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
@@ -141,6 +893,7 @@ async def record_outbound_message(
|
||||
channel_chat_id: str,
|
||||
message_type: str, # 'interim' | 'final' | 'error' | 'approval_request'
|
||||
content: str | None = None,
|
||||
source_envelope: dict[str, Any] | None = None,
|
||||
provider_message_id: str | None = None,
|
||||
send_status: str = "pending",
|
||||
conversation_event_id: UUID | None = None,
|
||||
@@ -155,26 +908,61 @@ async def record_outbound_message(
|
||||
"""
|
||||
content_hash: str | None = None
|
||||
content_preview: str | None = None
|
||||
content_redacted: str | None = None
|
||||
if content is not None:
|
||||
content_hash = hashlib.sha256(content.encode()).hexdigest()
|
||||
redacted = _redact_string(content)
|
||||
content_preview = redacted[:256]
|
||||
content_redacted = _redact_string(content)
|
||||
content_preview = content_redacted[:256]
|
||||
|
||||
envelope: dict[str, Any] = sanitize(source_envelope or {})
|
||||
envelope.update({
|
||||
"schema_version": "outbound_source_envelope_v1",
|
||||
"redaction_version": _OUTBOUND_REDACTION_VERSION,
|
||||
"content_sha256": content_hash,
|
||||
"content_length": len(content) if content is not None else 0,
|
||||
})
|
||||
source_envelope_json = json.dumps(envelope, ensure_ascii=False, default=str)
|
||||
|
||||
actual_status = "shadow" if is_shadow else send_status
|
||||
sent_at = (
|
||||
_db_timestamp_now()
|
||||
if actual_status == "sent"
|
||||
else None
|
||||
)
|
||||
|
||||
await ensure_completed_shadow_run(
|
||||
db,
|
||||
project_id=project_id,
|
||||
run_id=run_id,
|
||||
agent_id="legacy-telegram-gateway",
|
||||
trigger_type="legacy_outbound",
|
||||
trigger_ref=provider_message_id,
|
||||
input_payload={
|
||||
"channel_type": channel_type,
|
||||
"channel_chat_id": channel_chat_id,
|
||||
"message_type": message_type,
|
||||
"send_status": actual_status,
|
||||
"triggered_by_state": triggered_by_state,
|
||||
},
|
||||
)
|
||||
|
||||
result = await db.execute(
|
||||
text("""
|
||||
INSERT INTO awooop_outbound_message (
|
||||
project_id, run_id, conversation_event_id,
|
||||
channel_type, channel_chat_id, message_type,
|
||||
content_hash, content_preview, provider_message_id,
|
||||
send_status, queued_at,
|
||||
content_hash, content_preview, content_redacted,
|
||||
redaction_version, source_envelope,
|
||||
provider_message_id,
|
||||
send_status, queued_at, sent_at,
|
||||
triggered_by_state, waiting_since
|
||||
) VALUES (
|
||||
:project_id, :run_id, :conversation_event_id,
|
||||
:channel_type, :channel_chat_id, :message_type,
|
||||
:content_hash, :content_preview, :provider_message_id,
|
||||
:send_status, NOW(),
|
||||
:content_hash, :content_preview, :content_redacted,
|
||||
:redaction_version, CAST(:source_envelope AS jsonb),
|
||||
:provider_message_id,
|
||||
:send_status, NOW(), :sent_at,
|
||||
:triggered_by_state, :waiting_since
|
||||
)
|
||||
RETURNING message_id
|
||||
@@ -188,8 +976,12 @@ async def record_outbound_message(
|
||||
"message_type": message_type,
|
||||
"content_hash": content_hash,
|
||||
"content_preview": content_preview,
|
||||
"content_redacted": content_redacted,
|
||||
"redaction_version": _OUTBOUND_REDACTION_VERSION,
|
||||
"source_envelope": source_envelope_json,
|
||||
"provider_message_id": provider_message_id,
|
||||
"send_status": actual_status,
|
||||
"sent_at": sent_at,
|
||||
"triggered_by_state": triggered_by_state,
|
||||
"waiting_since": waiting_since,
|
||||
},
|
||||
@@ -278,7 +1070,7 @@ async def _interim_feedback_task(
|
||||
# run 已推進(complete/failed 等),不需要 interim
|
||||
return
|
||||
|
||||
waiting_since = datetime.now(timezone.utc)
|
||||
waiting_since = datetime.now(UTC)
|
||||
interim_content = "AI 正在分析中,請稍候... ⏳"
|
||||
|
||||
await record_outbound_message(
|
||||
|
||||
@@ -26,7 +26,7 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Any, Literal
|
||||
|
||||
import httpx
|
||||
@@ -125,7 +125,7 @@ class DecisionFusionAdapter:
|
||||
# Public API
|
||||
# =========================================================================
|
||||
|
||||
async def fuse_decision(self, event: "AiGovernanceEvent") -> FusedDecision:
|
||||
async def fuse_decision(self, event: AiGovernanceEvent) -> FusedDecision:
|
||||
"""三維融合:LLM × Playbook × MCP → FusedDecision。
|
||||
|
||||
三個維度並行評估(asyncio.gather),任一失敗靜默降為 0.5。
|
||||
@@ -226,7 +226,7 @@ class DecisionFusionAdapter:
|
||||
# =========================================================================
|
||||
|
||||
async def _score_llm(
|
||||
self, event: "AiGovernanceEvent"
|
||||
self, event: AiGovernanceEvent
|
||||
) -> tuple[float, str, dict[str, Any]]:
|
||||
"""Ollama LLM 推理:治理事件情境 → 建議動作 + 信心度。
|
||||
|
||||
@@ -254,7 +254,9 @@ class DecisionFusionAdapter:
|
||||
"只輸出 CONFIDENCE 和 ACTION 兩行,不要其他解釋。"
|
||||
)
|
||||
|
||||
ollama_url = getattr(self._settings, "OLLAMA_URL", "http://192.168.0.111:11434") # 2026-05-04 ogt: ADR-110 修正 — 111 primary
|
||||
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
|
||||
|
||||
ollama_url = resolve_ollama_endpoint("deep_rca")
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(
|
||||
@@ -320,7 +322,7 @@ class DecisionFusionAdapter:
|
||||
# =========================================================================
|
||||
|
||||
async def _score_playbook(
|
||||
self, event: "AiGovernanceEvent"
|
||||
self, event: AiGovernanceEvent
|
||||
) -> tuple[float, str | None, float | None]:
|
||||
"""Playbook 相似度比對 → 取最高 trust_score。
|
||||
|
||||
@@ -373,7 +375,7 @@ class DecisionFusionAdapter:
|
||||
# =========================================================================
|
||||
|
||||
async def _score_mcp(
|
||||
self, event: "AiGovernanceEvent"
|
||||
self, event: AiGovernanceEvent
|
||||
) -> tuple[float, dict[str, Any]]:
|
||||
"""Prometheus 情報採集 → MCP 感官品質分數。
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ Decision Manager - Phase 6.5 非同步決策狀態機
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import html
|
||||
import json
|
||||
from datetime import UTC, datetime
|
||||
from enum import Enum
|
||||
@@ -55,6 +56,20 @@ def _fire_and_forget(coro) -> asyncio.Task:
|
||||
return task
|
||||
|
||||
|
||||
def _incident_alertname_for_dedup(incident: Incident) -> str:
|
||||
"""Return a stable alert name for Telegram fingerprint dedup."""
|
||||
if incident.signals:
|
||||
signal = incident.signals[0]
|
||||
return (
|
||||
signal.labels.get("alertname")
|
||||
or signal.alert_name
|
||||
or signal.annotations.get("summary")
|
||||
or signal.annotations.get("description")
|
||||
or incident.incident_id
|
||||
)
|
||||
return incident.incident_id
|
||||
|
||||
|
||||
def _phase2_fallback_reason(package: Any) -> str | None:
|
||||
"""Return why a Phase 2 package should continue to Playbook/LLM fallback.
|
||||
|
||||
@@ -75,6 +90,22 @@ def _phase2_fallback_reason(package: Any) -> str | None:
|
||||
return None
|
||||
|
||||
|
||||
def _incident_llm_timeout_seconds() -> float:
|
||||
"""Return the outer timeout for incident LLM proposals.
|
||||
|
||||
The provider layer already has per-provider timeouts. This outer guard must
|
||||
not be shorter than the GCP Ollama lane, or alert diagnosis will be cut off
|
||||
before the free/local-first route can answer.
|
||||
"""
|
||||
|
||||
configured = getattr(settings, "INCIDENT_LLM_TIMEOUT_SECONDS", None)
|
||||
try:
|
||||
timeout = float(configured)
|
||||
except (TypeError, ValueError):
|
||||
timeout = 240.0
|
||||
return max(timeout, float(getattr(settings, "OPENCLAW_TIMEOUT", 30)))
|
||||
|
||||
|
||||
def _should_escalate_auto_approve_rejection(reason: Any) -> bool:
|
||||
"""Return True for manual gates that mean the automation path went blind."""
|
||||
|
||||
@@ -212,7 +243,7 @@ async def _push_decision_to_telegram(
|
||||
# 改成 alertname+target 構造的 fingerprint key + TTL 86400s,同症狀共用 dedup。
|
||||
# Incident 真正 RESOLVED/CLOSED 時走 line 220-226 的 status check 提早 return,不影響復發偵測。
|
||||
redis = get_redis()
|
||||
_alertname_fp = (incident.title or "unknown").strip().lower().replace(" ", "_")[:60]
|
||||
_alertname_fp = _incident_alertname_for_dedup(incident).strip().lower().replace(" ", "_")[:60]
|
||||
_target_fp = (
|
||||
incident.affected_services[0] if incident.affected_services else "unknown"
|
||||
).lower()[:40]
|
||||
@@ -546,6 +577,7 @@ async def _push_decision_to_telegram(
|
||||
alert_category=_alert_category,
|
||||
notification_type=_notification_type,
|
||||
playbook_name=_playbook_name,
|
||||
automation_state=proposal_data.get("automation_state", ""),
|
||||
)
|
||||
|
||||
# 2026-04-09 Claude Sonnet 4.6: 存 message_id → 後續狀態更新在原訊息延續
|
||||
@@ -607,7 +639,7 @@ async def _nemoclaw_second_opinion(incident: "Incident", primary_result: dict) -
|
||||
"""
|
||||
MCP Phase 4a: NemoClaw second opinion — 信心 < 0.7 時觸發
|
||||
============================================================
|
||||
用 deepseek-r1:14b (Ollama 188) 對同一份資料做獨立推理,
|
||||
用 deepseek-r1:14b (設定的 Ollama primary) 對同一份資料做獨立推理,
|
||||
輸出純文字 advisory_note,不執行任何操作。
|
||||
|
||||
2026-04-11 Claude Sonnet 4.6 Asia/Taipei
|
||||
@@ -666,7 +698,7 @@ async def _generate_playbook_draft_if_new(incident: "Incident") -> None:
|
||||
MCP Phase 4c: Playbook 無命中時,自動生成 AI 草稿 Playbook 寫入 KM
|
||||
=====================================================================
|
||||
- 僅在 KM 中不存在同 alertname 的 Playbook 時觸發(避免重複)
|
||||
- 用 qwen2.5:7b-instruct (Ollama 188) 生成結構化 Playbook 草稿
|
||||
- 用 qwen2.5:7b-instruct (設定的 Ollama primary) 生成結構化 Playbook 草稿
|
||||
- 寫入 KnowledgeEntry,status=DRAFT,需人工審核後升為 APPROVED
|
||||
- 寫入 AlertOperationLog PLAYBOOK_DRAFT_CREATED 事件
|
||||
|
||||
@@ -827,7 +859,6 @@ async def _resolve_target_from_k8s(incident: "Incident", namespace: str) -> str
|
||||
reason="alertname 有對應但 keywords=[],走 fallback 取第一個非 infra pod",
|
||||
)
|
||||
|
||||
import re as _re
|
||||
for line in pod_lines:
|
||||
pod = line.removeprefix("pod/").strip()
|
||||
if not pod:
|
||||
@@ -977,6 +1008,58 @@ def _format_metrics_delta(before: dict, after: dict) -> str:
|
||||
return " | ".join(parts)
|
||||
|
||||
|
||||
def _clip_telegram_field(value: str | None, limit: int) -> str:
|
||||
"""Normalize a short Telegram field without leaking multiline command noise."""
|
||||
text = " ".join(str(value or "").split())
|
||||
if len(text) <= limit:
|
||||
return text
|
||||
return f"{text[: max(0, limit - 3)]}..."
|
||||
|
||||
|
||||
def _format_auto_repair_status_line(
|
||||
*,
|
||||
incident_id: str,
|
||||
target: str,
|
||||
action: str,
|
||||
success: bool,
|
||||
error: str = "",
|
||||
metrics_delta_text: str = "",
|
||||
) -> str:
|
||||
"""Render auto-repair result as a scannable operation card."""
|
||||
safe_incident = html.escape(_clip_telegram_field(incident_id, 40))
|
||||
safe_target = html.escape(_clip_telegram_field(target, 80) or "unknown")
|
||||
safe_action = html.escape(_clip_telegram_field(action, 160) or "已執行")
|
||||
safe_error = html.escape(_clip_telegram_field(error, 180) or "未回傳錯誤")
|
||||
|
||||
if success:
|
||||
delta_line = (
|
||||
f"\n├ 指標:<code>{html.escape(_clip_telegram_field(metrics_delta_text, 120))}</code>"
|
||||
if metrics_delta_text
|
||||
else ""
|
||||
)
|
||||
return (
|
||||
"✅ <b>AUTO RESOLVED|AI 自動修復完成</b>\n"
|
||||
"──────────────────────\n"
|
||||
f"├ 事件:<code>{safe_incident}</code>\n"
|
||||
f"├ 對象:<code>{safe_target}</code>\n"
|
||||
f"├ 執行:<code>{safe_action}</code>\n"
|
||||
"├ 狀態:自動化已完成,等待後驗證觀察\n"
|
||||
"├ Actor:leWOOOgo autonomous"
|
||||
f"{delta_line}"
|
||||
)
|
||||
|
||||
return (
|
||||
"🧑🔧 <b>HANDOFF REQUIRED|AI 自動修復失敗,已轉人工</b>\n"
|
||||
"──────────────────────\n"
|
||||
f"├ 事件:<code>{safe_incident}</code>\n"
|
||||
f"├ 對象:<code>{safe_target}</code>\n"
|
||||
f"├ 嘗試:<code>{safe_action}</code>\n"
|
||||
f"├ 原因:<code>{safe_error}</code>\n"
|
||||
"├ 狀態:自動化已停止,不再重試\n"
|
||||
"└ 下一步:請 SRE 依 AwoooP Run / 原告警卡處理"
|
||||
)
|
||||
|
||||
|
||||
async def _push_auto_repair_result(
|
||||
incident: Incident,
|
||||
action: str,
|
||||
@@ -1052,24 +1135,16 @@ async def _push_auto_repair_result(
|
||||
except Exception as _k8s_err:
|
||||
logger.debug("k8s_state_after_failed", incident_id=inc_id, error=str(_k8s_err))
|
||||
|
||||
# 2026-05-02 ogt + Claude Sonnet 4.6: 強制標記 [AUTO],避免事後抵賴
|
||||
# 統帥要求「就算是自動化處理,也要發告警訊息出來」—— 所有自治動作必須留痕,
|
||||
# 且 Telegram 上能明顯與人工點擊區隔。
|
||||
if success:
|
||||
delta_line = f"\n├ 指標: <code>{metrics_delta_text}</code>" if metrics_delta_text else ""
|
||||
status_line = (
|
||||
f"🤖 <b>[AUTO] AI 自動修復完成</b>\n"
|
||||
f"├ 動作: <code>{action[:100] if action else '已執行'}</code>\n"
|
||||
f"├ Actor: leWOOOgo (autonomous)"
|
||||
f"{delta_line}"
|
||||
)
|
||||
else:
|
||||
status_line = (
|
||||
f"🤖❌ <b>[AUTO] AI 自動修復失敗,已升級人工介入</b>\n"
|
||||
f"├ 動作: <code>{action[:80] if action else '未知'}</code>\n"
|
||||
f"├ Actor: leWOOOgo (autonomous)\n"
|
||||
f"└ 錯誤: {error[:100] if error else '未知錯誤'}"
|
||||
)
|
||||
# 2026-05-07 Codex: 自動化結果必須讓 SRE 一眼分辨「已自動解決」或
|
||||
# 「已停止並轉人工」,不能再用 raw command / exception 片段洗版。
|
||||
status_line = _format_auto_repair_status_line(
|
||||
incident_id=inc_id,
|
||||
target=target,
|
||||
action=action,
|
||||
success=success,
|
||||
error=error,
|
||||
metrics_delta_text=metrics_delta_text,
|
||||
)
|
||||
|
||||
# BUG-006 修復 2026-04-11: outcome + verification_result 全為 null
|
||||
# 原因:_push_auto_repair_result 只送 Telegram,沒寫 DB
|
||||
@@ -1715,6 +1790,25 @@ class DecisionManager:
|
||||
token.proposal_data["auto_approve_reason"] = auto_decision.reason_detail
|
||||
await self._save_token(token)
|
||||
|
||||
try:
|
||||
from src.services.awooop_ansible_audit_service import (
|
||||
record_ansible_decision_audit as _record_ansible_decision_audit,
|
||||
)
|
||||
|
||||
_fire_and_forget(
|
||||
_record_ansible_decision_audit(
|
||||
incident=incident,
|
||||
proposal_data=token.proposal_data,
|
||||
decision_path="auto_execute",
|
||||
not_used_reason=(
|
||||
"auto_execute selected existing executor path; "
|
||||
"Ansible check-mode is not wired yet"
|
||||
),
|
||||
)
|
||||
)
|
||||
except Exception as _ansible_audit_err:
|
||||
logger.debug("ansible_decision_audit_schedule_error", error=str(_ansible_audit_err))
|
||||
|
||||
# 觸發自動執行 (非阻塞)
|
||||
_fire_and_forget(
|
||||
self._auto_execute(incident, token)
|
||||
@@ -1738,6 +1832,24 @@ class DecisionManager:
|
||||
),
|
||||
)
|
||||
)
|
||||
try:
|
||||
from src.services.awooop_ansible_audit_service import (
|
||||
record_ansible_decision_audit as _record_ansible_decision_audit,
|
||||
)
|
||||
|
||||
_fire_and_forget(
|
||||
_record_ansible_decision_audit(
|
||||
incident=incident,
|
||||
proposal_data=token.proposal_data,
|
||||
decision_path="manual_approval",
|
||||
not_used_reason=(
|
||||
"manual approval required; Ansible check-mode "
|
||||
"is not wired to approval execution yet"
|
||||
),
|
||||
)
|
||||
)
|
||||
except Exception as _ansible_audit_err:
|
||||
logger.debug("ansible_decision_audit_schedule_error", error=str(_ansible_audit_err))
|
||||
_fire_and_forget(
|
||||
_push_decision_to_telegram(incident, token.proposal_data)
|
||||
)
|
||||
@@ -1858,14 +1970,14 @@ class DecisionManager:
|
||||
try:
|
||||
from src.core.feature_flags import aiops_flags as _p6_flags
|
||||
if _p6_flags.is_sub_flag_enabled("AIOPS_P6_SELF_DEMOTION"):
|
||||
from src.db.base import get_session_factory as _p6_sf
|
||||
from src.db.base import get_db_context as _p6_db_context
|
||||
from src.db.models import AiGovernanceEvent as _GovernanceEvent
|
||||
from sqlalchemy import select as _p6_select, func as _p6_func
|
||||
from datetime import timedelta as _p6_td
|
||||
|
||||
_now = __import__("src.utils.timezone", fromlist=["now_taipei"]).now_taipei()
|
||||
|
||||
async with _p6_sf()() as _p6_sess:
|
||||
async with _p6_db_context() as _p6_sess:
|
||||
# 過去 7 天有幾筆未解決的 slo_violation?
|
||||
_viol_7d_q = await _p6_sess.execute(
|
||||
_p6_select(_p6_func.count()).where(
|
||||
@@ -1905,8 +2017,8 @@ class DecisionManager:
|
||||
)
|
||||
# 記錄保守模式事件
|
||||
try:
|
||||
from src.db.base import get_session_factory as _p6_sf2
|
||||
async with _p6_sf2()() as _s2:
|
||||
from src.db.base import get_db_context as _p6_db_context2
|
||||
async with _p6_db_context2() as _s2:
|
||||
_s2.add(_GovernanceEvent(
|
||||
event_type="conservative_mode",
|
||||
details={
|
||||
@@ -1946,8 +2058,8 @@ class DecisionManager:
|
||||
_push_decision_to_telegram(incident, token.proposal_data)
|
||||
)
|
||||
try:
|
||||
from src.db.base import get_session_factory as _p6_sf3
|
||||
async with _p6_sf3()() as _s3:
|
||||
from src.db.base import get_db_context as _p6_db_context3
|
||||
async with _p6_db_context3() as _s3:
|
||||
_s3.add(_GovernanceEvent(
|
||||
event_type="self_demotion",
|
||||
details={
|
||||
@@ -2694,9 +2806,10 @@ class DecisionManager:
|
||||
if context_parts:
|
||||
llm_expert_context["diagnosis_context"] = "\n\n".join(context_parts)
|
||||
|
||||
# GAP-B4 (2026-04-14 Claude Sonnet 4.6): LLM 25s hard timeout,
|
||||
# 比外層 decide() 30s wait_for 更嚴格,留 5s 給 YAML risk override + NemoClaw second opinion
|
||||
# Timeout → 明確 llm_timeout_fallback 日誌,返回 expert_result 而非等外層觸發
|
||||
# 2026-05-06 Codex: The alert goal is resolution quality, not a
|
||||
# fast-but-paid card. The outer guard is configurable and must allow
|
||||
# the GCP-A → GCP-B → 111 Ollama lane to finish before cloud backup.
|
||||
llm_timeout_seconds = _incident_llm_timeout_seconds()
|
||||
llm_result, provider, success = await asyncio.wait_for(
|
||||
self._openclaw.generate_incident_proposal_with_tools(
|
||||
incident_id=incident.incident_id,
|
||||
@@ -2705,7 +2818,7 @@ class DecisionManager:
|
||||
affected_services=incident.affected_services,
|
||||
expert_context=llm_expert_context if llm_expert_context else None,
|
||||
),
|
||||
timeout=25.0,
|
||||
timeout=llm_timeout_seconds,
|
||||
)
|
||||
|
||||
if success and llm_result:
|
||||
@@ -2772,7 +2885,7 @@ class DecisionManager:
|
||||
logger.warning(
|
||||
"llm_timeout_fallback",
|
||||
incident_id=incident.incident_id,
|
||||
timeout_sec=25.0,
|
||||
timeout_sec=llm_timeout_seconds,
|
||||
action="降級 Expert System",
|
||||
)
|
||||
except Exception as e:
|
||||
@@ -2923,6 +3036,52 @@ class DecisionManager:
|
||||
|
||||
return None
|
||||
|
||||
async def _find_existing_tokens_for_incidents(
|
||||
self,
|
||||
incident_ids: list[str],
|
||||
) -> dict[str, DecisionToken]:
|
||||
"""
|
||||
批次查找現有決策令牌。
|
||||
|
||||
2026-05-06 Codex: GET /api/v1/incidents 是前端輪詢路徑,不可對每個
|
||||
incident 都掃描一次 decision:*。這裡只掃一次 Redis keyspace,避免
|
||||
200+ incidents 時形成 O(N×M) 延遲與前端控制台卡死。
|
||||
"""
|
||||
wanted = set(incident_ids)
|
||||
if not wanted:
|
||||
return {}
|
||||
|
||||
import json
|
||||
|
||||
redis_client = get_redis()
|
||||
found: dict[str, DecisionToken] = {}
|
||||
cursor = 0
|
||||
while True:
|
||||
cursor, keys = await redis_client.scan(
|
||||
cursor=cursor,
|
||||
match=f"{DECISION_TOKEN_PREFIX}*",
|
||||
count=500,
|
||||
)
|
||||
|
||||
for key in keys:
|
||||
try:
|
||||
data = await redis_client.get(key)
|
||||
if not data:
|
||||
continue
|
||||
token_data = json.loads(data)
|
||||
incident_id = token_data.get("incident_id")
|
||||
if incident_id in wanted and incident_id not in found:
|
||||
found[incident_id] = DecisionToken.from_dict(token_data)
|
||||
if len(found) == len(wanted):
|
||||
return found
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if cursor == 0:
|
||||
break
|
||||
|
||||
return found
|
||||
|
||||
async def _persist_decision_to_db(
|
||||
self, incident_id: str, proposal_data: dict
|
||||
) -> None:
|
||||
@@ -3236,7 +3395,7 @@ class DecisionManager:
|
||||
# 與 line 217-218 同邏輯,避免 pod restart resend 路徑繞過 fingerprint dedup。
|
||||
# 原本 telegram_sent:{incident_id} TTL 600s 早就過期 → 重啟必重發;
|
||||
# 改 fingerprint + 24h TTL → 同症狀 24h 內任何 INC ID 都不會重推。
|
||||
_alertname_fp = (getattr(incident, "title", None) or "unknown").strip().lower().replace(" ", "_")[:60]
|
||||
_alertname_fp = _incident_alertname_for_dedup(incident).strip().lower().replace(" ", "_")[:60]
|
||||
_affected = getattr(incident, "affected_services", None) or []
|
||||
_target_fp = (_affected[0] if _affected else "unknown").lower()[:40]
|
||||
dedup_key = f"telegram_sent:fp:{_alertname_fp}:{_target_fp}"
|
||||
@@ -3443,6 +3602,8 @@ class DecisionManager:
|
||||
token.proposal_data["decision_state"] = DecisionState.READY.value
|
||||
token.proposal_data["auto_executed"] = False
|
||||
token.proposal_data["mcp_all_failed"] = True
|
||||
if _tool == "ssh_diagnose":
|
||||
token.proposal_data["automation_state"] = "diagnosis_failed_manual_required"
|
||||
await self._save_token(token)
|
||||
_fire_and_forget(
|
||||
_escalate_decision_auto_repair_unavailable(
|
||||
@@ -3452,14 +3613,15 @@ class DecisionManager:
|
||||
attempted_actions=f"decision_manager._ssh_execute -> {_tool}",
|
||||
)
|
||||
)
|
||||
_fire_and_forget(
|
||||
_push_auto_repair_result(
|
||||
incident,
|
||||
action,
|
||||
success=False,
|
||||
error=token.error,
|
||||
if _tool != "ssh_diagnose":
|
||||
_fire_and_forget(
|
||||
_push_auto_repair_result(
|
||||
incident,
|
||||
action,
|
||||
success=False,
|
||||
error=token.error,
|
||||
)
|
||||
)
|
||||
)
|
||||
_fire_and_forget(_push_decision_to_telegram(incident, token.proposal_data))
|
||||
return
|
||||
|
||||
@@ -3469,6 +3631,7 @@ class DecisionManager:
|
||||
token.proposal_data["auto_executed"] = False
|
||||
token.proposal_data["ssh_diagnosis_collected"] = True
|
||||
token.proposal_data["ssh_diagnosis_preview"] = output_preview
|
||||
token.proposal_data["automation_state"] = "diagnosis_collected_manual_required"
|
||||
await self._save_token(token)
|
||||
_fire_and_forget(
|
||||
_escalate_decision_auto_repair_unavailable(
|
||||
|
||||
@@ -17,11 +17,12 @@ Drift Interpreter - Phase 25 P2 Config Drift Detection
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import structlog
|
||||
|
||||
from src.models.drift import DriftIntent, DriftInterpretation, DriftItem
|
||||
from src.models.drift import DriftIntent, DriftInterpretation
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from src.models.drift import DriftReport
|
||||
@@ -52,6 +53,58 @@ _INTENT_PROMPT_TEMPLATE = """你是 AWOOOI GitOps 守門員,請分析以下 K8
|
||||
"""
|
||||
|
||||
|
||||
def _strip_think_blocks(text: str) -> str:
|
||||
"""移除 qwen/deepseek 類模型常見的 <think> 推理段。"""
|
||||
return re.sub(r"<think>[\s\S]*?</think>", "", text, flags=re.IGNORECASE).strip()
|
||||
|
||||
|
||||
def _extract_first_json_object(text: str) -> dict | None:
|
||||
"""
|
||||
從 LLM 回應中擷取第一個 JSON object。
|
||||
|
||||
Ollama qwen3/deepseek 常會在 JSON 前後加 `<think>` 或短句;這些文字不應
|
||||
讓 drift intent 直接降級成 UNKNOWN。
|
||||
"""
|
||||
cleaned = _strip_think_blocks(text)
|
||||
|
||||
candidates = [cleaned]
|
||||
candidates.extend(match.group(1).strip() for match in re.finditer(r"```(?:json)?\s*([\s\S]+?)```", cleaned))
|
||||
|
||||
start = cleaned.find("{")
|
||||
if start >= 0:
|
||||
in_string = False
|
||||
escaped = False
|
||||
depth = 0
|
||||
for idx, ch in enumerate(cleaned[start:], start=start):
|
||||
if escaped:
|
||||
escaped = False
|
||||
continue
|
||||
if ch == "\\":
|
||||
escaped = True
|
||||
continue
|
||||
if ch == '"':
|
||||
in_string = not in_string
|
||||
continue
|
||||
if in_string:
|
||||
continue
|
||||
if ch == "{":
|
||||
depth += 1
|
||||
elif ch == "}":
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
candidates.append(cleaned[start : idx + 1])
|
||||
break
|
||||
|
||||
for candidate in candidates:
|
||||
try:
|
||||
data = json.loads(candidate)
|
||||
except Exception:
|
||||
continue
|
||||
if isinstance(data, dict):
|
||||
return data
|
||||
return None
|
||||
|
||||
|
||||
class NemotronDriftInterpreter:
|
||||
"""
|
||||
使用 Nemotron 分析漂移意圖
|
||||
@@ -62,7 +115,7 @@ class NemotronDriftInterpreter:
|
||||
❌ 不直接呼叫 kubectl 或 git
|
||||
"""
|
||||
|
||||
async def analyze(self, report: "DriftReport") -> DriftInterpretation:
|
||||
async def analyze(self, report: DriftReport) -> DriftInterpretation:
|
||||
"""
|
||||
分析漂移意圖
|
||||
|
||||
@@ -85,7 +138,7 @@ class NemotronDriftInterpreter:
|
||||
result = await self._call_nemotron(prompt)
|
||||
return result
|
||||
|
||||
def _format_diff_for_prompt(self, report: "DriftReport") -> str:
|
||||
def _format_diff_for_prompt(self, report: DriftReport) -> str:
|
||||
"""格式化 diff 給 Nemotron 分析用"""
|
||||
lines = []
|
||||
for item in report.items[:10]: # 最多 10 項避免 token 過多
|
||||
@@ -111,7 +164,17 @@ class NemotronDriftInterpreter:
|
||||
try:
|
||||
from src.services.openclaw import get_openclaw
|
||||
openclaw = get_openclaw()
|
||||
response_text, _provider, success = await openclaw.call(prompt)
|
||||
response_text, _provider, success = await openclaw.call(
|
||||
prompt,
|
||||
alert_context={
|
||||
"intent_hint": "config",
|
||||
"task_type": "diagnose",
|
||||
"enforce_ollama_first": True,
|
||||
"allow_gcp_heavy_model": True,
|
||||
"target_resource": "config-drift",
|
||||
"alert_type": "ConfigDriftInternalScan",
|
||||
},
|
||||
)
|
||||
|
||||
if not success or not response_text:
|
||||
logger.warning("drift_interpreter_openclaw_failed", provider=_provider)
|
||||
@@ -125,19 +188,9 @@ class NemotronDriftInterpreter:
|
||||
|
||||
def _parse_response(self, text: str) -> DriftInterpretation:
|
||||
"""解析 Nemotron JSON 回應"""
|
||||
try:
|
||||
# 嘗試直接解析
|
||||
data = json.loads(text)
|
||||
except Exception:
|
||||
try:
|
||||
import re
|
||||
match = re.search(r"```(?:json)?\s*([\s\S]+?)```", text)
|
||||
if match:
|
||||
data = json.loads(match.group(1))
|
||||
else:
|
||||
return self._unknown_result("無法解析 JSON")
|
||||
except Exception:
|
||||
return self._unknown_result("JSON 解析失敗")
|
||||
data = _extract_first_json_object(text)
|
||||
if data is None:
|
||||
return self._unknown_result("無法解析 JSON")
|
||||
|
||||
try:
|
||||
intent_str = data.get("intent", "unknown")
|
||||
|
||||
@@ -33,10 +33,11 @@ logger = structlog.get_logger(__name__)
|
||||
# ============================================================
|
||||
# 設定
|
||||
# ============================================================
|
||||
# 2026-05-03 ogt: ADR-110 GCP-A Primary — 改從 settings 讀取,不再硬編碼 111
|
||||
# 2026-05-05 Codex: 重摘要走 111 lane,避免污染 GCP alert-fast lane
|
||||
def _get_ollama_url() -> str:
|
||||
from src.core.config import get_settings
|
||||
return get_settings().OLLAMA_URL
|
||||
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
|
||||
|
||||
return resolve_ollama_endpoint("deep_rca")
|
||||
# D1 集中化 2026-04-11: 從 models.json providers.ollama.models.drift_summary 讀取
|
||||
NARRATOR_MODEL = get_model("ollama", "drift_summary")
|
||||
NARRATOR_TIMEOUT = 90.0 # seconds
|
||||
@@ -120,8 +121,8 @@ class DriftNarratorService:
|
||||
|
||||
async def narrate_and_notify(
|
||||
self,
|
||||
report: "DriftReport",
|
||||
interpretation: "DriftInterpretation | None" = None,
|
||||
report: DriftReport,
|
||||
interpretation: DriftInterpretation | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
生成人話摘要並推送 Telegram
|
||||
@@ -147,7 +148,13 @@ class DriftNarratorService:
|
||||
# 2026-04-18 B 方案: LLM 同時產 narrative + 結構化 items(取代 str()[:30])
|
||||
# 2026-04-20 P0.2: 追加 recommendation(action/confidence/reason)
|
||||
narrative, items, recommendation = await self._generate_narrative_and_items(report, interpretation)
|
||||
await self._send_telegram(report, narrative, items, recommendation)
|
||||
repeat_state = None
|
||||
try:
|
||||
from src.repositories.drift_repository import get_drift_repository
|
||||
repeat_state = await get_drift_repository().get_repeat_state(report)
|
||||
except Exception as e:
|
||||
logger.warning("drift_repeat_state_lookup_failed", report_id=report.report_id, error=str(e))
|
||||
await self._send_telegram(report, narrative, items, recommendation, repeat_state)
|
||||
|
||||
# 寫入 DB narrative_text (Phase 30 ADR-067)
|
||||
try:
|
||||
@@ -166,7 +173,7 @@ class DriftNarratorService:
|
||||
medium=report.medium_count,
|
||||
)
|
||||
|
||||
def _should_narrate(self, report: "DriftReport") -> bool:
|
||||
def _should_narrate(self, report: DriftReport) -> bool:
|
||||
"""觸發條件:high >= 1 or medium >= 3"""
|
||||
# 過濾 HPA 白名單後重算
|
||||
non_hpa_items = [
|
||||
@@ -180,8 +187,8 @@ class DriftNarratorService:
|
||||
|
||||
async def _generate_narrative_and_items(
|
||||
self,
|
||||
report: "DriftReport",
|
||||
interpretation: "DriftInterpretation | None",
|
||||
report: DriftReport,
|
||||
interpretation: DriftInterpretation | None,
|
||||
) -> tuple[str, list[dict], dict]:
|
||||
"""
|
||||
2026-04-18 ogt + Claude Opus 4.7: B 方案 — LLM 產生 narrative + 結構化 items
|
||||
@@ -354,8 +361,8 @@ class DriftNarratorService:
|
||||
|
||||
def _fallback_recommendation(
|
||||
self,
|
||||
report: "DriftReport",
|
||||
interpretation: "DriftInterpretation | None",
|
||||
report: DriftReport,
|
||||
interpretation: DriftInterpretation | None,
|
||||
) -> dict:
|
||||
"""
|
||||
2026-04-20 P0.2 ogt + Claude Opus 4.7: LLM 沒給 recommendation 時的 Python fallback
|
||||
@@ -397,7 +404,7 @@ class DriftNarratorService:
|
||||
|
||||
async def _log_ai_action_to_db(
|
||||
self,
|
||||
report: "DriftReport",
|
||||
report: DriftReport,
|
||||
prompt: str,
|
||||
raw_response: str | None,
|
||||
narrative: str,
|
||||
@@ -416,7 +423,9 @@ class DriftNarratorService:
|
||||
- 若能找到該 drift 的 incident 關聯,設 parent_op_id
|
||||
"""
|
||||
import json as _json
|
||||
|
||||
from sqlalchemy import text as _sql
|
||||
|
||||
from src.db.base import get_db_context
|
||||
|
||||
input_json = _json.dumps({
|
||||
@@ -511,7 +520,7 @@ class DriftNarratorService:
|
||||
items_count=len(items),
|
||||
)
|
||||
|
||||
def _format_drift_for_llm(self, report: "DriftReport") -> str:
|
||||
def _format_drift_for_llm(self, report: DriftReport) -> str:
|
||||
"""
|
||||
2026-04-18 ogt + Claude Opus 4.7: B 方案 — 餵 LLM 用的 JSON 序列化
|
||||
保留更多原始 context 給 LLM 推理,不做 30 字元暴力截斷
|
||||
@@ -582,7 +591,7 @@ class DriftNarratorService:
|
||||
# 一般變化
|
||||
return f"{from_val} → {to_val}"
|
||||
|
||||
def _fallback_items(self, report: "DriftReport") -> list[dict]:
|
||||
def _fallback_items(self, report: DriftReport) -> list[dict]:
|
||||
"""
|
||||
LLM 失敗時的 Python 智能摘要 (取代舊 str()[:30])
|
||||
- 過濾白名單
|
||||
@@ -605,7 +614,7 @@ class DriftNarratorService:
|
||||
})
|
||||
return items
|
||||
|
||||
def _format_intent_summary(self, interpretation: "DriftInterpretation | None") -> str:
|
||||
def _format_intent_summary(self, interpretation: DriftInterpretation | None) -> str:
|
||||
if not interpretation:
|
||||
return "無意圖分析"
|
||||
return (
|
||||
@@ -616,8 +625,8 @@ class DriftNarratorService:
|
||||
|
||||
def _fallback_narrative(
|
||||
self,
|
||||
report: "DriftReport",
|
||||
interpretation: "DriftInterpretation | None",
|
||||
report: DriftReport,
|
||||
interpretation: DriftInterpretation | None,
|
||||
) -> str:
|
||||
"""LLM 失敗時的結構化 fallback"""
|
||||
resources = list({
|
||||
@@ -636,10 +645,11 @@ class DriftNarratorService:
|
||||
|
||||
async def _send_telegram(
|
||||
self,
|
||||
report: "DriftReport",
|
||||
report: DriftReport,
|
||||
narrative: str,
|
||||
items: list[dict],
|
||||
recommendation: dict | None = None,
|
||||
repeat_state: dict | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
推送 TYPE-4D Config Drift 卡片(ADR-075)+ B 方案智能摘要
|
||||
@@ -651,7 +661,7 @@ class DriftNarratorService:
|
||||
"""
|
||||
from src.services.telegram_gateway import get_telegram_gateway
|
||||
|
||||
diff_summary = self._render_telegram_body(report, narrative, items, recommendation)
|
||||
diff_summary = self._render_telegram_body(report, narrative, items, recommendation, repeat_state)
|
||||
|
||||
try:
|
||||
tg = get_telegram_gateway()
|
||||
@@ -667,7 +677,7 @@ class DriftNarratorService:
|
||||
except Exception as e:
|
||||
logger.warning("drift_narrator_telegram_error", error=str(e))
|
||||
|
||||
def _count_nontrivial_drift(self, report: "DriftReport") -> int:
|
||||
def _count_nontrivial_drift(self, report: DriftReport) -> int:
|
||||
"""
|
||||
計算非白名單、非 trivial (K8s 自動補齊) 的 drift 數
|
||||
用於 Telegram 底部「還有 N 項」顯示實際可操作數量
|
||||
@@ -704,10 +714,11 @@ class DriftNarratorService:
|
||||
|
||||
def _render_telegram_body(
|
||||
self,
|
||||
report: "DriftReport",
|
||||
report: DriftReport,
|
||||
narrative: str,
|
||||
items: list[dict],
|
||||
recommendation: dict | None = None,
|
||||
repeat_state: dict | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
組裝 Telegram 卡片 body(B 方案格式 + P0.2 AI 推薦)
|
||||
@@ -738,6 +749,10 @@ class DriftNarratorService:
|
||||
}.get(_act, _act)
|
||||
lines.append(f"🎯 AI 建議:{_emoji_action} ({int(_conf * 100)}%) — {_reason}\n")
|
||||
|
||||
repeat_line = self._render_repeat_state(repeat_state)
|
||||
if repeat_line:
|
||||
lines.append(f"{repeat_line}\n")
|
||||
|
||||
lines.append(f"🤖 AI 研判\n{narrative}\n")
|
||||
|
||||
# 用非 trivial + 非白名單 的實際可操作數顯示
|
||||
@@ -758,6 +773,23 @@ class DriftNarratorService:
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _render_repeat_state(self, repeat_state: dict | None) -> str:
|
||||
"""Render operator-visible repeat/stage metadata for Telegram."""
|
||||
if not repeat_state:
|
||||
return ""
|
||||
fingerprint = str(repeat_state.get("fingerprint") or "unknown")
|
||||
occurrences = int(repeat_state.get("occurrences_12h") or 0)
|
||||
window_hours = int(repeat_state.get("window_hours") or 12)
|
||||
stage = str(repeat_state.get("operator_stage") or "unknown")
|
||||
if occurrences <= 1:
|
||||
repeat_text = f"{window_hours}h 內首次出現"
|
||||
else:
|
||||
repeat_text = f"{window_hours}h 內第 {occurrences} 次同指紋"
|
||||
return (
|
||||
"流程: drift_scanned → ai_analyzed → "
|
||||
f"{stage}\n重複: {repeat_text}\n指紋: {fingerprint}"
|
||||
)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Singleton
|
||||
|
||||
180
apps/api/src/services/drift_repeat_state.py
Normal file
180
apps/api/src/services/drift_repeat_state.py
Normal file
@@ -0,0 +1,180 @@
|
||||
"""Stable repeat identity for Config Drift reports.
|
||||
|
||||
The drift scanner emits a fresh ``report_id`` for every run. Operators need a
|
||||
stable identity that answers whether two reports describe the same drift, not
|
||||
just whether they have the same HIGH/MEDIUM/INFO counts.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any
|
||||
|
||||
|
||||
SCHEMA_VERSION = "drift_repeat_state_v1"
|
||||
FINGERPRINT_VERSION = "drift_fingerprint_v1"
|
||||
|
||||
|
||||
def _get(obj: Any, key: str, default: Any = None) -> Any:
|
||||
if isinstance(obj, dict):
|
||||
return obj.get(key, default)
|
||||
return getattr(obj, key, default)
|
||||
|
||||
|
||||
def _enum_value(value: Any) -> Any:
|
||||
return getattr(value, "value", value)
|
||||
|
||||
|
||||
def _jsonable(value: Any) -> Any:
|
||||
value = _enum_value(value)
|
||||
if isinstance(value, dict):
|
||||
return {str(k): _jsonable(v) for k, v in value.items()}
|
||||
if isinstance(value, list):
|
||||
return [_jsonable(v) for v in value]
|
||||
if isinstance(value, tuple):
|
||||
return [_jsonable(v) for v in value]
|
||||
if isinstance(value, datetime):
|
||||
return value.isoformat()
|
||||
return value
|
||||
|
||||
|
||||
def _canonical_json(value: Any) -> str:
|
||||
return json.dumps(
|
||||
_jsonable(value),
|
||||
ensure_ascii=False,
|
||||
sort_keys=True,
|
||||
separators=(",", ":"),
|
||||
default=str,
|
||||
)
|
||||
|
||||
|
||||
def _parse_datetime(value: Any) -> datetime | None:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, datetime):
|
||||
parsed = value
|
||||
if parsed.tzinfo is not None:
|
||||
return parsed.astimezone(timezone.utc).replace(tzinfo=None)
|
||||
return parsed
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
|
||||
if parsed.tzinfo is not None:
|
||||
return parsed.astimezone(timezone.utc).replace(tzinfo=None)
|
||||
return parsed
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _iso(value: Any) -> str | None:
|
||||
parsed = _parse_datetime(value)
|
||||
return parsed.isoformat() if parsed else None
|
||||
|
||||
|
||||
def drift_item_identity(item: Any) -> dict[str, Any]:
|
||||
"""Return the stable fields that define one drift item."""
|
||||
return {
|
||||
"resource_kind": str(_get(item, "resource_kind", "")),
|
||||
"resource_name": str(_get(item, "resource_name", "")),
|
||||
"namespace": str(_get(item, "namespace", "")),
|
||||
"field_path": str(_get(item, "field_path", "")),
|
||||
"drift_level": str(_enum_value(_get(item, "drift_level", ""))),
|
||||
"git_value": _jsonable(_get(item, "git_value")),
|
||||
"actual_value": _jsonable(_get(item, "actual_value")),
|
||||
"is_allowlisted": bool(_get(item, "is_allowlisted", False)),
|
||||
}
|
||||
|
||||
|
||||
def build_drift_fingerprint(namespace: str, items: list[Any]) -> str:
|
||||
"""Build a deterministic fingerprint from namespace + sorted drift items."""
|
||||
identities = [drift_item_identity(item) for item in items]
|
||||
identities.sort(key=_canonical_json)
|
||||
payload = {
|
||||
"version": FINGERPRINT_VERSION,
|
||||
"namespace": namespace,
|
||||
"items": identities,
|
||||
}
|
||||
digest = hashlib.sha256(_canonical_json(payload).encode("utf-8")).hexdigest()
|
||||
return f"dfp_{digest[:16]}"
|
||||
|
||||
|
||||
def _report_identity(report: Any) -> dict[str, Any]:
|
||||
items = _get(report, "items", []) or []
|
||||
namespace = str(_get(report, "namespace", ""))
|
||||
return {
|
||||
"report_id": _get(report, "report_id"),
|
||||
"namespace": namespace,
|
||||
"status": str(_enum_value(_get(report, "status", ""))),
|
||||
"scanned_at": _get(report, "scanned_at"),
|
||||
"created_at": _get(report, "created_at"),
|
||||
"fingerprint": build_drift_fingerprint(namespace, list(items)),
|
||||
}
|
||||
|
||||
|
||||
def build_drift_repeat_state(
|
||||
report: Any,
|
||||
recent_reports: list[Any],
|
||||
*,
|
||||
window_hours: int = 12,
|
||||
max_reports: int = 20,
|
||||
) -> dict[str, Any]:
|
||||
"""Summarize repeat state for one drift report using stable fingerprints."""
|
||||
current = _report_identity(report)
|
||||
current_time = (
|
||||
_parse_datetime(current.get("scanned_at"))
|
||||
or _parse_datetime(current.get("created_at"))
|
||||
or datetime.now()
|
||||
)
|
||||
cutoff = current_time - timedelta(hours=window_hours)
|
||||
|
||||
by_id: dict[str, dict[str, Any]] = {}
|
||||
for candidate in [report, *recent_reports]:
|
||||
identity = _report_identity(candidate)
|
||||
report_id = str(identity.get("report_id") or "")
|
||||
if not report_id:
|
||||
continue
|
||||
candidate_time = (
|
||||
_parse_datetime(identity.get("scanned_at"))
|
||||
or _parse_datetime(identity.get("created_at"))
|
||||
)
|
||||
if candidate_time is not None and candidate_time < cutoff:
|
||||
continue
|
||||
if identity["fingerprint"] != current["fingerprint"]:
|
||||
continue
|
||||
by_id[report_id] = identity
|
||||
|
||||
matches = sorted(
|
||||
by_id.values(),
|
||||
key=lambda row: (
|
||||
_parse_datetime(row.get("scanned_at"))
|
||||
or _parse_datetime(row.get("created_at"))
|
||||
or datetime.min
|
||||
),
|
||||
)
|
||||
first = matches[0] if matches else current
|
||||
last = matches[-1] if matches else current
|
||||
status = current.get("status") or "unknown"
|
||||
operator_stage = "pending_human" if status == "pending" else str(status)
|
||||
|
||||
return {
|
||||
"schema_version": SCHEMA_VERSION,
|
||||
"fingerprint": current["fingerprint"],
|
||||
"matching_strategy": "namespace_and_stable_items_v1",
|
||||
"window_hours": window_hours,
|
||||
"occurrences_12h": len(matches),
|
||||
"first_scanned_at": _iso(first.get("scanned_at") or first.get("created_at")),
|
||||
"last_scanned_at": _iso(last.get("scanned_at") or last.get("created_at")),
|
||||
"operator_stage": operator_stage,
|
||||
"reports": [
|
||||
{
|
||||
"report_id": row.get("report_id"),
|
||||
"scanned_at": _iso(row.get("scanned_at")),
|
||||
"created_at": _iso(row.get("created_at")),
|
||||
"status": row.get("status"),
|
||||
}
|
||||
for row in reversed(matches[-max_reports:])
|
||||
],
|
||||
}
|
||||
@@ -71,7 +71,7 @@ class BaselineState:
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: dict[str, Any]) -> "BaselineState":
|
||||
def from_dict(cls, d: dict[str, Any]) -> BaselineState:
|
||||
return cls(
|
||||
metric_name=d["metric_name"],
|
||||
mean=d["mean"],
|
||||
@@ -250,6 +250,7 @@ class DynamicBaselineService:
|
||||
) -> list[MetricDatapoint]:
|
||||
"""從 Prometheus query_range API 抓取歷史資料(1h 步進)。"""
|
||||
import httpx
|
||||
|
||||
from src.core.config import settings
|
||||
|
||||
end_ts = now_taipei().timestamp()
|
||||
@@ -314,7 +315,7 @@ class DynamicBaselineService:
|
||||
seasonal="add" if len(arr) >= seasonal_periods * 2 else None,
|
||||
seasonal_periods=seasonal_periods,
|
||||
initialization_method="estimated",
|
||||
).fit(optimized=True, disp=False)
|
||||
).fit(optimized=True)
|
||||
|
||||
fitted = model.fittedvalues
|
||||
residuals = arr - fitted
|
||||
@@ -423,11 +424,10 @@ class DynamicBaselineService:
|
||||
async def _pg_upsert_baseline(self, state: BaselineState, promql: str, lookback_hours: int) -> None:
|
||||
"""寫入 DynamicBaselineRecord 到 PostgreSQL(INSERT,不更新舊記錄)"""
|
||||
try:
|
||||
from src.db.base import get_session_factory
|
||||
from src.db.base import get_db_context
|
||||
from src.db.models import DynamicBaselineRecord
|
||||
|
||||
factory = get_session_factory()
|
||||
async with factory() as session:
|
||||
async with get_db_context() as session:
|
||||
record = DynamicBaselineRecord(
|
||||
metric_name=state.metric_name,
|
||||
mean=state.mean,
|
||||
@@ -447,11 +447,11 @@ class DynamicBaselineService:
|
||||
"""從 PostgreSQL 載入最新一筆基線記錄"""
|
||||
try:
|
||||
from sqlalchemy import select
|
||||
from src.db.base import get_session_factory
|
||||
|
||||
from src.db.base import get_db_context
|
||||
from src.db.models import DynamicBaselineRecord
|
||||
|
||||
factory = get_session_factory()
|
||||
async with factory() as session:
|
||||
async with get_db_context() as session:
|
||||
stmt = (
|
||||
select(DynamicBaselineRecord)
|
||||
.where(DynamicBaselineRecord.metric_name == metric_name)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user